1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pattern matching instruction selector for PowerPC, 10 // converting from a legalized dag to a PPC dag. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/PPCMCTargetDesc.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCISelLowering.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCSubtarget.h" 20 #include "PPCTargetMachine.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/KnownBits.h" 54 #include "llvm/Support/MachineValueType.h" 55 #include "llvm/Support/MathExtras.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <iterator> 61 #include <limits> 62 #include <memory> 63 #include <new> 64 #include <tuple> 65 #include <utility> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "ppc-codegen" 70 71 STATISTIC(NumSextSetcc, 72 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 73 STATISTIC(NumZextSetcc, 74 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 75 STATISTIC(SignExtensionsAdded, 76 "Number of sign extensions for compare inputs added."); 77 STATISTIC(ZeroExtensionsAdded, 78 "Number of zero extensions for compare inputs added."); 79 STATISTIC(NumLogicOpsOnComparison, 80 "Number of logical ops on i1 values calculated in GPR."); 81 STATISTIC(OmittedForNonExtendUses, 82 "Number of compares not eliminated as they have non-extending uses."); 83 STATISTIC(NumP9Setb, 84 "Number of compares lowered to setb."); 85 86 // FIXME: Remove this once the bug has been fixed! 87 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 89 90 static cl::opt<bool> 91 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 92 cl::desc("use aggressive ppc isel for bit permutations"), 93 cl::Hidden); 94 static cl::opt<bool> BPermRewriterNoMasking( 95 "ppc-bit-perm-rewriter-stress-rotates", 96 cl::desc("stress rotate selection in aggressive ppc isel for " 97 "bit permutations"), 98 cl::Hidden); 99 100 static cl::opt<bool> EnableBranchHint( 101 "ppc-use-branch-hint", cl::init(true), 102 cl::desc("Enable static hinting of branches on ppc"), 103 cl::Hidden); 104 105 static cl::opt<bool> EnableTLSOpt( 106 "ppc-tls-opt", cl::init(true), 107 cl::desc("Enable tls optimization peephole"), 108 cl::Hidden); 109 110 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, 111 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, 112 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; 113 114 static cl::opt<ICmpInGPRType> CmpInGPR( 115 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), 116 cl::desc("Specify the types of comparisons to emit GPR-only code for."), 117 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), 118 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), 119 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), 120 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), 121 clEnumValN(ICGPR_NonExtIn, "nonextin", 122 "Only comparisons where inputs don't need [sz]ext."), 123 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), 124 clEnumValN(ICGPR_ZextI32, "zexti32", 125 "Only i32 comparisons with zext result."), 126 clEnumValN(ICGPR_ZextI64, "zexti64", 127 "Only i64 comparisons with zext result."), 128 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), 129 clEnumValN(ICGPR_SextI32, "sexti32", 130 "Only i32 comparisons with sext result."), 131 clEnumValN(ICGPR_SextI64, "sexti64", 132 "Only i64 comparisons with sext result."))); 133 namespace { 134 135 //===--------------------------------------------------------------------===// 136 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 137 /// instructions for SelectionDAG operations. 138 /// 139 class PPCDAGToDAGISel : public SelectionDAGISel { 140 const PPCTargetMachine &TM; 141 const PPCSubtarget *PPCSubTarget = nullptr; 142 const PPCTargetLowering *PPCLowering = nullptr; 143 unsigned GlobalBaseReg = 0; 144 145 public: 146 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 147 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 148 149 bool runOnMachineFunction(MachineFunction &MF) override { 150 // Make sure we re-emit a set of the global base reg if necessary 151 GlobalBaseReg = 0; 152 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 153 PPCLowering = PPCSubTarget->getTargetLowering(); 154 SelectionDAGISel::runOnMachineFunction(MF); 155 156 if (!PPCSubTarget->isSVR4ABI()) 157 InsertVRSaveCode(MF); 158 159 return true; 160 } 161 162 void PreprocessISelDAG() override; 163 void PostprocessISelDAG() override; 164 165 /// getI16Imm - Return a target constant with the specified value, of type 166 /// i16. 167 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { 168 return CurDAG->getTargetConstant(Imm, dl, MVT::i16); 169 } 170 171 /// getI32Imm - Return a target constant with the specified value, of type 172 /// i32. 173 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 174 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 175 } 176 177 /// getI64Imm - Return a target constant with the specified value, of type 178 /// i64. 179 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 180 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 181 } 182 183 /// getSmallIPtrImm - Return a target constant of pointer type. 184 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 185 return CurDAG->getTargetConstant( 186 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 187 } 188 189 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 190 /// rotate and mask opcode and mask operation. 191 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 192 unsigned &SH, unsigned &MB, unsigned &ME); 193 194 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 195 /// base register. Return the virtual register that holds this value. 196 SDNode *getGlobalBaseReg(); 197 198 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 199 200 // Select - Convert the specified operand from a target-independent to a 201 // target-specific node if it hasn't already been changed. 202 void Select(SDNode *N) override; 203 204 bool tryBitfieldInsert(SDNode *N); 205 bool tryBitPermutation(SDNode *N); 206 bool tryIntCompareInGPR(SDNode *N); 207 208 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into 209 // an X-Form load instruction with the offset being a relocation coming from 210 // the PPCISD::ADD_TLS. 211 bool tryTLSXFormLoad(LoadSDNode *N); 212 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into 213 // an X-Form store instruction with the offset being a relocation coming from 214 // the PPCISD::ADD_TLS. 215 bool tryTLSXFormStore(StoreSDNode *N); 216 /// SelectCC - Select a comparison of the specified values with the 217 /// specified condition code, returning the CR# of the expression. 218 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 219 const SDLoc &dl); 220 221 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 222 /// immediate field. Note that the operand at this point is already the 223 /// result of a prior SelectAddressRegImm call. 224 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 225 if (N.getOpcode() == ISD::TargetConstant || 226 N.getOpcode() == ISD::TargetGlobalAddress) { 227 Out = N; 228 return true; 229 } 230 231 return false; 232 } 233 234 /// SelectAddrIdx - Given the specified address, check to see if it can be 235 /// represented as an indexed [r+r] operation. 236 /// This is for xform instructions whose associated displacement form is D. 237 /// The last parameter \p 0 means associated D form has no requirment for 16 238 /// bit signed displacement. 239 /// Returns false if it can be represented by [r+imm], which are preferred. 240 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 241 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); 242 } 243 244 /// SelectAddrIdx4 - Given the specified address, check to see if it can be 245 /// represented as an indexed [r+r] operation. 246 /// This is for xform instructions whose associated displacement form is DS. 247 /// The last parameter \p 4 means associated DS form 16 bit signed 248 /// displacement must be a multiple of 4. 249 /// Returns false if it can be represented by [r+imm], which are preferred. 250 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { 251 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); 252 } 253 254 /// SelectAddrIdx16 - Given the specified address, check to see if it can be 255 /// represented as an indexed [r+r] operation. 256 /// This is for xform instructions whose associated displacement form is DQ. 257 /// The last parameter \p 16 means associated DQ form 16 bit signed 258 /// displacement must be a multiple of 16. 259 /// Returns false if it can be represented by [r+imm], which are preferred. 260 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { 261 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); 262 } 263 264 /// SelectAddrIdxOnly - Given the specified address, force it to be 265 /// represented as an indexed [r+r] operation. 266 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 267 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 268 } 269 270 /// SelectAddrImm - Returns true if the address N can be represented by 271 /// a base register plus a signed 16-bit displacement [r+imm]. 272 /// The last parameter \p 0 means D form has no requirment for 16 bit signed 273 /// displacement. 274 bool SelectAddrImm(SDValue N, SDValue &Disp, 275 SDValue &Base) { 276 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); 277 } 278 279 /// SelectAddrImmX4 - Returns true if the address N can be represented by 280 /// a base register plus a signed 16-bit displacement that is a multiple of 281 /// 4 (last parameter). Suitable for use by STD and friends. 282 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 283 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); 284 } 285 286 /// SelectAddrImmX16 - Returns true if the address N can be represented by 287 /// a base register plus a signed 16-bit displacement that is a multiple of 288 /// 16(last parameter). Suitable for use by STXV and friends. 289 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { 290 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); 291 } 292 293 // Select an address into a single register. 294 bool SelectAddr(SDValue N, SDValue &Base) { 295 Base = N; 296 return true; 297 } 298 299 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 300 /// inline asm expressions. It is always correct to compute the value into 301 /// a register. The case of adding a (possibly relocatable) constant to a 302 /// register can be improved, but it is wrong to substitute Reg+Reg for 303 /// Reg in an asm, because the load or store opcode would have to change. 304 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 305 unsigned ConstraintID, 306 std::vector<SDValue> &OutOps) override { 307 switch(ConstraintID) { 308 default: 309 errs() << "ConstraintID: " << ConstraintID << "\n"; 310 llvm_unreachable("Unexpected asm memory constraint"); 311 case InlineAsm::Constraint_es: 312 case InlineAsm::Constraint_m: 313 case InlineAsm::Constraint_o: 314 case InlineAsm::Constraint_Q: 315 case InlineAsm::Constraint_Z: 316 case InlineAsm::Constraint_Zy: 317 // We need to make sure that this one operand does not end up in r0 318 // (because we might end up lowering this as 0(%op)). 319 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 320 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 321 SDLoc dl(Op); 322 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 323 SDValue NewOp = 324 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 325 dl, Op.getValueType(), 326 Op, RC), 0); 327 328 OutOps.push_back(NewOp); 329 return false; 330 } 331 return true; 332 } 333 334 void InsertVRSaveCode(MachineFunction &MF); 335 336 StringRef getPassName() const override { 337 return "PowerPC DAG->DAG Pattern Instruction Selection"; 338 } 339 340 // Include the pieces autogenerated from the target description. 341 #include "PPCGenDAGISel.inc" 342 343 private: 344 bool trySETCC(SDNode *N); 345 bool tryAsSingleRLDICL(SDNode *N); 346 bool tryAsSingleRLDICR(SDNode *N); 347 bool tryAsSingleRLWINM(SDNode *N); 348 bool tryAsSingleRLWINM8(SDNode *N); 349 bool tryAsSingleRLWIMI(SDNode *N); 350 351 void PeepholePPC64(); 352 void PeepholePPC64ZExt(); 353 void PeepholeCROps(); 354 355 SDValue combineToCMPB(SDNode *N); 356 void foldBoolExts(SDValue &Res, SDNode *&N); 357 358 bool AllUsersSelectZero(SDNode *N); 359 void SwapAllSelectUsers(SDNode *N); 360 361 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; 362 void transferMemOperands(SDNode *N, SDNode *Result); 363 }; 364 365 } // end anonymous namespace 366 367 /// InsertVRSaveCode - Once the entire function has been instruction selected, 368 /// all virtual registers are created and all machine instructions are built, 369 /// check to see if we need to save/restore VRSAVE. If so, do it. 370 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 371 // Check to see if this function uses vector registers, which means we have to 372 // save and restore the VRSAVE register and update it with the regs we use. 373 // 374 // In this case, there will be virtual registers of vector type created 375 // by the scheduler. Detect them now. 376 bool HasVectorVReg = false; 377 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 378 unsigned Reg = Register::index2VirtReg(i); 379 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 380 HasVectorVReg = true; 381 break; 382 } 383 } 384 if (!HasVectorVReg) return; // nothing to do. 385 386 // If we have a vector register, we want to emit code into the entry and exit 387 // blocks to save and restore the VRSAVE register. We do this here (instead 388 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 389 // 390 // 1. This (trivially) reduces the load on the register allocator, by not 391 // having to represent the live range of the VRSAVE register. 392 // 2. This (more significantly) allows us to create a temporary virtual 393 // register to hold the saved VRSAVE value, allowing this temporary to be 394 // register allocated, instead of forcing it to be spilled to the stack. 395 396 // Create two vregs - one to hold the VRSAVE register that is live-in to the 397 // function and one for the value after having bits or'd into it. 398 Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 399 Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 400 401 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 402 MachineBasicBlock &EntryBB = *Fn.begin(); 403 DebugLoc dl; 404 // Emit the following code into the entry block: 405 // InVRSAVE = MFVRSAVE 406 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 407 // MTVRSAVE UpdatedVRSAVE 408 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 409 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 410 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 411 UpdatedVRSAVE).addReg(InVRSAVE); 412 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 413 414 // Find all return blocks, outputting a restore in each epilog. 415 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 416 if (BB->isReturnBlock()) { 417 IP = BB->end(); --IP; 418 419 // Skip over all terminator instructions, which are part of the return 420 // sequence. 421 MachineBasicBlock::iterator I2 = IP; 422 while (I2 != BB->begin() && (--I2)->isTerminator()) 423 IP = I2; 424 425 // Emit: MTVRSAVE InVRSave 426 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 427 } 428 } 429 } 430 431 /// getGlobalBaseReg - Output the instructions required to put the 432 /// base address to use for accessing globals into a register. 433 /// 434 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 435 if (!GlobalBaseReg) { 436 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 437 // Insert the set of GlobalBaseReg into the first MBB of the function 438 MachineBasicBlock &FirstMBB = MF->front(); 439 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 440 const Module *M = MF->getFunction().getParent(); 441 DebugLoc dl; 442 443 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 444 if (PPCSubTarget->isTargetELF()) { 445 GlobalBaseReg = PPC::R30; 446 if (!PPCSubTarget->isSecurePlt() && 447 M->getPICLevel() == PICLevel::SmallPIC) { 448 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 449 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 450 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 451 } else { 452 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 453 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 454 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 455 BuildMI(FirstMBB, MBBI, dl, 456 TII.get(PPC::UpdateGBR), GlobalBaseReg) 457 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 458 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 459 } 460 } else { 461 GlobalBaseReg = 462 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 463 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 464 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 465 } 466 } else { 467 // We must ensure that this sequence is dominated by the prologue. 468 // FIXME: This is a bit of a big hammer since we don't get the benefits 469 // of shrink-wrapping whenever we emit this instruction. Considering 470 // this is used in any function where we emit a jump table, this may be 471 // a significant limitation. We should consider inserting this in the 472 // block where it is used and then commoning this sequence up if it 473 // appears in multiple places. 474 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of 475 // MovePCtoLR8. 476 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); 477 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 478 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 479 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 480 } 481 } 482 return CurDAG->getRegister(GlobalBaseReg, 483 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 484 .getNode(); 485 } 486 487 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 488 /// operand. If so Imm will receive the 32-bit value. 489 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 490 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 491 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 492 return true; 493 } 494 return false; 495 } 496 497 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 498 /// operand. If so Imm will receive the 64-bit value. 499 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 500 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 501 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 502 return true; 503 } 504 return false; 505 } 506 507 // isInt32Immediate - This method tests to see if a constant operand. 508 // If so Imm will receive the 32 bit value. 509 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 510 return isInt32Immediate(N.getNode(), Imm); 511 } 512 513 /// isInt64Immediate - This method tests to see if the value is a 64-bit 514 /// constant operand. If so Imm will receive the 64-bit value. 515 static bool isInt64Immediate(SDValue N, uint64_t &Imm) { 516 return isInt64Immediate(N.getNode(), Imm); 517 } 518 519 static unsigned getBranchHint(unsigned PCC, 520 const FunctionLoweringInfo &FuncInfo, 521 const SDValue &DestMBB) { 522 assert(isa<BasicBlockSDNode>(DestMBB)); 523 524 if (!FuncInfo.BPI) return PPC::BR_NO_HINT; 525 526 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); 527 const Instruction *BBTerm = BB->getTerminator(); 528 529 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 530 531 const BasicBlock *TBB = BBTerm->getSuccessor(0); 532 const BasicBlock *FBB = BBTerm->getSuccessor(1); 533 534 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); 535 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); 536 537 // We only want to handle cases which are easy to predict at static time, e.g. 538 // C++ throw statement, that is very likely not taken, or calling never 539 // returned function, e.g. stdlib exit(). So we set Threshold to filter 540 // unwanted cases. 541 // 542 // Below is LLVM branch weight table, we only want to handle case 1, 2 543 // 544 // Case Taken:Nontaken Example 545 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 546 // 2. Invoke-terminating 1:1048575 547 // 3. Coldblock 4:64 __builtin_expect 548 // 4. Loop Branch 124:4 For loop 549 // 5. PH/ZH/FPH 20:12 550 const uint32_t Threshold = 10000; 551 552 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 553 return PPC::BR_NO_HINT; 554 555 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() 556 << "::" << BB->getName() << "'\n" 557 << " -> " << TBB->getName() << ": " << TProb << "\n" 558 << " -> " << FBB->getName() << ": " << FProb << "\n"); 559 560 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 561 562 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 563 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 564 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 565 std::swap(TProb, FProb); 566 567 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 568 } 569 570 // isOpcWithIntImmediate - This method tests to see if the node is a specific 571 // opcode and that it has a immediate integer right operand. 572 // If so Imm will receive the 32 bit value. 573 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 574 return N->getOpcode() == Opc 575 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 576 } 577 578 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 579 SDLoc dl(SN); 580 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 581 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 582 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 583 if (SN->hasOneUse()) 584 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 585 getSmallIPtrImm(Offset, dl)); 586 else 587 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 588 getSmallIPtrImm(Offset, dl))); 589 } 590 591 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 592 bool isShiftMask, unsigned &SH, 593 unsigned &MB, unsigned &ME) { 594 // Don't even go down this path for i64, since different logic will be 595 // necessary for rldicl/rldicr/rldimi. 596 if (N->getValueType(0) != MVT::i32) 597 return false; 598 599 unsigned Shift = 32; 600 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 601 unsigned Opcode = N->getOpcode(); 602 if (N->getNumOperands() != 2 || 603 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 604 return false; 605 606 if (Opcode == ISD::SHL) { 607 // apply shift left to mask if it comes first 608 if (isShiftMask) Mask = Mask << Shift; 609 // determine which bits are made indeterminant by shift 610 Indeterminant = ~(0xFFFFFFFFu << Shift); 611 } else if (Opcode == ISD::SRL) { 612 // apply shift right to mask if it comes first 613 if (isShiftMask) Mask = Mask >> Shift; 614 // determine which bits are made indeterminant by shift 615 Indeterminant = ~(0xFFFFFFFFu >> Shift); 616 // adjust for the left rotate 617 Shift = 32 - Shift; 618 } else if (Opcode == ISD::ROTL) { 619 Indeterminant = 0; 620 } else { 621 return false; 622 } 623 624 // if the mask doesn't intersect any Indeterminant bits 625 if (Mask && !(Mask & Indeterminant)) { 626 SH = Shift & 31; 627 // make sure the mask is still a mask (wrap arounds may not be) 628 return isRunOfOnes(Mask, MB, ME); 629 } 630 return false; 631 } 632 633 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { 634 SDValue Base = ST->getBasePtr(); 635 if (Base.getOpcode() != PPCISD::ADD_TLS) 636 return false; 637 SDValue Offset = ST->getOffset(); 638 if (!Offset.isUndef()) 639 return false; 640 641 SDLoc dl(ST); 642 EVT MemVT = ST->getMemoryVT(); 643 EVT RegVT = ST->getValue().getValueType(); 644 645 unsigned Opcode; 646 switch (MemVT.getSimpleVT().SimpleTy) { 647 default: 648 return false; 649 case MVT::i8: { 650 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; 651 break; 652 } 653 case MVT::i16: { 654 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; 655 break; 656 } 657 case MVT::i32: { 658 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; 659 break; 660 } 661 case MVT::i64: { 662 Opcode = PPC::STDXTLS; 663 break; 664 } 665 } 666 SDValue Chain = ST->getChain(); 667 SDVTList VTs = ST->getVTList(); 668 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), 669 Chain}; 670 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 671 transferMemOperands(ST, MN); 672 ReplaceNode(ST, MN); 673 return true; 674 } 675 676 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { 677 SDValue Base = LD->getBasePtr(); 678 if (Base.getOpcode() != PPCISD::ADD_TLS) 679 return false; 680 SDValue Offset = LD->getOffset(); 681 if (!Offset.isUndef()) 682 return false; 683 684 SDLoc dl(LD); 685 EVT MemVT = LD->getMemoryVT(); 686 EVT RegVT = LD->getValueType(0); 687 unsigned Opcode; 688 switch (MemVT.getSimpleVT().SimpleTy) { 689 default: 690 return false; 691 case MVT::i8: { 692 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; 693 break; 694 } 695 case MVT::i16: { 696 Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; 697 break; 698 } 699 case MVT::i32: { 700 Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; 701 break; 702 } 703 case MVT::i64: { 704 Opcode = PPC::LDXTLS; 705 break; 706 } 707 } 708 SDValue Chain = LD->getChain(); 709 SDVTList VTs = LD->getVTList(); 710 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; 711 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 712 transferMemOperands(LD, MN); 713 ReplaceNode(LD, MN); 714 return true; 715 } 716 717 /// Turn an or of two masked values into the rotate left word immediate then 718 /// mask insert (rlwimi) instruction. 719 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 720 SDValue Op0 = N->getOperand(0); 721 SDValue Op1 = N->getOperand(1); 722 SDLoc dl(N); 723 724 KnownBits LKnown = CurDAG->computeKnownBits(Op0); 725 KnownBits RKnown = CurDAG->computeKnownBits(Op1); 726 727 unsigned TargetMask = LKnown.Zero.getZExtValue(); 728 unsigned InsertMask = RKnown.Zero.getZExtValue(); 729 730 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 731 unsigned Op0Opc = Op0.getOpcode(); 732 unsigned Op1Opc = Op1.getOpcode(); 733 unsigned Value, SH = 0; 734 TargetMask = ~TargetMask; 735 InsertMask = ~InsertMask; 736 737 // If the LHS has a foldable shift and the RHS does not, then swap it to the 738 // RHS so that we can fold the shift into the insert. 739 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 740 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 741 Op0.getOperand(0).getOpcode() == ISD::SRL) { 742 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 743 Op1.getOperand(0).getOpcode() != ISD::SRL) { 744 std::swap(Op0, Op1); 745 std::swap(Op0Opc, Op1Opc); 746 std::swap(TargetMask, InsertMask); 747 } 748 } 749 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 750 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 751 Op1.getOperand(0).getOpcode() != ISD::SRL) { 752 std::swap(Op0, Op1); 753 std::swap(Op0Opc, Op1Opc); 754 std::swap(TargetMask, InsertMask); 755 } 756 } 757 758 unsigned MB, ME; 759 if (isRunOfOnes(InsertMask, MB, ME)) { 760 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 761 isInt32Immediate(Op1.getOperand(1), Value)) { 762 Op1 = Op1.getOperand(0); 763 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 764 } 765 if (Op1Opc == ISD::AND) { 766 // The AND mask might not be a constant, and we need to make sure that 767 // if we're going to fold the masking with the insert, all bits not 768 // know to be zero in the mask are known to be one. 769 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); 770 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 771 772 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 773 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 774 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 775 // Note that Value must be in range here (less than 32) because 776 // otherwise there would not be any bits set in InsertMask. 777 Op1 = Op1.getOperand(0).getOperand(0); 778 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 779 } 780 } 781 782 SH &= 31; 783 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 784 getI32Imm(ME, dl) }; 785 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 786 return true; 787 } 788 } 789 return false; 790 } 791 792 // Predict the number of instructions that would be generated by calling 793 // selectI64Imm(N). 794 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { 795 // Assume no remaining bits. 796 unsigned Remainder = 0; 797 // Assume no shift required. 798 unsigned Shift = 0; 799 800 // If it can't be represented as a 32 bit value. 801 if (!isInt<32>(Imm)) { 802 Shift = countTrailingZeros<uint64_t>(Imm); 803 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 804 805 // If the shifted value fits 32 bits. 806 if (isInt<32>(ImmSh)) { 807 // Go with the shifted value. 808 Imm = ImmSh; 809 } else { 810 // Still stuck with a 64 bit value. 811 Remainder = Imm; 812 Shift = 32; 813 Imm >>= 32; 814 } 815 } 816 817 // Intermediate operand. 818 unsigned Result = 0; 819 820 // Handle first 32 bits. 821 unsigned Lo = Imm & 0xFFFF; 822 823 // Simple value. 824 if (isInt<16>(Imm)) { 825 // Just the Lo bits. 826 ++Result; 827 } else if (Lo) { 828 // Handle the Hi bits and Lo bits. 829 Result += 2; 830 } else { 831 // Just the Hi bits. 832 ++Result; 833 } 834 835 // If no shift, we're done. 836 if (!Shift) return Result; 837 838 // If Hi word == Lo word, 839 // we can use rldimi to insert the Lo word into Hi word. 840 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 841 ++Result; 842 return Result; 843 } 844 845 // Shift for next step if the upper 32-bits were not zero. 846 if (Imm) 847 ++Result; 848 849 // Add in the last bits as required. 850 if ((Remainder >> 16) & 0xFFFF) 851 ++Result; 852 if (Remainder & 0xFFFF) 853 ++Result; 854 855 return Result; 856 } 857 858 static uint64_t Rot64(uint64_t Imm, unsigned R) { 859 return (Imm << R) | (Imm >> (64 - R)); 860 } 861 862 static unsigned selectI64ImmInstrCount(int64_t Imm) { 863 unsigned Count = selectI64ImmInstrCountDirect(Imm); 864 865 // If the instruction count is 1 or 2, we do not need further analysis 866 // since rotate + load constant requires at least 2 instructions. 867 if (Count <= 2) 868 return Count; 869 870 for (unsigned r = 1; r < 63; ++r) { 871 uint64_t RImm = Rot64(Imm, r); 872 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 873 Count = std::min(Count, RCount); 874 875 // See comments in selectI64Imm for an explanation of the logic below. 876 unsigned LS = findLastSet(RImm); 877 if (LS != r-1) 878 continue; 879 880 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 881 uint64_t RImmWithOnes = RImm | OnesMask; 882 883 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 884 Count = std::min(Count, RCount); 885 } 886 887 return Count; 888 } 889 890 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount 891 // (above) needs to be kept in sync with this function. 892 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, 893 int64_t Imm) { 894 // Assume no remaining bits. 895 unsigned Remainder = 0; 896 // Assume no shift required. 897 unsigned Shift = 0; 898 899 // If it can't be represented as a 32 bit value. 900 if (!isInt<32>(Imm)) { 901 Shift = countTrailingZeros<uint64_t>(Imm); 902 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 903 904 // If the shifted value fits 32 bits. 905 if (isInt<32>(ImmSh)) { 906 // Go with the shifted value. 907 Imm = ImmSh; 908 } else { 909 // Still stuck with a 64 bit value. 910 Remainder = Imm; 911 Shift = 32; 912 Imm >>= 32; 913 } 914 } 915 916 // Intermediate operand. 917 SDNode *Result; 918 919 // Handle first 32 bits. 920 unsigned Lo = Imm & 0xFFFF; 921 unsigned Hi = (Imm >> 16) & 0xFFFF; 922 923 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 924 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 925 }; 926 927 // Simple value. 928 if (isInt<16>(Imm)) { 929 uint64_t SextImm = SignExtend64(Lo, 16); 930 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 931 // Just the Lo bits. 932 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 933 } else if (Lo) { 934 // Handle the Hi bits. 935 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 936 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 937 // And Lo bits. 938 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 939 SDValue(Result, 0), getI32Imm(Lo)); 940 } else { 941 // Just the Hi bits. 942 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 943 } 944 945 // If no shift, we're done. 946 if (!Shift) return Result; 947 948 // If Hi word == Lo word, 949 // we can use rldimi to insert the Lo word into Hi word. 950 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 951 SDValue Ops[] = 952 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 953 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 954 } 955 956 // Shift for next step if the upper 32-bits were not zero. 957 if (Imm) { 958 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 959 SDValue(Result, 0), 960 getI32Imm(Shift), 961 getI32Imm(63 - Shift)); 962 } 963 964 // Add in the last bits as required. 965 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 966 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 967 SDValue(Result, 0), getI32Imm(Hi)); 968 } 969 if ((Lo = Remainder & 0xFFFF)) { 970 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 971 SDValue(Result, 0), getI32Imm(Lo)); 972 } 973 974 return Result; 975 } 976 977 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, 978 int64_t Imm) { 979 unsigned Count = selectI64ImmInstrCountDirect(Imm); 980 981 // If the instruction count is 1 or 2, we do not need further analysis 982 // since rotate + load constant requires at least 2 instructions. 983 if (Count <= 2) 984 return selectI64ImmDirect(CurDAG, dl, Imm); 985 986 unsigned RMin = 0; 987 988 int64_t MatImm; 989 unsigned MaskEnd; 990 991 for (unsigned r = 1; r < 63; ++r) { 992 uint64_t RImm = Rot64(Imm, r); 993 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 994 if (RCount < Count) { 995 Count = RCount; 996 RMin = r; 997 MatImm = RImm; 998 MaskEnd = 63; 999 } 1000 1001 // If the immediate to generate has many trailing zeros, it might be 1002 // worthwhile to generate a rotated value with too many leading ones 1003 // (because that's free with li/lis's sign-extension semantics), and then 1004 // mask them off after rotation. 1005 1006 unsigned LS = findLastSet(RImm); 1007 // We're adding (63-LS) higher-order ones, and we expect to mask them off 1008 // after performing the inverse rotation by (64-r). So we need that: 1009 // 63-LS == 64-r => LS == r-1 1010 if (LS != r-1) 1011 continue; 1012 1013 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 1014 uint64_t RImmWithOnes = RImm | OnesMask; 1015 1016 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 1017 if (RCount < Count) { 1018 Count = RCount; 1019 RMin = r; 1020 MatImm = RImmWithOnes; 1021 MaskEnd = LS; 1022 } 1023 } 1024 1025 if (!RMin) 1026 return selectI64ImmDirect(CurDAG, dl, Imm); 1027 1028 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 1029 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1030 }; 1031 1032 SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); 1033 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 1034 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 1035 } 1036 1037 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { 1038 unsigned MaxTruncation = 0; 1039 // Cannot use range-based for loop here as we need the actual use (i.e. we 1040 // need the operand number corresponding to the use). A range-based for 1041 // will unbox the use and provide an SDNode*. 1042 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); 1043 Use != UseEnd; ++Use) { 1044 unsigned Opc = 1045 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); 1046 switch (Opc) { 1047 default: return 0; 1048 case ISD::TRUNCATE: 1049 if (Use->isMachineOpcode()) 1050 return 0; 1051 MaxTruncation = 1052 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); 1053 continue; 1054 case ISD::STORE: { 1055 if (Use->isMachineOpcode()) 1056 return 0; 1057 StoreSDNode *STN = cast<StoreSDNode>(*Use); 1058 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); 1059 if (MemVTSize == 64 || Use.getOperandNo() != 0) 1060 return 0; 1061 MaxTruncation = std::max(MaxTruncation, MemVTSize); 1062 continue; 1063 } 1064 case PPC::STW8: 1065 case PPC::STWX8: 1066 case PPC::STWU8: 1067 case PPC::STWUX8: 1068 if (Use.getOperandNo() != 0) 1069 return 0; 1070 MaxTruncation = std::max(MaxTruncation, 32u); 1071 continue; 1072 case PPC::STH8: 1073 case PPC::STHX8: 1074 case PPC::STHU8: 1075 case PPC::STHUX8: 1076 if (Use.getOperandNo() != 0) 1077 return 0; 1078 MaxTruncation = std::max(MaxTruncation, 16u); 1079 continue; 1080 case PPC::STB8: 1081 case PPC::STBX8: 1082 case PPC::STBU8: 1083 case PPC::STBUX8: 1084 if (Use.getOperandNo() != 0) 1085 return 0; 1086 MaxTruncation = std::max(MaxTruncation, 8u); 1087 continue; 1088 } 1089 } 1090 return MaxTruncation; 1091 } 1092 1093 // Select a 64-bit constant. 1094 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { 1095 SDLoc dl(N); 1096 1097 // Get 64 bit value. 1098 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 1099 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { 1100 uint64_t SextImm = SignExtend64(Imm, MinSize); 1101 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 1102 if (isInt<16>(SextImm)) 1103 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 1104 } 1105 return selectI64Imm(CurDAG, dl, Imm); 1106 } 1107 1108 namespace { 1109 1110 class BitPermutationSelector { 1111 struct ValueBit { 1112 SDValue V; 1113 1114 // The bit number in the value, using a convention where bit 0 is the 1115 // lowest-order bit. 1116 unsigned Idx; 1117 1118 // ConstZero means a bit we need to mask off. 1119 // Variable is a bit comes from an input variable. 1120 // VariableKnownToBeZero is also a bit comes from an input variable, 1121 // but it is known to be already zero. So we do not need to mask them. 1122 enum Kind { 1123 ConstZero, 1124 Variable, 1125 VariableKnownToBeZero 1126 } K; 1127 1128 ValueBit(SDValue V, unsigned I, Kind K = Variable) 1129 : V(V), Idx(I), K(K) {} 1130 ValueBit(Kind K = Variable) 1131 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 1132 1133 bool isZero() const { 1134 return K == ConstZero || K == VariableKnownToBeZero; 1135 } 1136 1137 bool hasValue() const { 1138 return K == Variable || K == VariableKnownToBeZero; 1139 } 1140 1141 SDValue getValue() const { 1142 assert(hasValue() && "Cannot get the value of a constant bit"); 1143 return V; 1144 } 1145 1146 unsigned getValueBitIndex() const { 1147 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 1148 return Idx; 1149 } 1150 }; 1151 1152 // A bit group has the same underlying value and the same rotate factor. 1153 struct BitGroup { 1154 SDValue V; 1155 unsigned RLAmt; 1156 unsigned StartIdx, EndIdx; 1157 1158 // This rotation amount assumes that the lower 32 bits of the quantity are 1159 // replicated in the high 32 bits by the rotation operator (which is done 1160 // by rlwinm and friends in 64-bit mode). 1161 bool Repl32; 1162 // Did converting to Repl32 == true change the rotation factor? If it did, 1163 // it decreased it by 32. 1164 bool Repl32CR; 1165 // Was this group coalesced after setting Repl32 to true? 1166 bool Repl32Coalesced; 1167 1168 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 1169 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 1170 Repl32Coalesced(false) { 1171 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R 1172 << " [" << S << ", " << E << "]\n"); 1173 } 1174 }; 1175 1176 // Information on each (Value, RLAmt) pair (like the number of groups 1177 // associated with each) used to choose the lowering method. 1178 struct ValueRotInfo { 1179 SDValue V; 1180 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 1181 unsigned NumGroups = 0; 1182 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 1183 bool Repl32 = false; 1184 1185 ValueRotInfo() = default; 1186 1187 // For sorting (in reverse order) by NumGroups, and then by 1188 // FirstGroupStartIdx. 1189 bool operator < (const ValueRotInfo &Other) const { 1190 // We need to sort so that the non-Repl32 come first because, when we're 1191 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 1192 // masking operation. 1193 if (Repl32 < Other.Repl32) 1194 return true; 1195 else if (Repl32 > Other.Repl32) 1196 return false; 1197 else if (NumGroups > Other.NumGroups) 1198 return true; 1199 else if (NumGroups < Other.NumGroups) 1200 return false; 1201 else if (RLAmt == 0 && Other.RLAmt != 0) 1202 return true; 1203 else if (RLAmt != 0 && Other.RLAmt == 0) 1204 return false; 1205 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 1206 return true; 1207 return false; 1208 } 1209 }; 1210 1211 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 1212 using ValueBitsMemoizer = 1213 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1214 ValueBitsMemoizer Memoizer; 1215 1216 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1217 // The bool is true if something interesting was deduced, otherwise if we're 1218 // providing only a generic representation of V (or something else likewise 1219 // uninteresting for instruction selection) through the SmallVector. 1220 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1221 unsigned NumBits) { 1222 auto &ValueEntry = Memoizer[V]; 1223 if (ValueEntry) 1224 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1225 ValueEntry.reset(new ValueBitsMemoizedValue()); 1226 bool &Interesting = ValueEntry->first; 1227 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1228 Bits.resize(NumBits); 1229 1230 switch (V.getOpcode()) { 1231 default: break; 1232 case ISD::ROTL: 1233 if (isa<ConstantSDNode>(V.getOperand(1))) { 1234 unsigned RotAmt = V.getConstantOperandVal(1); 1235 1236 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1237 1238 for (unsigned i = 0; i < NumBits; ++i) 1239 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1240 1241 return std::make_pair(Interesting = true, &Bits); 1242 } 1243 break; 1244 case ISD::SHL: 1245 if (isa<ConstantSDNode>(V.getOperand(1))) { 1246 unsigned ShiftAmt = V.getConstantOperandVal(1); 1247 1248 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1249 1250 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1251 Bits[i] = LHSBits[i - ShiftAmt]; 1252 1253 for (unsigned i = 0; i < ShiftAmt; ++i) 1254 Bits[i] = ValueBit(ValueBit::ConstZero); 1255 1256 return std::make_pair(Interesting = true, &Bits); 1257 } 1258 break; 1259 case ISD::SRL: 1260 if (isa<ConstantSDNode>(V.getOperand(1))) { 1261 unsigned ShiftAmt = V.getConstantOperandVal(1); 1262 1263 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1264 1265 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1266 Bits[i] = LHSBits[i + ShiftAmt]; 1267 1268 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1269 Bits[i] = ValueBit(ValueBit::ConstZero); 1270 1271 return std::make_pair(Interesting = true, &Bits); 1272 } 1273 break; 1274 case ISD::AND: 1275 if (isa<ConstantSDNode>(V.getOperand(1))) { 1276 uint64_t Mask = V.getConstantOperandVal(1); 1277 1278 const SmallVector<ValueBit, 64> *LHSBits; 1279 // Mark this as interesting, only if the LHS was also interesting. This 1280 // prevents the overall procedure from matching a single immediate 'and' 1281 // (which is non-optimal because such an and might be folded with other 1282 // things if we don't select it here). 1283 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1284 1285 for (unsigned i = 0; i < NumBits; ++i) 1286 if (((Mask >> i) & 1) == 1) 1287 Bits[i] = (*LHSBits)[i]; 1288 else { 1289 // AND instruction masks this bit. If the input is already zero, 1290 // we have nothing to do here. Otherwise, make the bit ConstZero. 1291 if ((*LHSBits)[i].isZero()) 1292 Bits[i] = (*LHSBits)[i]; 1293 else 1294 Bits[i] = ValueBit(ValueBit::ConstZero); 1295 } 1296 1297 return std::make_pair(Interesting, &Bits); 1298 } 1299 break; 1300 case ISD::OR: { 1301 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1302 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1303 1304 bool AllDisjoint = true; 1305 SDValue LastVal = SDValue(); 1306 unsigned LastIdx = 0; 1307 for (unsigned i = 0; i < NumBits; ++i) { 1308 if (LHSBits[i].isZero() && RHSBits[i].isZero()) { 1309 // If both inputs are known to be zero and one is ConstZero and 1310 // another is VariableKnownToBeZero, we can select whichever 1311 // we like. To minimize the number of bit groups, we select 1312 // VariableKnownToBeZero if this bit is the next bit of the same 1313 // input variable from the previous bit. Otherwise, we select 1314 // ConstZero. 1315 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && 1316 LHSBits[i].getValueBitIndex() == LastIdx + 1) 1317 Bits[i] = LHSBits[i]; 1318 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && 1319 RHSBits[i].getValueBitIndex() == LastIdx + 1) 1320 Bits[i] = RHSBits[i]; 1321 else 1322 Bits[i] = ValueBit(ValueBit::ConstZero); 1323 } 1324 else if (LHSBits[i].isZero()) 1325 Bits[i] = RHSBits[i]; 1326 else if (RHSBits[i].isZero()) 1327 Bits[i] = LHSBits[i]; 1328 else { 1329 AllDisjoint = false; 1330 break; 1331 } 1332 // We remember the value and bit index of this bit. 1333 if (Bits[i].hasValue()) { 1334 LastVal = Bits[i].getValue(); 1335 LastIdx = Bits[i].getValueBitIndex(); 1336 } 1337 else { 1338 if (LastVal) LastVal = SDValue(); 1339 LastIdx = 0; 1340 } 1341 } 1342 1343 if (!AllDisjoint) 1344 break; 1345 1346 return std::make_pair(Interesting = true, &Bits); 1347 } 1348 case ISD::ZERO_EXTEND: { 1349 // We support only the case with zero extension from i32 to i64 so far. 1350 if (V.getValueType() != MVT::i64 || 1351 V.getOperand(0).getValueType() != MVT::i32) 1352 break; 1353 1354 const SmallVector<ValueBit, 64> *LHSBits; 1355 const unsigned NumOperandBits = 32; 1356 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1357 NumOperandBits); 1358 1359 for (unsigned i = 0; i < NumOperandBits; ++i) 1360 Bits[i] = (*LHSBits)[i]; 1361 1362 for (unsigned i = NumOperandBits; i < NumBits; ++i) 1363 Bits[i] = ValueBit(ValueBit::ConstZero); 1364 1365 return std::make_pair(Interesting, &Bits); 1366 } 1367 case ISD::TRUNCATE: { 1368 EVT FromType = V.getOperand(0).getValueType(); 1369 EVT ToType = V.getValueType(); 1370 // We support only the case with truncate from i64 to i32. 1371 if (FromType != MVT::i64 || ToType != MVT::i32) 1372 break; 1373 const unsigned NumAllBits = FromType.getSizeInBits(); 1374 SmallVector<ValueBit, 64> *InBits; 1375 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), 1376 NumAllBits); 1377 const unsigned NumValidBits = ToType.getSizeInBits(); 1378 1379 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. 1380 // So, we cannot include this truncate. 1381 bool UseUpper32bit = false; 1382 for (unsigned i = 0; i < NumValidBits; ++i) 1383 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { 1384 UseUpper32bit = true; 1385 break; 1386 } 1387 if (UseUpper32bit) 1388 break; 1389 1390 for (unsigned i = 0; i < NumValidBits; ++i) 1391 Bits[i] = (*InBits)[i]; 1392 1393 return std::make_pair(Interesting, &Bits); 1394 } 1395 case ISD::AssertZext: { 1396 // For AssertZext, we look through the operand and 1397 // mark the bits known to be zero. 1398 const SmallVector<ValueBit, 64> *LHSBits; 1399 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1400 NumBits); 1401 1402 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); 1403 const unsigned NumValidBits = FromType.getSizeInBits(); 1404 for (unsigned i = 0; i < NumValidBits; ++i) 1405 Bits[i] = (*LHSBits)[i]; 1406 1407 // These bits are known to be zero but the AssertZext may be from a value 1408 // that already has some constant zero bits (i.e. from a masking and). 1409 for (unsigned i = NumValidBits; i < NumBits; ++i) 1410 Bits[i] = (*LHSBits)[i].hasValue() 1411 ? ValueBit((*LHSBits)[i].getValue(), 1412 (*LHSBits)[i].getValueBitIndex(), 1413 ValueBit::VariableKnownToBeZero) 1414 : ValueBit(ValueBit::ConstZero); 1415 1416 return std::make_pair(Interesting, &Bits); 1417 } 1418 case ISD::LOAD: 1419 LoadSDNode *LD = cast<LoadSDNode>(V); 1420 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { 1421 EVT VT = LD->getMemoryVT(); 1422 const unsigned NumValidBits = VT.getSizeInBits(); 1423 1424 for (unsigned i = 0; i < NumValidBits; ++i) 1425 Bits[i] = ValueBit(V, i); 1426 1427 // These bits are known to be zero. 1428 for (unsigned i = NumValidBits; i < NumBits; ++i) 1429 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); 1430 1431 // Zero-extending load itself cannot be optimized. So, it is not 1432 // interesting by itself though it gives useful information. 1433 return std::make_pair(Interesting = false, &Bits); 1434 } 1435 break; 1436 } 1437 1438 for (unsigned i = 0; i < NumBits; ++i) 1439 Bits[i] = ValueBit(V, i); 1440 1441 return std::make_pair(Interesting = false, &Bits); 1442 } 1443 1444 // For each value (except the constant ones), compute the left-rotate amount 1445 // to get it from its original to final position. 1446 void computeRotationAmounts() { 1447 NeedMask = false; 1448 RLAmt.resize(Bits.size()); 1449 for (unsigned i = 0; i < Bits.size(); ++i) 1450 if (Bits[i].hasValue()) { 1451 unsigned VBI = Bits[i].getValueBitIndex(); 1452 if (i >= VBI) 1453 RLAmt[i] = i - VBI; 1454 else 1455 RLAmt[i] = Bits.size() - (VBI - i); 1456 } else if (Bits[i].isZero()) { 1457 NeedMask = true; 1458 RLAmt[i] = UINT32_MAX; 1459 } else { 1460 llvm_unreachable("Unknown value bit type"); 1461 } 1462 } 1463 1464 // Collect groups of consecutive bits with the same underlying value and 1465 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1466 // they break up groups. 1467 void collectBitGroups(bool LateMask) { 1468 BitGroups.clear(); 1469 1470 unsigned LastRLAmt = RLAmt[0]; 1471 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1472 unsigned LastGroupStartIdx = 0; 1473 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1474 for (unsigned i = 1; i < Bits.size(); ++i) { 1475 unsigned ThisRLAmt = RLAmt[i]; 1476 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1477 if (LateMask && !ThisValue) { 1478 ThisValue = LastValue; 1479 ThisRLAmt = LastRLAmt; 1480 // If we're doing late masking, then the first bit group always starts 1481 // at zero (even if the first bits were zero). 1482 if (BitGroups.empty()) 1483 LastGroupStartIdx = 0; 1484 } 1485 1486 // If this bit is known to be zero and the current group is a bit group 1487 // of zeros, we do not need to terminate the current bit group even the 1488 // Value or RLAmt does not match here. Instead, we terminate this group 1489 // when the first non-zero bit appears later. 1490 if (IsGroupOfZeros && Bits[i].isZero()) 1491 continue; 1492 1493 // If this bit has the same underlying value and the same rotate factor as 1494 // the last one, then they're part of the same group. 1495 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1496 // We cannot continue the current group if this bits is not known to 1497 // be zero in a bit group of zeros. 1498 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) 1499 continue; 1500 1501 if (LastValue.getNode()) 1502 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1503 i-1)); 1504 LastRLAmt = ThisRLAmt; 1505 LastValue = ThisValue; 1506 LastGroupStartIdx = i; 1507 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1508 } 1509 if (LastValue.getNode()) 1510 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1511 Bits.size()-1)); 1512 1513 if (BitGroups.empty()) 1514 return; 1515 1516 // We might be able to combine the first and last groups. 1517 if (BitGroups.size() > 1) { 1518 // If the first and last groups are the same, then remove the first group 1519 // in favor of the last group, making the ending index of the last group 1520 // equal to the ending index of the to-be-removed first group. 1521 if (BitGroups[0].StartIdx == 0 && 1522 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1523 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1524 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1525 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1526 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1527 BitGroups.erase(BitGroups.begin()); 1528 } 1529 } 1530 } 1531 1532 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1533 // associated with each. If the number of groups are same, we prefer a group 1534 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate 1535 // instruction. If there is a degeneracy, pick the one that occurs 1536 // first (in the final value). 1537 void collectValueRotInfo() { 1538 ValueRots.clear(); 1539 1540 for (auto &BG : BitGroups) { 1541 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1542 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1543 VRI.V = BG.V; 1544 VRI.RLAmt = BG.RLAmt; 1545 VRI.Repl32 = BG.Repl32; 1546 VRI.NumGroups += 1; 1547 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1548 } 1549 1550 // Now that we've collected the various ValueRotInfo instances, we need to 1551 // sort them. 1552 ValueRotsVec.clear(); 1553 for (auto &I : ValueRots) { 1554 ValueRotsVec.push_back(I.second); 1555 } 1556 llvm::sort(ValueRotsVec); 1557 } 1558 1559 // In 64-bit mode, rlwinm and friends have a rotation operator that 1560 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1561 // indices of these instructions can only be in the lower 32 bits, so they 1562 // can only represent some 64-bit bit groups. However, when they can be used, 1563 // the 32-bit replication can be used to represent, as a single bit group, 1564 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1565 // groups when possible. Returns true if any of the bit groups were 1566 // converted. 1567 void assignRepl32BitGroups() { 1568 // If we have bits like this: 1569 // 1570 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1571 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1572 // Groups: | RLAmt = 8 | RLAmt = 40 | 1573 // 1574 // But, making use of a 32-bit operation that replicates the low-order 32 1575 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1576 // of 8. 1577 1578 auto IsAllLow32 = [this](BitGroup & BG) { 1579 if (BG.StartIdx <= BG.EndIdx) { 1580 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1581 if (!Bits[i].hasValue()) 1582 continue; 1583 if (Bits[i].getValueBitIndex() >= 32) 1584 return false; 1585 } 1586 } else { 1587 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1588 if (!Bits[i].hasValue()) 1589 continue; 1590 if (Bits[i].getValueBitIndex() >= 32) 1591 return false; 1592 } 1593 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1594 if (!Bits[i].hasValue()) 1595 continue; 1596 if (Bits[i].getValueBitIndex() >= 32) 1597 return false; 1598 } 1599 } 1600 1601 return true; 1602 }; 1603 1604 for (auto &BG : BitGroups) { 1605 // If this bit group has RLAmt of 0 and will not be merged with 1606 // another bit group, we don't benefit from Repl32. We don't mark 1607 // such group to give more freedom for later instruction selection. 1608 if (BG.RLAmt == 0) { 1609 auto PotentiallyMerged = [this](BitGroup & BG) { 1610 for (auto &BG2 : BitGroups) 1611 if (&BG != &BG2 && BG.V == BG2.V && 1612 (BG2.RLAmt == 0 || BG2.RLAmt == 32)) 1613 return true; 1614 return false; 1615 }; 1616 if (!PotentiallyMerged(BG)) 1617 continue; 1618 } 1619 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1620 if (IsAllLow32(BG)) { 1621 if (BG.RLAmt >= 32) { 1622 BG.RLAmt -= 32; 1623 BG.Repl32CR = true; 1624 } 1625 1626 BG.Repl32 = true; 1627 1628 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " 1629 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" 1630 << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1631 } 1632 } 1633 } 1634 1635 // Now walk through the bit groups, consolidating where possible. 1636 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1637 // We might want to remove this bit group by merging it with the previous 1638 // group (which might be the ending group). 1639 auto IP = (I == BitGroups.begin()) ? 1640 std::prev(BitGroups.end()) : std::prev(I); 1641 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1642 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1643 1644 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " 1645 << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" 1646 << I->StartIdx << ", " << I->EndIdx 1647 << "] with group with range [" << IP->StartIdx << ", " 1648 << IP->EndIdx << "]\n"); 1649 1650 IP->EndIdx = I->EndIdx; 1651 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1652 IP->Repl32Coalesced = true; 1653 I = BitGroups.erase(I); 1654 continue; 1655 } else { 1656 // There is a special case worth handling: If there is a single group 1657 // covering the entire upper 32 bits, and it can be merged with both 1658 // the next and previous groups (which might be the same group), then 1659 // do so. If it is the same group (so there will be only one group in 1660 // total), then we need to reverse the order of the range so that it 1661 // covers the entire 64 bits. 1662 if (I->StartIdx == 32 && I->EndIdx == 63) { 1663 assert(std::next(I) == BitGroups.end() && 1664 "bit group ends at index 63 but there is another?"); 1665 auto IN = BitGroups.begin(); 1666 1667 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1668 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1669 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1670 IsAllLow32(*I)) { 1671 1672 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() 1673 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx 1674 << ", " << I->EndIdx 1675 << "] with 32-bit replicated groups with ranges [" 1676 << IP->StartIdx << ", " << IP->EndIdx << "] and [" 1677 << IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1678 1679 if (IP == IN) { 1680 // There is only one other group; change it to cover the whole 1681 // range (backward, so that it can still be Repl32 but cover the 1682 // whole 64-bit range). 1683 IP->StartIdx = 31; 1684 IP->EndIdx = 30; 1685 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1686 IP->Repl32Coalesced = true; 1687 I = BitGroups.erase(I); 1688 } else { 1689 // There are two separate groups, one before this group and one 1690 // after us (at the beginning). We're going to remove this group, 1691 // but also the group at the very beginning. 1692 IP->EndIdx = IN->EndIdx; 1693 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1694 IP->Repl32Coalesced = true; 1695 I = BitGroups.erase(I); 1696 BitGroups.erase(BitGroups.begin()); 1697 } 1698 1699 // This must be the last group in the vector (and we might have 1700 // just invalidated the iterator above), so break here. 1701 break; 1702 } 1703 } 1704 } 1705 1706 ++I; 1707 } 1708 } 1709 1710 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1711 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1712 } 1713 1714 uint64_t getZerosMask() { 1715 uint64_t Mask = 0; 1716 for (unsigned i = 0; i < Bits.size(); ++i) { 1717 if (Bits[i].hasValue()) 1718 continue; 1719 Mask |= (UINT64_C(1) << i); 1720 } 1721 1722 return ~Mask; 1723 } 1724 1725 // This method extends an input value to 64 bit if input is 32-bit integer. 1726 // While selecting instructions in BitPermutationSelector in 64-bit mode, 1727 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. 1728 // In such case, we extend it to 64 bit to be consistent with other values. 1729 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { 1730 if (V.getValueSizeInBits() == 64) 1731 return V; 1732 1733 assert(V.getValueSizeInBits() == 32); 1734 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1735 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 1736 MVT::i64), 0); 1737 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 1738 MVT::i64, ImDef, V, 1739 SubRegIdx), 0); 1740 return ExtVal; 1741 } 1742 1743 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { 1744 if (V.getValueSizeInBits() == 32) 1745 return V; 1746 1747 assert(V.getValueSizeInBits() == 64); 1748 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1749 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, 1750 MVT::i32, V, SubRegIdx), 0); 1751 return SubVal; 1752 } 1753 1754 // Depending on the number of groups for a particular value, it might be 1755 // better to rotate, mask explicitly (using andi/andis), and then or the 1756 // result. Select this part of the result first. 1757 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1758 if (BPermRewriterNoMasking) 1759 return; 1760 1761 for (ValueRotInfo &VRI : ValueRotsVec) { 1762 unsigned Mask = 0; 1763 for (unsigned i = 0; i < Bits.size(); ++i) { 1764 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1765 continue; 1766 if (RLAmt[i] != VRI.RLAmt) 1767 continue; 1768 Mask |= (1u << i); 1769 } 1770 1771 // Compute the masks for andi/andis that would be necessary. 1772 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1773 assert((ANDIMask != 0 || ANDISMask != 0) && 1774 "No set bits in mask for value bit groups"); 1775 bool NeedsRotate = VRI.RLAmt != 0; 1776 1777 // We're trying to minimize the number of instructions. If we have one 1778 // group, using one of andi/andis can break even. If we have three 1779 // groups, we can use both andi and andis and break even (to use both 1780 // andi and andis we also need to or the results together). We need four 1781 // groups if we also need to rotate. To use andi/andis we need to do more 1782 // than break even because rotate-and-mask instructions tend to be easier 1783 // to schedule. 1784 1785 // FIXME: We've biased here against using andi/andis, which is right for 1786 // POWER cores, but not optimal everywhere. For example, on the A2, 1787 // andi/andis have single-cycle latency whereas the rotate-and-mask 1788 // instructions take two cycles, and it would be better to bias toward 1789 // andi/andis in break-even cases. 1790 1791 unsigned NumAndInsts = (unsigned) NeedsRotate + 1792 (unsigned) (ANDIMask != 0) + 1793 (unsigned) (ANDISMask != 0) + 1794 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1795 (unsigned) (bool) Res; 1796 1797 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 1798 << " RL: " << VRI.RLAmt << ":" 1799 << "\n\t\t\tisel using masking: " << NumAndInsts 1800 << " using rotates: " << VRI.NumGroups << "\n"); 1801 1802 if (NumAndInsts >= VRI.NumGroups) 1803 continue; 1804 1805 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1806 1807 if (InstCnt) *InstCnt += NumAndInsts; 1808 1809 SDValue VRot; 1810 if (VRI.RLAmt) { 1811 SDValue Ops[] = 1812 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1813 getI32Imm(0, dl), getI32Imm(31, dl) }; 1814 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1815 Ops), 0); 1816 } else { 1817 VRot = TruncateToInt32(VRI.V, dl); 1818 } 1819 1820 SDValue ANDIVal, ANDISVal; 1821 if (ANDIMask != 0) 1822 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1823 VRot, getI32Imm(ANDIMask, dl)), 1824 0); 1825 if (ANDISMask != 0) 1826 ANDISVal = 1827 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, 1828 getI32Imm(ANDISMask, dl)), 1829 0); 1830 1831 SDValue TotalVal; 1832 if (!ANDIVal) 1833 TotalVal = ANDISVal; 1834 else if (!ANDISVal) 1835 TotalVal = ANDIVal; 1836 else 1837 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1838 ANDIVal, ANDISVal), 0); 1839 1840 if (!Res) 1841 Res = TotalVal; 1842 else 1843 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1844 Res, TotalVal), 0); 1845 1846 // Now, remove all groups with this underlying value and rotation 1847 // factor. 1848 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1849 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1850 }); 1851 } 1852 } 1853 1854 // Instruction selection for the 32-bit case. 1855 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1856 SDLoc dl(N); 1857 SDValue Res; 1858 1859 if (InstCnt) *InstCnt = 0; 1860 1861 // Take care of cases that should use andi/andis first. 1862 SelectAndParts32(dl, Res, InstCnt); 1863 1864 // If we've not yet selected a 'starting' instruction, and we have no zeros 1865 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1866 // number of groups), and start with this rotated value. 1867 if ((!NeedMask || LateMask) && !Res) { 1868 ValueRotInfo &VRI = ValueRotsVec[0]; 1869 if (VRI.RLAmt) { 1870 if (InstCnt) *InstCnt += 1; 1871 SDValue Ops[] = 1872 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1873 getI32Imm(0, dl), getI32Imm(31, dl) }; 1874 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1875 0); 1876 } else { 1877 Res = TruncateToInt32(VRI.V, dl); 1878 } 1879 1880 // Now, remove all groups with this underlying value and rotation factor. 1881 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1882 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1883 }); 1884 } 1885 1886 if (InstCnt) *InstCnt += BitGroups.size(); 1887 1888 // Insert the other groups (one at a time). 1889 for (auto &BG : BitGroups) { 1890 if (!Res) { 1891 SDValue Ops[] = 1892 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1893 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1894 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1895 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1896 } else { 1897 SDValue Ops[] = 1898 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1899 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1900 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1901 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1902 } 1903 } 1904 1905 if (LateMask) { 1906 unsigned Mask = (unsigned) getZerosMask(); 1907 1908 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1909 assert((ANDIMask != 0 || ANDISMask != 0) && 1910 "No set bits in zeros mask?"); 1911 1912 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1913 (unsigned) (ANDISMask != 0) + 1914 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1915 1916 SDValue ANDIVal, ANDISVal; 1917 if (ANDIMask != 0) 1918 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1919 Res, getI32Imm(ANDIMask, dl)), 1920 0); 1921 if (ANDISMask != 0) 1922 ANDISVal = 1923 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, 1924 getI32Imm(ANDISMask, dl)), 1925 0); 1926 1927 if (!ANDIVal) 1928 Res = ANDISVal; 1929 else if (!ANDISVal) 1930 Res = ANDIVal; 1931 else 1932 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1933 ANDIVal, ANDISVal), 0); 1934 } 1935 1936 return Res.getNode(); 1937 } 1938 1939 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1940 unsigned MaskStart, unsigned MaskEnd, 1941 bool IsIns) { 1942 // In the notation used by the instructions, 'start' and 'end' are reversed 1943 // because bits are counted from high to low order. 1944 unsigned InstMaskStart = 64 - MaskEnd - 1, 1945 InstMaskEnd = 64 - MaskStart - 1; 1946 1947 if (Repl32) 1948 return 1; 1949 1950 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1951 InstMaskEnd == 63 - RLAmt) 1952 return 1; 1953 1954 return 2; 1955 } 1956 1957 // For 64-bit values, not all combinations of rotates and masks are 1958 // available. Produce one if it is available. 1959 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1960 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1961 unsigned *InstCnt = nullptr) { 1962 // In the notation used by the instructions, 'start' and 'end' are reversed 1963 // because bits are counted from high to low order. 1964 unsigned InstMaskStart = 64 - MaskEnd - 1, 1965 InstMaskEnd = 64 - MaskStart - 1; 1966 1967 if (InstCnt) *InstCnt += 1; 1968 1969 if (Repl32) { 1970 // This rotation amount assumes that the lower 32 bits of the quantity 1971 // are replicated in the high 32 bits by the rotation operator (which is 1972 // done by rlwinm and friends). 1973 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1974 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1975 SDValue Ops[] = 1976 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1977 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1978 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1979 Ops), 0); 1980 } 1981 1982 if (InstMaskEnd == 63) { 1983 SDValue Ops[] = 1984 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1985 getI32Imm(InstMaskStart, dl) }; 1986 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1987 } 1988 1989 if (InstMaskStart == 0) { 1990 SDValue Ops[] = 1991 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1992 getI32Imm(InstMaskEnd, dl) }; 1993 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 1994 } 1995 1996 if (InstMaskEnd == 63 - RLAmt) { 1997 SDValue Ops[] = 1998 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1999 getI32Imm(InstMaskStart, dl) }; 2000 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 2001 } 2002 2003 // We cannot do this with a single instruction, so we'll use two. The 2004 // problem is that we're not free to choose both a rotation amount and mask 2005 // start and end independently. We can choose an arbitrary mask start and 2006 // end, but then the rotation amount is fixed. Rotation, however, can be 2007 // inverted, and so by applying an "inverse" rotation first, we can get the 2008 // desired result. 2009 if (InstCnt) *InstCnt += 1; 2010 2011 // The rotation mask for the second instruction must be MaskStart. 2012 unsigned RLAmt2 = MaskStart; 2013 // The first instruction must rotate V so that the overall rotation amount 2014 // is RLAmt. 2015 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2016 if (RLAmt1) 2017 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2018 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 2019 } 2020 2021 // For 64-bit values, not all combinations of rotates and masks are 2022 // available. Produce a rotate-mask-and-insert if one is available. 2023 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 2024 unsigned RLAmt, bool Repl32, unsigned MaskStart, 2025 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 2026 // In the notation used by the instructions, 'start' and 'end' are reversed 2027 // because bits are counted from high to low order. 2028 unsigned InstMaskStart = 64 - MaskEnd - 1, 2029 InstMaskEnd = 64 - MaskStart - 1; 2030 2031 if (InstCnt) *InstCnt += 1; 2032 2033 if (Repl32) { 2034 // This rotation amount assumes that the lower 32 bits of the quantity 2035 // are replicated in the high 32 bits by the rotation operator (which is 2036 // done by rlwinm and friends). 2037 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 2038 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 2039 SDValue Ops[] = 2040 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2041 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 2042 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 2043 Ops), 0); 2044 } 2045 2046 if (InstMaskEnd == 63 - RLAmt) { 2047 SDValue Ops[] = 2048 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2049 getI32Imm(InstMaskStart, dl) }; 2050 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 2051 } 2052 2053 // We cannot do this with a single instruction, so we'll use two. The 2054 // problem is that we're not free to choose both a rotation amount and mask 2055 // start and end independently. We can choose an arbitrary mask start and 2056 // end, but then the rotation amount is fixed. Rotation, however, can be 2057 // inverted, and so by applying an "inverse" rotation first, we can get the 2058 // desired result. 2059 if (InstCnt) *InstCnt += 1; 2060 2061 // The rotation mask for the second instruction must be MaskStart. 2062 unsigned RLAmt2 = MaskStart; 2063 // The first instruction must rotate V so that the overall rotation amount 2064 // is RLAmt. 2065 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2066 if (RLAmt1) 2067 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2068 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 2069 } 2070 2071 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 2072 if (BPermRewriterNoMasking) 2073 return; 2074 2075 // The idea here is the same as in the 32-bit version, but with additional 2076 // complications from the fact that Repl32 might be true. Because we 2077 // aggressively convert bit groups to Repl32 form (which, for small 2078 // rotation factors, involves no other change), and then coalesce, it might 2079 // be the case that a single 64-bit masking operation could handle both 2080 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 2081 // form allowed coalescing, then we must use a 32-bit rotaton in order to 2082 // completely capture the new combined bit group. 2083 2084 for (ValueRotInfo &VRI : ValueRotsVec) { 2085 uint64_t Mask = 0; 2086 2087 // We need to add to the mask all bits from the associated bit groups. 2088 // If Repl32 is false, we need to add bits from bit groups that have 2089 // Repl32 true, but are trivially convertable to Repl32 false. Such a 2090 // group is trivially convertable if it overlaps only with the lower 32 2091 // bits, and the group has not been coalesced. 2092 auto MatchingBG = [VRI](const BitGroup &BG) { 2093 if (VRI.V != BG.V) 2094 return false; 2095 2096 unsigned EffRLAmt = BG.RLAmt; 2097 if (!VRI.Repl32 && BG.Repl32) { 2098 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 2099 !BG.Repl32Coalesced) { 2100 if (BG.Repl32CR) 2101 EffRLAmt += 32; 2102 } else { 2103 return false; 2104 } 2105 } else if (VRI.Repl32 != BG.Repl32) { 2106 return false; 2107 } 2108 2109 return VRI.RLAmt == EffRLAmt; 2110 }; 2111 2112 for (auto &BG : BitGroups) { 2113 if (!MatchingBG(BG)) 2114 continue; 2115 2116 if (BG.StartIdx <= BG.EndIdx) { 2117 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 2118 Mask |= (UINT64_C(1) << i); 2119 } else { 2120 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 2121 Mask |= (UINT64_C(1) << i); 2122 for (unsigned i = 0; i <= BG.EndIdx; ++i) 2123 Mask |= (UINT64_C(1) << i); 2124 } 2125 } 2126 2127 // We can use the 32-bit andi/andis technique if the mask does not 2128 // require any higher-order bits. This can save an instruction compared 2129 // to always using the general 64-bit technique. 2130 bool Use32BitInsts = isUInt<32>(Mask); 2131 // Compute the masks for andi/andis that would be necessary. 2132 unsigned ANDIMask = (Mask & UINT16_MAX), 2133 ANDISMask = (Mask >> 16) & UINT16_MAX; 2134 2135 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 2136 2137 unsigned NumAndInsts = (unsigned) NeedsRotate + 2138 (unsigned) (bool) Res; 2139 if (Use32BitInsts) 2140 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 2141 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2142 else 2143 NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; 2144 2145 unsigned NumRLInsts = 0; 2146 bool FirstBG = true; 2147 bool MoreBG = false; 2148 for (auto &BG : BitGroups) { 2149 if (!MatchingBG(BG)) { 2150 MoreBG = true; 2151 continue; 2152 } 2153 NumRLInsts += 2154 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 2155 !FirstBG); 2156 FirstBG = false; 2157 } 2158 2159 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 2160 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") 2161 << "\n\t\t\tisel using masking: " << NumAndInsts 2162 << " using rotates: " << NumRLInsts << "\n"); 2163 2164 // When we'd use andi/andis, we bias toward using the rotates (andi only 2165 // has a record form, and is cracked on POWER cores). However, when using 2166 // general 64-bit constant formation, bias toward the constant form, 2167 // because that exposes more opportunities for CSE. 2168 if (NumAndInsts > NumRLInsts) 2169 continue; 2170 // When merging multiple bit groups, instruction or is used. 2171 // But when rotate is used, rldimi can inert the rotated value into any 2172 // register, so instruction or can be avoided. 2173 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 2174 continue; 2175 2176 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 2177 2178 if (InstCnt) *InstCnt += NumAndInsts; 2179 2180 SDValue VRot; 2181 // We actually need to generate a rotation if we have a non-zero rotation 2182 // factor or, in the Repl32 case, if we care about any of the 2183 // higher-order replicated bits. In the latter case, we generate a mask 2184 // backward so that it actually includes the entire 64 bits. 2185 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 2186 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2187 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 2188 else 2189 VRot = VRI.V; 2190 2191 SDValue TotalVal; 2192 if (Use32BitInsts) { 2193 assert((ANDIMask != 0 || ANDISMask != 0) && 2194 "No set bits in mask when using 32-bit ands for 64-bit value"); 2195 2196 SDValue ANDIVal, ANDISVal; 2197 if (ANDIMask != 0) 2198 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2199 ExtendToInt64(VRot, dl), 2200 getI32Imm(ANDIMask, dl)), 2201 0); 2202 if (ANDISMask != 0) 2203 ANDISVal = 2204 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2205 ExtendToInt64(VRot, dl), 2206 getI32Imm(ANDISMask, dl)), 2207 0); 2208 2209 if (!ANDIVal) 2210 TotalVal = ANDISVal; 2211 else if (!ANDISVal) 2212 TotalVal = ANDIVal; 2213 else 2214 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2215 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2216 } else { 2217 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2218 TotalVal = 2219 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2220 ExtendToInt64(VRot, dl), TotalVal), 2221 0); 2222 } 2223 2224 if (!Res) 2225 Res = TotalVal; 2226 else 2227 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2228 ExtendToInt64(Res, dl), TotalVal), 2229 0); 2230 2231 // Now, remove all groups with this underlying value and rotation 2232 // factor. 2233 eraseMatchingBitGroups(MatchingBG); 2234 } 2235 } 2236 2237 // Instruction selection for the 64-bit case. 2238 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 2239 SDLoc dl(N); 2240 SDValue Res; 2241 2242 if (InstCnt) *InstCnt = 0; 2243 2244 // Take care of cases that should use andi/andis first. 2245 SelectAndParts64(dl, Res, InstCnt); 2246 2247 // If we've not yet selected a 'starting' instruction, and we have no zeros 2248 // to fill in, select the (Value, RLAmt) with the highest priority (largest 2249 // number of groups), and start with this rotated value. 2250 if ((!NeedMask || LateMask) && !Res) { 2251 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 2252 // groups will come first, and so the VRI representing the largest number 2253 // of groups might not be first (it might be the first Repl32 groups). 2254 unsigned MaxGroupsIdx = 0; 2255 if (!ValueRotsVec[0].Repl32) { 2256 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 2257 if (ValueRotsVec[i].Repl32) { 2258 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 2259 MaxGroupsIdx = i; 2260 break; 2261 } 2262 } 2263 2264 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 2265 bool NeedsRotate = false; 2266 if (VRI.RLAmt) { 2267 NeedsRotate = true; 2268 } else if (VRI.Repl32) { 2269 for (auto &BG : BitGroups) { 2270 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 2271 BG.Repl32 != VRI.Repl32) 2272 continue; 2273 2274 // We don't need a rotate if the bit group is confined to the lower 2275 // 32 bits. 2276 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 2277 continue; 2278 2279 NeedsRotate = true; 2280 break; 2281 } 2282 } 2283 2284 if (NeedsRotate) 2285 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2286 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 2287 InstCnt); 2288 else 2289 Res = VRI.V; 2290 2291 // Now, remove all groups with this underlying value and rotation factor. 2292 if (Res) 2293 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 2294 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 2295 BG.Repl32 == VRI.Repl32; 2296 }); 2297 } 2298 2299 // Because 64-bit rotates are more flexible than inserts, we might have a 2300 // preference regarding which one we do first (to save one instruction). 2301 if (!Res) 2302 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 2303 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2304 false) < 2305 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2306 true)) { 2307 if (I != BitGroups.begin()) { 2308 BitGroup BG = *I; 2309 BitGroups.erase(I); 2310 BitGroups.insert(BitGroups.begin(), BG); 2311 } 2312 2313 break; 2314 } 2315 } 2316 2317 // Insert the other groups (one at a time). 2318 for (auto &BG : BitGroups) { 2319 if (!Res) 2320 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 2321 BG.EndIdx, InstCnt); 2322 else 2323 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 2324 BG.StartIdx, BG.EndIdx, InstCnt); 2325 } 2326 2327 if (LateMask) { 2328 uint64_t Mask = getZerosMask(); 2329 2330 // We can use the 32-bit andi/andis technique if the mask does not 2331 // require any higher-order bits. This can save an instruction compared 2332 // to always using the general 64-bit technique. 2333 bool Use32BitInsts = isUInt<32>(Mask); 2334 // Compute the masks for andi/andis that would be necessary. 2335 unsigned ANDIMask = (Mask & UINT16_MAX), 2336 ANDISMask = (Mask >> 16) & UINT16_MAX; 2337 2338 if (Use32BitInsts) { 2339 assert((ANDIMask != 0 || ANDISMask != 0) && 2340 "No set bits in mask when using 32-bit ands for 64-bit value"); 2341 2342 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 2343 (unsigned) (ANDISMask != 0) + 2344 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2345 2346 SDValue ANDIVal, ANDISVal; 2347 if (ANDIMask != 0) 2348 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2349 ExtendToInt64(Res, dl), 2350 getI32Imm(ANDIMask, dl)), 2351 0); 2352 if (ANDISMask != 0) 2353 ANDISVal = 2354 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2355 ExtendToInt64(Res, dl), 2356 getI32Imm(ANDISMask, dl)), 2357 0); 2358 2359 if (!ANDIVal) 2360 Res = ANDISVal; 2361 else if (!ANDISVal) 2362 Res = ANDIVal; 2363 else 2364 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2365 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2366 } else { 2367 if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; 2368 2369 SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2370 Res = 2371 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2372 ExtendToInt64(Res, dl), MaskVal), 0); 2373 } 2374 } 2375 2376 return Res.getNode(); 2377 } 2378 2379 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 2380 // Fill in BitGroups. 2381 collectBitGroups(LateMask); 2382 if (BitGroups.empty()) 2383 return nullptr; 2384 2385 // For 64-bit values, figure out when we can use 32-bit instructions. 2386 if (Bits.size() == 64) 2387 assignRepl32BitGroups(); 2388 2389 // Fill in ValueRotsVec. 2390 collectValueRotInfo(); 2391 2392 if (Bits.size() == 32) { 2393 return Select32(N, LateMask, InstCnt); 2394 } else { 2395 assert(Bits.size() == 64 && "Not 64 bits here?"); 2396 return Select64(N, LateMask, InstCnt); 2397 } 2398 2399 return nullptr; 2400 } 2401 2402 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 2403 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 2404 } 2405 2406 SmallVector<ValueBit, 64> Bits; 2407 2408 bool NeedMask = false; 2409 SmallVector<unsigned, 64> RLAmt; 2410 2411 SmallVector<BitGroup, 16> BitGroups; 2412 2413 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2414 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2415 2416 SelectionDAG *CurDAG = nullptr; 2417 2418 public: 2419 BitPermutationSelector(SelectionDAG *DAG) 2420 : CurDAG(DAG) {} 2421 2422 // Here we try to match complex bit permutations into a set of 2423 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2424 // known to produce optimal code for common cases (like i32 byte swapping). 2425 SDNode *Select(SDNode *N) { 2426 Memoizer.clear(); 2427 auto Result = 2428 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2429 if (!Result.first) 2430 return nullptr; 2431 Bits = std::move(*Result.second); 2432 2433 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2434 " selection for: "); 2435 LLVM_DEBUG(N->dump(CurDAG)); 2436 2437 // Fill it RLAmt and set NeedMask. 2438 computeRotationAmounts(); 2439 2440 if (!NeedMask) 2441 return Select(N, false); 2442 2443 // We currently have two techniques for handling results with zeros: early 2444 // masking (the default) and late masking. Late masking is sometimes more 2445 // efficient, but because the structure of the bit groups is different, it 2446 // is hard to tell without generating both and comparing the results. With 2447 // late masking, we ignore zeros in the resulting value when inserting each 2448 // set of bit groups, and then mask in the zeros at the end. With early 2449 // masking, we only insert the non-zero parts of the result at every step. 2450 2451 unsigned InstCnt = 0, InstCntLateMask = 0; 2452 LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); 2453 SDNode *RN = Select(N, false, &InstCnt); 2454 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2455 2456 LLVM_DEBUG(dbgs() << "\tLate masking:\n"); 2457 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2458 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask 2459 << " instructions\n"); 2460 2461 if (InstCnt <= InstCntLateMask) { 2462 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2463 return RN; 2464 } 2465 2466 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2467 return RNLM; 2468 } 2469 }; 2470 2471 class IntegerCompareEliminator { 2472 SelectionDAG *CurDAG; 2473 PPCDAGToDAGISel *S; 2474 // Conversion type for interpreting results of a 32-bit instruction as 2475 // a 64-bit value or vice versa. 2476 enum ExtOrTruncConversion { Ext, Trunc }; 2477 2478 // Modifiers to guide how an ISD::SETCC node's result is to be computed 2479 // in a GPR. 2480 // ZExtOrig - use the original condition code, zero-extend value 2481 // ZExtInvert - invert the condition code, zero-extend value 2482 // SExtOrig - use the original condition code, sign-extend value 2483 // SExtInvert - invert the condition code, sign-extend value 2484 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 2485 2486 // Comparisons against zero to emit GPR code sequences for. Each of these 2487 // sequences may need to be emitted for two or more equivalent patterns. 2488 // For example (a >= 0) == (a > -1). The direction of the comparison (</>) 2489 // matters as well as the extension type: sext (-1/0), zext (1/0). 2490 // GEZExt - (zext (LHS >= 0)) 2491 // GESExt - (sext (LHS >= 0)) 2492 // LEZExt - (zext (LHS <= 0)) 2493 // LESExt - (sext (LHS <= 0)) 2494 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; 2495 2496 SDNode *tryEXTEND(SDNode *N); 2497 SDNode *tryLogicOpOfCompares(SDNode *N); 2498 SDValue computeLogicOpInGPR(SDValue LogicOp); 2499 SDValue signExtendInputIfNeeded(SDValue Input); 2500 SDValue zeroExtendInputIfNeeded(SDValue Input); 2501 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 2502 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2503 ZeroCompare CmpTy); 2504 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2505 int64_t RHSValue, SDLoc dl); 2506 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2507 int64_t RHSValue, SDLoc dl); 2508 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2509 int64_t RHSValue, SDLoc dl); 2510 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2511 int64_t RHSValue, SDLoc dl); 2512 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 2513 2514 public: 2515 IntegerCompareEliminator(SelectionDAG *DAG, 2516 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { 2517 assert(CurDAG->getTargetLoweringInfo() 2518 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && 2519 "Only expecting to use this on 64 bit targets."); 2520 } 2521 SDNode *Select(SDNode *N) { 2522 if (CmpInGPR == ICGPR_None) 2523 return nullptr; 2524 switch (N->getOpcode()) { 2525 default: break; 2526 case ISD::ZERO_EXTEND: 2527 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || 2528 CmpInGPR == ICGPR_SextI64) 2529 return nullptr; 2530 LLVM_FALLTHROUGH; 2531 case ISD::SIGN_EXTEND: 2532 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || 2533 CmpInGPR == ICGPR_ZextI64) 2534 return nullptr; 2535 return tryEXTEND(N); 2536 case ISD::AND: 2537 case ISD::OR: 2538 case ISD::XOR: 2539 return tryLogicOpOfCompares(N); 2540 } 2541 return nullptr; 2542 } 2543 }; 2544 2545 static bool isLogicOp(unsigned Opc) { 2546 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2547 } 2548 // The obvious case for wanting to keep the value in a GPR. Namely, the 2549 // result of the comparison is actually needed in a GPR. 2550 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { 2551 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2552 N->getOpcode() == ISD::SIGN_EXTEND) && 2553 "Expecting a zero/sign extend node!"); 2554 SDValue WideRes; 2555 // If we are zero-extending the result of a logical operation on i1 2556 // values, we can keep the values in GPRs. 2557 if (isLogicOp(N->getOperand(0).getOpcode()) && 2558 N->getOperand(0).getValueType() == MVT::i1 && 2559 N->getOpcode() == ISD::ZERO_EXTEND) 2560 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2561 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2562 return nullptr; 2563 else 2564 WideRes = 2565 getSETCCInGPR(N->getOperand(0), 2566 N->getOpcode() == ISD::SIGN_EXTEND ? 2567 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2568 2569 if (!WideRes) 2570 return nullptr; 2571 2572 SDLoc dl(N); 2573 bool Input32Bit = WideRes.getValueType() == MVT::i32; 2574 bool Output32Bit = N->getValueType(0) == MVT::i32; 2575 2576 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2577 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2578 2579 SDValue ConvOp = WideRes; 2580 if (Input32Bit != Output32Bit) 2581 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : 2582 ExtOrTruncConversion::Trunc); 2583 return ConvOp.getNode(); 2584 } 2585 2586 // Attempt to perform logical operations on the results of comparisons while 2587 // keeping the values in GPRs. Without doing so, these would end up being 2588 // lowered to CR-logical operations which suffer from significant latency and 2589 // low ILP. 2590 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { 2591 if (N->getValueType(0) != MVT::i1) 2592 return nullptr; 2593 assert(isLogicOp(N->getOpcode()) && 2594 "Expected a logic operation on setcc results."); 2595 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2596 if (!LoweredLogical) 2597 return nullptr; 2598 2599 SDLoc dl(N); 2600 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2601 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2602 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2603 SDValue LHS = LoweredLogical.getOperand(0); 2604 SDValue RHS = LoweredLogical.getOperand(1); 2605 SDValue WideOp; 2606 SDValue OpToConvToRecForm; 2607 2608 // Look through any 32-bit to 64-bit implicit extend nodes to find the 2609 // opcode that is input to the XORI. 2610 if (IsBitwiseNegate && 2611 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2612 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2613 else if (IsBitwiseNegate) 2614 // If the input to the XORI isn't an extension, that's what we're after. 2615 OpToConvToRecForm = LoweredLogical.getOperand(0); 2616 else 2617 // If this is not an XORI, it is a reg-reg logical op and we can convert 2618 // it to record-form. 2619 OpToConvToRecForm = LoweredLogical; 2620 2621 // Get the record-form version of the node we're looking to use to get the 2622 // CR result from. 2623 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2624 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2625 2626 // Convert the right node to record-form. This is either the logical we're 2627 // looking at or it is the input node to the negation (if we're looking at 2628 // a bitwise negation). 2629 if (NewOpc != -1 && IsBitwiseNegate) { 2630 // The input to the XORI has a record-form. Use it. 2631 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2632 "Expected a PPC::XORI8 only for bitwise negation."); 2633 // Emit the record-form instruction. 2634 std::vector<SDValue> Ops; 2635 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2636 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2637 2638 WideOp = 2639 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2640 OpToConvToRecForm.getValueType(), 2641 MVT::Glue, Ops), 0); 2642 } else { 2643 assert((NewOpc != -1 || !IsBitwiseNegate) && 2644 "No record form available for AND8/OR8/XOR8?"); 2645 WideOp = 2646 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, 2647 dl, MVT::i64, MVT::Glue, LHS, RHS), 2648 0); 2649 } 2650 2651 // Select this node to a single bit from CR0 set by the record-form node 2652 // just created. For bitwise negation, use the EQ bit which is the equivalent 2653 // of negating the result (i.e. it is a bit set when the result of the 2654 // operation is zero). 2655 SDValue SRIdxVal = 2656 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2657 SDValue CRBit = 2658 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2659 MVT::i1, CR0Reg, SRIdxVal, 2660 WideOp.getValue(1)), 0); 2661 return CRBit.getNode(); 2662 } 2663 2664 // Lower a logical operation on i1 values into a GPR sequence if possible. 2665 // The result can be kept in a GPR if requested. 2666 // Three types of inputs can be handled: 2667 // - SETCC 2668 // - TRUNCATE 2669 // - Logical operation (AND/OR/XOR) 2670 // There is also a special case that is handled (namely a complement operation 2671 // achieved with xor %a, -1). 2672 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { 2673 assert(isLogicOp(LogicOp.getOpcode()) && 2674 "Can only handle logic operations here."); 2675 assert(LogicOp.getValueType() == MVT::i1 && 2676 "Can only handle logic operations on i1 values here."); 2677 SDLoc dl(LogicOp); 2678 SDValue LHS, RHS; 2679 2680 // Special case: xor %a, -1 2681 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2682 2683 // Produces a GPR sequence for each operand of the binary logic operation. 2684 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2685 // the value in a GPR and for logic operations, it will recursively produce 2686 // a GPR sequence for the operation. 2687 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2688 unsigned OperandOpcode = Operand.getOpcode(); 2689 if (OperandOpcode == ISD::SETCC) 2690 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2691 else if (OperandOpcode == ISD::TRUNCATE) { 2692 SDValue InputOp = Operand.getOperand(0); 2693 EVT InVT = InputOp.getValueType(); 2694 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2695 PPC::RLDICL, dl, InVT, InputOp, 2696 S->getI64Imm(0, dl), 2697 S->getI64Imm(63, dl)), 0); 2698 } else if (isLogicOp(OperandOpcode)) 2699 return computeLogicOpInGPR(Operand); 2700 return SDValue(); 2701 }; 2702 LHS = getLogicOperand(LogicOp.getOperand(0)); 2703 RHS = getLogicOperand(LogicOp.getOperand(1)); 2704 2705 // If a GPR sequence can't be produced for the LHS we can't proceed. 2706 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2707 // a bitwise negation operation. 2708 if (!LHS || (!RHS && !IsBitwiseNegation)) 2709 return SDValue(); 2710 2711 NumLogicOpsOnComparison++; 2712 2713 // We will use the inputs as 64-bit values. 2714 if (LHS.getValueType() == MVT::i32) 2715 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2716 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2717 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2718 2719 unsigned NewOpc; 2720 switch (LogicOp.getOpcode()) { 2721 default: llvm_unreachable("Unknown logic operation."); 2722 case ISD::AND: NewOpc = PPC::AND8; break; 2723 case ISD::OR: NewOpc = PPC::OR8; break; 2724 case ISD::XOR: NewOpc = PPC::XOR8; break; 2725 } 2726 2727 if (IsBitwiseNegation) { 2728 RHS = S->getI64Imm(1, dl); 2729 NewOpc = PPC::XORI8; 2730 } 2731 2732 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2733 2734 } 2735 2736 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2737 /// Otherwise just reinterpret it as a 64-bit value. 2738 /// Useful when emitting comparison code for 32-bit values without using 2739 /// the compare instruction (which only considers the lower 32-bits). 2740 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { 2741 assert(Input.getValueType() == MVT::i32 && 2742 "Can only sign-extend 32-bit values here."); 2743 unsigned Opc = Input.getOpcode(); 2744 2745 // The value was sign extended and then truncated to 32-bits. No need to 2746 // sign extend it again. 2747 if (Opc == ISD::TRUNCATE && 2748 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2749 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2750 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2751 2752 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2753 // The input is a sign-extending load. All ppc sign-extending loads 2754 // sign-extend to the full 64-bits. 2755 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2756 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2757 2758 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2759 // We don't sign-extend constants. 2760 if (InputConst) 2761 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2762 2763 SDLoc dl(Input); 2764 SignExtensionsAdded++; 2765 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, 2766 MVT::i64, Input), 0); 2767 } 2768 2769 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2770 /// Otherwise just reinterpret it as a 64-bit value. 2771 /// Useful when emitting comparison code for 32-bit values without using 2772 /// the compare instruction (which only considers the lower 32-bits). 2773 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { 2774 assert(Input.getValueType() == MVT::i32 && 2775 "Can only zero-extend 32-bit values here."); 2776 unsigned Opc = Input.getOpcode(); 2777 2778 // The only condition under which we can omit the actual extend instruction: 2779 // - The value is a positive constant 2780 // - The value comes from a load that isn't a sign-extending load 2781 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. 2782 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && 2783 (Input.getOperand(0).getOpcode() == ISD::AssertZext || 2784 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); 2785 if (IsTruncateOfZExt) 2786 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2787 2788 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2789 if (InputConst && InputConst->getSExtValue() >= 0) 2790 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2791 2792 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2793 // The input is a load that doesn't sign-extend (it will be zero-extended). 2794 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2795 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2796 2797 // None of the above, need to zero-extend. 2798 SDLoc dl(Input); 2799 ZeroExtensionsAdded++; 2800 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, 2801 S->getI64Imm(0, dl), 2802 S->getI64Imm(32, dl)), 0); 2803 } 2804 2805 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2806 // course not actual zero/sign extensions that will generate machine code, 2807 // they're just a way to reinterpret a 32 bit value in a register as a 2808 // 64 bit value and vice-versa. 2809 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, 2810 ExtOrTruncConversion Conv) { 2811 SDLoc dl(NatWidthRes); 2812 2813 // For reinterpreting 32-bit values as 64 bit values, we generate 2814 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2815 if (Conv == ExtOrTruncConversion::Ext) { 2816 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2817 SDValue SubRegIdx = 2818 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2819 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2820 ImDef, NatWidthRes, SubRegIdx), 0); 2821 } 2822 2823 assert(Conv == ExtOrTruncConversion::Trunc && 2824 "Unknown convertion between 32 and 64 bit values."); 2825 // For reinterpreting 64-bit values as 32-bit values, we just need to 2826 // EXTRACT_SUBREG (i.e. extract the low word). 2827 SDValue SubRegIdx = 2828 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2829 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2830 NatWidthRes, SubRegIdx), 0); 2831 } 2832 2833 // Produce a GPR sequence for compound comparisons (<=, >=) against zero. 2834 // Handle both zero-extensions and sign-extensions. 2835 SDValue 2836 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2837 ZeroCompare CmpTy) { 2838 EVT InVT = LHS.getValueType(); 2839 bool Is32Bit = InVT == MVT::i32; 2840 SDValue ToExtend; 2841 2842 // Produce the value that needs to be either zero or sign extended. 2843 switch (CmpTy) { 2844 case ZeroCompare::GEZExt: 2845 case ZeroCompare::GESExt: 2846 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, 2847 dl, InVT, LHS, LHS), 0); 2848 break; 2849 case ZeroCompare::LEZExt: 2850 case ZeroCompare::LESExt: { 2851 if (Is32Bit) { 2852 // Upper 32 bits cannot be undefined for this sequence. 2853 LHS = signExtendInputIfNeeded(LHS); 2854 SDValue Neg = 2855 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2856 ToExtend = 2857 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2858 Neg, S->getI64Imm(1, dl), 2859 S->getI64Imm(63, dl)), 0); 2860 } else { 2861 SDValue Addi = 2862 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 2863 S->getI64Imm(~0ULL, dl)), 0); 2864 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2865 Addi, LHS), 0); 2866 } 2867 break; 2868 } 2869 } 2870 2871 // For 64-bit sequences, the extensions are the same for the GE/LE cases. 2872 if (!Is32Bit && 2873 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) 2874 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2875 ToExtend, S->getI64Imm(1, dl), 2876 S->getI64Imm(63, dl)), 0); 2877 if (!Is32Bit && 2878 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) 2879 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, 2880 S->getI64Imm(63, dl)), 0); 2881 2882 assert(Is32Bit && "Should have handled the 32-bit sequences above."); 2883 // For 32-bit sequences, the extensions differ between GE/LE cases. 2884 switch (CmpTy) { 2885 case ZeroCompare::GEZExt: { 2886 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2887 S->getI32Imm(31, dl) }; 2888 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2889 ShiftOps), 0); 2890 } 2891 case ZeroCompare::GESExt: 2892 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, 2893 S->getI32Imm(31, dl)), 0); 2894 case ZeroCompare::LEZExt: 2895 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, 2896 S->getI32Imm(1, dl)), 0); 2897 case ZeroCompare::LESExt: 2898 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, 2899 S->getI32Imm(-1, dl)), 0); 2900 } 2901 2902 // The above case covers all the enumerators so it can't have a default clause 2903 // to avoid compiler warnings. 2904 llvm_unreachable("Unknown zero-comparison type."); 2905 } 2906 2907 /// Produces a zero-extended result of comparing two 32-bit values according to 2908 /// the passed condition code. 2909 SDValue 2910 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2911 ISD::CondCode CC, 2912 int64_t RHSValue, SDLoc dl) { 2913 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2914 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) 2915 return SDValue(); 2916 bool IsRHSZero = RHSValue == 0; 2917 bool IsRHSOne = RHSValue == 1; 2918 bool IsRHSNegOne = RHSValue == -1LL; 2919 switch (CC) { 2920 default: return SDValue(); 2921 case ISD::SETEQ: { 2922 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2923 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2924 SDValue Xor = IsRHSZero ? LHS : 2925 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2926 SDValue Clz = 2927 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2928 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2929 S->getI32Imm(31, dl) }; 2930 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2931 ShiftOps), 0); 2932 } 2933 case ISD::SETNE: { 2934 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2935 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2936 SDValue Xor = IsRHSZero ? LHS : 2937 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2938 SDValue Clz = 2939 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2940 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2941 S->getI32Imm(31, dl) }; 2942 SDValue Shift = 2943 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2944 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2945 S->getI32Imm(1, dl)), 0); 2946 } 2947 case ISD::SETGE: { 2948 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) 2949 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) 2950 if(IsRHSZero) 2951 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2952 2953 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2954 // by swapping inputs and falling through. 2955 std::swap(LHS, RHS); 2956 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2957 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2958 LLVM_FALLTHROUGH; 2959 } 2960 case ISD::SETLE: { 2961 if (CmpInGPR == ICGPR_NonExtIn) 2962 return SDValue(); 2963 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) 2964 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) 2965 if(IsRHSZero) { 2966 if (CmpInGPR == ICGPR_NonExtIn) 2967 return SDValue(); 2968 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2969 } 2970 2971 // The upper 32-bits of the register can't be undefined for this sequence. 2972 LHS = signExtendInputIfNeeded(LHS); 2973 RHS = signExtendInputIfNeeded(RHS); 2974 SDValue Sub = 2975 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2976 SDValue Shift = 2977 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, 2978 S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 2979 0); 2980 return 2981 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, 2982 MVT::i64, Shift, S->getI32Imm(1, dl)), 0); 2983 } 2984 case ISD::SETGT: { 2985 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) 2986 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) 2987 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) 2988 // Handle SETLT -1 (which is equivalent to SETGE 0). 2989 if (IsRHSNegOne) 2990 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2991 2992 if (IsRHSZero) { 2993 if (CmpInGPR == ICGPR_NonExtIn) 2994 return SDValue(); 2995 // The upper 32-bits of the register can't be undefined for this sequence. 2996 LHS = signExtendInputIfNeeded(LHS); 2997 RHS = signExtendInputIfNeeded(RHS); 2998 SDValue Neg = 2999 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3000 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3001 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); 3002 } 3003 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3004 // (%b < %a) by swapping inputs and falling through. 3005 std::swap(LHS, RHS); 3006 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3007 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3008 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3009 LLVM_FALLTHROUGH; 3010 } 3011 case ISD::SETLT: { 3012 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) 3013 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) 3014 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) 3015 // Handle SETLT 1 (which is equivalent to SETLE 0). 3016 if (IsRHSOne) { 3017 if (CmpInGPR == ICGPR_NonExtIn) 3018 return SDValue(); 3019 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3020 } 3021 3022 if (IsRHSZero) { 3023 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 3024 S->getI32Imm(31, dl) }; 3025 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 3026 ShiftOps), 0); 3027 } 3028 3029 if (CmpInGPR == ICGPR_NonExtIn) 3030 return SDValue(); 3031 // The upper 32-bits of the register can't be undefined for this sequence. 3032 LHS = signExtendInputIfNeeded(LHS); 3033 RHS = signExtendInputIfNeeded(RHS); 3034 SDValue SUBFNode = 3035 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3036 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3037 SUBFNode, S->getI64Imm(1, dl), 3038 S->getI64Imm(63, dl)), 0); 3039 } 3040 case ISD::SETUGE: 3041 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) 3042 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) 3043 std::swap(LHS, RHS); 3044 LLVM_FALLTHROUGH; 3045 case ISD::SETULE: { 3046 if (CmpInGPR == ICGPR_NonExtIn) 3047 return SDValue(); 3048 // The upper 32-bits of the register can't be undefined for this sequence. 3049 LHS = zeroExtendInputIfNeeded(LHS); 3050 RHS = zeroExtendInputIfNeeded(RHS); 3051 SDValue Subtract = 3052 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3053 SDValue SrdiNode = 3054 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3055 Subtract, S->getI64Imm(1, dl), 3056 S->getI64Imm(63, dl)), 0); 3057 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, 3058 S->getI32Imm(1, dl)), 0); 3059 } 3060 case ISD::SETUGT: 3061 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) 3062 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) 3063 std::swap(LHS, RHS); 3064 LLVM_FALLTHROUGH; 3065 case ISD::SETULT: { 3066 if (CmpInGPR == ICGPR_NonExtIn) 3067 return SDValue(); 3068 // The upper 32-bits of the register can't be undefined for this sequence. 3069 LHS = zeroExtendInputIfNeeded(LHS); 3070 RHS = zeroExtendInputIfNeeded(RHS); 3071 SDValue Subtract = 3072 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3073 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3074 Subtract, S->getI64Imm(1, dl), 3075 S->getI64Imm(63, dl)), 0); 3076 } 3077 } 3078 } 3079 3080 /// Produces a sign-extended result of comparing two 32-bit values according to 3081 /// the passed condition code. 3082 SDValue 3083 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, 3084 ISD::CondCode CC, 3085 int64_t RHSValue, SDLoc dl) { 3086 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 3087 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) 3088 return SDValue(); 3089 bool IsRHSZero = RHSValue == 0; 3090 bool IsRHSOne = RHSValue == 1; 3091 bool IsRHSNegOne = RHSValue == -1LL; 3092 3093 switch (CC) { 3094 default: return SDValue(); 3095 case ISD::SETEQ: { 3096 // (sext (setcc %a, %b, seteq)) -> 3097 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 3098 // (sext (setcc %a, 0, seteq)) -> 3099 // (ashr (shl (ctlz %a), 58), 63) 3100 SDValue CountInput = IsRHSZero ? LHS : 3101 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3102 SDValue Cntlzw = 3103 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 3104 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), 3105 S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3106 SDValue Slwi = 3107 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); 3108 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); 3109 } 3110 case ISD::SETNE: { 3111 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 3112 // flip the bit, finally take 2's complement. 3113 // (sext (setcc %a, %b, setne)) -> 3114 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 3115 // Same as above, but the first xor is not needed. 3116 // (sext (setcc %a, 0, setne)) -> 3117 // (neg (xor (lshr (ctlz %a), 5), 1)) 3118 SDValue Xor = IsRHSZero ? LHS : 3119 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3120 SDValue Clz = 3121 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 3122 SDValue ShiftOps[] = 3123 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3124 SDValue Shift = 3125 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 3126 SDValue Xori = 3127 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 3128 S->getI32Imm(1, dl)), 0); 3129 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 3130 } 3131 case ISD::SETGE: { 3132 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) 3133 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) 3134 if (IsRHSZero) 3135 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3136 3137 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 3138 // by swapping inputs and falling through. 3139 std::swap(LHS, RHS); 3140 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3141 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3142 LLVM_FALLTHROUGH; 3143 } 3144 case ISD::SETLE: { 3145 if (CmpInGPR == ICGPR_NonExtIn) 3146 return SDValue(); 3147 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) 3148 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) 3149 if (IsRHSZero) 3150 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3151 3152 // The upper 32-bits of the register can't be undefined for this sequence. 3153 LHS = signExtendInputIfNeeded(LHS); 3154 RHS = signExtendInputIfNeeded(RHS); 3155 SDValue SUBFNode = 3156 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, 3157 LHS, RHS), 0); 3158 SDValue Srdi = 3159 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3160 SUBFNode, S->getI64Imm(1, dl), 3161 S->getI64Imm(63, dl)), 0); 3162 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, 3163 S->getI32Imm(-1, dl)), 0); 3164 } 3165 case ISD::SETGT: { 3166 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) 3167 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) 3168 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) 3169 if (IsRHSNegOne) 3170 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3171 if (IsRHSZero) { 3172 if (CmpInGPR == ICGPR_NonExtIn) 3173 return SDValue(); 3174 // The upper 32-bits of the register can't be undefined for this sequence. 3175 LHS = signExtendInputIfNeeded(LHS); 3176 RHS = signExtendInputIfNeeded(RHS); 3177 SDValue Neg = 3178 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3179 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, 3180 S->getI64Imm(63, dl)), 0); 3181 } 3182 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3183 // (%b < %a) by swapping inputs and falling through. 3184 std::swap(LHS, RHS); 3185 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3186 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3187 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3188 LLVM_FALLTHROUGH; 3189 } 3190 case ISD::SETLT: { 3191 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) 3192 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) 3193 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) 3194 if (IsRHSOne) { 3195 if (CmpInGPR == ICGPR_NonExtIn) 3196 return SDValue(); 3197 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3198 } 3199 if (IsRHSZero) 3200 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, 3201 S->getI32Imm(31, dl)), 0); 3202 3203 if (CmpInGPR == ICGPR_NonExtIn) 3204 return SDValue(); 3205 // The upper 32-bits of the register can't be undefined for this sequence. 3206 LHS = signExtendInputIfNeeded(LHS); 3207 RHS = signExtendInputIfNeeded(RHS); 3208 SDValue SUBFNode = 3209 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3210 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3211 SUBFNode, S->getI64Imm(63, dl)), 0); 3212 } 3213 case ISD::SETUGE: 3214 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) 3215 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) 3216 std::swap(LHS, RHS); 3217 LLVM_FALLTHROUGH; 3218 case ISD::SETULE: { 3219 if (CmpInGPR == ICGPR_NonExtIn) 3220 return SDValue(); 3221 // The upper 32-bits of the register can't be undefined for this sequence. 3222 LHS = zeroExtendInputIfNeeded(LHS); 3223 RHS = zeroExtendInputIfNeeded(RHS); 3224 SDValue Subtract = 3225 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3226 SDValue Shift = 3227 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, 3228 S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 3229 0); 3230 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, 3231 S->getI32Imm(-1, dl)), 0); 3232 } 3233 case ISD::SETUGT: 3234 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) 3235 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) 3236 std::swap(LHS, RHS); 3237 LLVM_FALLTHROUGH; 3238 case ISD::SETULT: { 3239 if (CmpInGPR == ICGPR_NonExtIn) 3240 return SDValue(); 3241 // The upper 32-bits of the register can't be undefined for this sequence. 3242 LHS = zeroExtendInputIfNeeded(LHS); 3243 RHS = zeroExtendInputIfNeeded(RHS); 3244 SDValue Subtract = 3245 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3246 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3247 Subtract, S->getI64Imm(63, dl)), 0); 3248 } 3249 } 3250 } 3251 3252 /// Produces a zero-extended result of comparing two 64-bit values according to 3253 /// the passed condition code. 3254 SDValue 3255 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, 3256 ISD::CondCode CC, 3257 int64_t RHSValue, SDLoc dl) { 3258 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3259 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) 3260 return SDValue(); 3261 bool IsRHSZero = RHSValue == 0; 3262 bool IsRHSOne = RHSValue == 1; 3263 bool IsRHSNegOne = RHSValue == -1LL; 3264 switch (CC) { 3265 default: return SDValue(); 3266 case ISD::SETEQ: { 3267 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 3268 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 3269 SDValue Xor = IsRHSZero ? LHS : 3270 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3271 SDValue Clz = 3272 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 3273 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 3274 S->getI64Imm(58, dl), 3275 S->getI64Imm(63, dl)), 0); 3276 } 3277 case ISD::SETNE: { 3278 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3279 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) 3280 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3281 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3282 SDValue Xor = IsRHSZero ? LHS : 3283 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3284 SDValue AC = 3285 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3286 Xor, S->getI32Imm(~0U, dl)), 0); 3287 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, 3288 Xor, AC.getValue(1)), 0); 3289 } 3290 case ISD::SETGE: { 3291 // {subc.reg, subc.CA} = (subcarry %a, %b) 3292 // (zext (setcc %a, %b, setge)) -> 3293 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) 3294 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) 3295 if (IsRHSZero) 3296 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3297 std::swap(LHS, RHS); 3298 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3299 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3300 LLVM_FALLTHROUGH; 3301 } 3302 case ISD::SETLE: { 3303 // {subc.reg, subc.CA} = (subcarry %b, %a) 3304 // (zext (setcc %a, %b, setge)) -> 3305 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) 3306 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) 3307 if (IsRHSZero) 3308 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3309 SDValue ShiftL = 3310 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3311 S->getI64Imm(1, dl), 3312 S->getI64Imm(63, dl)), 0); 3313 SDValue ShiftR = 3314 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3315 S->getI64Imm(63, dl)), 0); 3316 SDValue SubtractCarry = 3317 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3318 LHS, RHS), 1); 3319 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3320 ShiftR, ShiftL, SubtractCarry), 0); 3321 } 3322 case ISD::SETGT: { 3323 // {subc.reg, subc.CA} = (subcarry %b, %a) 3324 // (zext (setcc %a, %b, setgt)) -> 3325 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3326 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) 3327 if (IsRHSNegOne) 3328 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3329 if (IsRHSZero) { 3330 SDValue Addi = 3331 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3332 S->getI64Imm(~0ULL, dl)), 0); 3333 SDValue Nor = 3334 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); 3335 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, 3336 S->getI64Imm(1, dl), 3337 S->getI64Imm(63, dl)), 0); 3338 } 3339 std::swap(LHS, RHS); 3340 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3341 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3342 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3343 LLVM_FALLTHROUGH; 3344 } 3345 case ISD::SETLT: { 3346 // {subc.reg, subc.CA} = (subcarry %a, %b) 3347 // (zext (setcc %a, %b, setlt)) -> 3348 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3349 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) 3350 if (IsRHSOne) 3351 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3352 if (IsRHSZero) 3353 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3354 S->getI64Imm(1, dl), 3355 S->getI64Imm(63, dl)), 0); 3356 SDValue SRADINode = 3357 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3358 LHS, S->getI64Imm(63, dl)), 0); 3359 SDValue SRDINode = 3360 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3361 RHS, S->getI64Imm(1, dl), 3362 S->getI64Imm(63, dl)), 0); 3363 SDValue SUBFC8Carry = 3364 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3365 RHS, LHS), 1); 3366 SDValue ADDE8Node = 3367 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3368 SRDINode, SRADINode, SUBFC8Carry), 0); 3369 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3370 ADDE8Node, S->getI64Imm(1, dl)), 0); 3371 } 3372 case ISD::SETUGE: 3373 // {subc.reg, subc.CA} = (subcarry %a, %b) 3374 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) 3375 std::swap(LHS, RHS); 3376 LLVM_FALLTHROUGH; 3377 case ISD::SETULE: { 3378 // {subc.reg, subc.CA} = (subcarry %b, %a) 3379 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) 3380 SDValue SUBFC8Carry = 3381 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3382 LHS, RHS), 1); 3383 SDValue SUBFE8Node = 3384 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, 3385 LHS, LHS, SUBFC8Carry), 0); 3386 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, 3387 SUBFE8Node, S->getI64Imm(1, dl)), 0); 3388 } 3389 case ISD::SETUGT: 3390 // {subc.reg, subc.CA} = (subcarry %b, %a) 3391 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) 3392 std::swap(LHS, RHS); 3393 LLVM_FALLTHROUGH; 3394 case ISD::SETULT: { 3395 // {subc.reg, subc.CA} = (subcarry %a, %b) 3396 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) 3397 SDValue SubtractCarry = 3398 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3399 RHS, LHS), 1); 3400 SDValue ExtSub = 3401 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3402 LHS, LHS, SubtractCarry), 0); 3403 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3404 ExtSub), 0); 3405 } 3406 } 3407 } 3408 3409 /// Produces a sign-extended result of comparing two 64-bit values according to 3410 /// the passed condition code. 3411 SDValue 3412 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, 3413 ISD::CondCode CC, 3414 int64_t RHSValue, SDLoc dl) { 3415 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3416 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) 3417 return SDValue(); 3418 bool IsRHSZero = RHSValue == 0; 3419 bool IsRHSOne = RHSValue == 1; 3420 bool IsRHSNegOne = RHSValue == -1LL; 3421 switch (CC) { 3422 default: return SDValue(); 3423 case ISD::SETEQ: { 3424 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3425 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 3426 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3427 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3428 SDValue AddInput = IsRHSZero ? LHS : 3429 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3430 SDValue Addic = 3431 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3432 AddInput, S->getI32Imm(~0U, dl)), 0); 3433 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 3434 Addic, Addic.getValue(1)), 0); 3435 } 3436 case ISD::SETNE: { 3437 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) 3438 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) 3439 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) 3440 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) 3441 SDValue Xor = IsRHSZero ? LHS : 3442 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3443 SDValue SC = 3444 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, 3445 Xor, S->getI32Imm(0, dl)), 0); 3446 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, 3447 SC, SC.getValue(1)), 0); 3448 } 3449 case ISD::SETGE: { 3450 // {subc.reg, subc.CA} = (subcarry %a, %b) 3451 // (zext (setcc %a, %b, setge)) -> 3452 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) 3453 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) 3454 if (IsRHSZero) 3455 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3456 std::swap(LHS, RHS); 3457 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3458 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3459 LLVM_FALLTHROUGH; 3460 } 3461 case ISD::SETLE: { 3462 // {subc.reg, subc.CA} = (subcarry %b, %a) 3463 // (zext (setcc %a, %b, setge)) -> 3464 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) 3465 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) 3466 if (IsRHSZero) 3467 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3468 SDValue ShiftR = 3469 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3470 S->getI64Imm(63, dl)), 0); 3471 SDValue ShiftL = 3472 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3473 S->getI64Imm(1, dl), 3474 S->getI64Imm(63, dl)), 0); 3475 SDValue SubtractCarry = 3476 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3477 LHS, RHS), 1); 3478 SDValue Adde = 3479 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3480 ShiftR, ShiftL, SubtractCarry), 0); 3481 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); 3482 } 3483 case ISD::SETGT: { 3484 // {subc.reg, subc.CA} = (subcarry %b, %a) 3485 // (zext (setcc %a, %b, setgt)) -> 3486 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3487 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) 3488 if (IsRHSNegOne) 3489 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3490 if (IsRHSZero) { 3491 SDValue Add = 3492 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3493 S->getI64Imm(-1, dl)), 0); 3494 SDValue Nor = 3495 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); 3496 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, 3497 S->getI64Imm(63, dl)), 0); 3498 } 3499 std::swap(LHS, RHS); 3500 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3501 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3502 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3503 LLVM_FALLTHROUGH; 3504 } 3505 case ISD::SETLT: { 3506 // {subc.reg, subc.CA} = (subcarry %a, %b) 3507 // (zext (setcc %a, %b, setlt)) -> 3508 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3509 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) 3510 if (IsRHSOne) 3511 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3512 if (IsRHSZero) { 3513 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, 3514 S->getI64Imm(63, dl)), 0); 3515 } 3516 SDValue SRADINode = 3517 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3518 LHS, S->getI64Imm(63, dl)), 0); 3519 SDValue SRDINode = 3520 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3521 RHS, S->getI64Imm(1, dl), 3522 S->getI64Imm(63, dl)), 0); 3523 SDValue SUBFC8Carry = 3524 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3525 RHS, LHS), 1); 3526 SDValue ADDE8Node = 3527 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, 3528 SRDINode, SRADINode, SUBFC8Carry), 0); 3529 SDValue XORI8Node = 3530 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3531 ADDE8Node, S->getI64Imm(1, dl)), 0); 3532 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3533 XORI8Node), 0); 3534 } 3535 case ISD::SETUGE: 3536 // {subc.reg, subc.CA} = (subcarry %a, %b) 3537 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) 3538 std::swap(LHS, RHS); 3539 LLVM_FALLTHROUGH; 3540 case ISD::SETULE: { 3541 // {subc.reg, subc.CA} = (subcarry %b, %a) 3542 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) 3543 SDValue SubtractCarry = 3544 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3545 LHS, RHS), 1); 3546 SDValue ExtSub = 3547 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, 3548 LHS, SubtractCarry), 0); 3549 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, 3550 ExtSub, ExtSub), 0); 3551 } 3552 case ISD::SETUGT: 3553 // {subc.reg, subc.CA} = (subcarry %b, %a) 3554 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) 3555 std::swap(LHS, RHS); 3556 LLVM_FALLTHROUGH; 3557 case ISD::SETULT: { 3558 // {subc.reg, subc.CA} = (subcarry %a, %b) 3559 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) 3560 SDValue SubCarry = 3561 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3562 RHS, LHS), 1); 3563 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3564 LHS, LHS, SubCarry), 0); 3565 } 3566 } 3567 } 3568 3569 /// Do all uses of this SDValue need the result in a GPR? 3570 /// This is meant to be used on values that have type i1 since 3571 /// it is somewhat meaningless to ask if values of other types 3572 /// should be kept in GPR's. 3573 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 3574 assert(Compare.getOpcode() == ISD::SETCC && 3575 "An ISD::SETCC node required here."); 3576 3577 // For values that have a single use, the caller should obviously already have 3578 // checked if that use is an extending use. We check the other uses here. 3579 if (Compare.hasOneUse()) 3580 return true; 3581 // We want the value in a GPR if it is being extended, used for a select, or 3582 // used in logical operations. 3583 for (auto CompareUse : Compare.getNode()->uses()) 3584 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 3585 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 3586 CompareUse->getOpcode() != ISD::SELECT && 3587 !isLogicOp(CompareUse->getOpcode())) { 3588 OmittedForNonExtendUses++; 3589 return false; 3590 } 3591 return true; 3592 } 3593 3594 /// Returns an equivalent of a SETCC node but with the result the same width as 3595 /// the inputs. This can also be used for SELECT_CC if either the true or false 3596 /// values is a power of two while the other is zero. 3597 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, 3598 SetccInGPROpts ConvOpts) { 3599 assert((Compare.getOpcode() == ISD::SETCC || 3600 Compare.getOpcode() == ISD::SELECT_CC) && 3601 "An ISD::SETCC node required here."); 3602 3603 // Don't convert this comparison to a GPR sequence because there are uses 3604 // of the i1 result (i.e. uses that require the result in the CR). 3605 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 3606 return SDValue(); 3607 3608 SDValue LHS = Compare.getOperand(0); 3609 SDValue RHS = Compare.getOperand(1); 3610 3611 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 3612 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 3613 ISD::CondCode CC = 3614 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 3615 EVT InputVT = LHS.getValueType(); 3616 if (InputVT != MVT::i32 && InputVT != MVT::i64) 3617 return SDValue(); 3618 3619 if (ConvOpts == SetccInGPROpts::ZExtInvert || 3620 ConvOpts == SetccInGPROpts::SExtInvert) 3621 CC = ISD::getSetCCInverse(CC, InputVT); 3622 3623 bool Inputs32Bit = InputVT == MVT::i32; 3624 3625 SDLoc dl(Compare); 3626 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3627 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3628 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3629 ConvOpts == SetccInGPROpts::SExtInvert; 3630 3631 if (IsSext && Inputs32Bit) 3632 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3633 else if (Inputs32Bit) 3634 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3635 else if (IsSext) 3636 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3637 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3638 } 3639 3640 } // end anonymous namespace 3641 3642 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { 3643 if (N->getValueType(0) != MVT::i32 && 3644 N->getValueType(0) != MVT::i64) 3645 return false; 3646 3647 // This optimization will emit code that assumes 64-bit registers 3648 // so we don't want to run it in 32-bit mode. Also don't run it 3649 // on functions that are not to be optimized. 3650 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 3651 return false; 3652 3653 switch (N->getOpcode()) { 3654 default: break; 3655 case ISD::ZERO_EXTEND: 3656 case ISD::SIGN_EXTEND: 3657 case ISD::AND: 3658 case ISD::OR: 3659 case ISD::XOR: { 3660 IntegerCompareEliminator ICmpElim(CurDAG, this); 3661 if (SDNode *New = ICmpElim.Select(N)) { 3662 ReplaceNode(N, New); 3663 return true; 3664 } 3665 } 3666 } 3667 return false; 3668 } 3669 3670 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 3671 if (N->getValueType(0) != MVT::i32 && 3672 N->getValueType(0) != MVT::i64) 3673 return false; 3674 3675 if (!UseBitPermRewriter) 3676 return false; 3677 3678 switch (N->getOpcode()) { 3679 default: break; 3680 case ISD::ROTL: 3681 case ISD::SHL: 3682 case ISD::SRL: 3683 case ISD::AND: 3684 case ISD::OR: { 3685 BitPermutationSelector BPS(CurDAG); 3686 if (SDNode *New = BPS.Select(N)) { 3687 ReplaceNode(N, New); 3688 return true; 3689 } 3690 return false; 3691 } 3692 } 3693 3694 return false; 3695 } 3696 3697 /// SelectCC - Select a comparison of the specified values with the specified 3698 /// condition code, returning the CR# of the expression. 3699 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3700 const SDLoc &dl) { 3701 // Always select the LHS. 3702 unsigned Opc; 3703 3704 if (LHS.getValueType() == MVT::i32) { 3705 unsigned Imm; 3706 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3707 if (isInt32Immediate(RHS, Imm)) { 3708 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3709 if (isUInt<16>(Imm)) 3710 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3711 getI32Imm(Imm & 0xFFFF, dl)), 3712 0); 3713 // If this is a 16-bit signed immediate, fold it. 3714 if (isInt<16>((int)Imm)) 3715 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3716 getI32Imm(Imm & 0xFFFF, dl)), 3717 0); 3718 3719 // For non-equality comparisons, the default code would materialize the 3720 // constant, then compare against it, like this: 3721 // lis r2, 4660 3722 // ori r2, r2, 22136 3723 // cmpw cr0, r3, r2 3724 // Since we are just comparing for equality, we can emit this instead: 3725 // xoris r0,r3,0x1234 3726 // cmplwi cr0,r0,0x5678 3727 // beq cr0,L6 3728 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 3729 getI32Imm(Imm >> 16, dl)), 0); 3730 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 3731 getI32Imm(Imm & 0xFFFF, dl)), 0); 3732 } 3733 Opc = PPC::CMPLW; 3734 } else if (ISD::isUnsignedIntSetCC(CC)) { 3735 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 3736 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3737 getI32Imm(Imm & 0xFFFF, dl)), 0); 3738 Opc = PPC::CMPLW; 3739 } else { 3740 int16_t SImm; 3741 if (isIntS16Immediate(RHS, SImm)) 3742 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3743 getI32Imm((int)SImm & 0xFFFF, 3744 dl)), 3745 0); 3746 Opc = PPC::CMPW; 3747 } 3748 } else if (LHS.getValueType() == MVT::i64) { 3749 uint64_t Imm; 3750 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3751 if (isInt64Immediate(RHS.getNode(), Imm)) { 3752 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3753 if (isUInt<16>(Imm)) 3754 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3755 getI32Imm(Imm & 0xFFFF, dl)), 3756 0); 3757 // If this is a 16-bit signed immediate, fold it. 3758 if (isInt<16>(Imm)) 3759 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3760 getI32Imm(Imm & 0xFFFF, dl)), 3761 0); 3762 3763 // For non-equality comparisons, the default code would materialize the 3764 // constant, then compare against it, like this: 3765 // lis r2, 4660 3766 // ori r2, r2, 22136 3767 // cmpd cr0, r3, r2 3768 // Since we are just comparing for equality, we can emit this instead: 3769 // xoris r0,r3,0x1234 3770 // cmpldi cr0,r0,0x5678 3771 // beq cr0,L6 3772 if (isUInt<32>(Imm)) { 3773 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 3774 getI64Imm(Imm >> 16, dl)), 0); 3775 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 3776 getI64Imm(Imm & 0xFFFF, dl)), 3777 0); 3778 } 3779 } 3780 Opc = PPC::CMPLD; 3781 } else if (ISD::isUnsignedIntSetCC(CC)) { 3782 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 3783 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3784 getI64Imm(Imm & 0xFFFF, dl)), 0); 3785 Opc = PPC::CMPLD; 3786 } else { 3787 int16_t SImm; 3788 if (isIntS16Immediate(RHS, SImm)) 3789 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3790 getI64Imm(SImm & 0xFFFF, dl)), 3791 0); 3792 Opc = PPC::CMPD; 3793 } 3794 } else if (LHS.getValueType() == MVT::f32) { 3795 if (PPCSubTarget->hasSPE()) { 3796 switch (CC) { 3797 default: 3798 case ISD::SETEQ: 3799 case ISD::SETNE: 3800 Opc = PPC::EFSCMPEQ; 3801 break; 3802 case ISD::SETLT: 3803 case ISD::SETGE: 3804 case ISD::SETOLT: 3805 case ISD::SETOGE: 3806 case ISD::SETULT: 3807 case ISD::SETUGE: 3808 Opc = PPC::EFSCMPLT; 3809 break; 3810 case ISD::SETGT: 3811 case ISD::SETLE: 3812 case ISD::SETOGT: 3813 case ISD::SETOLE: 3814 case ISD::SETUGT: 3815 case ISD::SETULE: 3816 Opc = PPC::EFSCMPGT; 3817 break; 3818 } 3819 } else 3820 Opc = PPC::FCMPUS; 3821 } else if (LHS.getValueType() == MVT::f64) { 3822 if (PPCSubTarget->hasSPE()) { 3823 switch (CC) { 3824 default: 3825 case ISD::SETEQ: 3826 case ISD::SETNE: 3827 Opc = PPC::EFDCMPEQ; 3828 break; 3829 case ISD::SETLT: 3830 case ISD::SETGE: 3831 case ISD::SETOLT: 3832 case ISD::SETOGE: 3833 case ISD::SETULT: 3834 case ISD::SETUGE: 3835 Opc = PPC::EFDCMPLT; 3836 break; 3837 case ISD::SETGT: 3838 case ISD::SETLE: 3839 case ISD::SETOGT: 3840 case ISD::SETOLE: 3841 case ISD::SETUGT: 3842 case ISD::SETULE: 3843 Opc = PPC::EFDCMPGT; 3844 break; 3845 } 3846 } else 3847 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 3848 } else { 3849 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); 3850 assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); 3851 Opc = PPC::XSCMPUQP; 3852 } 3853 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 3854 } 3855 3856 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, 3857 const PPCSubtarget *Subtarget) { 3858 // For SPE instructions, the result is in GT bit of the CR 3859 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); 3860 3861 switch (CC) { 3862 case ISD::SETUEQ: 3863 case ISD::SETONE: 3864 case ISD::SETOLE: 3865 case ISD::SETOGE: 3866 llvm_unreachable("Should be lowered by legalize!"); 3867 default: llvm_unreachable("Unknown condition!"); 3868 case ISD::SETOEQ: 3869 case ISD::SETEQ: 3870 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; 3871 case ISD::SETUNE: 3872 case ISD::SETNE: 3873 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; 3874 case ISD::SETOLT: 3875 case ISD::SETLT: 3876 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; 3877 case ISD::SETULE: 3878 case ISD::SETLE: 3879 return PPC::PRED_LE; 3880 case ISD::SETOGT: 3881 case ISD::SETGT: 3882 return PPC::PRED_GT; 3883 case ISD::SETUGE: 3884 case ISD::SETGE: 3885 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; 3886 case ISD::SETO: return PPC::PRED_NU; 3887 case ISD::SETUO: return PPC::PRED_UN; 3888 // These two are invalid for floating point. Assume we have int. 3889 case ISD::SETULT: return PPC::PRED_LT; 3890 case ISD::SETUGT: return PPC::PRED_GT; 3891 } 3892 } 3893 3894 /// getCRIdxForSetCC - Return the index of the condition register field 3895 /// associated with the SetCC condition, and whether or not the field is 3896 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 3897 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 3898 Invert = false; 3899 switch (CC) { 3900 default: llvm_unreachable("Unknown condition!"); 3901 case ISD::SETOLT: 3902 case ISD::SETLT: return 0; // Bit #0 = SETOLT 3903 case ISD::SETOGT: 3904 case ISD::SETGT: return 1; // Bit #1 = SETOGT 3905 case ISD::SETOEQ: 3906 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 3907 case ISD::SETUO: return 3; // Bit #3 = SETUO 3908 case ISD::SETUGE: 3909 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 3910 case ISD::SETULE: 3911 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 3912 case ISD::SETUNE: 3913 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 3914 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 3915 case ISD::SETUEQ: 3916 case ISD::SETOGE: 3917 case ISD::SETOLE: 3918 case ISD::SETONE: 3919 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 3920 // These are invalid for floating point. Assume integer. 3921 case ISD::SETULT: return 0; 3922 case ISD::SETUGT: return 1; 3923 } 3924 } 3925 3926 // getVCmpInst: return the vector compare instruction for the specified 3927 // vector type and condition code. Since this is for altivec specific code, 3928 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 3929 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 3930 bool HasVSX, bool &Swap, bool &Negate) { 3931 Swap = false; 3932 Negate = false; 3933 3934 if (VecVT.isFloatingPoint()) { 3935 /* Handle some cases by swapping input operands. */ 3936 switch (CC) { 3937 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 3938 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3939 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 3940 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 3941 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3942 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 3943 default: break; 3944 } 3945 /* Handle some cases by negating the result. */ 3946 switch (CC) { 3947 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3948 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 3949 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 3950 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 3951 default: break; 3952 } 3953 /* We have instructions implementing the remaining cases. */ 3954 switch (CC) { 3955 case ISD::SETEQ: 3956 case ISD::SETOEQ: 3957 if (VecVT == MVT::v4f32) 3958 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 3959 else if (VecVT == MVT::v2f64) 3960 return PPC::XVCMPEQDP; 3961 break; 3962 case ISD::SETGT: 3963 case ISD::SETOGT: 3964 if (VecVT == MVT::v4f32) 3965 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 3966 else if (VecVT == MVT::v2f64) 3967 return PPC::XVCMPGTDP; 3968 break; 3969 case ISD::SETGE: 3970 case ISD::SETOGE: 3971 if (VecVT == MVT::v4f32) 3972 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 3973 else if (VecVT == MVT::v2f64) 3974 return PPC::XVCMPGEDP; 3975 break; 3976 default: 3977 break; 3978 } 3979 llvm_unreachable("Invalid floating-point vector compare condition"); 3980 } else { 3981 /* Handle some cases by swapping input operands. */ 3982 switch (CC) { 3983 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 3984 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3985 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3986 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 3987 default: break; 3988 } 3989 /* Handle some cases by negating the result. */ 3990 switch (CC) { 3991 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3992 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 3993 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 3994 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 3995 default: break; 3996 } 3997 /* We have instructions implementing the remaining cases. */ 3998 switch (CC) { 3999 case ISD::SETEQ: 4000 case ISD::SETUEQ: 4001 if (VecVT == MVT::v16i8) 4002 return PPC::VCMPEQUB; 4003 else if (VecVT == MVT::v8i16) 4004 return PPC::VCMPEQUH; 4005 else if (VecVT == MVT::v4i32) 4006 return PPC::VCMPEQUW; 4007 else if (VecVT == MVT::v2i64) 4008 return PPC::VCMPEQUD; 4009 break; 4010 case ISD::SETGT: 4011 if (VecVT == MVT::v16i8) 4012 return PPC::VCMPGTSB; 4013 else if (VecVT == MVT::v8i16) 4014 return PPC::VCMPGTSH; 4015 else if (VecVT == MVT::v4i32) 4016 return PPC::VCMPGTSW; 4017 else if (VecVT == MVT::v2i64) 4018 return PPC::VCMPGTSD; 4019 break; 4020 case ISD::SETUGT: 4021 if (VecVT == MVT::v16i8) 4022 return PPC::VCMPGTUB; 4023 else if (VecVT == MVT::v8i16) 4024 return PPC::VCMPGTUH; 4025 else if (VecVT == MVT::v4i32) 4026 return PPC::VCMPGTUW; 4027 else if (VecVT == MVT::v2i64) 4028 return PPC::VCMPGTUD; 4029 break; 4030 default: 4031 break; 4032 } 4033 llvm_unreachable("Invalid integer vector compare condition"); 4034 } 4035 } 4036 4037 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 4038 SDLoc dl(N); 4039 unsigned Imm; 4040 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 4041 EVT PtrVT = 4042 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4043 bool isPPC64 = (PtrVT == MVT::i64); 4044 4045 if (!PPCSubTarget->useCRBits() && 4046 isInt32Immediate(N->getOperand(1), Imm)) { 4047 // We can codegen setcc op, imm very efficiently compared to a brcond. 4048 // Check for those cases here. 4049 // setcc op, 0 4050 if (Imm == 0) { 4051 SDValue Op = N->getOperand(0); 4052 switch (CC) { 4053 default: break; 4054 case ISD::SETEQ: { 4055 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 4056 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 4057 getI32Imm(31, dl) }; 4058 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4059 return true; 4060 } 4061 case ISD::SETNE: { 4062 if (isPPC64) break; 4063 SDValue AD = 4064 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4065 Op, getI32Imm(~0U, dl)), 0); 4066 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 4067 return true; 4068 } 4069 case ISD::SETLT: { 4070 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4071 getI32Imm(31, dl) }; 4072 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4073 return true; 4074 } 4075 case ISD::SETGT: { 4076 SDValue T = 4077 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 4078 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 4079 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 4080 getI32Imm(31, dl) }; 4081 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4082 return true; 4083 } 4084 } 4085 } else if (Imm == ~0U) { // setcc op, -1 4086 SDValue Op = N->getOperand(0); 4087 switch (CC) { 4088 default: break; 4089 case ISD::SETEQ: 4090 if (isPPC64) break; 4091 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4092 Op, getI32Imm(1, dl)), 0); 4093 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 4094 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 4095 MVT::i32, 4096 getI32Imm(0, dl)), 4097 0), Op.getValue(1)); 4098 return true; 4099 case ISD::SETNE: { 4100 if (isPPC64) break; 4101 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 4102 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4103 Op, getI32Imm(~0U, dl)); 4104 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 4105 SDValue(AD, 1)); 4106 return true; 4107 } 4108 case ISD::SETLT: { 4109 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 4110 getI32Imm(1, dl)), 0); 4111 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 4112 Op), 0); 4113 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 4114 getI32Imm(31, dl) }; 4115 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4116 return true; 4117 } 4118 case ISD::SETGT: { 4119 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4120 getI32Imm(31, dl) }; 4121 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4122 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 4123 return true; 4124 } 4125 } 4126 } 4127 } 4128 4129 SDValue LHS = N->getOperand(0); 4130 SDValue RHS = N->getOperand(1); 4131 4132 // Altivec Vector compare instructions do not set any CR register by default and 4133 // vector compare operations return the same type as the operands. 4134 if (LHS.getValueType().isVector()) { 4135 if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) 4136 return false; 4137 4138 EVT VecVT = LHS.getValueType(); 4139 bool Swap, Negate; 4140 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 4141 PPCSubTarget->hasVSX(), Swap, Negate); 4142 if (Swap) 4143 std::swap(LHS, RHS); 4144 4145 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 4146 if (Negate) { 4147 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 4148 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 4149 ResVT, VCmp, VCmp); 4150 return true; 4151 } 4152 4153 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 4154 return true; 4155 } 4156 4157 if (PPCSubTarget->useCRBits()) 4158 return false; 4159 4160 bool Inv; 4161 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4162 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 4163 SDValue IntCR; 4164 4165 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that 4166 // The correct compare instruction is already set by SelectCC() 4167 if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { 4168 Idx = 1; 4169 } 4170 4171 // Force the ccreg into CR7. 4172 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 4173 4174 SDValue InFlag(nullptr, 0); // Null incoming flag value. 4175 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 4176 InFlag).getValue(1); 4177 4178 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 4179 CCReg), 0); 4180 4181 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 4182 getI32Imm(31, dl), getI32Imm(31, dl) }; 4183 if (!Inv) { 4184 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4185 return true; 4186 } 4187 4188 // Get the specified bit. 4189 SDValue Tmp = 4190 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4191 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 4192 return true; 4193 } 4194 4195 /// Does this node represent a load/store node whose address can be represented 4196 /// with a register plus an immediate that's a multiple of \p Val: 4197 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { 4198 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); 4199 StoreSDNode *STN = dyn_cast<StoreSDNode>(N); 4200 SDValue AddrOp; 4201 if (LDN) 4202 AddrOp = LDN->getOperand(1); 4203 else if (STN) 4204 AddrOp = STN->getOperand(2); 4205 4206 // If the address points a frame object or a frame object with an offset, 4207 // we need to check the object alignment. 4208 short Imm = 0; 4209 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( 4210 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : 4211 AddrOp)) { 4212 // If op0 is a frame index that is under aligned, we can't do it either, 4213 // because it is translated to r31 or r1 + slot + offset. We won't know the 4214 // slot number until the stack frame is finalized. 4215 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); 4216 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); 4217 if ((SlotAlign % Val) != 0) 4218 return false; 4219 4220 // If we have an offset, we need further check on the offset. 4221 if (AddrOp.getOpcode() != ISD::ADD) 4222 return true; 4223 } 4224 4225 if (AddrOp.getOpcode() == ISD::ADD) 4226 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); 4227 4228 // If the address comes from the outside, the offset will be zero. 4229 return AddrOp.getOpcode() == ISD::CopyFromReg; 4230 } 4231 4232 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 4233 // Transfer memoperands. 4234 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 4235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 4236 } 4237 4238 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, 4239 bool &NeedSwapOps, bool &IsUnCmp) { 4240 4241 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); 4242 4243 SDValue LHS = N->getOperand(0); 4244 SDValue RHS = N->getOperand(1); 4245 SDValue TrueRes = N->getOperand(2); 4246 SDValue FalseRes = N->getOperand(3); 4247 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); 4248 if (!TrueConst) 4249 return false; 4250 4251 assert((N->getSimpleValueType(0) == MVT::i64 || 4252 N->getSimpleValueType(0) == MVT::i32) && 4253 "Expecting either i64 or i32 here."); 4254 4255 // We are looking for any of: 4256 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4257 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4258 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) 4259 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) 4260 int64_t TrueResVal = TrueConst->getSExtValue(); 4261 if ((TrueResVal < -1 || TrueResVal > 1) || 4262 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || 4263 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || 4264 (TrueResVal == 0 && 4265 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) 4266 return false; 4267 4268 bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; 4269 SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); 4270 if (SetOrSelCC.getOpcode() != ISD::SETCC && 4271 SetOrSelCC.getOpcode() != ISD::SELECT_CC) 4272 return false; 4273 4274 // Without this setb optimization, the outer SELECT_CC will be manually 4275 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass 4276 // transforms pseudo instruction to isel instruction. When there are more than 4277 // one use for result like zext/sext, with current optimization we only see 4278 // isel is replaced by setb but can't see any significant gain. Since 4279 // setb has longer latency than original isel, we should avoid this. Another 4280 // point is that setb requires comparison always kept, it can break the 4281 // opportunity to get the comparison away if we have in future. 4282 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) 4283 return false; 4284 4285 SDValue InnerLHS = SetOrSelCC.getOperand(0); 4286 SDValue InnerRHS = SetOrSelCC.getOperand(1); 4287 ISD::CondCode InnerCC = 4288 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); 4289 // If the inner comparison is a select_cc, make sure the true/false values are 4290 // 1/-1 and canonicalize it if needed. 4291 if (InnerIsSel) { 4292 ConstantSDNode *SelCCTrueConst = 4293 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); 4294 ConstantSDNode *SelCCFalseConst = 4295 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); 4296 if (!SelCCTrueConst || !SelCCFalseConst) 4297 return false; 4298 int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); 4299 int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); 4300 // The values must be -1/1 (requiring a swap) or 1/-1. 4301 if (SelCCTVal == -1 && SelCCFVal == 1) { 4302 std::swap(InnerLHS, InnerRHS); 4303 } else if (SelCCTVal != 1 || SelCCFVal != -1) 4304 return false; 4305 } 4306 4307 // Canonicalize unsigned case 4308 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { 4309 IsUnCmp = true; 4310 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; 4311 } 4312 4313 bool InnerSwapped = false; 4314 if (LHS == InnerRHS && RHS == InnerLHS) 4315 InnerSwapped = true; 4316 else if (LHS != InnerLHS || RHS != InnerRHS) 4317 return false; 4318 4319 switch (CC) { 4320 // (select_cc lhs, rhs, 0, \ 4321 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) 4322 case ISD::SETEQ: 4323 if (!InnerIsSel) 4324 return false; 4325 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) 4326 return false; 4327 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; 4328 break; 4329 4330 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4331 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) 4332 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) 4333 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4334 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) 4335 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) 4336 case ISD::SETULT: 4337 if (!IsUnCmp && InnerCC != ISD::SETNE) 4338 return false; 4339 IsUnCmp = true; 4340 LLVM_FALLTHROUGH; 4341 case ISD::SETLT: 4342 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || 4343 (InnerCC == ISD::SETLT && InnerSwapped)) 4344 NeedSwapOps = (TrueResVal == 1); 4345 else 4346 return false; 4347 break; 4348 4349 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4350 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) 4351 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) 4352 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4353 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) 4354 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) 4355 case ISD::SETUGT: 4356 if (!IsUnCmp && InnerCC != ISD::SETNE) 4357 return false; 4358 IsUnCmp = true; 4359 LLVM_FALLTHROUGH; 4360 case ISD::SETGT: 4361 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || 4362 (InnerCC == ISD::SETGT && InnerSwapped)) 4363 NeedSwapOps = (TrueResVal == -1); 4364 else 4365 return false; 4366 break; 4367 4368 default: 4369 return false; 4370 } 4371 4372 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); 4373 LLVM_DEBUG(N->dump()); 4374 4375 return true; 4376 } 4377 4378 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { 4379 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4380 unsigned Imm; 4381 if (!isInt32Immediate(N->getOperand(1), Imm)) 4382 return false; 4383 4384 SDLoc dl(N); 4385 SDValue Val = N->getOperand(0); 4386 unsigned SH, MB, ME; 4387 // If this is an and of a value rotated between 0 and 31 bits and then and'd 4388 // with a mask, emit rlwinm 4389 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { 4390 Val = Val.getOperand(0); 4391 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 4392 getI32Imm(ME, dl)}; 4393 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4394 return true; 4395 } 4396 4397 // If this is just a masked value where the input is not handled, and 4398 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 4399 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { 4400 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), 4401 getI32Imm(ME, dl)}; 4402 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4403 return true; 4404 } 4405 4406 // AND X, 0 -> 0, not "rlwinm 32". 4407 if (Imm == 0) { 4408 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 4409 return true; 4410 } 4411 4412 return false; 4413 } 4414 4415 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { 4416 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4417 uint64_t Imm64; 4418 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) 4419 return false; 4420 4421 unsigned MB, ME; 4422 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { 4423 // MB ME 4424 // +----------------------+ 4425 // |xxxxxxxxxxx00011111000| 4426 // +----------------------+ 4427 // 0 32 64 4428 // We can only do it if the MB is larger than 32 and MB <= ME 4429 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even 4430 // we didn't rotate it. 4431 SDLoc dl(N); 4432 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), 4433 getI64Imm(ME - 32, dl)}; 4434 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); 4435 return true; 4436 } 4437 4438 return false; 4439 } 4440 4441 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { 4442 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4443 unsigned Imm; 4444 if (!isInt32Immediate(N->getOperand(1), Imm)) 4445 return false; 4446 4447 SDValue Val = N->getOperand(0); 4448 unsigned Imm2; 4449 // ISD::OR doesn't get all the bitfield insertion fun. 4450 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 4451 // bitfield insert. 4452 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) 4453 return false; 4454 4455 // The idea here is to check whether this is equivalent to: 4456 // (c1 & m) | (x & ~m) 4457 // where m is a run-of-ones mask. The logic here is that, for each bit in 4458 // c1 and c2: 4459 // - if both are 1, then the output will be 1. 4460 // - if both are 0, then the output will be 0. 4461 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 4462 // come from x. 4463 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 4464 // be 0. 4465 // If that last condition is never the case, then we can form m from the 4466 // bits that are the same between c1 and c2. 4467 unsigned MB, ME; 4468 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { 4469 SDLoc dl(N); 4470 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), 4471 getI32Imm(MB, dl), getI32Imm(ME, dl)}; 4472 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 4473 return true; 4474 } 4475 4476 return false; 4477 } 4478 4479 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { 4480 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4481 uint64_t Imm64; 4482 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) 4483 return false; 4484 4485 // If this is a 64-bit zero-extension mask, emit rldicl. 4486 unsigned MB = 64 - countTrailingOnes(Imm64); 4487 unsigned SH = 0; 4488 unsigned Imm; 4489 SDValue Val = N->getOperand(0); 4490 SDLoc dl(N); 4491 4492 if (Val.getOpcode() == ISD::ANY_EXTEND) { 4493 auto Op0 = Val.getOperand(0); 4494 if (Op0.getOpcode() == ISD::SRL && 4495 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 4496 4497 auto ResultType = Val.getNode()->getValueType(0); 4498 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); 4499 SDValue IDVal(ImDef, 0); 4500 4501 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, 4502 IDVal, Op0.getOperand(0), 4503 getI32Imm(1, dl)), 4504 0); 4505 SH = 64 - Imm; 4506 } 4507 } 4508 4509 // If the operand is a logical right shift, we can fold it into this 4510 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 4511 // for n <= mb. The right shift is really a left rotate followed by a 4512 // mask, and this mask is a more-restrictive sub-mask of the mask implied 4513 // by the shift. 4514 if (Val.getOpcode() == ISD::SRL && 4515 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 4516 assert(Imm < 64 && "Illegal shift amount"); 4517 Val = Val.getOperand(0); 4518 SH = 64 - Imm; 4519 } 4520 4521 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4522 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4523 return true; 4524 } 4525 4526 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { 4527 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4528 uint64_t Imm64; 4529 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || 4530 !isMask_64(~Imm64)) 4531 return false; 4532 4533 // If this is a negated 64-bit zero-extension mask, 4534 // i.e. the immediate is a sequence of ones from most significant side 4535 // and all zero for reminder, we should use rldicr. 4536 unsigned MB = 63 - countTrailingOnes(~Imm64); 4537 unsigned SH = 0; 4538 SDLoc dl(N); 4539 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4540 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 4541 return true; 4542 } 4543 4544 // Select - Convert the specified operand from a target-independent to a 4545 // target-specific node if it hasn't already been changed. 4546 void PPCDAGToDAGISel::Select(SDNode *N) { 4547 SDLoc dl(N); 4548 if (N->isMachineOpcode()) { 4549 N->setNodeId(-1); 4550 return; // Already selected. 4551 } 4552 4553 // In case any misguided DAG-level optimizations form an ADD with a 4554 // TargetConstant operand, crash here instead of miscompiling (by selecting 4555 // an r+r add instead of some kind of r+i add). 4556 if (N->getOpcode() == ISD::ADD && 4557 N->getOperand(1).getOpcode() == ISD::TargetConstant) 4558 llvm_unreachable("Invalid ADD with TargetConstant operand"); 4559 4560 // Try matching complex bit permutations before doing anything else. 4561 if (tryBitPermutation(N)) 4562 return; 4563 4564 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). 4565 if (tryIntCompareInGPR(N)) 4566 return; 4567 4568 switch (N->getOpcode()) { 4569 default: break; 4570 4571 case ISD::Constant: 4572 if (N->getValueType(0) == MVT::i64) { 4573 ReplaceNode(N, selectI64Imm(CurDAG, N)); 4574 return; 4575 } 4576 break; 4577 4578 case ISD::SETCC: 4579 if (trySETCC(N)) 4580 return; 4581 break; 4582 // These nodes will be transformed into GETtlsADDR32 node, which 4583 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT 4584 case PPCISD::ADDI_TLSLD_L_ADDR: 4585 case PPCISD::ADDI_TLSGD_L_ADDR: { 4586 const Module *Mod = MF->getFunction().getParent(); 4587 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4588 !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || 4589 Mod->getPICLevel() == PICLevel::SmallPIC) 4590 break; 4591 // Attach global base pointer on GETtlsADDR32 node in order to 4592 // generate secure plt code for TLS symbols. 4593 getGlobalBaseReg(); 4594 } break; 4595 case PPCISD::CALL: { 4596 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4597 !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || 4598 !PPCSubTarget->isTargetELF()) 4599 break; 4600 4601 SDValue Op = N->getOperand(1); 4602 4603 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 4604 if (GA->getTargetFlags() == PPCII::MO_PLT) 4605 getGlobalBaseReg(); 4606 } 4607 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { 4608 if (ES->getTargetFlags() == PPCII::MO_PLT) 4609 getGlobalBaseReg(); 4610 } 4611 } 4612 break; 4613 4614 case PPCISD::GlobalBaseReg: 4615 ReplaceNode(N, getGlobalBaseReg()); 4616 return; 4617 4618 case ISD::FrameIndex: 4619 selectFrameIndex(N, N); 4620 return; 4621 4622 case PPCISD::MFOCRF: { 4623 SDValue InFlag = N->getOperand(1); 4624 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 4625 N->getOperand(0), InFlag)); 4626 return; 4627 } 4628 4629 case PPCISD::READ_TIME_BASE: 4630 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 4631 MVT::Other, N->getOperand(0))); 4632 return; 4633 4634 case PPCISD::SRA_ADDZE: { 4635 SDValue N0 = N->getOperand(0); 4636 SDValue ShiftAmt = 4637 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 4638 getConstantIntValue(), dl, 4639 N->getValueType(0)); 4640 if (N->getValueType(0) == MVT::i64) { 4641 SDNode *Op = 4642 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 4643 N0, ShiftAmt); 4644 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 4645 SDValue(Op, 1)); 4646 return; 4647 } else { 4648 assert(N->getValueType(0) == MVT::i32 && 4649 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 4650 SDNode *Op = 4651 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 4652 N0, ShiftAmt); 4653 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 4654 SDValue(Op, 1)); 4655 return; 4656 } 4657 } 4658 4659 case ISD::STORE: { 4660 // Change TLS initial-exec D-form stores to X-form stores. 4661 StoreSDNode *ST = cast<StoreSDNode>(N); 4662 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && 4663 ST->getAddressingMode() != ISD::PRE_INC) 4664 if (tryTLSXFormStore(ST)) 4665 return; 4666 break; 4667 } 4668 case ISD::LOAD: { 4669 // Handle preincrement loads. 4670 LoadSDNode *LD = cast<LoadSDNode>(N); 4671 EVT LoadedVT = LD->getMemoryVT(); 4672 4673 // Normal loads are handled by code generated from the .td file. 4674 if (LD->getAddressingMode() != ISD::PRE_INC) { 4675 // Change TLS initial-exec D-form loads to X-form loads. 4676 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) 4677 if (tryTLSXFormLoad(LD)) 4678 return; 4679 break; 4680 } 4681 4682 SDValue Offset = LD->getOffset(); 4683 if (Offset.getOpcode() == ISD::TargetConstant || 4684 Offset.getOpcode() == ISD::TargetGlobalAddress) { 4685 4686 unsigned Opcode; 4687 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4688 if (LD->getValueType(0) != MVT::i64) { 4689 // Handle PPC32 integer and normal FP loads. 4690 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4691 switch (LoadedVT.getSimpleVT().SimpleTy) { 4692 default: llvm_unreachable("Invalid PPC load type!"); 4693 case MVT::f64: Opcode = PPC::LFDU; break; 4694 case MVT::f32: Opcode = PPC::LFSU; break; 4695 case MVT::i32: Opcode = PPC::LWZU; break; 4696 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 4697 case MVT::i1: 4698 case MVT::i8: Opcode = PPC::LBZU; break; 4699 } 4700 } else { 4701 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4702 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4703 switch (LoadedVT.getSimpleVT().SimpleTy) { 4704 default: llvm_unreachable("Invalid PPC load type!"); 4705 case MVT::i64: Opcode = PPC::LDU; break; 4706 case MVT::i32: Opcode = PPC::LWZU8; break; 4707 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 4708 case MVT::i1: 4709 case MVT::i8: Opcode = PPC::LBZU8; break; 4710 } 4711 } 4712 4713 SDValue Chain = LD->getChain(); 4714 SDValue Base = LD->getBasePtr(); 4715 SDValue Ops[] = { Offset, Base, Chain }; 4716 SDNode *MN = CurDAG->getMachineNode( 4717 Opcode, dl, LD->getValueType(0), 4718 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4719 transferMemOperands(N, MN); 4720 ReplaceNode(N, MN); 4721 return; 4722 } else { 4723 unsigned Opcode; 4724 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4725 if (LD->getValueType(0) != MVT::i64) { 4726 // Handle PPC32 integer and normal FP loads. 4727 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4728 switch (LoadedVT.getSimpleVT().SimpleTy) { 4729 default: llvm_unreachable("Invalid PPC load type!"); 4730 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 4731 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 4732 case MVT::f64: Opcode = PPC::LFDUX; break; 4733 case MVT::f32: Opcode = PPC::LFSUX; break; 4734 case MVT::i32: Opcode = PPC::LWZUX; break; 4735 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 4736 case MVT::i1: 4737 case MVT::i8: Opcode = PPC::LBZUX; break; 4738 } 4739 } else { 4740 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4741 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 4742 "Invalid sext update load"); 4743 switch (LoadedVT.getSimpleVT().SimpleTy) { 4744 default: llvm_unreachable("Invalid PPC load type!"); 4745 case MVT::i64: Opcode = PPC::LDUX; break; 4746 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 4747 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 4748 case MVT::i1: 4749 case MVT::i8: Opcode = PPC::LBZUX8; break; 4750 } 4751 } 4752 4753 SDValue Chain = LD->getChain(); 4754 SDValue Base = LD->getBasePtr(); 4755 SDValue Ops[] = { Base, Offset, Chain }; 4756 SDNode *MN = CurDAG->getMachineNode( 4757 Opcode, dl, LD->getValueType(0), 4758 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4759 transferMemOperands(N, MN); 4760 ReplaceNode(N, MN); 4761 return; 4762 } 4763 } 4764 4765 case ISD::AND: 4766 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr 4767 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || 4768 tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N)) 4769 return; 4770 4771 // Other cases are autogenerated. 4772 break; 4773 case ISD::OR: { 4774 if (N->getValueType(0) == MVT::i32) 4775 if (tryBitfieldInsert(N)) 4776 return; 4777 4778 int16_t Imm; 4779 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4780 isIntS16Immediate(N->getOperand(1), Imm)) { 4781 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); 4782 4783 // If this is equivalent to an add, then we can fold it with the 4784 // FrameIndex calculation. 4785 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 4786 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4787 return; 4788 } 4789 } 4790 4791 // OR with a 32-bit immediate can be handled by ori + oris 4792 // without creating an immediate in a GPR. 4793 uint64_t Imm64 = 0; 4794 bool IsPPC64 = PPCSubTarget->isPPC64(); 4795 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4796 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4797 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. 4798 uint64_t ImmHi = Imm64 >> 16; 4799 uint64_t ImmLo = Imm64 & 0xFFFF; 4800 if (ImmHi != 0 && ImmLo != 0) { 4801 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 4802 N->getOperand(0), 4803 getI16Imm(ImmLo, dl)); 4804 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4805 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); 4806 return; 4807 } 4808 } 4809 4810 // Other cases are autogenerated. 4811 break; 4812 } 4813 case ISD::XOR: { 4814 // XOR with a 32-bit immediate can be handled by xori + xoris 4815 // without creating an immediate in a GPR. 4816 uint64_t Imm64 = 0; 4817 bool IsPPC64 = PPCSubTarget->isPPC64(); 4818 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4819 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4820 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. 4821 uint64_t ImmHi = Imm64 >> 16; 4822 uint64_t ImmLo = Imm64 & 0xFFFF; 4823 if (ImmHi != 0 && ImmLo != 0) { 4824 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 4825 N->getOperand(0), 4826 getI16Imm(ImmLo, dl)); 4827 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4828 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); 4829 return; 4830 } 4831 } 4832 4833 break; 4834 } 4835 case ISD::ADD: { 4836 int16_t Imm; 4837 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4838 isIntS16Immediate(N->getOperand(1), Imm)) { 4839 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4840 return; 4841 } 4842 4843 break; 4844 } 4845 case ISD::SHL: { 4846 unsigned Imm, SH, MB, ME; 4847 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4848 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4849 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4850 getI32Imm(SH, dl), getI32Imm(MB, dl), 4851 getI32Imm(ME, dl) }; 4852 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4853 return; 4854 } 4855 4856 // Other cases are autogenerated. 4857 break; 4858 } 4859 case ISD::SRL: { 4860 unsigned Imm, SH, MB, ME; 4861 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4862 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4863 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4864 getI32Imm(SH, dl), getI32Imm(MB, dl), 4865 getI32Imm(ME, dl) }; 4866 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4867 return; 4868 } 4869 4870 // Other cases are autogenerated. 4871 break; 4872 } 4873 // FIXME: Remove this once the ANDI glue bug is fixed: 4874 case PPCISD::ANDI_rec_1_EQ_BIT: 4875 case PPCISD::ANDI_rec_1_GT_BIT: { 4876 if (!ANDIGlueBug) 4877 break; 4878 4879 EVT InVT = N->getOperand(0).getValueType(); 4880 assert((InVT == MVT::i64 || InVT == MVT::i32) && 4881 "Invalid input type for ANDI_rec_1_EQ_BIT"); 4882 4883 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; 4884 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 4885 N->getOperand(0), 4886 CurDAG->getTargetConstant(1, dl, InVT)), 4887 0); 4888 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 4889 SDValue SRIdxVal = CurDAG->getTargetConstant( 4890 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, 4891 dl, MVT::i32); 4892 4893 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 4894 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 4895 return; 4896 } 4897 case ISD::SELECT_CC: { 4898 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4899 EVT PtrVT = 4900 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4901 bool isPPC64 = (PtrVT == MVT::i64); 4902 4903 // If this is a select of i1 operands, we'll pattern match it. 4904 if (PPCSubTarget->useCRBits() && 4905 N->getOperand(0).getValueType() == MVT::i1) 4906 break; 4907 4908 if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { 4909 bool NeedSwapOps = false; 4910 bool IsUnCmp = false; 4911 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { 4912 SDValue LHS = N->getOperand(0); 4913 SDValue RHS = N->getOperand(1); 4914 if (NeedSwapOps) 4915 std::swap(LHS, RHS); 4916 4917 // Make use of SelectCC to generate the comparison to set CR bits, for 4918 // equality comparisons having one literal operand, SelectCC probably 4919 // doesn't need to materialize the whole literal and just use xoris to 4920 // check it first, it leads the following comparison result can't 4921 // exactly represent GT/LT relationship. So to avoid this we specify 4922 // SETGT/SETUGT here instead of SETEQ. 4923 SDValue GenCC = 4924 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); 4925 CurDAG->SelectNodeTo( 4926 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, 4927 N->getValueType(0), GenCC); 4928 NumP9Setb++; 4929 return; 4930 } 4931 } 4932 4933 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 4934 if (!isPPC64) 4935 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 4936 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 4937 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 4938 if (N1C->isNullValue() && N3C->isNullValue() && 4939 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 4940 // FIXME: Implement this optzn for PPC64. 4941 N->getValueType(0) == MVT::i32) { 4942 SDNode *Tmp = 4943 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4944 N->getOperand(0), getI32Imm(~0U, dl)); 4945 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 4946 N->getOperand(0), SDValue(Tmp, 1)); 4947 return; 4948 } 4949 4950 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 4951 4952 if (N->getValueType(0) == MVT::i1) { 4953 // An i1 select is: (c & t) | (!c & f). 4954 bool Inv; 4955 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4956 4957 unsigned SRI; 4958 switch (Idx) { 4959 default: llvm_unreachable("Invalid CC index"); 4960 case 0: SRI = PPC::sub_lt; break; 4961 case 1: SRI = PPC::sub_gt; break; 4962 case 2: SRI = PPC::sub_eq; break; 4963 case 3: SRI = PPC::sub_un; break; 4964 } 4965 4966 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 4967 4968 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 4969 CCBit, CCBit), 0); 4970 SDValue C = Inv ? NotCCBit : CCBit, 4971 NotC = Inv ? CCBit : NotCCBit; 4972 4973 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4974 C, N->getOperand(2)), 0); 4975 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4976 NotC, N->getOperand(3)), 0); 4977 4978 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 4979 return; 4980 } 4981 4982 unsigned BROpc = 4983 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget); 4984 4985 unsigned SelectCCOp; 4986 if (N->getValueType(0) == MVT::i32) 4987 SelectCCOp = PPC::SELECT_CC_I4; 4988 else if (N->getValueType(0) == MVT::i64) 4989 SelectCCOp = PPC::SELECT_CC_I8; 4990 else if (N->getValueType(0) == MVT::f32) { 4991 if (PPCSubTarget->hasP8Vector()) 4992 SelectCCOp = PPC::SELECT_CC_VSSRC; 4993 else if (PPCSubTarget->hasSPE()) 4994 SelectCCOp = PPC::SELECT_CC_SPE4; 4995 else 4996 SelectCCOp = PPC::SELECT_CC_F4; 4997 } else if (N->getValueType(0) == MVT::f64) { 4998 if (PPCSubTarget->hasVSX()) 4999 SelectCCOp = PPC::SELECT_CC_VSFRC; 5000 else if (PPCSubTarget->hasSPE()) 5001 SelectCCOp = PPC::SELECT_CC_SPE; 5002 else 5003 SelectCCOp = PPC::SELECT_CC_F8; 5004 } else if (N->getValueType(0) == MVT::f128) 5005 SelectCCOp = PPC::SELECT_CC_F16; 5006 else if (PPCSubTarget->hasSPE()) 5007 SelectCCOp = PPC::SELECT_CC_SPE; 5008 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 5009 SelectCCOp = PPC::SELECT_CC_QFRC; 5010 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 5011 SelectCCOp = PPC::SELECT_CC_QSRC; 5012 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 5013 SelectCCOp = PPC::SELECT_CC_QBRC; 5014 else if (N->getValueType(0) == MVT::v2f64 || 5015 N->getValueType(0) == MVT::v2i64) 5016 SelectCCOp = PPC::SELECT_CC_VSRC; 5017 else 5018 SelectCCOp = PPC::SELECT_CC_VRRC; 5019 5020 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 5021 getI32Imm(BROpc, dl) }; 5022 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 5023 return; 5024 } 5025 case ISD::VECTOR_SHUFFLE: 5026 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 5027 N->getValueType(0) == MVT::v2i64)) { 5028 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 5029 5030 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 5031 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 5032 unsigned DM[2]; 5033 5034 for (int i = 0; i < 2; ++i) 5035 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 5036 DM[i] = 0; 5037 else 5038 DM[i] = 1; 5039 5040 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 5041 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 5042 isa<LoadSDNode>(Op1.getOperand(0))) { 5043 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 5044 SDValue Base, Offset; 5045 5046 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 5047 (LD->getMemoryVT() == MVT::f64 || 5048 LD->getMemoryVT() == MVT::i64) && 5049 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 5050 SDValue Chain = LD->getChain(); 5051 SDValue Ops[] = { Base, Offset, Chain }; 5052 MachineMemOperand *MemOp = LD->getMemOperand(); 5053 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 5054 N->getValueType(0), Ops); 5055 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); 5056 return; 5057 } 5058 } 5059 5060 // For little endian, we must swap the input operands and adjust 5061 // the mask elements (reverse and invert them). 5062 if (PPCSubTarget->isLittleEndian()) { 5063 std::swap(Op1, Op2); 5064 unsigned tmp = DM[0]; 5065 DM[0] = 1 - DM[1]; 5066 DM[1] = 1 - tmp; 5067 } 5068 5069 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 5070 MVT::i32); 5071 SDValue Ops[] = { Op1, Op2, DMV }; 5072 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 5073 return; 5074 } 5075 5076 break; 5077 case PPCISD::BDNZ: 5078 case PPCISD::BDZ: { 5079 bool IsPPC64 = PPCSubTarget->isPPC64(); 5080 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 5081 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 5082 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 5083 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 5084 MVT::Other, Ops); 5085 return; 5086 } 5087 case PPCISD::COND_BRANCH: { 5088 // Op #0 is the Chain. 5089 // Op #1 is the PPC::PRED_* number. 5090 // Op #2 is the CR# 5091 // Op #3 is the Dest MBB 5092 // Op #4 is the Flag. 5093 // Prevent PPC::PRED_* from being selected into LI. 5094 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 5095 if (EnableBranchHint) 5096 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); 5097 5098 SDValue Pred = getI32Imm(PCC, dl); 5099 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 5100 N->getOperand(0), N->getOperand(4) }; 5101 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5102 return; 5103 } 5104 case ISD::BR_CC: { 5105 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 5106 unsigned PCC = 5107 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget); 5108 5109 if (N->getOperand(2).getValueType() == MVT::i1) { 5110 unsigned Opc; 5111 bool Swap; 5112 switch (PCC) { 5113 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 5114 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 5115 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 5116 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 5117 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 5118 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 5119 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 5120 } 5121 5122 // A signed comparison of i1 values produces the opposite result to an 5123 // unsigned one if the condition code includes less-than or greater-than. 5124 // This is because 1 is the most negative signed i1 number and the most 5125 // positive unsigned i1 number. The CR-logical operations used for such 5126 // comparisons are non-commutative so for signed comparisons vs. unsigned 5127 // ones, the input operands just need to be swapped. 5128 if (ISD::isSignedIntSetCC(CC)) 5129 Swap = !Swap; 5130 5131 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 5132 N->getOperand(Swap ? 3 : 2), 5133 N->getOperand(Swap ? 2 : 3)), 0); 5134 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 5135 N->getOperand(0)); 5136 return; 5137 } 5138 5139 if (EnableBranchHint) 5140 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); 5141 5142 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 5143 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 5144 N->getOperand(4), N->getOperand(0) }; 5145 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5146 return; 5147 } 5148 case ISD::BRIND: { 5149 // FIXME: Should custom lower this. 5150 SDValue Chain = N->getOperand(0); 5151 SDValue Target = N->getOperand(1); 5152 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 5153 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 5154 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 5155 Chain), 0); 5156 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 5157 return; 5158 } 5159 case PPCISD::TOC_ENTRY: { 5160 const bool isPPC64 = PPCSubTarget->isPPC64(); 5161 const bool isELFABI = PPCSubTarget->isSVR4ABI(); 5162 const bool isAIXABI = PPCSubTarget->isAIXABI(); 5163 5164 // PowerPC only support small, medium and large code model. 5165 const CodeModel::Model CModel = TM.getCodeModel(); 5166 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && 5167 "PowerPC doesn't support tiny or kernel code models."); 5168 5169 if (isAIXABI && CModel == CodeModel::Medium) 5170 report_fatal_error("Medium code model is not supported on AIX."); 5171 5172 // For 64-bit small code model, we allow SelectCodeCommon to handle this, 5173 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 5174 if (isPPC64 && CModel == CodeModel::Small) 5175 break; 5176 5177 // Handle 32-bit small code model. 5178 if (!isPPC64) { 5179 // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. 5180 auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { 5181 SDValue GA = TocEntry->getOperand(0); 5182 SDValue TocBase = TocEntry->getOperand(1); 5183 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 5184 TocBase); 5185 transferMemOperands(TocEntry, MN); 5186 ReplaceNode(TocEntry, MN); 5187 }; 5188 5189 if (isELFABI) { 5190 assert(TM.isPositionIndependent() && 5191 "32-bit ELF can only have TOC entries in position independent" 5192 " code."); 5193 // 32-bit ELF always uses a small code model toc access. 5194 replaceWithLWZtoc(N); 5195 return; 5196 } 5197 5198 if (isAIXABI && CModel == CodeModel::Small) { 5199 replaceWithLWZtoc(N); 5200 return; 5201 } 5202 } 5203 5204 assert(CModel != CodeModel::Small && "All small code models handled."); 5205 5206 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" 5207 " ELF/AIX or 32-bit AIX in the following."); 5208 5209 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode 5210 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We 5211 // generate two instructions as described below. The first source operand 5212 // is a symbol reference. If it must be toc-referenced according to 5213 // PPCSubTarget, we generate: 5214 // [32-bit AIX] 5215 // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) 5216 // [64-bit ELF/AIX] 5217 // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) 5218 // Otherwise we generate: 5219 // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) 5220 SDValue GA = N->getOperand(0); 5221 SDValue TOCbase = N->getOperand(1); 5222 5223 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 5224 SDNode *Tmp = CurDAG->getMachineNode( 5225 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); 5226 5227 if (PPCLowering->isAccessedAsGotIndirect(GA)) { 5228 // If it is accessed as got-indirect, we need an extra LWZ/LD to load 5229 // the address. 5230 SDNode *MN = CurDAG->getMachineNode( 5231 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); 5232 5233 transferMemOperands(N, MN); 5234 ReplaceNode(N, MN); 5235 return; 5236 } 5237 5238 // Build the address relative to the TOC-pointer. 5239 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 5240 SDValue(Tmp, 0), GA)); 5241 return; 5242 } 5243 case PPCISD::PPC32_PICGOT: 5244 // Generate a PIC-safe GOT reference. 5245 assert(PPCSubTarget->is32BitELFABI() && 5246 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 5247 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 5248 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 5249 MVT::i32); 5250 return; 5251 5252 case PPCISD::VADD_SPLAT: { 5253 // This expands into one of three sequences, depending on whether 5254 // the first operand is odd or even, positive or negative. 5255 assert(isa<ConstantSDNode>(N->getOperand(0)) && 5256 isa<ConstantSDNode>(N->getOperand(1)) && 5257 "Invalid operand on VADD_SPLAT!"); 5258 5259 int Elt = N->getConstantOperandVal(0); 5260 int EltSize = N->getConstantOperandVal(1); 5261 unsigned Opc1, Opc2, Opc3; 5262 EVT VT; 5263 5264 if (EltSize == 1) { 5265 Opc1 = PPC::VSPLTISB; 5266 Opc2 = PPC::VADDUBM; 5267 Opc3 = PPC::VSUBUBM; 5268 VT = MVT::v16i8; 5269 } else if (EltSize == 2) { 5270 Opc1 = PPC::VSPLTISH; 5271 Opc2 = PPC::VADDUHM; 5272 Opc3 = PPC::VSUBUHM; 5273 VT = MVT::v8i16; 5274 } else { 5275 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 5276 Opc1 = PPC::VSPLTISW; 5277 Opc2 = PPC::VADDUWM; 5278 Opc3 = PPC::VSUBUWM; 5279 VT = MVT::v4i32; 5280 } 5281 5282 if ((Elt & 1) == 0) { 5283 // Elt is even, in the range [-32,-18] + [16,30]. 5284 // 5285 // Convert: VADD_SPLAT elt, size 5286 // Into: tmp = VSPLTIS[BHW] elt 5287 // VADDU[BHW]M tmp, tmp 5288 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 5289 SDValue EltVal = getI32Imm(Elt >> 1, dl); 5290 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5291 SDValue TmpVal = SDValue(Tmp, 0); 5292 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 5293 return; 5294 } else if (Elt > 0) { 5295 // Elt is odd and positive, in the range [17,31]. 5296 // 5297 // Convert: VADD_SPLAT elt, size 5298 // Into: tmp1 = VSPLTIS[BHW] elt-16 5299 // tmp2 = VSPLTIS[BHW] -16 5300 // VSUBU[BHW]M tmp1, tmp2 5301 SDValue EltVal = getI32Imm(Elt - 16, dl); 5302 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5303 EltVal = getI32Imm(-16, dl); 5304 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5305 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 5306 SDValue(Tmp2, 0))); 5307 return; 5308 } else { 5309 // Elt is odd and negative, in the range [-31,-17]. 5310 // 5311 // Convert: VADD_SPLAT elt, size 5312 // Into: tmp1 = VSPLTIS[BHW] elt+16 5313 // tmp2 = VSPLTIS[BHW] -16 5314 // VADDU[BHW]M tmp1, tmp2 5315 SDValue EltVal = getI32Imm(Elt + 16, dl); 5316 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5317 EltVal = getI32Imm(-16, dl); 5318 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5319 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 5320 SDValue(Tmp2, 0))); 5321 return; 5322 } 5323 } 5324 } 5325 5326 SelectCode(N); 5327 } 5328 5329 // If the target supports the cmpb instruction, do the idiom recognition here. 5330 // We don't do this as a DAG combine because we don't want to do it as nodes 5331 // are being combined (because we might miss part of the eventual idiom). We 5332 // don't want to do it during instruction selection because we want to reuse 5333 // the logic for lowering the masking operations already part of the 5334 // instruction selector. 5335 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 5336 SDLoc dl(N); 5337 5338 assert(N->getOpcode() == ISD::OR && 5339 "Only OR nodes are supported for CMPB"); 5340 5341 SDValue Res; 5342 if (!PPCSubTarget->hasCMPB()) 5343 return Res; 5344 5345 if (N->getValueType(0) != MVT::i32 && 5346 N->getValueType(0) != MVT::i64) 5347 return Res; 5348 5349 EVT VT = N->getValueType(0); 5350 5351 SDValue RHS, LHS; 5352 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 5353 uint64_t Mask = 0, Alt = 0; 5354 5355 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 5356 uint64_t &Mask, uint64_t &Alt, 5357 SDValue &LHS, SDValue &RHS) { 5358 if (O.getOpcode() != ISD::SELECT_CC) 5359 return false; 5360 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 5361 5362 if (!isa<ConstantSDNode>(O.getOperand(2)) || 5363 !isa<ConstantSDNode>(O.getOperand(3))) 5364 return false; 5365 5366 uint64_t PM = O.getConstantOperandVal(2); 5367 uint64_t PAlt = O.getConstantOperandVal(3); 5368 for (b = 0; b < 8; ++b) { 5369 uint64_t Mask = UINT64_C(0xFF) << (8*b); 5370 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 5371 break; 5372 } 5373 5374 if (b == 8) 5375 return false; 5376 Mask |= PM; 5377 Alt |= PAlt; 5378 5379 if (!isa<ConstantSDNode>(O.getOperand(1)) || 5380 O.getConstantOperandVal(1) != 0) { 5381 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 5382 if (Op0.getOpcode() == ISD::TRUNCATE) 5383 Op0 = Op0.getOperand(0); 5384 if (Op1.getOpcode() == ISD::TRUNCATE) 5385 Op1 = Op1.getOperand(0); 5386 5387 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 5388 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 5389 isa<ConstantSDNode>(Op0.getOperand(1))) { 5390 5391 unsigned Bits = Op0.getValueSizeInBits(); 5392 if (b != Bits/8-1) 5393 return false; 5394 if (Op0.getConstantOperandVal(1) != Bits-8) 5395 return false; 5396 5397 LHS = Op0.getOperand(0); 5398 RHS = Op1.getOperand(0); 5399 return true; 5400 } 5401 5402 // When we have small integers (i16 to be specific), the form present 5403 // post-legalization uses SETULT in the SELECT_CC for the 5404 // higher-order byte, depending on the fact that the 5405 // even-higher-order bytes are known to all be zero, for example: 5406 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 5407 // (so when the second byte is the same, because all higher-order 5408 // bits from bytes 3 and 4 are known to be zero, the result of the 5409 // xor can be at most 255) 5410 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 5411 isa<ConstantSDNode>(O.getOperand(1))) { 5412 5413 uint64_t ULim = O.getConstantOperandVal(1); 5414 if (ULim != (UINT64_C(1) << b*8)) 5415 return false; 5416 5417 // Now we need to make sure that the upper bytes are known to be 5418 // zero. 5419 unsigned Bits = Op0.getValueSizeInBits(); 5420 if (!CurDAG->MaskedValueIsZero( 5421 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 5422 return false; 5423 5424 LHS = Op0.getOperand(0); 5425 RHS = Op0.getOperand(1); 5426 return true; 5427 } 5428 5429 return false; 5430 } 5431 5432 if (CC != ISD::SETEQ) 5433 return false; 5434 5435 SDValue Op = O.getOperand(0); 5436 if (Op.getOpcode() == ISD::AND) { 5437 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5438 return false; 5439 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 5440 return false; 5441 5442 SDValue XOR = Op.getOperand(0); 5443 if (XOR.getOpcode() == ISD::TRUNCATE) 5444 XOR = XOR.getOperand(0); 5445 if (XOR.getOpcode() != ISD::XOR) 5446 return false; 5447 5448 LHS = XOR.getOperand(0); 5449 RHS = XOR.getOperand(1); 5450 return true; 5451 } else if (Op.getOpcode() == ISD::SRL) { 5452 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5453 return false; 5454 unsigned Bits = Op.getValueSizeInBits(); 5455 if (b != Bits/8-1) 5456 return false; 5457 if (Op.getConstantOperandVal(1) != Bits-8) 5458 return false; 5459 5460 SDValue XOR = Op.getOperand(0); 5461 if (XOR.getOpcode() == ISD::TRUNCATE) 5462 XOR = XOR.getOperand(0); 5463 if (XOR.getOpcode() != ISD::XOR) 5464 return false; 5465 5466 LHS = XOR.getOperand(0); 5467 RHS = XOR.getOperand(1); 5468 return true; 5469 } 5470 5471 return false; 5472 }; 5473 5474 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 5475 while (!Queue.empty()) { 5476 SDValue V = Queue.pop_back_val(); 5477 5478 for (const SDValue &O : V.getNode()->ops()) { 5479 unsigned b = 0; 5480 uint64_t M = 0, A = 0; 5481 SDValue OLHS, ORHS; 5482 if (O.getOpcode() == ISD::OR) { 5483 Queue.push_back(O); 5484 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 5485 if (!LHS) { 5486 LHS = OLHS; 5487 RHS = ORHS; 5488 BytesFound[b] = true; 5489 Mask |= M; 5490 Alt |= A; 5491 } else if ((LHS == ORHS && RHS == OLHS) || 5492 (RHS == ORHS && LHS == OLHS)) { 5493 BytesFound[b] = true; 5494 Mask |= M; 5495 Alt |= A; 5496 } else { 5497 return Res; 5498 } 5499 } else { 5500 return Res; 5501 } 5502 } 5503 } 5504 5505 unsigned LastB = 0, BCnt = 0; 5506 for (unsigned i = 0; i < 8; ++i) 5507 if (BytesFound[LastB]) { 5508 ++BCnt; 5509 LastB = i; 5510 } 5511 5512 if (!LastB || BCnt < 2) 5513 return Res; 5514 5515 // Because we'll be zero-extending the output anyway if don't have a specific 5516 // value for each input byte (via the Mask), we can 'anyext' the inputs. 5517 if (LHS.getValueType() != VT) { 5518 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 5519 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 5520 } 5521 5522 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 5523 5524 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 5525 if (NonTrivialMask && !Alt) { 5526 // Res = Mask & CMPB 5527 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5528 CurDAG->getConstant(Mask, dl, VT)); 5529 } else if (Alt) { 5530 // Res = (CMPB & Mask) | (~CMPB & Alt) 5531 // Which, as suggested here: 5532 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 5533 // can be written as: 5534 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 5535 // useful because the (Alt ^ Mask) can be pre-computed. 5536 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5537 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 5538 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 5539 CurDAG->getConstant(Alt, dl, VT)); 5540 } 5541 5542 return Res; 5543 } 5544 5545 // When CR bit registers are enabled, an extension of an i1 variable to a i32 5546 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 5547 // involves constant materialization of a 0 or a 1 or both. If the result of 5548 // the extension is then operated upon by some operator that can be constant 5549 // folded with a constant 0 or 1, and that constant can be materialized using 5550 // only one instruction (like a zero or one), then we should fold in those 5551 // operations with the select. 5552 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 5553 if (!PPCSubTarget->useCRBits()) 5554 return; 5555 5556 if (N->getOpcode() != ISD::ZERO_EXTEND && 5557 N->getOpcode() != ISD::SIGN_EXTEND && 5558 N->getOpcode() != ISD::ANY_EXTEND) 5559 return; 5560 5561 if (N->getOperand(0).getValueType() != MVT::i1) 5562 return; 5563 5564 if (!N->hasOneUse()) 5565 return; 5566 5567 SDLoc dl(N); 5568 EVT VT = N->getValueType(0); 5569 SDValue Cond = N->getOperand(0); 5570 SDValue ConstTrue = 5571 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 5572 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 5573 5574 do { 5575 SDNode *User = *N->use_begin(); 5576 if (User->getNumOperands() != 2) 5577 break; 5578 5579 auto TryFold = [this, N, User, dl](SDValue Val) { 5580 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 5581 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 5582 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 5583 5584 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 5585 User->getValueType(0), {O0, O1}); 5586 }; 5587 5588 // FIXME: When the semantics of the interaction between select and undef 5589 // are clearly defined, it may turn out to be unnecessary to break here. 5590 SDValue TrueRes = TryFold(ConstTrue); 5591 if (!TrueRes || TrueRes.isUndef()) 5592 break; 5593 SDValue FalseRes = TryFold(ConstFalse); 5594 if (!FalseRes || FalseRes.isUndef()) 5595 break; 5596 5597 // For us to materialize these using one instruction, we must be able to 5598 // represent them as signed 16-bit integers. 5599 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 5600 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 5601 if (!isInt<16>(True) || !isInt<16>(False)) 5602 break; 5603 5604 // We can replace User with a new SELECT node, and try again to see if we 5605 // can fold the select with its user. 5606 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 5607 N = User; 5608 ConstTrue = TrueRes; 5609 ConstFalse = FalseRes; 5610 } while (N->hasOneUse()); 5611 } 5612 5613 void PPCDAGToDAGISel::PreprocessISelDAG() { 5614 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 5615 5616 bool MadeChange = false; 5617 while (Position != CurDAG->allnodes_begin()) { 5618 SDNode *N = &*--Position; 5619 if (N->use_empty()) 5620 continue; 5621 5622 SDValue Res; 5623 switch (N->getOpcode()) { 5624 default: break; 5625 case ISD::OR: 5626 Res = combineToCMPB(N); 5627 break; 5628 } 5629 5630 if (!Res) 5631 foldBoolExts(Res, N); 5632 5633 if (Res) { 5634 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 5635 LLVM_DEBUG(N->dump(CurDAG)); 5636 LLVM_DEBUG(dbgs() << "\nNew: "); 5637 LLVM_DEBUG(Res.getNode()->dump(CurDAG)); 5638 LLVM_DEBUG(dbgs() << "\n"); 5639 5640 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 5641 MadeChange = true; 5642 } 5643 } 5644 5645 if (MadeChange) 5646 CurDAG->RemoveDeadNodes(); 5647 } 5648 5649 /// PostprocessISelDAG - Perform some late peephole optimizations 5650 /// on the DAG representation. 5651 void PPCDAGToDAGISel::PostprocessISelDAG() { 5652 // Skip peepholes at -O0. 5653 if (TM.getOptLevel() == CodeGenOpt::None) 5654 return; 5655 5656 PeepholePPC64(); 5657 PeepholeCROps(); 5658 PeepholePPC64ZExt(); 5659 } 5660 5661 // Check if all users of this node will become isel where the second operand 5662 // is the constant zero. If this is so, and if we can negate the condition, 5663 // then we can flip the true and false operands. This will allow the zero to 5664 // be folded with the isel so that we don't need to materialize a register 5665 // containing zero. 5666 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 5667 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5668 UI != UE; ++UI) { 5669 SDNode *User = *UI; 5670 if (!User->isMachineOpcode()) 5671 return false; 5672 if (User->getMachineOpcode() != PPC::SELECT_I4 && 5673 User->getMachineOpcode() != PPC::SELECT_I8) 5674 return false; 5675 5676 SDNode *Op2 = User->getOperand(2).getNode(); 5677 if (!Op2->isMachineOpcode()) 5678 return false; 5679 5680 if (Op2->getMachineOpcode() != PPC::LI && 5681 Op2->getMachineOpcode() != PPC::LI8) 5682 return false; 5683 5684 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 5685 if (!C) 5686 return false; 5687 5688 if (!C->isNullValue()) 5689 return false; 5690 } 5691 5692 return true; 5693 } 5694 5695 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 5696 SmallVector<SDNode *, 4> ToReplace; 5697 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5698 UI != UE; ++UI) { 5699 SDNode *User = *UI; 5700 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 5701 User->getMachineOpcode() == PPC::SELECT_I8) && 5702 "Must have all select users"); 5703 ToReplace.push_back(User); 5704 } 5705 5706 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 5707 UE = ToReplace.end(); UI != UE; ++UI) { 5708 SDNode *User = *UI; 5709 SDNode *ResNode = 5710 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 5711 User->getValueType(0), User->getOperand(0), 5712 User->getOperand(2), 5713 User->getOperand(1)); 5714 5715 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5716 LLVM_DEBUG(User->dump(CurDAG)); 5717 LLVM_DEBUG(dbgs() << "\nNew: "); 5718 LLVM_DEBUG(ResNode->dump(CurDAG)); 5719 LLVM_DEBUG(dbgs() << "\n"); 5720 5721 ReplaceUses(User, ResNode); 5722 } 5723 } 5724 5725 void PPCDAGToDAGISel::PeepholeCROps() { 5726 bool IsModified; 5727 do { 5728 IsModified = false; 5729 for (SDNode &Node : CurDAG->allnodes()) { 5730 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 5731 if (!MachineNode || MachineNode->use_empty()) 5732 continue; 5733 SDNode *ResNode = MachineNode; 5734 5735 bool Op1Set = false, Op1Unset = false, 5736 Op1Not = false, 5737 Op2Set = false, Op2Unset = false, 5738 Op2Not = false; 5739 5740 unsigned Opcode = MachineNode->getMachineOpcode(); 5741 switch (Opcode) { 5742 default: break; 5743 case PPC::CRAND: 5744 case PPC::CRNAND: 5745 case PPC::CROR: 5746 case PPC::CRXOR: 5747 case PPC::CRNOR: 5748 case PPC::CREQV: 5749 case PPC::CRANDC: 5750 case PPC::CRORC: { 5751 SDValue Op = MachineNode->getOperand(1); 5752 if (Op.isMachineOpcode()) { 5753 if (Op.getMachineOpcode() == PPC::CRSET) 5754 Op2Set = true; 5755 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5756 Op2Unset = true; 5757 else if (Op.getMachineOpcode() == PPC::CRNOR && 5758 Op.getOperand(0) == Op.getOperand(1)) 5759 Op2Not = true; 5760 } 5761 LLVM_FALLTHROUGH; 5762 } 5763 case PPC::BC: 5764 case PPC::BCn: 5765 case PPC::SELECT_I4: 5766 case PPC::SELECT_I8: 5767 case PPC::SELECT_F4: 5768 case PPC::SELECT_F8: 5769 case PPC::SELECT_QFRC: 5770 case PPC::SELECT_QSRC: 5771 case PPC::SELECT_QBRC: 5772 case PPC::SELECT_SPE: 5773 case PPC::SELECT_SPE4: 5774 case PPC::SELECT_VRRC: 5775 case PPC::SELECT_VSFRC: 5776 case PPC::SELECT_VSSRC: 5777 case PPC::SELECT_VSRC: { 5778 SDValue Op = MachineNode->getOperand(0); 5779 if (Op.isMachineOpcode()) { 5780 if (Op.getMachineOpcode() == PPC::CRSET) 5781 Op1Set = true; 5782 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5783 Op1Unset = true; 5784 else if (Op.getMachineOpcode() == PPC::CRNOR && 5785 Op.getOperand(0) == Op.getOperand(1)) 5786 Op1Not = true; 5787 } 5788 } 5789 break; 5790 } 5791 5792 bool SelectSwap = false; 5793 switch (Opcode) { 5794 default: break; 5795 case PPC::CRAND: 5796 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5797 // x & x = x 5798 ResNode = MachineNode->getOperand(0).getNode(); 5799 else if (Op1Set) 5800 // 1 & y = y 5801 ResNode = MachineNode->getOperand(1).getNode(); 5802 else if (Op2Set) 5803 // x & 1 = x 5804 ResNode = MachineNode->getOperand(0).getNode(); 5805 else if (Op1Unset || Op2Unset) 5806 // x & 0 = 0 & y = 0 5807 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5808 MVT::i1); 5809 else if (Op1Not) 5810 // ~x & y = andc(y, x) 5811 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5812 MVT::i1, MachineNode->getOperand(1), 5813 MachineNode->getOperand(0). 5814 getOperand(0)); 5815 else if (Op2Not) 5816 // x & ~y = andc(x, y) 5817 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5818 MVT::i1, MachineNode->getOperand(0), 5819 MachineNode->getOperand(1). 5820 getOperand(0)); 5821 else if (AllUsersSelectZero(MachineNode)) { 5822 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5823 MVT::i1, MachineNode->getOperand(0), 5824 MachineNode->getOperand(1)); 5825 SelectSwap = true; 5826 } 5827 break; 5828 case PPC::CRNAND: 5829 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5830 // nand(x, x) -> nor(x, x) 5831 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5832 MVT::i1, MachineNode->getOperand(0), 5833 MachineNode->getOperand(0)); 5834 else if (Op1Set) 5835 // nand(1, y) -> nor(y, y) 5836 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5837 MVT::i1, MachineNode->getOperand(1), 5838 MachineNode->getOperand(1)); 5839 else if (Op2Set) 5840 // nand(x, 1) -> nor(x, x) 5841 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5842 MVT::i1, MachineNode->getOperand(0), 5843 MachineNode->getOperand(0)); 5844 else if (Op1Unset || Op2Unset) 5845 // nand(x, 0) = nand(0, y) = 1 5846 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5847 MVT::i1); 5848 else if (Op1Not) 5849 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 5850 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5851 MVT::i1, MachineNode->getOperand(0). 5852 getOperand(0), 5853 MachineNode->getOperand(1)); 5854 else if (Op2Not) 5855 // nand(x, ~y) = ~x | y = orc(y, x) 5856 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5857 MVT::i1, MachineNode->getOperand(1). 5858 getOperand(0), 5859 MachineNode->getOperand(0)); 5860 else if (AllUsersSelectZero(MachineNode)) { 5861 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5862 MVT::i1, MachineNode->getOperand(0), 5863 MachineNode->getOperand(1)); 5864 SelectSwap = true; 5865 } 5866 break; 5867 case PPC::CROR: 5868 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5869 // x | x = x 5870 ResNode = MachineNode->getOperand(0).getNode(); 5871 else if (Op1Set || Op2Set) 5872 // x | 1 = 1 | y = 1 5873 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5874 MVT::i1); 5875 else if (Op1Unset) 5876 // 0 | y = y 5877 ResNode = MachineNode->getOperand(1).getNode(); 5878 else if (Op2Unset) 5879 // x | 0 = x 5880 ResNode = MachineNode->getOperand(0).getNode(); 5881 else if (Op1Not) 5882 // ~x | y = orc(y, x) 5883 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5884 MVT::i1, MachineNode->getOperand(1), 5885 MachineNode->getOperand(0). 5886 getOperand(0)); 5887 else if (Op2Not) 5888 // x | ~y = orc(x, y) 5889 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5890 MVT::i1, MachineNode->getOperand(0), 5891 MachineNode->getOperand(1). 5892 getOperand(0)); 5893 else if (AllUsersSelectZero(MachineNode)) { 5894 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5895 MVT::i1, MachineNode->getOperand(0), 5896 MachineNode->getOperand(1)); 5897 SelectSwap = true; 5898 } 5899 break; 5900 case PPC::CRXOR: 5901 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5902 // xor(x, x) = 0 5903 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5904 MVT::i1); 5905 else if (Op1Set) 5906 // xor(1, y) -> nor(y, y) 5907 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5908 MVT::i1, MachineNode->getOperand(1), 5909 MachineNode->getOperand(1)); 5910 else if (Op2Set) 5911 // xor(x, 1) -> nor(x, x) 5912 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5913 MVT::i1, MachineNode->getOperand(0), 5914 MachineNode->getOperand(0)); 5915 else if (Op1Unset) 5916 // xor(0, y) = y 5917 ResNode = MachineNode->getOperand(1).getNode(); 5918 else if (Op2Unset) 5919 // xor(x, 0) = x 5920 ResNode = MachineNode->getOperand(0).getNode(); 5921 else if (Op1Not) 5922 // xor(~x, y) = eqv(x, y) 5923 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5924 MVT::i1, MachineNode->getOperand(0). 5925 getOperand(0), 5926 MachineNode->getOperand(1)); 5927 else if (Op2Not) 5928 // xor(x, ~y) = eqv(x, y) 5929 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5930 MVT::i1, MachineNode->getOperand(0), 5931 MachineNode->getOperand(1). 5932 getOperand(0)); 5933 else if (AllUsersSelectZero(MachineNode)) { 5934 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5935 MVT::i1, MachineNode->getOperand(0), 5936 MachineNode->getOperand(1)); 5937 SelectSwap = true; 5938 } 5939 break; 5940 case PPC::CRNOR: 5941 if (Op1Set || Op2Set) 5942 // nor(1, y) -> 0 5943 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5944 MVT::i1); 5945 else if (Op1Unset) 5946 // nor(0, y) = ~y -> nor(y, y) 5947 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5948 MVT::i1, MachineNode->getOperand(1), 5949 MachineNode->getOperand(1)); 5950 else if (Op2Unset) 5951 // nor(x, 0) = ~x 5952 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5953 MVT::i1, MachineNode->getOperand(0), 5954 MachineNode->getOperand(0)); 5955 else if (Op1Not) 5956 // nor(~x, y) = andc(x, y) 5957 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5958 MVT::i1, MachineNode->getOperand(0). 5959 getOperand(0), 5960 MachineNode->getOperand(1)); 5961 else if (Op2Not) 5962 // nor(x, ~y) = andc(y, x) 5963 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5964 MVT::i1, MachineNode->getOperand(1). 5965 getOperand(0), 5966 MachineNode->getOperand(0)); 5967 else if (AllUsersSelectZero(MachineNode)) { 5968 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 5969 MVT::i1, MachineNode->getOperand(0), 5970 MachineNode->getOperand(1)); 5971 SelectSwap = true; 5972 } 5973 break; 5974 case PPC::CREQV: 5975 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5976 // eqv(x, x) = 1 5977 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5978 MVT::i1); 5979 else if (Op1Set) 5980 // eqv(1, y) = y 5981 ResNode = MachineNode->getOperand(1).getNode(); 5982 else if (Op2Set) 5983 // eqv(x, 1) = x 5984 ResNode = MachineNode->getOperand(0).getNode(); 5985 else if (Op1Unset) 5986 // eqv(0, y) = ~y -> nor(y, y) 5987 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5988 MVT::i1, MachineNode->getOperand(1), 5989 MachineNode->getOperand(1)); 5990 else if (Op2Unset) 5991 // eqv(x, 0) = ~x 5992 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5993 MVT::i1, MachineNode->getOperand(0), 5994 MachineNode->getOperand(0)); 5995 else if (Op1Not) 5996 // eqv(~x, y) = xor(x, y) 5997 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5998 MVT::i1, MachineNode->getOperand(0). 5999 getOperand(0), 6000 MachineNode->getOperand(1)); 6001 else if (Op2Not) 6002 // eqv(x, ~y) = xor(x, y) 6003 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6004 MVT::i1, MachineNode->getOperand(0), 6005 MachineNode->getOperand(1). 6006 getOperand(0)); 6007 else if (AllUsersSelectZero(MachineNode)) { 6008 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6009 MVT::i1, MachineNode->getOperand(0), 6010 MachineNode->getOperand(1)); 6011 SelectSwap = true; 6012 } 6013 break; 6014 case PPC::CRANDC: 6015 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6016 // andc(x, x) = 0 6017 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6018 MVT::i1); 6019 else if (Op1Set) 6020 // andc(1, y) = ~y 6021 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6022 MVT::i1, MachineNode->getOperand(1), 6023 MachineNode->getOperand(1)); 6024 else if (Op1Unset || Op2Set) 6025 // andc(0, y) = andc(x, 1) = 0 6026 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6027 MVT::i1); 6028 else if (Op2Unset) 6029 // andc(x, 0) = x 6030 ResNode = MachineNode->getOperand(0).getNode(); 6031 else if (Op1Not) 6032 // andc(~x, y) = ~(x | y) = nor(x, y) 6033 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6034 MVT::i1, MachineNode->getOperand(0). 6035 getOperand(0), 6036 MachineNode->getOperand(1)); 6037 else if (Op2Not) 6038 // andc(x, ~y) = x & y 6039 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 6040 MVT::i1, MachineNode->getOperand(0), 6041 MachineNode->getOperand(1). 6042 getOperand(0)); 6043 else if (AllUsersSelectZero(MachineNode)) { 6044 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6045 MVT::i1, MachineNode->getOperand(1), 6046 MachineNode->getOperand(0)); 6047 SelectSwap = true; 6048 } 6049 break; 6050 case PPC::CRORC: 6051 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6052 // orc(x, x) = 1 6053 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6054 MVT::i1); 6055 else if (Op1Set || Op2Unset) 6056 // orc(1, y) = orc(x, 0) = 1 6057 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6058 MVT::i1); 6059 else if (Op2Set) 6060 // orc(x, 1) = x 6061 ResNode = MachineNode->getOperand(0).getNode(); 6062 else if (Op1Unset) 6063 // orc(0, y) = ~y 6064 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6065 MVT::i1, MachineNode->getOperand(1), 6066 MachineNode->getOperand(1)); 6067 else if (Op1Not) 6068 // orc(~x, y) = ~(x & y) = nand(x, y) 6069 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 6070 MVT::i1, MachineNode->getOperand(0). 6071 getOperand(0), 6072 MachineNode->getOperand(1)); 6073 else if (Op2Not) 6074 // orc(x, ~y) = x | y 6075 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 6076 MVT::i1, MachineNode->getOperand(0), 6077 MachineNode->getOperand(1). 6078 getOperand(0)); 6079 else if (AllUsersSelectZero(MachineNode)) { 6080 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6081 MVT::i1, MachineNode->getOperand(1), 6082 MachineNode->getOperand(0)); 6083 SelectSwap = true; 6084 } 6085 break; 6086 case PPC::SELECT_I4: 6087 case PPC::SELECT_I8: 6088 case PPC::SELECT_F4: 6089 case PPC::SELECT_F8: 6090 case PPC::SELECT_QFRC: 6091 case PPC::SELECT_QSRC: 6092 case PPC::SELECT_QBRC: 6093 case PPC::SELECT_SPE: 6094 case PPC::SELECT_SPE4: 6095 case PPC::SELECT_VRRC: 6096 case PPC::SELECT_VSFRC: 6097 case PPC::SELECT_VSSRC: 6098 case PPC::SELECT_VSRC: 6099 if (Op1Set) 6100 ResNode = MachineNode->getOperand(1).getNode(); 6101 else if (Op1Unset) 6102 ResNode = MachineNode->getOperand(2).getNode(); 6103 else if (Op1Not) 6104 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 6105 SDLoc(MachineNode), 6106 MachineNode->getValueType(0), 6107 MachineNode->getOperand(0). 6108 getOperand(0), 6109 MachineNode->getOperand(2), 6110 MachineNode->getOperand(1)); 6111 break; 6112 case PPC::BC: 6113 case PPC::BCn: 6114 if (Op1Not) 6115 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 6116 PPC::BC, 6117 SDLoc(MachineNode), 6118 MVT::Other, 6119 MachineNode->getOperand(0). 6120 getOperand(0), 6121 MachineNode->getOperand(1), 6122 MachineNode->getOperand(2)); 6123 // FIXME: Handle Op1Set, Op1Unset here too. 6124 break; 6125 } 6126 6127 // If we're inverting this node because it is used only by selects that 6128 // we'd like to swap, then swap the selects before the node replacement. 6129 if (SelectSwap) 6130 SwapAllSelectUsers(MachineNode); 6131 6132 if (ResNode != MachineNode) { 6133 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 6134 LLVM_DEBUG(MachineNode->dump(CurDAG)); 6135 LLVM_DEBUG(dbgs() << "\nNew: "); 6136 LLVM_DEBUG(ResNode->dump(CurDAG)); 6137 LLVM_DEBUG(dbgs() << "\n"); 6138 6139 ReplaceUses(MachineNode, ResNode); 6140 IsModified = true; 6141 } 6142 } 6143 if (IsModified) 6144 CurDAG->RemoveDeadNodes(); 6145 } while (IsModified); 6146 } 6147 6148 // Gather the set of 32-bit operations that are known to have their 6149 // higher-order 32 bits zero, where ToPromote contains all such operations. 6150 static bool PeepholePPC64ZExtGather(SDValue Op32, 6151 SmallPtrSetImpl<SDNode *> &ToPromote) { 6152 if (!Op32.isMachineOpcode()) 6153 return false; 6154 6155 // First, check for the "frontier" instructions (those that will clear the 6156 // higher-order 32 bits. 6157 6158 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 6159 // around. If it does not, then these instructions will clear the 6160 // higher-order bits. 6161 if ((Op32.getMachineOpcode() == PPC::RLWINM || 6162 Op32.getMachineOpcode() == PPC::RLWNM) && 6163 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 6164 ToPromote.insert(Op32.getNode()); 6165 return true; 6166 } 6167 6168 // SLW and SRW always clear the higher-order bits. 6169 if (Op32.getMachineOpcode() == PPC::SLW || 6170 Op32.getMachineOpcode() == PPC::SRW) { 6171 ToPromote.insert(Op32.getNode()); 6172 return true; 6173 } 6174 6175 // For LI and LIS, we need the immediate to be positive (so that it is not 6176 // sign extended). 6177 if (Op32.getMachineOpcode() == PPC::LI || 6178 Op32.getMachineOpcode() == PPC::LIS) { 6179 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 6180 return false; 6181 6182 ToPromote.insert(Op32.getNode()); 6183 return true; 6184 } 6185 6186 // LHBRX and LWBRX always clear the higher-order bits. 6187 if (Op32.getMachineOpcode() == PPC::LHBRX || 6188 Op32.getMachineOpcode() == PPC::LWBRX) { 6189 ToPromote.insert(Op32.getNode()); 6190 return true; 6191 } 6192 6193 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 6194 if (Op32.getMachineOpcode() == PPC::CNTLZW || 6195 Op32.getMachineOpcode() == PPC::CNTTZW) { 6196 ToPromote.insert(Op32.getNode()); 6197 return true; 6198 } 6199 6200 // Next, check for those instructions we can look through. 6201 6202 // Assuming the mask does not wrap around, then the higher-order bits are 6203 // taken directly from the first operand. 6204 if (Op32.getMachineOpcode() == PPC::RLWIMI && 6205 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 6206 SmallPtrSet<SDNode *, 16> ToPromote1; 6207 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6208 return false; 6209 6210 ToPromote.insert(Op32.getNode()); 6211 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6212 return true; 6213 } 6214 6215 // For OR, the higher-order bits are zero if that is true for both operands. 6216 // For SELECT_I4, the same is true (but the relevant operand numbers are 6217 // shifted by 1). 6218 if (Op32.getMachineOpcode() == PPC::OR || 6219 Op32.getMachineOpcode() == PPC::SELECT_I4) { 6220 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 6221 SmallPtrSet<SDNode *, 16> ToPromote1; 6222 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 6223 return false; 6224 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 6225 return false; 6226 6227 ToPromote.insert(Op32.getNode()); 6228 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6229 return true; 6230 } 6231 6232 // For ORI and ORIS, we need the higher-order bits of the first operand to be 6233 // zero, and also for the constant to be positive (so that it is not sign 6234 // extended). 6235 if (Op32.getMachineOpcode() == PPC::ORI || 6236 Op32.getMachineOpcode() == PPC::ORIS) { 6237 SmallPtrSet<SDNode *, 16> ToPromote1; 6238 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6239 return false; 6240 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 6241 return false; 6242 6243 ToPromote.insert(Op32.getNode()); 6244 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6245 return true; 6246 } 6247 6248 // The higher-order bits of AND are zero if that is true for at least one of 6249 // the operands. 6250 if (Op32.getMachineOpcode() == PPC::AND) { 6251 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 6252 bool Op0OK = 6253 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6254 bool Op1OK = 6255 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 6256 if (!Op0OK && !Op1OK) 6257 return false; 6258 6259 ToPromote.insert(Op32.getNode()); 6260 6261 if (Op0OK) 6262 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6263 6264 if (Op1OK) 6265 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 6266 6267 return true; 6268 } 6269 6270 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 6271 // of the first operand, or if the second operand is positive (so that it is 6272 // not sign extended). 6273 if (Op32.getMachineOpcode() == PPC::ANDI_rec || 6274 Op32.getMachineOpcode() == PPC::ANDIS_rec) { 6275 SmallPtrSet<SDNode *, 16> ToPromote1; 6276 bool Op0OK = 6277 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6278 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 6279 if (!Op0OK && !Op1OK) 6280 return false; 6281 6282 ToPromote.insert(Op32.getNode()); 6283 6284 if (Op0OK) 6285 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6286 6287 return true; 6288 } 6289 6290 return false; 6291 } 6292 6293 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 6294 if (!PPCSubTarget->isPPC64()) 6295 return; 6296 6297 // When we zero-extend from i32 to i64, we use a pattern like this: 6298 // def : Pat<(i64 (zext i32:$in)), 6299 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 6300 // 0, 32)>; 6301 // There are several 32-bit shift/rotate instructions, however, that will 6302 // clear the higher-order bits of their output, rendering the RLDICL 6303 // unnecessary. When that happens, we remove it here, and redefine the 6304 // relevant 32-bit operation to be a 64-bit operation. 6305 6306 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6307 6308 bool MadeChange = false; 6309 while (Position != CurDAG->allnodes_begin()) { 6310 SDNode *N = &*--Position; 6311 // Skip dead nodes and any non-machine opcodes. 6312 if (N->use_empty() || !N->isMachineOpcode()) 6313 continue; 6314 6315 if (N->getMachineOpcode() != PPC::RLDICL) 6316 continue; 6317 6318 if (N->getConstantOperandVal(1) != 0 || 6319 N->getConstantOperandVal(2) != 32) 6320 continue; 6321 6322 SDValue ISR = N->getOperand(0); 6323 if (!ISR.isMachineOpcode() || 6324 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 6325 continue; 6326 6327 if (!ISR.hasOneUse()) 6328 continue; 6329 6330 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 6331 continue; 6332 6333 SDValue IDef = ISR.getOperand(0); 6334 if (!IDef.isMachineOpcode() || 6335 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 6336 continue; 6337 6338 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 6339 // can get rid of it. 6340 6341 SDValue Op32 = ISR->getOperand(1); 6342 if (!Op32.isMachineOpcode()) 6343 continue; 6344 6345 // There are some 32-bit instructions that always clear the high-order 32 6346 // bits, there are also some instructions (like AND) that we can look 6347 // through. 6348 SmallPtrSet<SDNode *, 16> ToPromote; 6349 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 6350 continue; 6351 6352 // If the ToPromote set contains nodes that have uses outside of the set 6353 // (except for the original INSERT_SUBREG), then abort the transformation. 6354 bool OutsideUse = false; 6355 for (SDNode *PN : ToPromote) { 6356 for (SDNode *UN : PN->uses()) { 6357 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 6358 OutsideUse = true; 6359 break; 6360 } 6361 } 6362 6363 if (OutsideUse) 6364 break; 6365 } 6366 if (OutsideUse) 6367 continue; 6368 6369 MadeChange = true; 6370 6371 // We now know that this zero extension can be removed by promoting to 6372 // nodes in ToPromote to 64-bit operations, where for operations in the 6373 // frontier of the set, we need to insert INSERT_SUBREGs for their 6374 // operands. 6375 for (SDNode *PN : ToPromote) { 6376 unsigned NewOpcode; 6377 switch (PN->getMachineOpcode()) { 6378 default: 6379 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 6380 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 6381 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 6382 case PPC::SLW: NewOpcode = PPC::SLW8; break; 6383 case PPC::SRW: NewOpcode = PPC::SRW8; break; 6384 case PPC::LI: NewOpcode = PPC::LI8; break; 6385 case PPC::LIS: NewOpcode = PPC::LIS8; break; 6386 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 6387 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 6388 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 6389 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 6390 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 6391 case PPC::OR: NewOpcode = PPC::OR8; break; 6392 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 6393 case PPC::ORI: NewOpcode = PPC::ORI8; break; 6394 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 6395 case PPC::AND: NewOpcode = PPC::AND8; break; 6396 case PPC::ANDI_rec: 6397 NewOpcode = PPC::ANDI8_rec; 6398 break; 6399 case PPC::ANDIS_rec: 6400 NewOpcode = PPC::ANDIS8_rec; 6401 break; 6402 } 6403 6404 // Note: During the replacement process, the nodes will be in an 6405 // inconsistent state (some instructions will have operands with values 6406 // of the wrong type). Once done, however, everything should be right 6407 // again. 6408 6409 SmallVector<SDValue, 4> Ops; 6410 for (const SDValue &V : PN->ops()) { 6411 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 6412 !isa<ConstantSDNode>(V)) { 6413 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 6414 SDNode *ReplOp = 6415 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 6416 ISR.getNode()->getVTList(), ReplOpOps); 6417 Ops.push_back(SDValue(ReplOp, 0)); 6418 } else { 6419 Ops.push_back(V); 6420 } 6421 } 6422 6423 // Because all to-be-promoted nodes only have users that are other 6424 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 6425 // the i32 result value type with i64. 6426 6427 SmallVector<EVT, 2> NewVTs; 6428 SDVTList VTs = PN->getVTList(); 6429 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 6430 if (VTs.VTs[i] == MVT::i32) 6431 NewVTs.push_back(MVT::i64); 6432 else 6433 NewVTs.push_back(VTs.VTs[i]); 6434 6435 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 6436 LLVM_DEBUG(PN->dump(CurDAG)); 6437 6438 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 6439 6440 LLVM_DEBUG(dbgs() << "\nNew: "); 6441 LLVM_DEBUG(PN->dump(CurDAG)); 6442 LLVM_DEBUG(dbgs() << "\n"); 6443 } 6444 6445 // Now we replace the original zero extend and its associated INSERT_SUBREG 6446 // with the value feeding the INSERT_SUBREG (which has now been promoted to 6447 // return an i64). 6448 6449 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 6450 LLVM_DEBUG(N->dump(CurDAG)); 6451 LLVM_DEBUG(dbgs() << "\nNew: "); 6452 LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); 6453 LLVM_DEBUG(dbgs() << "\n"); 6454 6455 ReplaceUses(N, Op32.getNode()); 6456 } 6457 6458 if (MadeChange) 6459 CurDAG->RemoveDeadNodes(); 6460 } 6461 6462 void PPCDAGToDAGISel::PeepholePPC64() { 6463 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6464 6465 while (Position != CurDAG->allnodes_begin()) { 6466 SDNode *N = &*--Position; 6467 // Skip dead nodes and any non-machine opcodes. 6468 if (N->use_empty() || !N->isMachineOpcode()) 6469 continue; 6470 6471 unsigned FirstOp; 6472 unsigned StorageOpcode = N->getMachineOpcode(); 6473 bool RequiresMod4Offset = false; 6474 6475 switch (StorageOpcode) { 6476 default: continue; 6477 6478 case PPC::LWA: 6479 case PPC::LD: 6480 case PPC::DFLOADf64: 6481 case PPC::DFLOADf32: 6482 RequiresMod4Offset = true; 6483 LLVM_FALLTHROUGH; 6484 case PPC::LBZ: 6485 case PPC::LBZ8: 6486 case PPC::LFD: 6487 case PPC::LFS: 6488 case PPC::LHA: 6489 case PPC::LHA8: 6490 case PPC::LHZ: 6491 case PPC::LHZ8: 6492 case PPC::LWZ: 6493 case PPC::LWZ8: 6494 FirstOp = 0; 6495 break; 6496 6497 case PPC::STD: 6498 case PPC::DFSTOREf64: 6499 case PPC::DFSTOREf32: 6500 RequiresMod4Offset = true; 6501 LLVM_FALLTHROUGH; 6502 case PPC::STB: 6503 case PPC::STB8: 6504 case PPC::STFD: 6505 case PPC::STFS: 6506 case PPC::STH: 6507 case PPC::STH8: 6508 case PPC::STW: 6509 case PPC::STW8: 6510 FirstOp = 1; 6511 break; 6512 } 6513 6514 // If this is a load or store with a zero offset, or within the alignment, 6515 // we may be able to fold an add-immediate into the memory operation. 6516 // The check against alignment is below, as it can't occur until we check 6517 // the arguments to N 6518 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 6519 continue; 6520 6521 SDValue Base = N->getOperand(FirstOp + 1); 6522 if (!Base.isMachineOpcode()) 6523 continue; 6524 6525 unsigned Flags = 0; 6526 bool ReplaceFlags = true; 6527 6528 // When the feeding operation is an add-immediate of some sort, 6529 // determine whether we need to add relocation information to the 6530 // target flags on the immediate operand when we fold it into the 6531 // load instruction. 6532 // 6533 // For something like ADDItocL, the relocation information is 6534 // inferred from the opcode; when we process it in the AsmPrinter, 6535 // we add the necessary relocation there. A load, though, can receive 6536 // relocation from various flavors of ADDIxxx, so we need to carry 6537 // the relocation information in the target flags. 6538 switch (Base.getMachineOpcode()) { 6539 default: continue; 6540 6541 case PPC::ADDI8: 6542 case PPC::ADDI: 6543 // In some cases (such as TLS) the relocation information 6544 // is already in place on the operand, so copying the operand 6545 // is sufficient. 6546 ReplaceFlags = false; 6547 // For these cases, the immediate may not be divisible by 4, in 6548 // which case the fold is illegal for DS-form instructions. (The 6549 // other cases provide aligned addresses and are always safe.) 6550 if (RequiresMod4Offset && 6551 (!isa<ConstantSDNode>(Base.getOperand(1)) || 6552 Base.getConstantOperandVal(1) % 4 != 0)) 6553 continue; 6554 break; 6555 case PPC::ADDIdtprelL: 6556 Flags = PPCII::MO_DTPREL_LO; 6557 break; 6558 case PPC::ADDItlsldL: 6559 Flags = PPCII::MO_TLSLD_LO; 6560 break; 6561 case PPC::ADDItocL: 6562 Flags = PPCII::MO_TOC_LO; 6563 break; 6564 } 6565 6566 SDValue ImmOpnd = Base.getOperand(1); 6567 6568 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 6569 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 6570 // we might have needed different @ha relocation values for the offset 6571 // pointers). 6572 int MaxDisplacement = 7; 6573 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6574 const GlobalValue *GV = GA->getGlobal(); 6575 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 6576 } 6577 6578 bool UpdateHBase = false; 6579 SDValue HBase = Base.getOperand(0); 6580 6581 int Offset = N->getConstantOperandVal(FirstOp); 6582 if (ReplaceFlags) { 6583 if (Offset < 0 || Offset > MaxDisplacement) { 6584 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 6585 // one use, then we can do this for any offset, we just need to also 6586 // update the offset (i.e. the symbol addend) on the addis also. 6587 if (Base.getMachineOpcode() != PPC::ADDItocL) 6588 continue; 6589 6590 if (!HBase.isMachineOpcode() || 6591 HBase.getMachineOpcode() != PPC::ADDIStocHA8) 6592 continue; 6593 6594 if (!Base.hasOneUse() || !HBase.hasOneUse()) 6595 continue; 6596 6597 SDValue HImmOpnd = HBase.getOperand(1); 6598 if (HImmOpnd != ImmOpnd) 6599 continue; 6600 6601 UpdateHBase = true; 6602 } 6603 } else { 6604 // If we're directly folding the addend from an addi instruction, then: 6605 // 1. In general, the offset on the memory access must be zero. 6606 // 2. If the addend is a constant, then it can be combined with a 6607 // non-zero offset, but only if the result meets the encoding 6608 // requirements. 6609 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 6610 Offset += C->getSExtValue(); 6611 6612 if (RequiresMod4Offset && (Offset % 4) != 0) 6613 continue; 6614 6615 if (!isInt<16>(Offset)) 6616 continue; 6617 6618 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 6619 ImmOpnd.getValueType()); 6620 } else if (Offset != 0) { 6621 continue; 6622 } 6623 } 6624 6625 // We found an opportunity. Reverse the operands from the add 6626 // immediate and substitute them into the load or store. If 6627 // needed, update the target flags for the immediate operand to 6628 // reflect the necessary relocation information. 6629 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 6630 LLVM_DEBUG(Base->dump(CurDAG)); 6631 LLVM_DEBUG(dbgs() << "\nN: "); 6632 LLVM_DEBUG(N->dump(CurDAG)); 6633 LLVM_DEBUG(dbgs() << "\n"); 6634 6635 // If the relocation information isn't already present on the 6636 // immediate operand, add it now. 6637 if (ReplaceFlags) { 6638 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6639 SDLoc dl(GA); 6640 const GlobalValue *GV = GA->getGlobal(); 6641 // We can't perform this optimization for data whose alignment 6642 // is insufficient for the instruction encoding. 6643 if (GV->getAlignment() < 4 && 6644 (RequiresMod4Offset || (Offset % 4) != 0)) { 6645 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 6646 continue; 6647 } 6648 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 6649 } else if (ConstantPoolSDNode *CP = 6650 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 6651 const Constant *C = CP->getConstVal(); 6652 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, 6653 CP->getAlignment(), 6654 Offset, Flags); 6655 } 6656 } 6657 6658 if (FirstOp == 1) // Store 6659 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 6660 Base.getOperand(0), N->getOperand(3)); 6661 else // Load 6662 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 6663 N->getOperand(2)); 6664 6665 if (UpdateHBase) 6666 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 6667 ImmOpnd); 6668 6669 // The add-immediate may now be dead, in which case remove it. 6670 if (Base.getNode()->use_empty()) 6671 CurDAG->RemoveDeadNode(Base.getNode()); 6672 } 6673 } 6674 6675 /// createPPCISelDag - This pass converts a legalized DAG into a 6676 /// PowerPC-specific DAG, ready for instruction scheduling. 6677 /// 6678 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 6679 CodeGenOpt::Level OptLevel) { 6680 return new PPCDAGToDAGISel(TM, OptLevel); 6681 } 6682