1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pattern matching instruction selector for PowerPC, 10 // converting from a legalized dag to a PPC dag. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/PPCMCTargetDesc.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCISelLowering.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCSubtarget.h" 20 #include "PPCTargetMachine.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/IntrinsicsPowerPC.h" 47 #include "llvm/IR/Module.h" 48 #include "llvm/Support/Casting.h" 49 #include "llvm/Support/CodeGen.h" 50 #include "llvm/Support/CommandLine.h" 51 #include "llvm/Support/Compiler.h" 52 #include "llvm/Support/Debug.h" 53 #include "llvm/Support/ErrorHandling.h" 54 #include "llvm/Support/KnownBits.h" 55 #include "llvm/Support/MachineValueType.h" 56 #include "llvm/Support/MathExtras.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <cstdint> 61 #include <iterator> 62 #include <limits> 63 #include <memory> 64 #include <new> 65 #include <tuple> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "ppc-codegen" 71 72 STATISTIC(NumSextSetcc, 73 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 74 STATISTIC(NumZextSetcc, 75 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 76 STATISTIC(SignExtensionsAdded, 77 "Number of sign extensions for compare inputs added."); 78 STATISTIC(ZeroExtensionsAdded, 79 "Number of zero extensions for compare inputs added."); 80 STATISTIC(NumLogicOpsOnComparison, 81 "Number of logical ops on i1 values calculated in GPR."); 82 STATISTIC(OmittedForNonExtendUses, 83 "Number of compares not eliminated as they have non-extending uses."); 84 STATISTIC(NumP9Setb, 85 "Number of compares lowered to setb."); 86 87 // FIXME: Remove this once the bug has been fixed! 88 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 89 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 90 91 static cl::opt<bool> 92 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 93 cl::desc("use aggressive ppc isel for bit permutations"), 94 cl::Hidden); 95 static cl::opt<bool> BPermRewriterNoMasking( 96 "ppc-bit-perm-rewriter-stress-rotates", 97 cl::desc("stress rotate selection in aggressive ppc isel for " 98 "bit permutations"), 99 cl::Hidden); 100 101 static cl::opt<bool> EnableBranchHint( 102 "ppc-use-branch-hint", cl::init(true), 103 cl::desc("Enable static hinting of branches on ppc"), 104 cl::Hidden); 105 106 static cl::opt<bool> EnableTLSOpt( 107 "ppc-tls-opt", cl::init(true), 108 cl::desc("Enable tls optimization peephole"), 109 cl::Hidden); 110 111 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, 112 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, 113 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; 114 115 static cl::opt<ICmpInGPRType> CmpInGPR( 116 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), 117 cl::desc("Specify the types of comparisons to emit GPR-only code for."), 118 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), 119 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), 120 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), 121 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), 122 clEnumValN(ICGPR_NonExtIn, "nonextin", 123 "Only comparisons where inputs don't need [sz]ext."), 124 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), 125 clEnumValN(ICGPR_ZextI32, "zexti32", 126 "Only i32 comparisons with zext result."), 127 clEnumValN(ICGPR_ZextI64, "zexti64", 128 "Only i64 comparisons with zext result."), 129 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), 130 clEnumValN(ICGPR_SextI32, "sexti32", 131 "Only i32 comparisons with sext result."), 132 clEnumValN(ICGPR_SextI64, "sexti64", 133 "Only i64 comparisons with sext result."))); 134 namespace { 135 136 //===--------------------------------------------------------------------===// 137 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 138 /// instructions for SelectionDAG operations. 139 /// 140 class PPCDAGToDAGISel : public SelectionDAGISel { 141 const PPCTargetMachine &TM; 142 const PPCSubtarget *Subtarget = nullptr; 143 const PPCTargetLowering *PPCLowering = nullptr; 144 unsigned GlobalBaseReg = 0; 145 146 public: 147 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 148 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 149 150 bool runOnMachineFunction(MachineFunction &MF) override { 151 // Make sure we re-emit a set of the global base reg if necessary 152 GlobalBaseReg = 0; 153 Subtarget = &MF.getSubtarget<PPCSubtarget>(); 154 PPCLowering = Subtarget->getTargetLowering(); 155 SelectionDAGISel::runOnMachineFunction(MF); 156 157 return true; 158 } 159 160 void PreprocessISelDAG() override; 161 void PostprocessISelDAG() override; 162 163 /// getI16Imm - Return a target constant with the specified value, of type 164 /// i16. 165 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { 166 return CurDAG->getTargetConstant(Imm, dl, MVT::i16); 167 } 168 169 /// getI32Imm - Return a target constant with the specified value, of type 170 /// i32. 171 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 172 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 173 } 174 175 /// getI64Imm - Return a target constant with the specified value, of type 176 /// i64. 177 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 178 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 179 } 180 181 /// getSmallIPtrImm - Return a target constant of pointer type. 182 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 183 return CurDAG->getTargetConstant( 184 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 185 } 186 187 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 188 /// rotate and mask opcode and mask operation. 189 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 190 unsigned &SH, unsigned &MB, unsigned &ME); 191 192 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 193 /// base register. Return the virtual register that holds this value. 194 SDNode *getGlobalBaseReg(); 195 196 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 197 198 // Select - Convert the specified operand from a target-independent to a 199 // target-specific node if it hasn't already been changed. 200 void Select(SDNode *N) override; 201 202 bool tryBitfieldInsert(SDNode *N); 203 bool tryBitPermutation(SDNode *N); 204 bool tryIntCompareInGPR(SDNode *N); 205 206 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into 207 // an X-Form load instruction with the offset being a relocation coming from 208 // the PPCISD::ADD_TLS. 209 bool tryTLSXFormLoad(LoadSDNode *N); 210 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into 211 // an X-Form store instruction with the offset being a relocation coming from 212 // the PPCISD::ADD_TLS. 213 bool tryTLSXFormStore(StoreSDNode *N); 214 /// SelectCC - Select a comparison of the specified values with the 215 /// specified condition code, returning the CR# of the expression. 216 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 217 const SDLoc &dl, SDValue Chain = SDValue()); 218 219 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 220 /// immediate field. Note that the operand at this point is already the 221 /// result of a prior SelectAddressRegImm call. 222 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 223 if (N.getOpcode() == ISD::TargetConstant || 224 N.getOpcode() == ISD::TargetGlobalAddress) { 225 Out = N; 226 return true; 227 } 228 229 return false; 230 } 231 232 /// SelectAddrIdx - Given the specified address, check to see if it can be 233 /// represented as an indexed [r+r] operation. 234 /// This is for xform instructions whose associated displacement form is D. 235 /// The last parameter \p 0 means associated D form has no requirment for 16 236 /// bit signed displacement. 237 /// Returns false if it can be represented by [r+imm], which are preferred. 238 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 239 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None); 240 } 241 242 /// SelectAddrIdx4 - Given the specified address, check to see if it can be 243 /// represented as an indexed [r+r] operation. 244 /// This is for xform instructions whose associated displacement form is DS. 245 /// The last parameter \p 4 means associated DS form 16 bit signed 246 /// displacement must be a multiple of 4. 247 /// Returns false if it can be represented by [r+imm], which are preferred. 248 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { 249 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 250 Align(4)); 251 } 252 253 /// SelectAddrIdx16 - Given the specified address, check to see if it can be 254 /// represented as an indexed [r+r] operation. 255 /// This is for xform instructions whose associated displacement form is DQ. 256 /// The last parameter \p 16 means associated DQ form 16 bit signed 257 /// displacement must be a multiple of 16. 258 /// Returns false if it can be represented by [r+imm], which are preferred. 259 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { 260 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 261 Align(16)); 262 } 263 264 /// SelectAddrIdxOnly - Given the specified address, force it to be 265 /// represented as an indexed [r+r] operation. 266 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 267 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 268 } 269 270 /// SelectAddrImm - Returns true if the address N can be represented by 271 /// a base register plus a signed 16-bit displacement [r+imm]. 272 /// The last parameter \p 0 means D form has no requirment for 16 bit signed 273 /// displacement. 274 bool SelectAddrImm(SDValue N, SDValue &Disp, 275 SDValue &Base) { 276 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None); 277 } 278 279 /// SelectAddrImmX4 - Returns true if the address N can be represented by 280 /// a base register plus a signed 16-bit displacement that is a multiple of 281 /// 4 (last parameter). Suitable for use by STD and friends. 282 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 283 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4)); 284 } 285 286 /// SelectAddrImmX16 - Returns true if the address N can be represented by 287 /// a base register plus a signed 16-bit displacement that is a multiple of 288 /// 16(last parameter). Suitable for use by STXV and friends. 289 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { 290 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 291 Align(16)); 292 } 293 294 /// SelectAddrImmX34 - Returns true if the address N can be represented by 295 /// a base register plus a signed 34-bit displacement. Suitable for use by 296 /// PSTXVP and friends. 297 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) { 298 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG); 299 } 300 301 // Select an address into a single register. 302 bool SelectAddr(SDValue N, SDValue &Base) { 303 Base = N; 304 return true; 305 } 306 307 bool SelectAddrPCRel(SDValue N, SDValue &Base) { 308 return PPCLowering->SelectAddressPCRel(N, Base); 309 } 310 311 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 312 /// inline asm expressions. It is always correct to compute the value into 313 /// a register. The case of adding a (possibly relocatable) constant to a 314 /// register can be improved, but it is wrong to substitute Reg+Reg for 315 /// Reg in an asm, because the load or store opcode would have to change. 316 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 317 unsigned ConstraintID, 318 std::vector<SDValue> &OutOps) override { 319 switch(ConstraintID) { 320 default: 321 errs() << "ConstraintID: " << ConstraintID << "\n"; 322 llvm_unreachable("Unexpected asm memory constraint"); 323 case InlineAsm::Constraint_es: 324 case InlineAsm::Constraint_m: 325 case InlineAsm::Constraint_o: 326 case InlineAsm::Constraint_Q: 327 case InlineAsm::Constraint_Z: 328 case InlineAsm::Constraint_Zy: 329 // We need to make sure that this one operand does not end up in r0 330 // (because we might end up lowering this as 0(%op)). 331 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 332 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 333 SDLoc dl(Op); 334 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 335 SDValue NewOp = 336 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 337 dl, Op.getValueType(), 338 Op, RC), 0); 339 340 OutOps.push_back(NewOp); 341 return false; 342 } 343 return true; 344 } 345 346 StringRef getPassName() const override { 347 return "PowerPC DAG->DAG Pattern Instruction Selection"; 348 } 349 350 // Include the pieces autogenerated from the target description. 351 #include "PPCGenDAGISel.inc" 352 353 private: 354 bool trySETCC(SDNode *N); 355 bool tryFoldSWTestBRCC(SDNode *N); 356 bool tryAsSingleRLDICL(SDNode *N); 357 bool tryAsSingleRLDICR(SDNode *N); 358 bool tryAsSingleRLWINM(SDNode *N); 359 bool tryAsSingleRLWINM8(SDNode *N); 360 bool tryAsSingleRLWIMI(SDNode *N); 361 bool tryAsPairOfRLDICL(SDNode *N); 362 bool tryAsSingleRLDIMI(SDNode *N); 363 364 void PeepholePPC64(); 365 void PeepholePPC64ZExt(); 366 void PeepholeCROps(); 367 368 SDValue combineToCMPB(SDNode *N); 369 void foldBoolExts(SDValue &Res, SDNode *&N); 370 371 bool AllUsersSelectZero(SDNode *N); 372 void SwapAllSelectUsers(SDNode *N); 373 374 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; 375 void transferMemOperands(SDNode *N, SDNode *Result); 376 }; 377 378 } // end anonymous namespace 379 380 /// getGlobalBaseReg - Output the instructions required to put the 381 /// base address to use for accessing globals into a register. 382 /// 383 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 384 if (!GlobalBaseReg) { 385 const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); 386 // Insert the set of GlobalBaseReg into the first MBB of the function 387 MachineBasicBlock &FirstMBB = MF->front(); 388 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 389 const Module *M = MF->getFunction().getParent(); 390 DebugLoc dl; 391 392 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 393 if (Subtarget->isTargetELF()) { 394 GlobalBaseReg = PPC::R30; 395 if (!Subtarget->isSecurePlt() && 396 M->getPICLevel() == PICLevel::SmallPIC) { 397 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 398 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 399 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 400 } else { 401 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 402 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 403 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 404 BuildMI(FirstMBB, MBBI, dl, 405 TII.get(PPC::UpdateGBR), GlobalBaseReg) 406 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 407 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 408 } 409 } else { 410 GlobalBaseReg = 411 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 412 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 413 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 414 } 415 } else { 416 // We must ensure that this sequence is dominated by the prologue. 417 // FIXME: This is a bit of a big hammer since we don't get the benefits 418 // of shrink-wrapping whenever we emit this instruction. Considering 419 // this is used in any function where we emit a jump table, this may be 420 // a significant limitation. We should consider inserting this in the 421 // block where it is used and then commoning this sequence up if it 422 // appears in multiple places. 423 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of 424 // MovePCtoLR8. 425 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); 426 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 427 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 428 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 429 } 430 } 431 return CurDAG->getRegister(GlobalBaseReg, 432 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 433 .getNode(); 434 } 435 436 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 437 /// operand. If so Imm will receive the 32-bit value. 438 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 439 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 440 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 441 return true; 442 } 443 return false; 444 } 445 446 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 447 /// operand. If so Imm will receive the 64-bit value. 448 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 449 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 450 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 451 return true; 452 } 453 return false; 454 } 455 456 // isInt32Immediate - This method tests to see if a constant operand. 457 // If so Imm will receive the 32 bit value. 458 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 459 return isInt32Immediate(N.getNode(), Imm); 460 } 461 462 /// isInt64Immediate - This method tests to see if the value is a 64-bit 463 /// constant operand. If so Imm will receive the 64-bit value. 464 static bool isInt64Immediate(SDValue N, uint64_t &Imm) { 465 return isInt64Immediate(N.getNode(), Imm); 466 } 467 468 static unsigned getBranchHint(unsigned PCC, 469 const FunctionLoweringInfo &FuncInfo, 470 const SDValue &DestMBB) { 471 assert(isa<BasicBlockSDNode>(DestMBB)); 472 473 if (!FuncInfo.BPI) return PPC::BR_NO_HINT; 474 475 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); 476 const Instruction *BBTerm = BB->getTerminator(); 477 478 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 479 480 const BasicBlock *TBB = BBTerm->getSuccessor(0); 481 const BasicBlock *FBB = BBTerm->getSuccessor(1); 482 483 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); 484 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); 485 486 // We only want to handle cases which are easy to predict at static time, e.g. 487 // C++ throw statement, that is very likely not taken, or calling never 488 // returned function, e.g. stdlib exit(). So we set Threshold to filter 489 // unwanted cases. 490 // 491 // Below is LLVM branch weight table, we only want to handle case 1, 2 492 // 493 // Case Taken:Nontaken Example 494 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 495 // 2. Invoke-terminating 1:1048575 496 // 3. Coldblock 4:64 __builtin_expect 497 // 4. Loop Branch 124:4 For loop 498 // 5. PH/ZH/FPH 20:12 499 const uint32_t Threshold = 10000; 500 501 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 502 return PPC::BR_NO_HINT; 503 504 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() 505 << "::" << BB->getName() << "'\n" 506 << " -> " << TBB->getName() << ": " << TProb << "\n" 507 << " -> " << FBB->getName() << ": " << FProb << "\n"); 508 509 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 510 511 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 512 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 513 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 514 std::swap(TProb, FProb); 515 516 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 517 } 518 519 // isOpcWithIntImmediate - This method tests to see if the node is a specific 520 // opcode and that it has a immediate integer right operand. 521 // If so Imm will receive the 32 bit value. 522 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 523 return N->getOpcode() == Opc 524 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 525 } 526 527 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 528 SDLoc dl(SN); 529 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 530 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 531 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 532 if (SN->hasOneUse()) 533 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 534 getSmallIPtrImm(Offset, dl)); 535 else 536 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 537 getSmallIPtrImm(Offset, dl))); 538 } 539 540 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 541 bool isShiftMask, unsigned &SH, 542 unsigned &MB, unsigned &ME) { 543 // Don't even go down this path for i64, since different logic will be 544 // necessary for rldicl/rldicr/rldimi. 545 if (N->getValueType(0) != MVT::i32) 546 return false; 547 548 unsigned Shift = 32; 549 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 550 unsigned Opcode = N->getOpcode(); 551 if (N->getNumOperands() != 2 || 552 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 553 return false; 554 555 if (Opcode == ISD::SHL) { 556 // apply shift left to mask if it comes first 557 if (isShiftMask) Mask = Mask << Shift; 558 // determine which bits are made indeterminant by shift 559 Indeterminant = ~(0xFFFFFFFFu << Shift); 560 } else if (Opcode == ISD::SRL) { 561 // apply shift right to mask if it comes first 562 if (isShiftMask) Mask = Mask >> Shift; 563 // determine which bits are made indeterminant by shift 564 Indeterminant = ~(0xFFFFFFFFu >> Shift); 565 // adjust for the left rotate 566 Shift = 32 - Shift; 567 } else if (Opcode == ISD::ROTL) { 568 Indeterminant = 0; 569 } else { 570 return false; 571 } 572 573 // if the mask doesn't intersect any Indeterminant bits 574 if (Mask && !(Mask & Indeterminant)) { 575 SH = Shift & 31; 576 // make sure the mask is still a mask (wrap arounds may not be) 577 return isRunOfOnes(Mask, MB, ME); 578 } 579 return false; 580 } 581 582 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { 583 SDValue Base = ST->getBasePtr(); 584 if (Base.getOpcode() != PPCISD::ADD_TLS) 585 return false; 586 SDValue Offset = ST->getOffset(); 587 if (!Offset.isUndef()) 588 return false; 589 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) 590 return false; 591 592 SDLoc dl(ST); 593 EVT MemVT = ST->getMemoryVT(); 594 EVT RegVT = ST->getValue().getValueType(); 595 596 unsigned Opcode; 597 switch (MemVT.getSimpleVT().SimpleTy) { 598 default: 599 return false; 600 case MVT::i8: { 601 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; 602 break; 603 } 604 case MVT::i16: { 605 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; 606 break; 607 } 608 case MVT::i32: { 609 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; 610 break; 611 } 612 case MVT::i64: { 613 Opcode = PPC::STDXTLS; 614 break; 615 } 616 } 617 SDValue Chain = ST->getChain(); 618 SDVTList VTs = ST->getVTList(); 619 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), 620 Chain}; 621 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 622 transferMemOperands(ST, MN); 623 ReplaceNode(ST, MN); 624 return true; 625 } 626 627 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { 628 SDValue Base = LD->getBasePtr(); 629 if (Base.getOpcode() != PPCISD::ADD_TLS) 630 return false; 631 SDValue Offset = LD->getOffset(); 632 if (!Offset.isUndef()) 633 return false; 634 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR) 635 return false; 636 637 SDLoc dl(LD); 638 EVT MemVT = LD->getMemoryVT(); 639 EVT RegVT = LD->getValueType(0); 640 unsigned Opcode; 641 switch (MemVT.getSimpleVT().SimpleTy) { 642 default: 643 return false; 644 case MVT::i8: { 645 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; 646 break; 647 } 648 case MVT::i16: { 649 Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; 650 break; 651 } 652 case MVT::i32: { 653 Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; 654 break; 655 } 656 case MVT::i64: { 657 Opcode = PPC::LDXTLS; 658 break; 659 } 660 } 661 SDValue Chain = LD->getChain(); 662 SDVTList VTs = LD->getVTList(); 663 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; 664 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 665 transferMemOperands(LD, MN); 666 ReplaceNode(LD, MN); 667 return true; 668 } 669 670 /// Turn an or of two masked values into the rotate left word immediate then 671 /// mask insert (rlwimi) instruction. 672 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 673 SDValue Op0 = N->getOperand(0); 674 SDValue Op1 = N->getOperand(1); 675 SDLoc dl(N); 676 677 KnownBits LKnown = CurDAG->computeKnownBits(Op0); 678 KnownBits RKnown = CurDAG->computeKnownBits(Op1); 679 680 unsigned TargetMask = LKnown.Zero.getZExtValue(); 681 unsigned InsertMask = RKnown.Zero.getZExtValue(); 682 683 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 684 unsigned Op0Opc = Op0.getOpcode(); 685 unsigned Op1Opc = Op1.getOpcode(); 686 unsigned Value, SH = 0; 687 TargetMask = ~TargetMask; 688 InsertMask = ~InsertMask; 689 690 // If the LHS has a foldable shift and the RHS does not, then swap it to the 691 // RHS so that we can fold the shift into the insert. 692 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 693 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 694 Op0.getOperand(0).getOpcode() == ISD::SRL) { 695 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 696 Op1.getOperand(0).getOpcode() != ISD::SRL) { 697 std::swap(Op0, Op1); 698 std::swap(Op0Opc, Op1Opc); 699 std::swap(TargetMask, InsertMask); 700 } 701 } 702 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 703 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 704 Op1.getOperand(0).getOpcode() != ISD::SRL) { 705 std::swap(Op0, Op1); 706 std::swap(Op0Opc, Op1Opc); 707 std::swap(TargetMask, InsertMask); 708 } 709 } 710 711 unsigned MB, ME; 712 if (isRunOfOnes(InsertMask, MB, ME)) { 713 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 714 isInt32Immediate(Op1.getOperand(1), Value)) { 715 Op1 = Op1.getOperand(0); 716 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 717 } 718 if (Op1Opc == ISD::AND) { 719 // The AND mask might not be a constant, and we need to make sure that 720 // if we're going to fold the masking with the insert, all bits not 721 // know to be zero in the mask are known to be one. 722 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); 723 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 724 725 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 726 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 727 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 728 // Note that Value must be in range here (less than 32) because 729 // otherwise there would not be any bits set in InsertMask. 730 Op1 = Op1.getOperand(0).getOperand(0); 731 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 732 } 733 } 734 735 SH &= 31; 736 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 737 getI32Imm(ME, dl) }; 738 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 739 return true; 740 } 741 } 742 return false; 743 } 744 745 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { 746 unsigned MaxTruncation = 0; 747 // Cannot use range-based for loop here as we need the actual use (i.e. we 748 // need the operand number corresponding to the use). A range-based for 749 // will unbox the use and provide an SDNode*. 750 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); 751 Use != UseEnd; ++Use) { 752 unsigned Opc = 753 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); 754 switch (Opc) { 755 default: return 0; 756 case ISD::TRUNCATE: 757 if (Use->isMachineOpcode()) 758 return 0; 759 MaxTruncation = 760 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); 761 continue; 762 case ISD::STORE: { 763 if (Use->isMachineOpcode()) 764 return 0; 765 StoreSDNode *STN = cast<StoreSDNode>(*Use); 766 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); 767 if (MemVTSize == 64 || Use.getOperandNo() != 0) 768 return 0; 769 MaxTruncation = std::max(MaxTruncation, MemVTSize); 770 continue; 771 } 772 case PPC::STW8: 773 case PPC::STWX8: 774 case PPC::STWU8: 775 case PPC::STWUX8: 776 if (Use.getOperandNo() != 0) 777 return 0; 778 MaxTruncation = std::max(MaxTruncation, 32u); 779 continue; 780 case PPC::STH8: 781 case PPC::STHX8: 782 case PPC::STHU8: 783 case PPC::STHUX8: 784 if (Use.getOperandNo() != 0) 785 return 0; 786 MaxTruncation = std::max(MaxTruncation, 16u); 787 continue; 788 case PPC::STB8: 789 case PPC::STBX8: 790 case PPC::STBU8: 791 case PPC::STBUX8: 792 if (Use.getOperandNo() != 0) 793 return 0; 794 MaxTruncation = std::max(MaxTruncation, 8u); 795 continue; 796 } 797 } 798 return MaxTruncation; 799 } 800 801 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive 802 // zeros and return the number of bits by the left of these consecutive zeros. 803 static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) { 804 unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm)); 805 unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm)); 806 if ((HiTZ + LoLZ) >= Num) 807 return (32 + HiTZ); 808 return 0; 809 } 810 811 // Direct materialization of 64-bit constants by enumerated patterns. 812 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, 813 uint64_t Imm, unsigned &InstCnt) { 814 unsigned TZ = countTrailingZeros<uint64_t>(Imm); 815 unsigned LZ = countLeadingZeros<uint64_t>(Imm); 816 unsigned TO = countTrailingOnes<uint64_t>(Imm); 817 unsigned LO = countLeadingOnes<uint64_t>(Imm); 818 unsigned Hi32 = Hi_32(Imm); 819 unsigned Lo32 = Lo_32(Imm); 820 SDNode *Result = nullptr; 821 unsigned Shift = 0; 822 823 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 824 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 825 }; 826 827 // Following patterns use 1 instructions to materialize the Imm. 828 InstCnt = 1; 829 // 1-1) Patterns : {zeros}{15-bit valve} 830 // {ones}{15-bit valve} 831 if (isInt<16>(Imm)) { 832 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64); 833 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 834 } 835 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros} 836 // {ones}{15-bit valve}{16 zeros} 837 if (TZ > 15 && (LZ > 32 || LO > 32)) 838 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, 839 getI32Imm((Imm >> 16) & 0xffff)); 840 841 // Following patterns use 2 instructions to materialize the Imm. 842 InstCnt = 2; 843 assert(LZ < 64 && "Unexpected leading zeros here."); 844 // Count of ones follwing the leading zeros. 845 unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ); 846 // 2-1) Patterns : {zeros}{31-bit value} 847 // {ones}{31-bit value} 848 if (isInt<32>(Imm)) { 849 uint64_t ImmHi16 = (Imm >> 16) & 0xffff; 850 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; 851 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); 852 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 853 getI32Imm(Imm & 0xffff)); 854 } 855 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros} 856 // {zeros}{15-bit value}{zeros} 857 // {zeros}{ones}{15-bit value} 858 // {ones}{15-bit value}{zeros} 859 // We can take advantage of LI's sign-extension semantics to generate leading 860 // ones, and then use RLDIC to mask off the ones in both sides after rotation. 861 if ((LZ + FO + TZ) > 48) { 862 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, 863 getI32Imm((Imm >> TZ) & 0xffff)); 864 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), 865 getI32Imm(TZ), getI32Imm(LZ)); 866 } 867 // 2-3) Pattern : {zeros}{15-bit value}{ones} 868 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value, 869 // therefore we can take advantage of LI's sign-extension semantics, and then 870 // mask them off after rotation. 871 // 872 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+ 873 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1| 874 // +------------------------+ +------------------------+ 875 // 63 0 63 0 876 // Imm (Imm >> (48 - LZ) & 0xffff) 877 // +----sext-----|--16-bit--+ +clear-|-----------------+ 878 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111| 879 // +------------------------+ +------------------------+ 880 // 63 0 63 0 881 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ 882 if ((LZ + TO) > 48) { 883 // Since the immediates with (LZ > 32) have been handled by previous 884 // patterns, here we have (LZ <= 32) to make sure we will not shift right 885 // the Imm by a negative value. 886 assert(LZ <= 32 && "Unexpected shift value."); 887 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, 888 getI32Imm((Imm >> (48 - LZ) & 0xffff))); 889 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 890 getI32Imm(48 - LZ), getI32Imm(LZ)); 891 } 892 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones} 893 // {ones}{15-bit value}{ones} 894 // We can take advantage of LI's sign-extension semantics to generate leading 895 // ones, and then use RLDICL to mask off the ones in left sides (if required) 896 // after rotation. 897 // 898 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+ 899 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb| 900 // +------------------------+ +------------------------+ 901 // 63 0 63 0 902 // Imm (Imm >> TO) & 0xffff 903 // +----sext-----|--16-bit--+ +LZ|---------------------+ 904 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111| 905 // +------------------------+ +------------------------+ 906 // 63 0 63 0 907 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ 908 if ((LZ + FO + TO) > 48) { 909 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, 910 getI32Imm((Imm >> TO) & 0xffff)); 911 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 912 getI32Imm(TO), getI32Imm(LZ)); 913 } 914 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value} 915 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit 916 // value, we can use LI for Lo16 without generating leading ones then add the 917 // Hi16(in Lo32). 918 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) { 919 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, 920 getI32Imm(Lo32 & 0xffff)); 921 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0), 922 getI32Imm(Lo32 >> 16)); 923 } 924 // 2-6) Patterns : {******}{49 zeros}{******} 925 // {******}{49 ones}{******} 926 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15 927 // bits remain on both sides. Rotate right the Imm to construct an int<16> 928 // value, use LI for int<16> value and then use RLDICL without mask to rotate 929 // it back. 930 // 931 // 1) findContiguousZerosAtLeast(Imm, 49) 932 // +------|--zeros-|------+ +---ones--||---15 bit--+ 933 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb| 934 // +----------------------+ +----------------------+ 935 // 63 0 63 0 936 // 937 // 2) findContiguousZerosAtLeast(~Imm, 49) 938 // +------|--ones--|------+ +---ones--||---15 bit--+ 939 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb| 940 // +----------------------+ +----------------------+ 941 // 63 0 63 0 942 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) || 943 (Shift = findContiguousZerosAtLeast(~Imm, 49))) { 944 uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); 945 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, 946 getI32Imm(RotImm & 0xffff)); 947 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 948 getI32Imm(Shift), getI32Imm(0)); 949 } 950 951 // Following patterns use 3 instructions to materialize the Imm. 952 InstCnt = 3; 953 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros} 954 // {zeros}{31-bit value}{zeros} 955 // {zeros}{ones}{31-bit value} 956 // {ones}{31-bit value}{zeros} 957 // We can take advantage of LIS's sign-extension semantics to generate leading 958 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the 959 // ones in both sides after rotation. 960 if ((LZ + FO + TZ) > 32) { 961 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff; 962 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; 963 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); 964 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 965 getI32Imm((Imm >> TZ) & 0xffff)); 966 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0), 967 getI32Imm(TZ), getI32Imm(LZ)); 968 } 969 // 3-2) Pattern : {zeros}{31-bit value}{ones} 970 // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value, 971 // therefore we can take advantage of LIS's sign-extension semantics, add 972 // the remaining bits with ORI, and then mask them off after rotation. 973 // This is similar to Pattern 2-3, please refer to the diagram there. 974 if ((LZ + TO) > 32) { 975 // Since the immediates with (LZ > 32) have been handled by previous 976 // patterns, here we have (LZ <= 32) to make sure we will not shift right 977 // the Imm by a negative value. 978 assert(LZ <= 32 && "Unexpected shift value."); 979 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, 980 getI32Imm((Imm >> (48 - LZ)) & 0xffff)); 981 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 982 getI32Imm((Imm >> (32 - LZ)) & 0xffff)); 983 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 984 getI32Imm(32 - LZ), getI32Imm(LZ)); 985 } 986 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones} 987 // {ones}{31-bit value}{ones} 988 // We can take advantage of LIS's sign-extension semantics to generate leading 989 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the 990 // ones in left sides (if required) after rotation. 991 // This is similar to Pattern 2-4, please refer to the diagram there. 992 if ((LZ + FO + TO) > 32) { 993 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, 994 getI32Imm((Imm >> (TO + 16)) & 0xffff)); 995 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 996 getI32Imm((Imm >> TO) & 0xffff)); 997 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 998 getI32Imm(TO), getI32Imm(LZ)); 999 } 1000 // 3-4) Patterns : High word == Low word 1001 if (Hi32 == Lo32) { 1002 // Handle the first 32 bits. 1003 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff; 1004 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; 1005 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); 1006 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 1007 getI32Imm(Lo32 & 0xffff)); 1008 // Use rldimi to insert the Low word into High word. 1009 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), 1010 getI32Imm(0)}; 1011 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 1012 } 1013 // 3-5) Patterns : {******}{33 zeros}{******} 1014 // {******}{33 ones}{******} 1015 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31 1016 // bits remain on both sides. Rotate right the Imm to construct an int<32> 1017 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to 1018 // rotate it back. 1019 // This is similar to Pattern 2-6, please refer to the diagram there. 1020 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) || 1021 (Shift = findContiguousZerosAtLeast(~Imm, 33))) { 1022 uint64_t RotImm = (Imm >> Shift) | (Imm << (64 - Shift)); 1023 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff; 1024 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8; 1025 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16)); 1026 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 1027 getI32Imm(RotImm & 0xffff)); 1028 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0), 1029 getI32Imm(Shift), getI32Imm(0)); 1030 } 1031 1032 InstCnt = 0; 1033 return nullptr; 1034 } 1035 1036 // Try to select instructions to generate a 64 bit immediate using prefix as 1037 // well as non prefix instructions. The function will return the SDNode 1038 // to materialize that constant or it will return nullptr if it does not 1039 // find one. The variable InstCnt is set to the number of instructions that 1040 // were selected. 1041 static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, 1042 uint64_t Imm, unsigned &InstCnt) { 1043 // Following patterns use 1 instruction to materialize Imm. 1044 InstCnt = 1; 1045 1046 // The pli instruction can materialize up to 34 bits directly. 1047 // It is defined in the TD file and so we just return the constant. 1048 if (isInt<34>(Imm)) 1049 return cast<ConstantSDNode>(CurDAG->getConstant(Imm, dl, MVT::i64)); 1050 1051 InstCnt = 0; 1052 return nullptr; 1053 } 1054 1055 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, 1056 unsigned *InstCnt = nullptr) { 1057 unsigned InstCntDirect = 0; 1058 // No more than 3 instructions is used if we can select the i64 immediate 1059 // directly. 1060 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect); 1061 1062 const PPCSubtarget &Subtarget = 1063 CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>(); 1064 1065 if (Subtarget.hasPrefixInstrs()) { 1066 unsigned InstCntDirectP = 0; 1067 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP); 1068 // Use the prefix case in either of two cases: 1069 // 1) We have no result from the non-prefix case to use. 1070 // 2) The non-prefix case uses more instructions than the prefix case. 1071 // If the prefix and non-prefix cases use the same number of instructions 1072 // we will prefer the non-prefix case. 1073 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) { 1074 if (InstCnt) 1075 *InstCnt = InstCntDirectP; 1076 return ResultP; 1077 } 1078 } 1079 1080 if (Result) { 1081 if (InstCnt) 1082 *InstCnt = InstCntDirect; 1083 return Result; 1084 } 1085 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 1086 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1087 }; 1088 // Handle the upper 32 bit value. 1089 Result = 1090 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); 1091 // Add in the last bits as required. 1092 if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { 1093 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 1094 SDValue(Result, 0), getI32Imm(Hi16)); 1095 ++InstCntDirect; 1096 } 1097 if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { 1098 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), 1099 getI32Imm(Lo16)); 1100 ++InstCntDirect; 1101 } 1102 if (InstCnt) 1103 *InstCnt = InstCntDirect; 1104 return Result; 1105 } 1106 1107 // Select a 64-bit constant. 1108 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { 1109 SDLoc dl(N); 1110 1111 // Get 64 bit value. 1112 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 1113 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { 1114 uint64_t SextImm = SignExtend64(Imm, MinSize); 1115 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 1116 if (isInt<16>(SextImm)) 1117 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 1118 } 1119 return selectI64Imm(CurDAG, dl, Imm); 1120 } 1121 1122 namespace { 1123 1124 class BitPermutationSelector { 1125 struct ValueBit { 1126 SDValue V; 1127 1128 // The bit number in the value, using a convention where bit 0 is the 1129 // lowest-order bit. 1130 unsigned Idx; 1131 1132 // ConstZero means a bit we need to mask off. 1133 // Variable is a bit comes from an input variable. 1134 // VariableKnownToBeZero is also a bit comes from an input variable, 1135 // but it is known to be already zero. So we do not need to mask them. 1136 enum Kind { 1137 ConstZero, 1138 Variable, 1139 VariableKnownToBeZero 1140 } K; 1141 1142 ValueBit(SDValue V, unsigned I, Kind K = Variable) 1143 : V(V), Idx(I), K(K) {} 1144 ValueBit(Kind K = Variable) 1145 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 1146 1147 bool isZero() const { 1148 return K == ConstZero || K == VariableKnownToBeZero; 1149 } 1150 1151 bool hasValue() const { 1152 return K == Variable || K == VariableKnownToBeZero; 1153 } 1154 1155 SDValue getValue() const { 1156 assert(hasValue() && "Cannot get the value of a constant bit"); 1157 return V; 1158 } 1159 1160 unsigned getValueBitIndex() const { 1161 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 1162 return Idx; 1163 } 1164 }; 1165 1166 // A bit group has the same underlying value and the same rotate factor. 1167 struct BitGroup { 1168 SDValue V; 1169 unsigned RLAmt; 1170 unsigned StartIdx, EndIdx; 1171 1172 // This rotation amount assumes that the lower 32 bits of the quantity are 1173 // replicated in the high 32 bits by the rotation operator (which is done 1174 // by rlwinm and friends in 64-bit mode). 1175 bool Repl32; 1176 // Did converting to Repl32 == true change the rotation factor? If it did, 1177 // it decreased it by 32. 1178 bool Repl32CR; 1179 // Was this group coalesced after setting Repl32 to true? 1180 bool Repl32Coalesced; 1181 1182 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 1183 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 1184 Repl32Coalesced(false) { 1185 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R 1186 << " [" << S << ", " << E << "]\n"); 1187 } 1188 }; 1189 1190 // Information on each (Value, RLAmt) pair (like the number of groups 1191 // associated with each) used to choose the lowering method. 1192 struct ValueRotInfo { 1193 SDValue V; 1194 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 1195 unsigned NumGroups = 0; 1196 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 1197 bool Repl32 = false; 1198 1199 ValueRotInfo() = default; 1200 1201 // For sorting (in reverse order) by NumGroups, and then by 1202 // FirstGroupStartIdx. 1203 bool operator < (const ValueRotInfo &Other) const { 1204 // We need to sort so that the non-Repl32 come first because, when we're 1205 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 1206 // masking operation. 1207 if (Repl32 < Other.Repl32) 1208 return true; 1209 else if (Repl32 > Other.Repl32) 1210 return false; 1211 else if (NumGroups > Other.NumGroups) 1212 return true; 1213 else if (NumGroups < Other.NumGroups) 1214 return false; 1215 else if (RLAmt == 0 && Other.RLAmt != 0) 1216 return true; 1217 else if (RLAmt != 0 && Other.RLAmt == 0) 1218 return false; 1219 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 1220 return true; 1221 return false; 1222 } 1223 }; 1224 1225 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 1226 using ValueBitsMemoizer = 1227 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1228 ValueBitsMemoizer Memoizer; 1229 1230 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1231 // The bool is true if something interesting was deduced, otherwise if we're 1232 // providing only a generic representation of V (or something else likewise 1233 // uninteresting for instruction selection) through the SmallVector. 1234 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1235 unsigned NumBits) { 1236 auto &ValueEntry = Memoizer[V]; 1237 if (ValueEntry) 1238 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1239 ValueEntry.reset(new ValueBitsMemoizedValue()); 1240 bool &Interesting = ValueEntry->first; 1241 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1242 Bits.resize(NumBits); 1243 1244 switch (V.getOpcode()) { 1245 default: break; 1246 case ISD::ROTL: 1247 if (isa<ConstantSDNode>(V.getOperand(1))) { 1248 unsigned RotAmt = V.getConstantOperandVal(1); 1249 1250 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1251 1252 for (unsigned i = 0; i < NumBits; ++i) 1253 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1254 1255 return std::make_pair(Interesting = true, &Bits); 1256 } 1257 break; 1258 case ISD::SHL: 1259 case PPCISD::SHL: 1260 if (isa<ConstantSDNode>(V.getOperand(1))) { 1261 unsigned ShiftAmt = V.getConstantOperandVal(1); 1262 1263 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1264 1265 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1266 Bits[i] = LHSBits[i - ShiftAmt]; 1267 1268 for (unsigned i = 0; i < ShiftAmt; ++i) 1269 Bits[i] = ValueBit(ValueBit::ConstZero); 1270 1271 return std::make_pair(Interesting = true, &Bits); 1272 } 1273 break; 1274 case ISD::SRL: 1275 case PPCISD::SRL: 1276 if (isa<ConstantSDNode>(V.getOperand(1))) { 1277 unsigned ShiftAmt = V.getConstantOperandVal(1); 1278 1279 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1280 1281 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1282 Bits[i] = LHSBits[i + ShiftAmt]; 1283 1284 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1285 Bits[i] = ValueBit(ValueBit::ConstZero); 1286 1287 return std::make_pair(Interesting = true, &Bits); 1288 } 1289 break; 1290 case ISD::AND: 1291 if (isa<ConstantSDNode>(V.getOperand(1))) { 1292 uint64_t Mask = V.getConstantOperandVal(1); 1293 1294 const SmallVector<ValueBit, 64> *LHSBits; 1295 // Mark this as interesting, only if the LHS was also interesting. This 1296 // prevents the overall procedure from matching a single immediate 'and' 1297 // (which is non-optimal because such an and might be folded with other 1298 // things if we don't select it here). 1299 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1300 1301 for (unsigned i = 0; i < NumBits; ++i) 1302 if (((Mask >> i) & 1) == 1) 1303 Bits[i] = (*LHSBits)[i]; 1304 else { 1305 // AND instruction masks this bit. If the input is already zero, 1306 // we have nothing to do here. Otherwise, make the bit ConstZero. 1307 if ((*LHSBits)[i].isZero()) 1308 Bits[i] = (*LHSBits)[i]; 1309 else 1310 Bits[i] = ValueBit(ValueBit::ConstZero); 1311 } 1312 1313 return std::make_pair(Interesting, &Bits); 1314 } 1315 break; 1316 case ISD::OR: { 1317 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1318 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1319 1320 bool AllDisjoint = true; 1321 SDValue LastVal = SDValue(); 1322 unsigned LastIdx = 0; 1323 for (unsigned i = 0; i < NumBits; ++i) { 1324 if (LHSBits[i].isZero() && RHSBits[i].isZero()) { 1325 // If both inputs are known to be zero and one is ConstZero and 1326 // another is VariableKnownToBeZero, we can select whichever 1327 // we like. To minimize the number of bit groups, we select 1328 // VariableKnownToBeZero if this bit is the next bit of the same 1329 // input variable from the previous bit. Otherwise, we select 1330 // ConstZero. 1331 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && 1332 LHSBits[i].getValueBitIndex() == LastIdx + 1) 1333 Bits[i] = LHSBits[i]; 1334 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && 1335 RHSBits[i].getValueBitIndex() == LastIdx + 1) 1336 Bits[i] = RHSBits[i]; 1337 else 1338 Bits[i] = ValueBit(ValueBit::ConstZero); 1339 } 1340 else if (LHSBits[i].isZero()) 1341 Bits[i] = RHSBits[i]; 1342 else if (RHSBits[i].isZero()) 1343 Bits[i] = LHSBits[i]; 1344 else { 1345 AllDisjoint = false; 1346 break; 1347 } 1348 // We remember the value and bit index of this bit. 1349 if (Bits[i].hasValue()) { 1350 LastVal = Bits[i].getValue(); 1351 LastIdx = Bits[i].getValueBitIndex(); 1352 } 1353 else { 1354 if (LastVal) LastVal = SDValue(); 1355 LastIdx = 0; 1356 } 1357 } 1358 1359 if (!AllDisjoint) 1360 break; 1361 1362 return std::make_pair(Interesting = true, &Bits); 1363 } 1364 case ISD::ZERO_EXTEND: { 1365 // We support only the case with zero extension from i32 to i64 so far. 1366 if (V.getValueType() != MVT::i64 || 1367 V.getOperand(0).getValueType() != MVT::i32) 1368 break; 1369 1370 const SmallVector<ValueBit, 64> *LHSBits; 1371 const unsigned NumOperandBits = 32; 1372 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1373 NumOperandBits); 1374 1375 for (unsigned i = 0; i < NumOperandBits; ++i) 1376 Bits[i] = (*LHSBits)[i]; 1377 1378 for (unsigned i = NumOperandBits; i < NumBits; ++i) 1379 Bits[i] = ValueBit(ValueBit::ConstZero); 1380 1381 return std::make_pair(Interesting, &Bits); 1382 } 1383 case ISD::TRUNCATE: { 1384 EVT FromType = V.getOperand(0).getValueType(); 1385 EVT ToType = V.getValueType(); 1386 // We support only the case with truncate from i64 to i32. 1387 if (FromType != MVT::i64 || ToType != MVT::i32) 1388 break; 1389 const unsigned NumAllBits = FromType.getSizeInBits(); 1390 SmallVector<ValueBit, 64> *InBits; 1391 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), 1392 NumAllBits); 1393 const unsigned NumValidBits = ToType.getSizeInBits(); 1394 1395 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. 1396 // So, we cannot include this truncate. 1397 bool UseUpper32bit = false; 1398 for (unsigned i = 0; i < NumValidBits; ++i) 1399 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { 1400 UseUpper32bit = true; 1401 break; 1402 } 1403 if (UseUpper32bit) 1404 break; 1405 1406 for (unsigned i = 0; i < NumValidBits; ++i) 1407 Bits[i] = (*InBits)[i]; 1408 1409 return std::make_pair(Interesting, &Bits); 1410 } 1411 case ISD::AssertZext: { 1412 // For AssertZext, we look through the operand and 1413 // mark the bits known to be zero. 1414 const SmallVector<ValueBit, 64> *LHSBits; 1415 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1416 NumBits); 1417 1418 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); 1419 const unsigned NumValidBits = FromType.getSizeInBits(); 1420 for (unsigned i = 0; i < NumValidBits; ++i) 1421 Bits[i] = (*LHSBits)[i]; 1422 1423 // These bits are known to be zero but the AssertZext may be from a value 1424 // that already has some constant zero bits (i.e. from a masking and). 1425 for (unsigned i = NumValidBits; i < NumBits; ++i) 1426 Bits[i] = (*LHSBits)[i].hasValue() 1427 ? ValueBit((*LHSBits)[i].getValue(), 1428 (*LHSBits)[i].getValueBitIndex(), 1429 ValueBit::VariableKnownToBeZero) 1430 : ValueBit(ValueBit::ConstZero); 1431 1432 return std::make_pair(Interesting, &Bits); 1433 } 1434 case ISD::LOAD: 1435 LoadSDNode *LD = cast<LoadSDNode>(V); 1436 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { 1437 EVT VT = LD->getMemoryVT(); 1438 const unsigned NumValidBits = VT.getSizeInBits(); 1439 1440 for (unsigned i = 0; i < NumValidBits; ++i) 1441 Bits[i] = ValueBit(V, i); 1442 1443 // These bits are known to be zero. 1444 for (unsigned i = NumValidBits; i < NumBits; ++i) 1445 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); 1446 1447 // Zero-extending load itself cannot be optimized. So, it is not 1448 // interesting by itself though it gives useful information. 1449 return std::make_pair(Interesting = false, &Bits); 1450 } 1451 break; 1452 } 1453 1454 for (unsigned i = 0; i < NumBits; ++i) 1455 Bits[i] = ValueBit(V, i); 1456 1457 return std::make_pair(Interesting = false, &Bits); 1458 } 1459 1460 // For each value (except the constant ones), compute the left-rotate amount 1461 // to get it from its original to final position. 1462 void computeRotationAmounts() { 1463 NeedMask = false; 1464 RLAmt.resize(Bits.size()); 1465 for (unsigned i = 0; i < Bits.size(); ++i) 1466 if (Bits[i].hasValue()) { 1467 unsigned VBI = Bits[i].getValueBitIndex(); 1468 if (i >= VBI) 1469 RLAmt[i] = i - VBI; 1470 else 1471 RLAmt[i] = Bits.size() - (VBI - i); 1472 } else if (Bits[i].isZero()) { 1473 NeedMask = true; 1474 RLAmt[i] = UINT32_MAX; 1475 } else { 1476 llvm_unreachable("Unknown value bit type"); 1477 } 1478 } 1479 1480 // Collect groups of consecutive bits with the same underlying value and 1481 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1482 // they break up groups. 1483 void collectBitGroups(bool LateMask) { 1484 BitGroups.clear(); 1485 1486 unsigned LastRLAmt = RLAmt[0]; 1487 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1488 unsigned LastGroupStartIdx = 0; 1489 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1490 for (unsigned i = 1; i < Bits.size(); ++i) { 1491 unsigned ThisRLAmt = RLAmt[i]; 1492 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1493 if (LateMask && !ThisValue) { 1494 ThisValue = LastValue; 1495 ThisRLAmt = LastRLAmt; 1496 // If we're doing late masking, then the first bit group always starts 1497 // at zero (even if the first bits were zero). 1498 if (BitGroups.empty()) 1499 LastGroupStartIdx = 0; 1500 } 1501 1502 // If this bit is known to be zero and the current group is a bit group 1503 // of zeros, we do not need to terminate the current bit group even the 1504 // Value or RLAmt does not match here. Instead, we terminate this group 1505 // when the first non-zero bit appears later. 1506 if (IsGroupOfZeros && Bits[i].isZero()) 1507 continue; 1508 1509 // If this bit has the same underlying value and the same rotate factor as 1510 // the last one, then they're part of the same group. 1511 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1512 // We cannot continue the current group if this bits is not known to 1513 // be zero in a bit group of zeros. 1514 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) 1515 continue; 1516 1517 if (LastValue.getNode()) 1518 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1519 i-1)); 1520 LastRLAmt = ThisRLAmt; 1521 LastValue = ThisValue; 1522 LastGroupStartIdx = i; 1523 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1524 } 1525 if (LastValue.getNode()) 1526 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1527 Bits.size()-1)); 1528 1529 if (BitGroups.empty()) 1530 return; 1531 1532 // We might be able to combine the first and last groups. 1533 if (BitGroups.size() > 1) { 1534 // If the first and last groups are the same, then remove the first group 1535 // in favor of the last group, making the ending index of the last group 1536 // equal to the ending index of the to-be-removed first group. 1537 if (BitGroups[0].StartIdx == 0 && 1538 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1539 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1540 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1541 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1542 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1543 BitGroups.erase(BitGroups.begin()); 1544 } 1545 } 1546 } 1547 1548 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1549 // associated with each. If the number of groups are same, we prefer a group 1550 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate 1551 // instruction. If there is a degeneracy, pick the one that occurs 1552 // first (in the final value). 1553 void collectValueRotInfo() { 1554 ValueRots.clear(); 1555 1556 for (auto &BG : BitGroups) { 1557 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1558 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1559 VRI.V = BG.V; 1560 VRI.RLAmt = BG.RLAmt; 1561 VRI.Repl32 = BG.Repl32; 1562 VRI.NumGroups += 1; 1563 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1564 } 1565 1566 // Now that we've collected the various ValueRotInfo instances, we need to 1567 // sort them. 1568 ValueRotsVec.clear(); 1569 for (auto &I : ValueRots) { 1570 ValueRotsVec.push_back(I.second); 1571 } 1572 llvm::sort(ValueRotsVec); 1573 } 1574 1575 // In 64-bit mode, rlwinm and friends have a rotation operator that 1576 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1577 // indices of these instructions can only be in the lower 32 bits, so they 1578 // can only represent some 64-bit bit groups. However, when they can be used, 1579 // the 32-bit replication can be used to represent, as a single bit group, 1580 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1581 // groups when possible. Returns true if any of the bit groups were 1582 // converted. 1583 void assignRepl32BitGroups() { 1584 // If we have bits like this: 1585 // 1586 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1587 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1588 // Groups: | RLAmt = 8 | RLAmt = 40 | 1589 // 1590 // But, making use of a 32-bit operation that replicates the low-order 32 1591 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1592 // of 8. 1593 1594 auto IsAllLow32 = [this](BitGroup & BG) { 1595 if (BG.StartIdx <= BG.EndIdx) { 1596 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1597 if (!Bits[i].hasValue()) 1598 continue; 1599 if (Bits[i].getValueBitIndex() >= 32) 1600 return false; 1601 } 1602 } else { 1603 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1604 if (!Bits[i].hasValue()) 1605 continue; 1606 if (Bits[i].getValueBitIndex() >= 32) 1607 return false; 1608 } 1609 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1610 if (!Bits[i].hasValue()) 1611 continue; 1612 if (Bits[i].getValueBitIndex() >= 32) 1613 return false; 1614 } 1615 } 1616 1617 return true; 1618 }; 1619 1620 for (auto &BG : BitGroups) { 1621 // If this bit group has RLAmt of 0 and will not be merged with 1622 // another bit group, we don't benefit from Repl32. We don't mark 1623 // such group to give more freedom for later instruction selection. 1624 if (BG.RLAmt == 0) { 1625 auto PotentiallyMerged = [this](BitGroup & BG) { 1626 for (auto &BG2 : BitGroups) 1627 if (&BG != &BG2 && BG.V == BG2.V && 1628 (BG2.RLAmt == 0 || BG2.RLAmt == 32)) 1629 return true; 1630 return false; 1631 }; 1632 if (!PotentiallyMerged(BG)) 1633 continue; 1634 } 1635 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1636 if (IsAllLow32(BG)) { 1637 if (BG.RLAmt >= 32) { 1638 BG.RLAmt -= 32; 1639 BG.Repl32CR = true; 1640 } 1641 1642 BG.Repl32 = true; 1643 1644 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " 1645 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" 1646 << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1647 } 1648 } 1649 } 1650 1651 // Now walk through the bit groups, consolidating where possible. 1652 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1653 // We might want to remove this bit group by merging it with the previous 1654 // group (which might be the ending group). 1655 auto IP = (I == BitGroups.begin()) ? 1656 std::prev(BitGroups.end()) : std::prev(I); 1657 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1658 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1659 1660 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " 1661 << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" 1662 << I->StartIdx << ", " << I->EndIdx 1663 << "] with group with range [" << IP->StartIdx << ", " 1664 << IP->EndIdx << "]\n"); 1665 1666 IP->EndIdx = I->EndIdx; 1667 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1668 IP->Repl32Coalesced = true; 1669 I = BitGroups.erase(I); 1670 continue; 1671 } else { 1672 // There is a special case worth handling: If there is a single group 1673 // covering the entire upper 32 bits, and it can be merged with both 1674 // the next and previous groups (which might be the same group), then 1675 // do so. If it is the same group (so there will be only one group in 1676 // total), then we need to reverse the order of the range so that it 1677 // covers the entire 64 bits. 1678 if (I->StartIdx == 32 && I->EndIdx == 63) { 1679 assert(std::next(I) == BitGroups.end() && 1680 "bit group ends at index 63 but there is another?"); 1681 auto IN = BitGroups.begin(); 1682 1683 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1684 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1685 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1686 IsAllLow32(*I)) { 1687 1688 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() 1689 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx 1690 << ", " << I->EndIdx 1691 << "] with 32-bit replicated groups with ranges [" 1692 << IP->StartIdx << ", " << IP->EndIdx << "] and [" 1693 << IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1694 1695 if (IP == IN) { 1696 // There is only one other group; change it to cover the whole 1697 // range (backward, so that it can still be Repl32 but cover the 1698 // whole 64-bit range). 1699 IP->StartIdx = 31; 1700 IP->EndIdx = 30; 1701 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1702 IP->Repl32Coalesced = true; 1703 I = BitGroups.erase(I); 1704 } else { 1705 // There are two separate groups, one before this group and one 1706 // after us (at the beginning). We're going to remove this group, 1707 // but also the group at the very beginning. 1708 IP->EndIdx = IN->EndIdx; 1709 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1710 IP->Repl32Coalesced = true; 1711 I = BitGroups.erase(I); 1712 BitGroups.erase(BitGroups.begin()); 1713 } 1714 1715 // This must be the last group in the vector (and we might have 1716 // just invalidated the iterator above), so break here. 1717 break; 1718 } 1719 } 1720 } 1721 1722 ++I; 1723 } 1724 } 1725 1726 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1727 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1728 } 1729 1730 uint64_t getZerosMask() { 1731 uint64_t Mask = 0; 1732 for (unsigned i = 0; i < Bits.size(); ++i) { 1733 if (Bits[i].hasValue()) 1734 continue; 1735 Mask |= (UINT64_C(1) << i); 1736 } 1737 1738 return ~Mask; 1739 } 1740 1741 // This method extends an input value to 64 bit if input is 32-bit integer. 1742 // While selecting instructions in BitPermutationSelector in 64-bit mode, 1743 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. 1744 // In such case, we extend it to 64 bit to be consistent with other values. 1745 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { 1746 if (V.getValueSizeInBits() == 64) 1747 return V; 1748 1749 assert(V.getValueSizeInBits() == 32); 1750 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1751 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 1752 MVT::i64), 0); 1753 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 1754 MVT::i64, ImDef, V, 1755 SubRegIdx), 0); 1756 return ExtVal; 1757 } 1758 1759 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { 1760 if (V.getValueSizeInBits() == 32) 1761 return V; 1762 1763 assert(V.getValueSizeInBits() == 64); 1764 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1765 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, 1766 MVT::i32, V, SubRegIdx), 0); 1767 return SubVal; 1768 } 1769 1770 // Depending on the number of groups for a particular value, it might be 1771 // better to rotate, mask explicitly (using andi/andis), and then or the 1772 // result. Select this part of the result first. 1773 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1774 if (BPermRewriterNoMasking) 1775 return; 1776 1777 for (ValueRotInfo &VRI : ValueRotsVec) { 1778 unsigned Mask = 0; 1779 for (unsigned i = 0; i < Bits.size(); ++i) { 1780 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1781 continue; 1782 if (RLAmt[i] != VRI.RLAmt) 1783 continue; 1784 Mask |= (1u << i); 1785 } 1786 1787 // Compute the masks for andi/andis that would be necessary. 1788 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1789 assert((ANDIMask != 0 || ANDISMask != 0) && 1790 "No set bits in mask for value bit groups"); 1791 bool NeedsRotate = VRI.RLAmt != 0; 1792 1793 // We're trying to minimize the number of instructions. If we have one 1794 // group, using one of andi/andis can break even. If we have three 1795 // groups, we can use both andi and andis and break even (to use both 1796 // andi and andis we also need to or the results together). We need four 1797 // groups if we also need to rotate. To use andi/andis we need to do more 1798 // than break even because rotate-and-mask instructions tend to be easier 1799 // to schedule. 1800 1801 // FIXME: We've biased here against using andi/andis, which is right for 1802 // POWER cores, but not optimal everywhere. For example, on the A2, 1803 // andi/andis have single-cycle latency whereas the rotate-and-mask 1804 // instructions take two cycles, and it would be better to bias toward 1805 // andi/andis in break-even cases. 1806 1807 unsigned NumAndInsts = (unsigned) NeedsRotate + 1808 (unsigned) (ANDIMask != 0) + 1809 (unsigned) (ANDISMask != 0) + 1810 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1811 (unsigned) (bool) Res; 1812 1813 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 1814 << " RL: " << VRI.RLAmt << ":" 1815 << "\n\t\t\tisel using masking: " << NumAndInsts 1816 << " using rotates: " << VRI.NumGroups << "\n"); 1817 1818 if (NumAndInsts >= VRI.NumGroups) 1819 continue; 1820 1821 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1822 1823 if (InstCnt) *InstCnt += NumAndInsts; 1824 1825 SDValue VRot; 1826 if (VRI.RLAmt) { 1827 SDValue Ops[] = 1828 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1829 getI32Imm(0, dl), getI32Imm(31, dl) }; 1830 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1831 Ops), 0); 1832 } else { 1833 VRot = TruncateToInt32(VRI.V, dl); 1834 } 1835 1836 SDValue ANDIVal, ANDISVal; 1837 if (ANDIMask != 0) 1838 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1839 VRot, getI32Imm(ANDIMask, dl)), 1840 0); 1841 if (ANDISMask != 0) 1842 ANDISVal = 1843 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, 1844 getI32Imm(ANDISMask, dl)), 1845 0); 1846 1847 SDValue TotalVal; 1848 if (!ANDIVal) 1849 TotalVal = ANDISVal; 1850 else if (!ANDISVal) 1851 TotalVal = ANDIVal; 1852 else 1853 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1854 ANDIVal, ANDISVal), 0); 1855 1856 if (!Res) 1857 Res = TotalVal; 1858 else 1859 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1860 Res, TotalVal), 0); 1861 1862 // Now, remove all groups with this underlying value and rotation 1863 // factor. 1864 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1865 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1866 }); 1867 } 1868 } 1869 1870 // Instruction selection for the 32-bit case. 1871 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1872 SDLoc dl(N); 1873 SDValue Res; 1874 1875 if (InstCnt) *InstCnt = 0; 1876 1877 // Take care of cases that should use andi/andis first. 1878 SelectAndParts32(dl, Res, InstCnt); 1879 1880 // If we've not yet selected a 'starting' instruction, and we have no zeros 1881 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1882 // number of groups), and start with this rotated value. 1883 if ((!NeedMask || LateMask) && !Res) { 1884 ValueRotInfo &VRI = ValueRotsVec[0]; 1885 if (VRI.RLAmt) { 1886 if (InstCnt) *InstCnt += 1; 1887 SDValue Ops[] = 1888 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1889 getI32Imm(0, dl), getI32Imm(31, dl) }; 1890 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1891 0); 1892 } else { 1893 Res = TruncateToInt32(VRI.V, dl); 1894 } 1895 1896 // Now, remove all groups with this underlying value and rotation factor. 1897 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1898 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1899 }); 1900 } 1901 1902 if (InstCnt) *InstCnt += BitGroups.size(); 1903 1904 // Insert the other groups (one at a time). 1905 for (auto &BG : BitGroups) { 1906 if (!Res) { 1907 SDValue Ops[] = 1908 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1909 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1910 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1911 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1912 } else { 1913 SDValue Ops[] = 1914 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1915 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1916 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1917 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1918 } 1919 } 1920 1921 if (LateMask) { 1922 unsigned Mask = (unsigned) getZerosMask(); 1923 1924 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1925 assert((ANDIMask != 0 || ANDISMask != 0) && 1926 "No set bits in zeros mask?"); 1927 1928 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1929 (unsigned) (ANDISMask != 0) + 1930 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1931 1932 SDValue ANDIVal, ANDISVal; 1933 if (ANDIMask != 0) 1934 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1935 Res, getI32Imm(ANDIMask, dl)), 1936 0); 1937 if (ANDISMask != 0) 1938 ANDISVal = 1939 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, 1940 getI32Imm(ANDISMask, dl)), 1941 0); 1942 1943 if (!ANDIVal) 1944 Res = ANDISVal; 1945 else if (!ANDISVal) 1946 Res = ANDIVal; 1947 else 1948 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1949 ANDIVal, ANDISVal), 0); 1950 } 1951 1952 return Res.getNode(); 1953 } 1954 1955 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1956 unsigned MaskStart, unsigned MaskEnd, 1957 bool IsIns) { 1958 // In the notation used by the instructions, 'start' and 'end' are reversed 1959 // because bits are counted from high to low order. 1960 unsigned InstMaskStart = 64 - MaskEnd - 1, 1961 InstMaskEnd = 64 - MaskStart - 1; 1962 1963 if (Repl32) 1964 return 1; 1965 1966 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1967 InstMaskEnd == 63 - RLAmt) 1968 return 1; 1969 1970 return 2; 1971 } 1972 1973 // For 64-bit values, not all combinations of rotates and masks are 1974 // available. Produce one if it is available. 1975 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1976 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1977 unsigned *InstCnt = nullptr) { 1978 // In the notation used by the instructions, 'start' and 'end' are reversed 1979 // because bits are counted from high to low order. 1980 unsigned InstMaskStart = 64 - MaskEnd - 1, 1981 InstMaskEnd = 64 - MaskStart - 1; 1982 1983 if (InstCnt) *InstCnt += 1; 1984 1985 if (Repl32) { 1986 // This rotation amount assumes that the lower 32 bits of the quantity 1987 // are replicated in the high 32 bits by the rotation operator (which is 1988 // done by rlwinm and friends). 1989 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1990 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1991 SDValue Ops[] = 1992 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1993 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1994 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1995 Ops), 0); 1996 } 1997 1998 if (InstMaskEnd == 63) { 1999 SDValue Ops[] = 2000 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2001 getI32Imm(InstMaskStart, dl) }; 2002 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 2003 } 2004 2005 if (InstMaskStart == 0) { 2006 SDValue Ops[] = 2007 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2008 getI32Imm(InstMaskEnd, dl) }; 2009 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 2010 } 2011 2012 if (InstMaskEnd == 63 - RLAmt) { 2013 SDValue Ops[] = 2014 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2015 getI32Imm(InstMaskStart, dl) }; 2016 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 2017 } 2018 2019 // We cannot do this with a single instruction, so we'll use two. The 2020 // problem is that we're not free to choose both a rotation amount and mask 2021 // start and end independently. We can choose an arbitrary mask start and 2022 // end, but then the rotation amount is fixed. Rotation, however, can be 2023 // inverted, and so by applying an "inverse" rotation first, we can get the 2024 // desired result. 2025 if (InstCnt) *InstCnt += 1; 2026 2027 // The rotation mask for the second instruction must be MaskStart. 2028 unsigned RLAmt2 = MaskStart; 2029 // The first instruction must rotate V so that the overall rotation amount 2030 // is RLAmt. 2031 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2032 if (RLAmt1) 2033 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2034 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 2035 } 2036 2037 // For 64-bit values, not all combinations of rotates and masks are 2038 // available. Produce a rotate-mask-and-insert if one is available. 2039 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 2040 unsigned RLAmt, bool Repl32, unsigned MaskStart, 2041 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 2042 // In the notation used by the instructions, 'start' and 'end' are reversed 2043 // because bits are counted from high to low order. 2044 unsigned InstMaskStart = 64 - MaskEnd - 1, 2045 InstMaskEnd = 64 - MaskStart - 1; 2046 2047 if (InstCnt) *InstCnt += 1; 2048 2049 if (Repl32) { 2050 // This rotation amount assumes that the lower 32 bits of the quantity 2051 // are replicated in the high 32 bits by the rotation operator (which is 2052 // done by rlwinm and friends). 2053 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 2054 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 2055 SDValue Ops[] = 2056 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2057 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 2058 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 2059 Ops), 0); 2060 } 2061 2062 if (InstMaskEnd == 63 - RLAmt) { 2063 SDValue Ops[] = 2064 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2065 getI32Imm(InstMaskStart, dl) }; 2066 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 2067 } 2068 2069 // We cannot do this with a single instruction, so we'll use two. The 2070 // problem is that we're not free to choose both a rotation amount and mask 2071 // start and end independently. We can choose an arbitrary mask start and 2072 // end, but then the rotation amount is fixed. Rotation, however, can be 2073 // inverted, and so by applying an "inverse" rotation first, we can get the 2074 // desired result. 2075 if (InstCnt) *InstCnt += 1; 2076 2077 // The rotation mask for the second instruction must be MaskStart. 2078 unsigned RLAmt2 = MaskStart; 2079 // The first instruction must rotate V so that the overall rotation amount 2080 // is RLAmt. 2081 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2082 if (RLAmt1) 2083 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2084 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 2085 } 2086 2087 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 2088 if (BPermRewriterNoMasking) 2089 return; 2090 2091 // The idea here is the same as in the 32-bit version, but with additional 2092 // complications from the fact that Repl32 might be true. Because we 2093 // aggressively convert bit groups to Repl32 form (which, for small 2094 // rotation factors, involves no other change), and then coalesce, it might 2095 // be the case that a single 64-bit masking operation could handle both 2096 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 2097 // form allowed coalescing, then we must use a 32-bit rotaton in order to 2098 // completely capture the new combined bit group. 2099 2100 for (ValueRotInfo &VRI : ValueRotsVec) { 2101 uint64_t Mask = 0; 2102 2103 // We need to add to the mask all bits from the associated bit groups. 2104 // If Repl32 is false, we need to add bits from bit groups that have 2105 // Repl32 true, but are trivially convertable to Repl32 false. Such a 2106 // group is trivially convertable if it overlaps only with the lower 32 2107 // bits, and the group has not been coalesced. 2108 auto MatchingBG = [VRI](const BitGroup &BG) { 2109 if (VRI.V != BG.V) 2110 return false; 2111 2112 unsigned EffRLAmt = BG.RLAmt; 2113 if (!VRI.Repl32 && BG.Repl32) { 2114 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 2115 !BG.Repl32Coalesced) { 2116 if (BG.Repl32CR) 2117 EffRLAmt += 32; 2118 } else { 2119 return false; 2120 } 2121 } else if (VRI.Repl32 != BG.Repl32) { 2122 return false; 2123 } 2124 2125 return VRI.RLAmt == EffRLAmt; 2126 }; 2127 2128 for (auto &BG : BitGroups) { 2129 if (!MatchingBG(BG)) 2130 continue; 2131 2132 if (BG.StartIdx <= BG.EndIdx) { 2133 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 2134 Mask |= (UINT64_C(1) << i); 2135 } else { 2136 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 2137 Mask |= (UINT64_C(1) << i); 2138 for (unsigned i = 0; i <= BG.EndIdx; ++i) 2139 Mask |= (UINT64_C(1) << i); 2140 } 2141 } 2142 2143 // We can use the 32-bit andi/andis technique if the mask does not 2144 // require any higher-order bits. This can save an instruction compared 2145 // to always using the general 64-bit technique. 2146 bool Use32BitInsts = isUInt<32>(Mask); 2147 // Compute the masks for andi/andis that would be necessary. 2148 unsigned ANDIMask = (Mask & UINT16_MAX), 2149 ANDISMask = (Mask >> 16) & UINT16_MAX; 2150 2151 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 2152 2153 unsigned NumAndInsts = (unsigned) NeedsRotate + 2154 (unsigned) (bool) Res; 2155 unsigned NumOfSelectInsts = 0; 2156 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts); 2157 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant."); 2158 if (Use32BitInsts) 2159 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 2160 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2161 else 2162 NumAndInsts += NumOfSelectInsts + /* and */ 1; 2163 2164 unsigned NumRLInsts = 0; 2165 bool FirstBG = true; 2166 bool MoreBG = false; 2167 for (auto &BG : BitGroups) { 2168 if (!MatchingBG(BG)) { 2169 MoreBG = true; 2170 continue; 2171 } 2172 NumRLInsts += 2173 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 2174 !FirstBG); 2175 FirstBG = false; 2176 } 2177 2178 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 2179 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") 2180 << "\n\t\t\tisel using masking: " << NumAndInsts 2181 << " using rotates: " << NumRLInsts << "\n"); 2182 2183 // When we'd use andi/andis, we bias toward using the rotates (andi only 2184 // has a record form, and is cracked on POWER cores). However, when using 2185 // general 64-bit constant formation, bias toward the constant form, 2186 // because that exposes more opportunities for CSE. 2187 if (NumAndInsts > NumRLInsts) 2188 continue; 2189 // When merging multiple bit groups, instruction or is used. 2190 // But when rotate is used, rldimi can inert the rotated value into any 2191 // register, so instruction or can be avoided. 2192 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 2193 continue; 2194 2195 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 2196 2197 if (InstCnt) *InstCnt += NumAndInsts; 2198 2199 SDValue VRot; 2200 // We actually need to generate a rotation if we have a non-zero rotation 2201 // factor or, in the Repl32 case, if we care about any of the 2202 // higher-order replicated bits. In the latter case, we generate a mask 2203 // backward so that it actually includes the entire 64 bits. 2204 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 2205 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2206 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 2207 else 2208 VRot = VRI.V; 2209 2210 SDValue TotalVal; 2211 if (Use32BitInsts) { 2212 assert((ANDIMask != 0 || ANDISMask != 0) && 2213 "No set bits in mask when using 32-bit ands for 64-bit value"); 2214 2215 SDValue ANDIVal, ANDISVal; 2216 if (ANDIMask != 0) 2217 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2218 ExtendToInt64(VRot, dl), 2219 getI32Imm(ANDIMask, dl)), 2220 0); 2221 if (ANDISMask != 0) 2222 ANDISVal = 2223 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2224 ExtendToInt64(VRot, dl), 2225 getI32Imm(ANDISMask, dl)), 2226 0); 2227 2228 if (!ANDIVal) 2229 TotalVal = ANDISVal; 2230 else if (!ANDISVal) 2231 TotalVal = ANDIVal; 2232 else 2233 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2234 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2235 } else { 2236 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2237 TotalVal = 2238 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2239 ExtendToInt64(VRot, dl), TotalVal), 2240 0); 2241 } 2242 2243 if (!Res) 2244 Res = TotalVal; 2245 else 2246 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2247 ExtendToInt64(Res, dl), TotalVal), 2248 0); 2249 2250 // Now, remove all groups with this underlying value and rotation 2251 // factor. 2252 eraseMatchingBitGroups(MatchingBG); 2253 } 2254 } 2255 2256 // Instruction selection for the 64-bit case. 2257 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 2258 SDLoc dl(N); 2259 SDValue Res; 2260 2261 if (InstCnt) *InstCnt = 0; 2262 2263 // Take care of cases that should use andi/andis first. 2264 SelectAndParts64(dl, Res, InstCnt); 2265 2266 // If we've not yet selected a 'starting' instruction, and we have no zeros 2267 // to fill in, select the (Value, RLAmt) with the highest priority (largest 2268 // number of groups), and start with this rotated value. 2269 if ((!NeedMask || LateMask) && !Res) { 2270 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 2271 // groups will come first, and so the VRI representing the largest number 2272 // of groups might not be first (it might be the first Repl32 groups). 2273 unsigned MaxGroupsIdx = 0; 2274 if (!ValueRotsVec[0].Repl32) { 2275 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 2276 if (ValueRotsVec[i].Repl32) { 2277 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 2278 MaxGroupsIdx = i; 2279 break; 2280 } 2281 } 2282 2283 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 2284 bool NeedsRotate = false; 2285 if (VRI.RLAmt) { 2286 NeedsRotate = true; 2287 } else if (VRI.Repl32) { 2288 for (auto &BG : BitGroups) { 2289 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 2290 BG.Repl32 != VRI.Repl32) 2291 continue; 2292 2293 // We don't need a rotate if the bit group is confined to the lower 2294 // 32 bits. 2295 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 2296 continue; 2297 2298 NeedsRotate = true; 2299 break; 2300 } 2301 } 2302 2303 if (NeedsRotate) 2304 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2305 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 2306 InstCnt); 2307 else 2308 Res = VRI.V; 2309 2310 // Now, remove all groups with this underlying value and rotation factor. 2311 if (Res) 2312 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 2313 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 2314 BG.Repl32 == VRI.Repl32; 2315 }); 2316 } 2317 2318 // Because 64-bit rotates are more flexible than inserts, we might have a 2319 // preference regarding which one we do first (to save one instruction). 2320 if (!Res) 2321 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 2322 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2323 false) < 2324 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2325 true)) { 2326 if (I != BitGroups.begin()) { 2327 BitGroup BG = *I; 2328 BitGroups.erase(I); 2329 BitGroups.insert(BitGroups.begin(), BG); 2330 } 2331 2332 break; 2333 } 2334 } 2335 2336 // Insert the other groups (one at a time). 2337 for (auto &BG : BitGroups) { 2338 if (!Res) 2339 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 2340 BG.EndIdx, InstCnt); 2341 else 2342 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 2343 BG.StartIdx, BG.EndIdx, InstCnt); 2344 } 2345 2346 if (LateMask) { 2347 uint64_t Mask = getZerosMask(); 2348 2349 // We can use the 32-bit andi/andis technique if the mask does not 2350 // require any higher-order bits. This can save an instruction compared 2351 // to always using the general 64-bit technique. 2352 bool Use32BitInsts = isUInt<32>(Mask); 2353 // Compute the masks for andi/andis that would be necessary. 2354 unsigned ANDIMask = (Mask & UINT16_MAX), 2355 ANDISMask = (Mask >> 16) & UINT16_MAX; 2356 2357 if (Use32BitInsts) { 2358 assert((ANDIMask != 0 || ANDISMask != 0) && 2359 "No set bits in mask when using 32-bit ands for 64-bit value"); 2360 2361 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 2362 (unsigned) (ANDISMask != 0) + 2363 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2364 2365 SDValue ANDIVal, ANDISVal; 2366 if (ANDIMask != 0) 2367 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2368 ExtendToInt64(Res, dl), 2369 getI32Imm(ANDIMask, dl)), 2370 0); 2371 if (ANDISMask != 0) 2372 ANDISVal = 2373 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2374 ExtendToInt64(Res, dl), 2375 getI32Imm(ANDISMask, dl)), 2376 0); 2377 2378 if (!ANDIVal) 2379 Res = ANDISVal; 2380 else if (!ANDISVal) 2381 Res = ANDIVal; 2382 else 2383 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2384 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2385 } else { 2386 unsigned NumOfSelectInsts = 0; 2387 SDValue MaskVal = 2388 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0); 2389 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2390 ExtendToInt64(Res, dl), MaskVal), 2391 0); 2392 if (InstCnt) 2393 *InstCnt += NumOfSelectInsts + /* and */ 1; 2394 } 2395 } 2396 2397 return Res.getNode(); 2398 } 2399 2400 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 2401 // Fill in BitGroups. 2402 collectBitGroups(LateMask); 2403 if (BitGroups.empty()) 2404 return nullptr; 2405 2406 // For 64-bit values, figure out when we can use 32-bit instructions. 2407 if (Bits.size() == 64) 2408 assignRepl32BitGroups(); 2409 2410 // Fill in ValueRotsVec. 2411 collectValueRotInfo(); 2412 2413 if (Bits.size() == 32) { 2414 return Select32(N, LateMask, InstCnt); 2415 } else { 2416 assert(Bits.size() == 64 && "Not 64 bits here?"); 2417 return Select64(N, LateMask, InstCnt); 2418 } 2419 2420 return nullptr; 2421 } 2422 2423 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 2424 erase_if(BitGroups, F); 2425 } 2426 2427 SmallVector<ValueBit, 64> Bits; 2428 2429 bool NeedMask = false; 2430 SmallVector<unsigned, 64> RLAmt; 2431 2432 SmallVector<BitGroup, 16> BitGroups; 2433 2434 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2435 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2436 2437 SelectionDAG *CurDAG = nullptr; 2438 2439 public: 2440 BitPermutationSelector(SelectionDAG *DAG) 2441 : CurDAG(DAG) {} 2442 2443 // Here we try to match complex bit permutations into a set of 2444 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2445 // known to produce optimal code for common cases (like i32 byte swapping). 2446 SDNode *Select(SDNode *N) { 2447 Memoizer.clear(); 2448 auto Result = 2449 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2450 if (!Result.first) 2451 return nullptr; 2452 Bits = std::move(*Result.second); 2453 2454 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2455 " selection for: "); 2456 LLVM_DEBUG(N->dump(CurDAG)); 2457 2458 // Fill it RLAmt and set NeedMask. 2459 computeRotationAmounts(); 2460 2461 if (!NeedMask) 2462 return Select(N, false); 2463 2464 // We currently have two techniques for handling results with zeros: early 2465 // masking (the default) and late masking. Late masking is sometimes more 2466 // efficient, but because the structure of the bit groups is different, it 2467 // is hard to tell without generating both and comparing the results. With 2468 // late masking, we ignore zeros in the resulting value when inserting each 2469 // set of bit groups, and then mask in the zeros at the end. With early 2470 // masking, we only insert the non-zero parts of the result at every step. 2471 2472 unsigned InstCnt = 0, InstCntLateMask = 0; 2473 LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); 2474 SDNode *RN = Select(N, false, &InstCnt); 2475 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2476 2477 LLVM_DEBUG(dbgs() << "\tLate masking:\n"); 2478 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2479 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask 2480 << " instructions\n"); 2481 2482 if (InstCnt <= InstCntLateMask) { 2483 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2484 return RN; 2485 } 2486 2487 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2488 return RNLM; 2489 } 2490 }; 2491 2492 class IntegerCompareEliminator { 2493 SelectionDAG *CurDAG; 2494 PPCDAGToDAGISel *S; 2495 // Conversion type for interpreting results of a 32-bit instruction as 2496 // a 64-bit value or vice versa. 2497 enum ExtOrTruncConversion { Ext, Trunc }; 2498 2499 // Modifiers to guide how an ISD::SETCC node's result is to be computed 2500 // in a GPR. 2501 // ZExtOrig - use the original condition code, zero-extend value 2502 // ZExtInvert - invert the condition code, zero-extend value 2503 // SExtOrig - use the original condition code, sign-extend value 2504 // SExtInvert - invert the condition code, sign-extend value 2505 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 2506 2507 // Comparisons against zero to emit GPR code sequences for. Each of these 2508 // sequences may need to be emitted for two or more equivalent patterns. 2509 // For example (a >= 0) == (a > -1). The direction of the comparison (</>) 2510 // matters as well as the extension type: sext (-1/0), zext (1/0). 2511 // GEZExt - (zext (LHS >= 0)) 2512 // GESExt - (sext (LHS >= 0)) 2513 // LEZExt - (zext (LHS <= 0)) 2514 // LESExt - (sext (LHS <= 0)) 2515 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; 2516 2517 SDNode *tryEXTEND(SDNode *N); 2518 SDNode *tryLogicOpOfCompares(SDNode *N); 2519 SDValue computeLogicOpInGPR(SDValue LogicOp); 2520 SDValue signExtendInputIfNeeded(SDValue Input); 2521 SDValue zeroExtendInputIfNeeded(SDValue Input); 2522 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 2523 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2524 ZeroCompare CmpTy); 2525 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2526 int64_t RHSValue, SDLoc dl); 2527 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2528 int64_t RHSValue, SDLoc dl); 2529 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2530 int64_t RHSValue, SDLoc dl); 2531 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2532 int64_t RHSValue, SDLoc dl); 2533 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 2534 2535 public: 2536 IntegerCompareEliminator(SelectionDAG *DAG, 2537 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { 2538 assert(CurDAG->getTargetLoweringInfo() 2539 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && 2540 "Only expecting to use this on 64 bit targets."); 2541 } 2542 SDNode *Select(SDNode *N) { 2543 if (CmpInGPR == ICGPR_None) 2544 return nullptr; 2545 switch (N->getOpcode()) { 2546 default: break; 2547 case ISD::ZERO_EXTEND: 2548 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || 2549 CmpInGPR == ICGPR_SextI64) 2550 return nullptr; 2551 LLVM_FALLTHROUGH; 2552 case ISD::SIGN_EXTEND: 2553 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || 2554 CmpInGPR == ICGPR_ZextI64) 2555 return nullptr; 2556 return tryEXTEND(N); 2557 case ISD::AND: 2558 case ISD::OR: 2559 case ISD::XOR: 2560 return tryLogicOpOfCompares(N); 2561 } 2562 return nullptr; 2563 } 2564 }; 2565 2566 static bool isLogicOp(unsigned Opc) { 2567 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2568 } 2569 // The obvious case for wanting to keep the value in a GPR. Namely, the 2570 // result of the comparison is actually needed in a GPR. 2571 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { 2572 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2573 N->getOpcode() == ISD::SIGN_EXTEND) && 2574 "Expecting a zero/sign extend node!"); 2575 SDValue WideRes; 2576 // If we are zero-extending the result of a logical operation on i1 2577 // values, we can keep the values in GPRs. 2578 if (isLogicOp(N->getOperand(0).getOpcode()) && 2579 N->getOperand(0).getValueType() == MVT::i1 && 2580 N->getOpcode() == ISD::ZERO_EXTEND) 2581 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2582 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2583 return nullptr; 2584 else 2585 WideRes = 2586 getSETCCInGPR(N->getOperand(0), 2587 N->getOpcode() == ISD::SIGN_EXTEND ? 2588 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2589 2590 if (!WideRes) 2591 return nullptr; 2592 2593 SDLoc dl(N); 2594 bool Input32Bit = WideRes.getValueType() == MVT::i32; 2595 bool Output32Bit = N->getValueType(0) == MVT::i32; 2596 2597 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2598 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2599 2600 SDValue ConvOp = WideRes; 2601 if (Input32Bit != Output32Bit) 2602 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : 2603 ExtOrTruncConversion::Trunc); 2604 return ConvOp.getNode(); 2605 } 2606 2607 // Attempt to perform logical operations on the results of comparisons while 2608 // keeping the values in GPRs. Without doing so, these would end up being 2609 // lowered to CR-logical operations which suffer from significant latency and 2610 // low ILP. 2611 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { 2612 if (N->getValueType(0) != MVT::i1) 2613 return nullptr; 2614 assert(isLogicOp(N->getOpcode()) && 2615 "Expected a logic operation on setcc results."); 2616 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2617 if (!LoweredLogical) 2618 return nullptr; 2619 2620 SDLoc dl(N); 2621 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2622 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2623 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2624 SDValue LHS = LoweredLogical.getOperand(0); 2625 SDValue RHS = LoweredLogical.getOperand(1); 2626 SDValue WideOp; 2627 SDValue OpToConvToRecForm; 2628 2629 // Look through any 32-bit to 64-bit implicit extend nodes to find the 2630 // opcode that is input to the XORI. 2631 if (IsBitwiseNegate && 2632 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2633 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2634 else if (IsBitwiseNegate) 2635 // If the input to the XORI isn't an extension, that's what we're after. 2636 OpToConvToRecForm = LoweredLogical.getOperand(0); 2637 else 2638 // If this is not an XORI, it is a reg-reg logical op and we can convert 2639 // it to record-form. 2640 OpToConvToRecForm = LoweredLogical; 2641 2642 // Get the record-form version of the node we're looking to use to get the 2643 // CR result from. 2644 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2645 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2646 2647 // Convert the right node to record-form. This is either the logical we're 2648 // looking at or it is the input node to the negation (if we're looking at 2649 // a bitwise negation). 2650 if (NewOpc != -1 && IsBitwiseNegate) { 2651 // The input to the XORI has a record-form. Use it. 2652 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2653 "Expected a PPC::XORI8 only for bitwise negation."); 2654 // Emit the record-form instruction. 2655 std::vector<SDValue> Ops; 2656 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2657 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2658 2659 WideOp = 2660 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2661 OpToConvToRecForm.getValueType(), 2662 MVT::Glue, Ops), 0); 2663 } else { 2664 assert((NewOpc != -1 || !IsBitwiseNegate) && 2665 "No record form available for AND8/OR8/XOR8?"); 2666 WideOp = 2667 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, 2668 dl, MVT::i64, MVT::Glue, LHS, RHS), 2669 0); 2670 } 2671 2672 // Select this node to a single bit from CR0 set by the record-form node 2673 // just created. For bitwise negation, use the EQ bit which is the equivalent 2674 // of negating the result (i.e. it is a bit set when the result of the 2675 // operation is zero). 2676 SDValue SRIdxVal = 2677 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2678 SDValue CRBit = 2679 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2680 MVT::i1, CR0Reg, SRIdxVal, 2681 WideOp.getValue(1)), 0); 2682 return CRBit.getNode(); 2683 } 2684 2685 // Lower a logical operation on i1 values into a GPR sequence if possible. 2686 // The result can be kept in a GPR if requested. 2687 // Three types of inputs can be handled: 2688 // - SETCC 2689 // - TRUNCATE 2690 // - Logical operation (AND/OR/XOR) 2691 // There is also a special case that is handled (namely a complement operation 2692 // achieved with xor %a, -1). 2693 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { 2694 assert(isLogicOp(LogicOp.getOpcode()) && 2695 "Can only handle logic operations here."); 2696 assert(LogicOp.getValueType() == MVT::i1 && 2697 "Can only handle logic operations on i1 values here."); 2698 SDLoc dl(LogicOp); 2699 SDValue LHS, RHS; 2700 2701 // Special case: xor %a, -1 2702 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2703 2704 // Produces a GPR sequence for each operand of the binary logic operation. 2705 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2706 // the value in a GPR and for logic operations, it will recursively produce 2707 // a GPR sequence for the operation. 2708 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2709 unsigned OperandOpcode = Operand.getOpcode(); 2710 if (OperandOpcode == ISD::SETCC) 2711 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2712 else if (OperandOpcode == ISD::TRUNCATE) { 2713 SDValue InputOp = Operand.getOperand(0); 2714 EVT InVT = InputOp.getValueType(); 2715 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2716 PPC::RLDICL, dl, InVT, InputOp, 2717 S->getI64Imm(0, dl), 2718 S->getI64Imm(63, dl)), 0); 2719 } else if (isLogicOp(OperandOpcode)) 2720 return computeLogicOpInGPR(Operand); 2721 return SDValue(); 2722 }; 2723 LHS = getLogicOperand(LogicOp.getOperand(0)); 2724 RHS = getLogicOperand(LogicOp.getOperand(1)); 2725 2726 // If a GPR sequence can't be produced for the LHS we can't proceed. 2727 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2728 // a bitwise negation operation. 2729 if (!LHS || (!RHS && !IsBitwiseNegation)) 2730 return SDValue(); 2731 2732 NumLogicOpsOnComparison++; 2733 2734 // We will use the inputs as 64-bit values. 2735 if (LHS.getValueType() == MVT::i32) 2736 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2737 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2738 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2739 2740 unsigned NewOpc; 2741 switch (LogicOp.getOpcode()) { 2742 default: llvm_unreachable("Unknown logic operation."); 2743 case ISD::AND: NewOpc = PPC::AND8; break; 2744 case ISD::OR: NewOpc = PPC::OR8; break; 2745 case ISD::XOR: NewOpc = PPC::XOR8; break; 2746 } 2747 2748 if (IsBitwiseNegation) { 2749 RHS = S->getI64Imm(1, dl); 2750 NewOpc = PPC::XORI8; 2751 } 2752 2753 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2754 2755 } 2756 2757 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2758 /// Otherwise just reinterpret it as a 64-bit value. 2759 /// Useful when emitting comparison code for 32-bit values without using 2760 /// the compare instruction (which only considers the lower 32-bits). 2761 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { 2762 assert(Input.getValueType() == MVT::i32 && 2763 "Can only sign-extend 32-bit values here."); 2764 unsigned Opc = Input.getOpcode(); 2765 2766 // The value was sign extended and then truncated to 32-bits. No need to 2767 // sign extend it again. 2768 if (Opc == ISD::TRUNCATE && 2769 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2770 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2771 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2772 2773 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2774 // The input is a sign-extending load. All ppc sign-extending loads 2775 // sign-extend to the full 64-bits. 2776 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2777 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2778 2779 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2780 // We don't sign-extend constants. 2781 if (InputConst) 2782 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2783 2784 SDLoc dl(Input); 2785 SignExtensionsAdded++; 2786 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, 2787 MVT::i64, Input), 0); 2788 } 2789 2790 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2791 /// Otherwise just reinterpret it as a 64-bit value. 2792 /// Useful when emitting comparison code for 32-bit values without using 2793 /// the compare instruction (which only considers the lower 32-bits). 2794 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { 2795 assert(Input.getValueType() == MVT::i32 && 2796 "Can only zero-extend 32-bit values here."); 2797 unsigned Opc = Input.getOpcode(); 2798 2799 // The only condition under which we can omit the actual extend instruction: 2800 // - The value is a positive constant 2801 // - The value comes from a load that isn't a sign-extending load 2802 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. 2803 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && 2804 (Input.getOperand(0).getOpcode() == ISD::AssertZext || 2805 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); 2806 if (IsTruncateOfZExt) 2807 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2808 2809 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2810 if (InputConst && InputConst->getSExtValue() >= 0) 2811 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2812 2813 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2814 // The input is a load that doesn't sign-extend (it will be zero-extended). 2815 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2816 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2817 2818 // None of the above, need to zero-extend. 2819 SDLoc dl(Input); 2820 ZeroExtensionsAdded++; 2821 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, 2822 S->getI64Imm(0, dl), 2823 S->getI64Imm(32, dl)), 0); 2824 } 2825 2826 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2827 // course not actual zero/sign extensions that will generate machine code, 2828 // they're just a way to reinterpret a 32 bit value in a register as a 2829 // 64 bit value and vice-versa. 2830 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, 2831 ExtOrTruncConversion Conv) { 2832 SDLoc dl(NatWidthRes); 2833 2834 // For reinterpreting 32-bit values as 64 bit values, we generate 2835 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2836 if (Conv == ExtOrTruncConversion::Ext) { 2837 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2838 SDValue SubRegIdx = 2839 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2840 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2841 ImDef, NatWidthRes, SubRegIdx), 0); 2842 } 2843 2844 assert(Conv == ExtOrTruncConversion::Trunc && 2845 "Unknown convertion between 32 and 64 bit values."); 2846 // For reinterpreting 64-bit values as 32-bit values, we just need to 2847 // EXTRACT_SUBREG (i.e. extract the low word). 2848 SDValue SubRegIdx = 2849 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2850 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2851 NatWidthRes, SubRegIdx), 0); 2852 } 2853 2854 // Produce a GPR sequence for compound comparisons (<=, >=) against zero. 2855 // Handle both zero-extensions and sign-extensions. 2856 SDValue 2857 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2858 ZeroCompare CmpTy) { 2859 EVT InVT = LHS.getValueType(); 2860 bool Is32Bit = InVT == MVT::i32; 2861 SDValue ToExtend; 2862 2863 // Produce the value that needs to be either zero or sign extended. 2864 switch (CmpTy) { 2865 case ZeroCompare::GEZExt: 2866 case ZeroCompare::GESExt: 2867 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, 2868 dl, InVT, LHS, LHS), 0); 2869 break; 2870 case ZeroCompare::LEZExt: 2871 case ZeroCompare::LESExt: { 2872 if (Is32Bit) { 2873 // Upper 32 bits cannot be undefined for this sequence. 2874 LHS = signExtendInputIfNeeded(LHS); 2875 SDValue Neg = 2876 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2877 ToExtend = 2878 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2879 Neg, S->getI64Imm(1, dl), 2880 S->getI64Imm(63, dl)), 0); 2881 } else { 2882 SDValue Addi = 2883 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 2884 S->getI64Imm(~0ULL, dl)), 0); 2885 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2886 Addi, LHS), 0); 2887 } 2888 break; 2889 } 2890 } 2891 2892 // For 64-bit sequences, the extensions are the same for the GE/LE cases. 2893 if (!Is32Bit && 2894 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) 2895 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2896 ToExtend, S->getI64Imm(1, dl), 2897 S->getI64Imm(63, dl)), 0); 2898 if (!Is32Bit && 2899 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) 2900 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, 2901 S->getI64Imm(63, dl)), 0); 2902 2903 assert(Is32Bit && "Should have handled the 32-bit sequences above."); 2904 // For 32-bit sequences, the extensions differ between GE/LE cases. 2905 switch (CmpTy) { 2906 case ZeroCompare::GEZExt: { 2907 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2908 S->getI32Imm(31, dl) }; 2909 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2910 ShiftOps), 0); 2911 } 2912 case ZeroCompare::GESExt: 2913 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, 2914 S->getI32Imm(31, dl)), 0); 2915 case ZeroCompare::LEZExt: 2916 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, 2917 S->getI32Imm(1, dl)), 0); 2918 case ZeroCompare::LESExt: 2919 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, 2920 S->getI32Imm(-1, dl)), 0); 2921 } 2922 2923 // The above case covers all the enumerators so it can't have a default clause 2924 // to avoid compiler warnings. 2925 llvm_unreachable("Unknown zero-comparison type."); 2926 } 2927 2928 /// Produces a zero-extended result of comparing two 32-bit values according to 2929 /// the passed condition code. 2930 SDValue 2931 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2932 ISD::CondCode CC, 2933 int64_t RHSValue, SDLoc dl) { 2934 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2935 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) 2936 return SDValue(); 2937 bool IsRHSZero = RHSValue == 0; 2938 bool IsRHSOne = RHSValue == 1; 2939 bool IsRHSNegOne = RHSValue == -1LL; 2940 switch (CC) { 2941 default: return SDValue(); 2942 case ISD::SETEQ: { 2943 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2944 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2945 SDValue Xor = IsRHSZero ? LHS : 2946 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2947 SDValue Clz = 2948 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2949 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2950 S->getI32Imm(31, dl) }; 2951 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2952 ShiftOps), 0); 2953 } 2954 case ISD::SETNE: { 2955 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2956 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2957 SDValue Xor = IsRHSZero ? LHS : 2958 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2959 SDValue Clz = 2960 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2961 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2962 S->getI32Imm(31, dl) }; 2963 SDValue Shift = 2964 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2965 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2966 S->getI32Imm(1, dl)), 0); 2967 } 2968 case ISD::SETGE: { 2969 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) 2970 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) 2971 if(IsRHSZero) 2972 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2973 2974 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2975 // by swapping inputs and falling through. 2976 std::swap(LHS, RHS); 2977 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2978 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2979 LLVM_FALLTHROUGH; 2980 } 2981 case ISD::SETLE: { 2982 if (CmpInGPR == ICGPR_NonExtIn) 2983 return SDValue(); 2984 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) 2985 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) 2986 if(IsRHSZero) { 2987 if (CmpInGPR == ICGPR_NonExtIn) 2988 return SDValue(); 2989 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2990 } 2991 2992 // The upper 32-bits of the register can't be undefined for this sequence. 2993 LHS = signExtendInputIfNeeded(LHS); 2994 RHS = signExtendInputIfNeeded(RHS); 2995 SDValue Sub = 2996 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2997 SDValue Shift = 2998 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, 2999 S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 3000 0); 3001 return 3002 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, 3003 MVT::i64, Shift, S->getI32Imm(1, dl)), 0); 3004 } 3005 case ISD::SETGT: { 3006 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) 3007 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) 3008 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) 3009 // Handle SETLT -1 (which is equivalent to SETGE 0). 3010 if (IsRHSNegOne) 3011 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3012 3013 if (IsRHSZero) { 3014 if (CmpInGPR == ICGPR_NonExtIn) 3015 return SDValue(); 3016 // The upper 32-bits of the register can't be undefined for this sequence. 3017 LHS = signExtendInputIfNeeded(LHS); 3018 RHS = signExtendInputIfNeeded(RHS); 3019 SDValue Neg = 3020 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3021 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3022 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); 3023 } 3024 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3025 // (%b < %a) by swapping inputs and falling through. 3026 std::swap(LHS, RHS); 3027 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3028 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3029 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3030 LLVM_FALLTHROUGH; 3031 } 3032 case ISD::SETLT: { 3033 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) 3034 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) 3035 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) 3036 // Handle SETLT 1 (which is equivalent to SETLE 0). 3037 if (IsRHSOne) { 3038 if (CmpInGPR == ICGPR_NonExtIn) 3039 return SDValue(); 3040 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3041 } 3042 3043 if (IsRHSZero) { 3044 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 3045 S->getI32Imm(31, dl) }; 3046 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 3047 ShiftOps), 0); 3048 } 3049 3050 if (CmpInGPR == ICGPR_NonExtIn) 3051 return SDValue(); 3052 // The upper 32-bits of the register can't be undefined for this sequence. 3053 LHS = signExtendInputIfNeeded(LHS); 3054 RHS = signExtendInputIfNeeded(RHS); 3055 SDValue SUBFNode = 3056 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3057 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3058 SUBFNode, S->getI64Imm(1, dl), 3059 S->getI64Imm(63, dl)), 0); 3060 } 3061 case ISD::SETUGE: 3062 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) 3063 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) 3064 std::swap(LHS, RHS); 3065 LLVM_FALLTHROUGH; 3066 case ISD::SETULE: { 3067 if (CmpInGPR == ICGPR_NonExtIn) 3068 return SDValue(); 3069 // The upper 32-bits of the register can't be undefined for this sequence. 3070 LHS = zeroExtendInputIfNeeded(LHS); 3071 RHS = zeroExtendInputIfNeeded(RHS); 3072 SDValue Subtract = 3073 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3074 SDValue SrdiNode = 3075 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3076 Subtract, S->getI64Imm(1, dl), 3077 S->getI64Imm(63, dl)), 0); 3078 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, 3079 S->getI32Imm(1, dl)), 0); 3080 } 3081 case ISD::SETUGT: 3082 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) 3083 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) 3084 std::swap(LHS, RHS); 3085 LLVM_FALLTHROUGH; 3086 case ISD::SETULT: { 3087 if (CmpInGPR == ICGPR_NonExtIn) 3088 return SDValue(); 3089 // The upper 32-bits of the register can't be undefined for this sequence. 3090 LHS = zeroExtendInputIfNeeded(LHS); 3091 RHS = zeroExtendInputIfNeeded(RHS); 3092 SDValue Subtract = 3093 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3094 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3095 Subtract, S->getI64Imm(1, dl), 3096 S->getI64Imm(63, dl)), 0); 3097 } 3098 } 3099 } 3100 3101 /// Produces a sign-extended result of comparing two 32-bit values according to 3102 /// the passed condition code. 3103 SDValue 3104 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, 3105 ISD::CondCode CC, 3106 int64_t RHSValue, SDLoc dl) { 3107 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 3108 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) 3109 return SDValue(); 3110 bool IsRHSZero = RHSValue == 0; 3111 bool IsRHSOne = RHSValue == 1; 3112 bool IsRHSNegOne = RHSValue == -1LL; 3113 3114 switch (CC) { 3115 default: return SDValue(); 3116 case ISD::SETEQ: { 3117 // (sext (setcc %a, %b, seteq)) -> 3118 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 3119 // (sext (setcc %a, 0, seteq)) -> 3120 // (ashr (shl (ctlz %a), 58), 63) 3121 SDValue CountInput = IsRHSZero ? LHS : 3122 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3123 SDValue Cntlzw = 3124 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 3125 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), 3126 S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3127 SDValue Slwi = 3128 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); 3129 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); 3130 } 3131 case ISD::SETNE: { 3132 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 3133 // flip the bit, finally take 2's complement. 3134 // (sext (setcc %a, %b, setne)) -> 3135 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 3136 // Same as above, but the first xor is not needed. 3137 // (sext (setcc %a, 0, setne)) -> 3138 // (neg (xor (lshr (ctlz %a), 5), 1)) 3139 SDValue Xor = IsRHSZero ? LHS : 3140 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3141 SDValue Clz = 3142 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 3143 SDValue ShiftOps[] = 3144 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3145 SDValue Shift = 3146 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 3147 SDValue Xori = 3148 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 3149 S->getI32Imm(1, dl)), 0); 3150 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 3151 } 3152 case ISD::SETGE: { 3153 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) 3154 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) 3155 if (IsRHSZero) 3156 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3157 3158 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 3159 // by swapping inputs and falling through. 3160 std::swap(LHS, RHS); 3161 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3162 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3163 LLVM_FALLTHROUGH; 3164 } 3165 case ISD::SETLE: { 3166 if (CmpInGPR == ICGPR_NonExtIn) 3167 return SDValue(); 3168 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) 3169 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) 3170 if (IsRHSZero) 3171 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3172 3173 // The upper 32-bits of the register can't be undefined for this sequence. 3174 LHS = signExtendInputIfNeeded(LHS); 3175 RHS = signExtendInputIfNeeded(RHS); 3176 SDValue SUBFNode = 3177 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, 3178 LHS, RHS), 0); 3179 SDValue Srdi = 3180 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3181 SUBFNode, S->getI64Imm(1, dl), 3182 S->getI64Imm(63, dl)), 0); 3183 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, 3184 S->getI32Imm(-1, dl)), 0); 3185 } 3186 case ISD::SETGT: { 3187 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) 3188 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) 3189 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) 3190 if (IsRHSNegOne) 3191 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3192 if (IsRHSZero) { 3193 if (CmpInGPR == ICGPR_NonExtIn) 3194 return SDValue(); 3195 // The upper 32-bits of the register can't be undefined for this sequence. 3196 LHS = signExtendInputIfNeeded(LHS); 3197 RHS = signExtendInputIfNeeded(RHS); 3198 SDValue Neg = 3199 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3200 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, 3201 S->getI64Imm(63, dl)), 0); 3202 } 3203 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3204 // (%b < %a) by swapping inputs and falling through. 3205 std::swap(LHS, RHS); 3206 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3207 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3208 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3209 LLVM_FALLTHROUGH; 3210 } 3211 case ISD::SETLT: { 3212 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) 3213 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) 3214 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) 3215 if (IsRHSOne) { 3216 if (CmpInGPR == ICGPR_NonExtIn) 3217 return SDValue(); 3218 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3219 } 3220 if (IsRHSZero) 3221 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, 3222 S->getI32Imm(31, dl)), 0); 3223 3224 if (CmpInGPR == ICGPR_NonExtIn) 3225 return SDValue(); 3226 // The upper 32-bits of the register can't be undefined for this sequence. 3227 LHS = signExtendInputIfNeeded(LHS); 3228 RHS = signExtendInputIfNeeded(RHS); 3229 SDValue SUBFNode = 3230 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3231 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3232 SUBFNode, S->getI64Imm(63, dl)), 0); 3233 } 3234 case ISD::SETUGE: 3235 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) 3236 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) 3237 std::swap(LHS, RHS); 3238 LLVM_FALLTHROUGH; 3239 case ISD::SETULE: { 3240 if (CmpInGPR == ICGPR_NonExtIn) 3241 return SDValue(); 3242 // The upper 32-bits of the register can't be undefined for this sequence. 3243 LHS = zeroExtendInputIfNeeded(LHS); 3244 RHS = zeroExtendInputIfNeeded(RHS); 3245 SDValue Subtract = 3246 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3247 SDValue Shift = 3248 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, 3249 S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 3250 0); 3251 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, 3252 S->getI32Imm(-1, dl)), 0); 3253 } 3254 case ISD::SETUGT: 3255 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) 3256 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) 3257 std::swap(LHS, RHS); 3258 LLVM_FALLTHROUGH; 3259 case ISD::SETULT: { 3260 if (CmpInGPR == ICGPR_NonExtIn) 3261 return SDValue(); 3262 // The upper 32-bits of the register can't be undefined for this sequence. 3263 LHS = zeroExtendInputIfNeeded(LHS); 3264 RHS = zeroExtendInputIfNeeded(RHS); 3265 SDValue Subtract = 3266 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3267 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3268 Subtract, S->getI64Imm(63, dl)), 0); 3269 } 3270 } 3271 } 3272 3273 /// Produces a zero-extended result of comparing two 64-bit values according to 3274 /// the passed condition code. 3275 SDValue 3276 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, 3277 ISD::CondCode CC, 3278 int64_t RHSValue, SDLoc dl) { 3279 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3280 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) 3281 return SDValue(); 3282 bool IsRHSZero = RHSValue == 0; 3283 bool IsRHSOne = RHSValue == 1; 3284 bool IsRHSNegOne = RHSValue == -1LL; 3285 switch (CC) { 3286 default: return SDValue(); 3287 case ISD::SETEQ: { 3288 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 3289 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 3290 SDValue Xor = IsRHSZero ? LHS : 3291 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3292 SDValue Clz = 3293 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 3294 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 3295 S->getI64Imm(58, dl), 3296 S->getI64Imm(63, dl)), 0); 3297 } 3298 case ISD::SETNE: { 3299 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3300 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) 3301 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3302 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3303 SDValue Xor = IsRHSZero ? LHS : 3304 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3305 SDValue AC = 3306 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3307 Xor, S->getI32Imm(~0U, dl)), 0); 3308 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, 3309 Xor, AC.getValue(1)), 0); 3310 } 3311 case ISD::SETGE: { 3312 // {subc.reg, subc.CA} = (subcarry %a, %b) 3313 // (zext (setcc %a, %b, setge)) -> 3314 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) 3315 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) 3316 if (IsRHSZero) 3317 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3318 std::swap(LHS, RHS); 3319 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3320 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3321 LLVM_FALLTHROUGH; 3322 } 3323 case ISD::SETLE: { 3324 // {subc.reg, subc.CA} = (subcarry %b, %a) 3325 // (zext (setcc %a, %b, setge)) -> 3326 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) 3327 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) 3328 if (IsRHSZero) 3329 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3330 SDValue ShiftL = 3331 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3332 S->getI64Imm(1, dl), 3333 S->getI64Imm(63, dl)), 0); 3334 SDValue ShiftR = 3335 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3336 S->getI64Imm(63, dl)), 0); 3337 SDValue SubtractCarry = 3338 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3339 LHS, RHS), 1); 3340 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3341 ShiftR, ShiftL, SubtractCarry), 0); 3342 } 3343 case ISD::SETGT: { 3344 // {subc.reg, subc.CA} = (subcarry %b, %a) 3345 // (zext (setcc %a, %b, setgt)) -> 3346 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3347 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) 3348 if (IsRHSNegOne) 3349 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3350 if (IsRHSZero) { 3351 SDValue Addi = 3352 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3353 S->getI64Imm(~0ULL, dl)), 0); 3354 SDValue Nor = 3355 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); 3356 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, 3357 S->getI64Imm(1, dl), 3358 S->getI64Imm(63, dl)), 0); 3359 } 3360 std::swap(LHS, RHS); 3361 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3362 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3363 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3364 LLVM_FALLTHROUGH; 3365 } 3366 case ISD::SETLT: { 3367 // {subc.reg, subc.CA} = (subcarry %a, %b) 3368 // (zext (setcc %a, %b, setlt)) -> 3369 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3370 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) 3371 if (IsRHSOne) 3372 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3373 if (IsRHSZero) 3374 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3375 S->getI64Imm(1, dl), 3376 S->getI64Imm(63, dl)), 0); 3377 SDValue SRADINode = 3378 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3379 LHS, S->getI64Imm(63, dl)), 0); 3380 SDValue SRDINode = 3381 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3382 RHS, S->getI64Imm(1, dl), 3383 S->getI64Imm(63, dl)), 0); 3384 SDValue SUBFC8Carry = 3385 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3386 RHS, LHS), 1); 3387 SDValue ADDE8Node = 3388 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3389 SRDINode, SRADINode, SUBFC8Carry), 0); 3390 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3391 ADDE8Node, S->getI64Imm(1, dl)), 0); 3392 } 3393 case ISD::SETUGE: 3394 // {subc.reg, subc.CA} = (subcarry %a, %b) 3395 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) 3396 std::swap(LHS, RHS); 3397 LLVM_FALLTHROUGH; 3398 case ISD::SETULE: { 3399 // {subc.reg, subc.CA} = (subcarry %b, %a) 3400 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) 3401 SDValue SUBFC8Carry = 3402 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3403 LHS, RHS), 1); 3404 SDValue SUBFE8Node = 3405 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, 3406 LHS, LHS, SUBFC8Carry), 0); 3407 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, 3408 SUBFE8Node, S->getI64Imm(1, dl)), 0); 3409 } 3410 case ISD::SETUGT: 3411 // {subc.reg, subc.CA} = (subcarry %b, %a) 3412 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) 3413 std::swap(LHS, RHS); 3414 LLVM_FALLTHROUGH; 3415 case ISD::SETULT: { 3416 // {subc.reg, subc.CA} = (subcarry %a, %b) 3417 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) 3418 SDValue SubtractCarry = 3419 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3420 RHS, LHS), 1); 3421 SDValue ExtSub = 3422 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3423 LHS, LHS, SubtractCarry), 0); 3424 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3425 ExtSub), 0); 3426 } 3427 } 3428 } 3429 3430 /// Produces a sign-extended result of comparing two 64-bit values according to 3431 /// the passed condition code. 3432 SDValue 3433 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, 3434 ISD::CondCode CC, 3435 int64_t RHSValue, SDLoc dl) { 3436 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3437 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) 3438 return SDValue(); 3439 bool IsRHSZero = RHSValue == 0; 3440 bool IsRHSOne = RHSValue == 1; 3441 bool IsRHSNegOne = RHSValue == -1LL; 3442 switch (CC) { 3443 default: return SDValue(); 3444 case ISD::SETEQ: { 3445 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3446 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 3447 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3448 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3449 SDValue AddInput = IsRHSZero ? LHS : 3450 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3451 SDValue Addic = 3452 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3453 AddInput, S->getI32Imm(~0U, dl)), 0); 3454 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 3455 Addic, Addic.getValue(1)), 0); 3456 } 3457 case ISD::SETNE: { 3458 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) 3459 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) 3460 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) 3461 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) 3462 SDValue Xor = IsRHSZero ? LHS : 3463 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3464 SDValue SC = 3465 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, 3466 Xor, S->getI32Imm(0, dl)), 0); 3467 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, 3468 SC, SC.getValue(1)), 0); 3469 } 3470 case ISD::SETGE: { 3471 // {subc.reg, subc.CA} = (subcarry %a, %b) 3472 // (zext (setcc %a, %b, setge)) -> 3473 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) 3474 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) 3475 if (IsRHSZero) 3476 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3477 std::swap(LHS, RHS); 3478 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3479 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3480 LLVM_FALLTHROUGH; 3481 } 3482 case ISD::SETLE: { 3483 // {subc.reg, subc.CA} = (subcarry %b, %a) 3484 // (zext (setcc %a, %b, setge)) -> 3485 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) 3486 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) 3487 if (IsRHSZero) 3488 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3489 SDValue ShiftR = 3490 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3491 S->getI64Imm(63, dl)), 0); 3492 SDValue ShiftL = 3493 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3494 S->getI64Imm(1, dl), 3495 S->getI64Imm(63, dl)), 0); 3496 SDValue SubtractCarry = 3497 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3498 LHS, RHS), 1); 3499 SDValue Adde = 3500 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3501 ShiftR, ShiftL, SubtractCarry), 0); 3502 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); 3503 } 3504 case ISD::SETGT: { 3505 // {subc.reg, subc.CA} = (subcarry %b, %a) 3506 // (zext (setcc %a, %b, setgt)) -> 3507 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3508 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) 3509 if (IsRHSNegOne) 3510 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3511 if (IsRHSZero) { 3512 SDValue Add = 3513 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3514 S->getI64Imm(-1, dl)), 0); 3515 SDValue Nor = 3516 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); 3517 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, 3518 S->getI64Imm(63, dl)), 0); 3519 } 3520 std::swap(LHS, RHS); 3521 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3522 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3523 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3524 LLVM_FALLTHROUGH; 3525 } 3526 case ISD::SETLT: { 3527 // {subc.reg, subc.CA} = (subcarry %a, %b) 3528 // (zext (setcc %a, %b, setlt)) -> 3529 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3530 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) 3531 if (IsRHSOne) 3532 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3533 if (IsRHSZero) { 3534 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, 3535 S->getI64Imm(63, dl)), 0); 3536 } 3537 SDValue SRADINode = 3538 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3539 LHS, S->getI64Imm(63, dl)), 0); 3540 SDValue SRDINode = 3541 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3542 RHS, S->getI64Imm(1, dl), 3543 S->getI64Imm(63, dl)), 0); 3544 SDValue SUBFC8Carry = 3545 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3546 RHS, LHS), 1); 3547 SDValue ADDE8Node = 3548 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, 3549 SRDINode, SRADINode, SUBFC8Carry), 0); 3550 SDValue XORI8Node = 3551 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3552 ADDE8Node, S->getI64Imm(1, dl)), 0); 3553 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3554 XORI8Node), 0); 3555 } 3556 case ISD::SETUGE: 3557 // {subc.reg, subc.CA} = (subcarry %a, %b) 3558 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) 3559 std::swap(LHS, RHS); 3560 LLVM_FALLTHROUGH; 3561 case ISD::SETULE: { 3562 // {subc.reg, subc.CA} = (subcarry %b, %a) 3563 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) 3564 SDValue SubtractCarry = 3565 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3566 LHS, RHS), 1); 3567 SDValue ExtSub = 3568 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, 3569 LHS, SubtractCarry), 0); 3570 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, 3571 ExtSub, ExtSub), 0); 3572 } 3573 case ISD::SETUGT: 3574 // {subc.reg, subc.CA} = (subcarry %b, %a) 3575 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) 3576 std::swap(LHS, RHS); 3577 LLVM_FALLTHROUGH; 3578 case ISD::SETULT: { 3579 // {subc.reg, subc.CA} = (subcarry %a, %b) 3580 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) 3581 SDValue SubCarry = 3582 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3583 RHS, LHS), 1); 3584 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3585 LHS, LHS, SubCarry), 0); 3586 } 3587 } 3588 } 3589 3590 /// Do all uses of this SDValue need the result in a GPR? 3591 /// This is meant to be used on values that have type i1 since 3592 /// it is somewhat meaningless to ask if values of other types 3593 /// should be kept in GPR's. 3594 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 3595 assert(Compare.getOpcode() == ISD::SETCC && 3596 "An ISD::SETCC node required here."); 3597 3598 // For values that have a single use, the caller should obviously already have 3599 // checked if that use is an extending use. We check the other uses here. 3600 if (Compare.hasOneUse()) 3601 return true; 3602 // We want the value in a GPR if it is being extended, used for a select, or 3603 // used in logical operations. 3604 for (auto CompareUse : Compare.getNode()->uses()) 3605 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 3606 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 3607 CompareUse->getOpcode() != ISD::SELECT && 3608 !isLogicOp(CompareUse->getOpcode())) { 3609 OmittedForNonExtendUses++; 3610 return false; 3611 } 3612 return true; 3613 } 3614 3615 /// Returns an equivalent of a SETCC node but with the result the same width as 3616 /// the inputs. This can also be used for SELECT_CC if either the true or false 3617 /// values is a power of two while the other is zero. 3618 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, 3619 SetccInGPROpts ConvOpts) { 3620 assert((Compare.getOpcode() == ISD::SETCC || 3621 Compare.getOpcode() == ISD::SELECT_CC) && 3622 "An ISD::SETCC node required here."); 3623 3624 // Don't convert this comparison to a GPR sequence because there are uses 3625 // of the i1 result (i.e. uses that require the result in the CR). 3626 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 3627 return SDValue(); 3628 3629 SDValue LHS = Compare.getOperand(0); 3630 SDValue RHS = Compare.getOperand(1); 3631 3632 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 3633 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 3634 ISD::CondCode CC = 3635 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 3636 EVT InputVT = LHS.getValueType(); 3637 if (InputVT != MVT::i32 && InputVT != MVT::i64) 3638 return SDValue(); 3639 3640 if (ConvOpts == SetccInGPROpts::ZExtInvert || 3641 ConvOpts == SetccInGPROpts::SExtInvert) 3642 CC = ISD::getSetCCInverse(CC, InputVT); 3643 3644 bool Inputs32Bit = InputVT == MVT::i32; 3645 3646 SDLoc dl(Compare); 3647 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3648 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3649 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3650 ConvOpts == SetccInGPROpts::SExtInvert; 3651 3652 if (IsSext && Inputs32Bit) 3653 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3654 else if (Inputs32Bit) 3655 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3656 else if (IsSext) 3657 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3658 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3659 } 3660 3661 } // end anonymous namespace 3662 3663 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { 3664 if (N->getValueType(0) != MVT::i32 && 3665 N->getValueType(0) != MVT::i64) 3666 return false; 3667 3668 // This optimization will emit code that assumes 64-bit registers 3669 // so we don't want to run it in 32-bit mode. Also don't run it 3670 // on functions that are not to be optimized. 3671 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 3672 return false; 3673 3674 // For POWER10, it is more profitable to use the set boolean extension 3675 // instructions rather than the integer compare elimination codegen. 3676 // Users can override this via the command line option, `--ppc-gpr-icmps`. 3677 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1()) 3678 return false; 3679 3680 switch (N->getOpcode()) { 3681 default: break; 3682 case ISD::ZERO_EXTEND: 3683 case ISD::SIGN_EXTEND: 3684 case ISD::AND: 3685 case ISD::OR: 3686 case ISD::XOR: { 3687 IntegerCompareEliminator ICmpElim(CurDAG, this); 3688 if (SDNode *New = ICmpElim.Select(N)) { 3689 ReplaceNode(N, New); 3690 return true; 3691 } 3692 } 3693 } 3694 return false; 3695 } 3696 3697 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 3698 if (N->getValueType(0) != MVT::i32 && 3699 N->getValueType(0) != MVT::i64) 3700 return false; 3701 3702 if (!UseBitPermRewriter) 3703 return false; 3704 3705 switch (N->getOpcode()) { 3706 default: break; 3707 case ISD::ROTL: 3708 case ISD::SHL: 3709 case ISD::SRL: 3710 case ISD::AND: 3711 case ISD::OR: { 3712 BitPermutationSelector BPS(CurDAG); 3713 if (SDNode *New = BPS.Select(N)) { 3714 ReplaceNode(N, New); 3715 return true; 3716 } 3717 return false; 3718 } 3719 } 3720 3721 return false; 3722 } 3723 3724 /// SelectCC - Select a comparison of the specified values with the specified 3725 /// condition code, returning the CR# of the expression. 3726 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3727 const SDLoc &dl, SDValue Chain) { 3728 // Always select the LHS. 3729 unsigned Opc; 3730 3731 if (LHS.getValueType() == MVT::i32) { 3732 unsigned Imm; 3733 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3734 if (isInt32Immediate(RHS, Imm)) { 3735 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3736 if (isUInt<16>(Imm)) 3737 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3738 getI32Imm(Imm & 0xFFFF, dl)), 3739 0); 3740 // If this is a 16-bit signed immediate, fold it. 3741 if (isInt<16>((int)Imm)) 3742 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3743 getI32Imm(Imm & 0xFFFF, dl)), 3744 0); 3745 3746 // For non-equality comparisons, the default code would materialize the 3747 // constant, then compare against it, like this: 3748 // lis r2, 4660 3749 // ori r2, r2, 22136 3750 // cmpw cr0, r3, r2 3751 // Since we are just comparing for equality, we can emit this instead: 3752 // xoris r0,r3,0x1234 3753 // cmplwi cr0,r0,0x5678 3754 // beq cr0,L6 3755 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 3756 getI32Imm(Imm >> 16, dl)), 0); 3757 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 3758 getI32Imm(Imm & 0xFFFF, dl)), 0); 3759 } 3760 Opc = PPC::CMPLW; 3761 } else if (ISD::isUnsignedIntSetCC(CC)) { 3762 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 3763 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3764 getI32Imm(Imm & 0xFFFF, dl)), 0); 3765 Opc = PPC::CMPLW; 3766 } else { 3767 int16_t SImm; 3768 if (isIntS16Immediate(RHS, SImm)) 3769 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3770 getI32Imm((int)SImm & 0xFFFF, 3771 dl)), 3772 0); 3773 Opc = PPC::CMPW; 3774 } 3775 } else if (LHS.getValueType() == MVT::i64) { 3776 uint64_t Imm; 3777 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3778 if (isInt64Immediate(RHS.getNode(), Imm)) { 3779 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3780 if (isUInt<16>(Imm)) 3781 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3782 getI32Imm(Imm & 0xFFFF, dl)), 3783 0); 3784 // If this is a 16-bit signed immediate, fold it. 3785 if (isInt<16>(Imm)) 3786 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3787 getI32Imm(Imm & 0xFFFF, dl)), 3788 0); 3789 3790 // For non-equality comparisons, the default code would materialize the 3791 // constant, then compare against it, like this: 3792 // lis r2, 4660 3793 // ori r2, r2, 22136 3794 // cmpd cr0, r3, r2 3795 // Since we are just comparing for equality, we can emit this instead: 3796 // xoris r0,r3,0x1234 3797 // cmpldi cr0,r0,0x5678 3798 // beq cr0,L6 3799 if (isUInt<32>(Imm)) { 3800 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 3801 getI64Imm(Imm >> 16, dl)), 0); 3802 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 3803 getI64Imm(Imm & 0xFFFF, dl)), 3804 0); 3805 } 3806 } 3807 Opc = PPC::CMPLD; 3808 } else if (ISD::isUnsignedIntSetCC(CC)) { 3809 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 3810 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3811 getI64Imm(Imm & 0xFFFF, dl)), 0); 3812 Opc = PPC::CMPLD; 3813 } else { 3814 int16_t SImm; 3815 if (isIntS16Immediate(RHS, SImm)) 3816 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3817 getI64Imm(SImm & 0xFFFF, dl)), 3818 0); 3819 Opc = PPC::CMPD; 3820 } 3821 } else if (LHS.getValueType() == MVT::f32) { 3822 if (Subtarget->hasSPE()) { 3823 switch (CC) { 3824 default: 3825 case ISD::SETEQ: 3826 case ISD::SETNE: 3827 Opc = PPC::EFSCMPEQ; 3828 break; 3829 case ISD::SETLT: 3830 case ISD::SETGE: 3831 case ISD::SETOLT: 3832 case ISD::SETOGE: 3833 case ISD::SETULT: 3834 case ISD::SETUGE: 3835 Opc = PPC::EFSCMPLT; 3836 break; 3837 case ISD::SETGT: 3838 case ISD::SETLE: 3839 case ISD::SETOGT: 3840 case ISD::SETOLE: 3841 case ISD::SETUGT: 3842 case ISD::SETULE: 3843 Opc = PPC::EFSCMPGT; 3844 break; 3845 } 3846 } else 3847 Opc = PPC::FCMPUS; 3848 } else if (LHS.getValueType() == MVT::f64) { 3849 if (Subtarget->hasSPE()) { 3850 switch (CC) { 3851 default: 3852 case ISD::SETEQ: 3853 case ISD::SETNE: 3854 Opc = PPC::EFDCMPEQ; 3855 break; 3856 case ISD::SETLT: 3857 case ISD::SETGE: 3858 case ISD::SETOLT: 3859 case ISD::SETOGE: 3860 case ISD::SETULT: 3861 case ISD::SETUGE: 3862 Opc = PPC::EFDCMPLT; 3863 break; 3864 case ISD::SETGT: 3865 case ISD::SETLE: 3866 case ISD::SETOGT: 3867 case ISD::SETOLE: 3868 case ISD::SETUGT: 3869 case ISD::SETULE: 3870 Opc = PPC::EFDCMPGT; 3871 break; 3872 } 3873 } else 3874 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 3875 } else { 3876 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); 3877 assert(Subtarget->hasVSX() && "__float128 requires VSX"); 3878 Opc = PPC::XSCMPUQP; 3879 } 3880 if (Chain) 3881 return SDValue( 3882 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain), 3883 0); 3884 else 3885 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 3886 } 3887 3888 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, 3889 const PPCSubtarget *Subtarget) { 3890 // For SPE instructions, the result is in GT bit of the CR 3891 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); 3892 3893 switch (CC) { 3894 case ISD::SETUEQ: 3895 case ISD::SETONE: 3896 case ISD::SETOLE: 3897 case ISD::SETOGE: 3898 llvm_unreachable("Should be lowered by legalize!"); 3899 default: llvm_unreachable("Unknown condition!"); 3900 case ISD::SETOEQ: 3901 case ISD::SETEQ: 3902 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; 3903 case ISD::SETUNE: 3904 case ISD::SETNE: 3905 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; 3906 case ISD::SETOLT: 3907 case ISD::SETLT: 3908 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; 3909 case ISD::SETULE: 3910 case ISD::SETLE: 3911 return PPC::PRED_LE; 3912 case ISD::SETOGT: 3913 case ISD::SETGT: 3914 return PPC::PRED_GT; 3915 case ISD::SETUGE: 3916 case ISD::SETGE: 3917 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; 3918 case ISD::SETO: return PPC::PRED_NU; 3919 case ISD::SETUO: return PPC::PRED_UN; 3920 // These two are invalid for floating point. Assume we have int. 3921 case ISD::SETULT: return PPC::PRED_LT; 3922 case ISD::SETUGT: return PPC::PRED_GT; 3923 } 3924 } 3925 3926 /// getCRIdxForSetCC - Return the index of the condition register field 3927 /// associated with the SetCC condition, and whether or not the field is 3928 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 3929 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 3930 Invert = false; 3931 switch (CC) { 3932 default: llvm_unreachable("Unknown condition!"); 3933 case ISD::SETOLT: 3934 case ISD::SETLT: return 0; // Bit #0 = SETOLT 3935 case ISD::SETOGT: 3936 case ISD::SETGT: return 1; // Bit #1 = SETOGT 3937 case ISD::SETOEQ: 3938 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 3939 case ISD::SETUO: return 3; // Bit #3 = SETUO 3940 case ISD::SETUGE: 3941 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 3942 case ISD::SETULE: 3943 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 3944 case ISD::SETUNE: 3945 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 3946 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 3947 case ISD::SETUEQ: 3948 case ISD::SETOGE: 3949 case ISD::SETOLE: 3950 case ISD::SETONE: 3951 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 3952 // These are invalid for floating point. Assume integer. 3953 case ISD::SETULT: return 0; 3954 case ISD::SETUGT: return 1; 3955 } 3956 } 3957 3958 // getVCmpInst: return the vector compare instruction for the specified 3959 // vector type and condition code. Since this is for altivec specific code, 3960 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128, 3961 // and v4f32). 3962 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 3963 bool HasVSX, bool &Swap, bool &Negate) { 3964 Swap = false; 3965 Negate = false; 3966 3967 if (VecVT.isFloatingPoint()) { 3968 /* Handle some cases by swapping input operands. */ 3969 switch (CC) { 3970 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 3971 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3972 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 3973 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 3974 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3975 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 3976 default: break; 3977 } 3978 /* Handle some cases by negating the result. */ 3979 switch (CC) { 3980 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3981 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 3982 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 3983 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 3984 default: break; 3985 } 3986 /* We have instructions implementing the remaining cases. */ 3987 switch (CC) { 3988 case ISD::SETEQ: 3989 case ISD::SETOEQ: 3990 if (VecVT == MVT::v4f32) 3991 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 3992 else if (VecVT == MVT::v2f64) 3993 return PPC::XVCMPEQDP; 3994 break; 3995 case ISD::SETGT: 3996 case ISD::SETOGT: 3997 if (VecVT == MVT::v4f32) 3998 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 3999 else if (VecVT == MVT::v2f64) 4000 return PPC::XVCMPGTDP; 4001 break; 4002 case ISD::SETGE: 4003 case ISD::SETOGE: 4004 if (VecVT == MVT::v4f32) 4005 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 4006 else if (VecVT == MVT::v2f64) 4007 return PPC::XVCMPGEDP; 4008 break; 4009 default: 4010 break; 4011 } 4012 llvm_unreachable("Invalid floating-point vector compare condition"); 4013 } else { 4014 /* Handle some cases by swapping input operands. */ 4015 switch (CC) { 4016 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 4017 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 4018 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 4019 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 4020 default: break; 4021 } 4022 /* Handle some cases by negating the result. */ 4023 switch (CC) { 4024 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 4025 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 4026 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 4027 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 4028 default: break; 4029 } 4030 /* We have instructions implementing the remaining cases. */ 4031 switch (CC) { 4032 case ISD::SETEQ: 4033 case ISD::SETUEQ: 4034 if (VecVT == MVT::v16i8) 4035 return PPC::VCMPEQUB; 4036 else if (VecVT == MVT::v8i16) 4037 return PPC::VCMPEQUH; 4038 else if (VecVT == MVT::v4i32) 4039 return PPC::VCMPEQUW; 4040 else if (VecVT == MVT::v2i64) 4041 return PPC::VCMPEQUD; 4042 else if (VecVT == MVT::v1i128) 4043 return PPC::VCMPEQUQ; 4044 break; 4045 case ISD::SETGT: 4046 if (VecVT == MVT::v16i8) 4047 return PPC::VCMPGTSB; 4048 else if (VecVT == MVT::v8i16) 4049 return PPC::VCMPGTSH; 4050 else if (VecVT == MVT::v4i32) 4051 return PPC::VCMPGTSW; 4052 else if (VecVT == MVT::v2i64) 4053 return PPC::VCMPGTSD; 4054 else if (VecVT == MVT::v1i128) 4055 return PPC::VCMPGTSQ; 4056 break; 4057 case ISD::SETUGT: 4058 if (VecVT == MVT::v16i8) 4059 return PPC::VCMPGTUB; 4060 else if (VecVT == MVT::v8i16) 4061 return PPC::VCMPGTUH; 4062 else if (VecVT == MVT::v4i32) 4063 return PPC::VCMPGTUW; 4064 else if (VecVT == MVT::v2i64) 4065 return PPC::VCMPGTUD; 4066 else if (VecVT == MVT::v1i128) 4067 return PPC::VCMPGTUQ; 4068 break; 4069 default: 4070 break; 4071 } 4072 llvm_unreachable("Invalid integer vector compare condition"); 4073 } 4074 } 4075 4076 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 4077 SDLoc dl(N); 4078 unsigned Imm; 4079 bool IsStrict = N->isStrictFPOpcode(); 4080 ISD::CondCode CC = 4081 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get(); 4082 EVT PtrVT = 4083 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4084 bool isPPC64 = (PtrVT == MVT::i64); 4085 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); 4086 4087 SDValue LHS = N->getOperand(IsStrict ? 1 : 0); 4088 SDValue RHS = N->getOperand(IsStrict ? 2 : 1); 4089 4090 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) { 4091 // We can codegen setcc op, imm very efficiently compared to a brcond. 4092 // Check for those cases here. 4093 // setcc op, 0 4094 if (Imm == 0) { 4095 SDValue Op = LHS; 4096 switch (CC) { 4097 default: break; 4098 case ISD::SETEQ: { 4099 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 4100 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 4101 getI32Imm(31, dl) }; 4102 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4103 return true; 4104 } 4105 case ISD::SETNE: { 4106 if (isPPC64) break; 4107 SDValue AD = 4108 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4109 Op, getI32Imm(~0U, dl)), 0); 4110 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 4111 return true; 4112 } 4113 case ISD::SETLT: { 4114 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4115 getI32Imm(31, dl) }; 4116 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4117 return true; 4118 } 4119 case ISD::SETGT: { 4120 SDValue T = 4121 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 4122 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 4123 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 4124 getI32Imm(31, dl) }; 4125 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4126 return true; 4127 } 4128 } 4129 } else if (Imm == ~0U) { // setcc op, -1 4130 SDValue Op = LHS; 4131 switch (CC) { 4132 default: break; 4133 case ISD::SETEQ: 4134 if (isPPC64) break; 4135 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4136 Op, getI32Imm(1, dl)), 0); 4137 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 4138 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 4139 MVT::i32, 4140 getI32Imm(0, dl)), 4141 0), Op.getValue(1)); 4142 return true; 4143 case ISD::SETNE: { 4144 if (isPPC64) break; 4145 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 4146 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4147 Op, getI32Imm(~0U, dl)); 4148 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 4149 SDValue(AD, 1)); 4150 return true; 4151 } 4152 case ISD::SETLT: { 4153 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 4154 getI32Imm(1, dl)), 0); 4155 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 4156 Op), 0); 4157 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 4158 getI32Imm(31, dl) }; 4159 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4160 return true; 4161 } 4162 case ISD::SETGT: { 4163 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4164 getI32Imm(31, dl) }; 4165 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4166 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 4167 return true; 4168 } 4169 } 4170 } 4171 } 4172 4173 // Altivec Vector compare instructions do not set any CR register by default and 4174 // vector compare operations return the same type as the operands. 4175 if (!IsStrict && LHS.getValueType().isVector()) { 4176 if (Subtarget->hasSPE()) 4177 return false; 4178 4179 EVT VecVT = LHS.getValueType(); 4180 bool Swap, Negate; 4181 unsigned int VCmpInst = 4182 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate); 4183 if (Swap) 4184 std::swap(LHS, RHS); 4185 4186 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 4187 if (Negate) { 4188 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 4189 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 4190 ResVT, VCmp, VCmp); 4191 return true; 4192 } 4193 4194 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 4195 return true; 4196 } 4197 4198 if (Subtarget->useCRBits()) 4199 return false; 4200 4201 bool Inv; 4202 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4203 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain); 4204 if (IsStrict) 4205 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1)); 4206 SDValue IntCR; 4207 4208 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that 4209 // The correct compare instruction is already set by SelectCC() 4210 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { 4211 Idx = 1; 4212 } 4213 4214 // Force the ccreg into CR7. 4215 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 4216 4217 SDValue InFlag(nullptr, 0); // Null incoming flag value. 4218 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 4219 InFlag).getValue(1); 4220 4221 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 4222 CCReg), 0); 4223 4224 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 4225 getI32Imm(31, dl), getI32Imm(31, dl) }; 4226 if (!Inv) { 4227 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4228 return true; 4229 } 4230 4231 // Get the specified bit. 4232 SDValue Tmp = 4233 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4234 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 4235 return true; 4236 } 4237 4238 /// Does this node represent a load/store node whose address can be represented 4239 /// with a register plus an immediate that's a multiple of \p Val: 4240 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { 4241 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); 4242 StoreSDNode *STN = dyn_cast<StoreSDNode>(N); 4243 SDValue AddrOp; 4244 if (LDN) 4245 AddrOp = LDN->getOperand(1); 4246 else if (STN) 4247 AddrOp = STN->getOperand(2); 4248 4249 // If the address points a frame object or a frame object with an offset, 4250 // we need to check the object alignment. 4251 short Imm = 0; 4252 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( 4253 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : 4254 AddrOp)) { 4255 // If op0 is a frame index that is under aligned, we can't do it either, 4256 // because it is translated to r31 or r1 + slot + offset. We won't know the 4257 // slot number until the stack frame is finalized. 4258 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); 4259 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); 4260 if ((SlotAlign % Val) != 0) 4261 return false; 4262 4263 // If we have an offset, we need further check on the offset. 4264 if (AddrOp.getOpcode() != ISD::ADD) 4265 return true; 4266 } 4267 4268 if (AddrOp.getOpcode() == ISD::ADD) 4269 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); 4270 4271 // If the address comes from the outside, the offset will be zero. 4272 return AddrOp.getOpcode() == ISD::CopyFromReg; 4273 } 4274 4275 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 4276 // Transfer memoperands. 4277 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 4278 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 4279 } 4280 4281 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, 4282 bool &NeedSwapOps, bool &IsUnCmp) { 4283 4284 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); 4285 4286 SDValue LHS = N->getOperand(0); 4287 SDValue RHS = N->getOperand(1); 4288 SDValue TrueRes = N->getOperand(2); 4289 SDValue FalseRes = N->getOperand(3); 4290 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); 4291 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && 4292 N->getSimpleValueType(0) != MVT::i32)) 4293 return false; 4294 4295 // We are looking for any of: 4296 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4297 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4298 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) 4299 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) 4300 int64_t TrueResVal = TrueConst->getSExtValue(); 4301 if ((TrueResVal < -1 || TrueResVal > 1) || 4302 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || 4303 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || 4304 (TrueResVal == 0 && 4305 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) 4306 return false; 4307 4308 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC 4309 ? FalseRes 4310 : FalseRes.getOperand(0); 4311 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC; 4312 if (SetOrSelCC.getOpcode() != ISD::SETCC && 4313 SetOrSelCC.getOpcode() != ISD::SELECT_CC) 4314 return false; 4315 4316 // Without this setb optimization, the outer SELECT_CC will be manually 4317 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass 4318 // transforms pseudo instruction to isel instruction. When there are more than 4319 // one use for result like zext/sext, with current optimization we only see 4320 // isel is replaced by setb but can't see any significant gain. Since 4321 // setb has longer latency than original isel, we should avoid this. Another 4322 // point is that setb requires comparison always kept, it can break the 4323 // opportunity to get the comparison away if we have in future. 4324 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) 4325 return false; 4326 4327 SDValue InnerLHS = SetOrSelCC.getOperand(0); 4328 SDValue InnerRHS = SetOrSelCC.getOperand(1); 4329 ISD::CondCode InnerCC = 4330 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); 4331 // If the inner comparison is a select_cc, make sure the true/false values are 4332 // 1/-1 and canonicalize it if needed. 4333 if (InnerIsSel) { 4334 ConstantSDNode *SelCCTrueConst = 4335 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); 4336 ConstantSDNode *SelCCFalseConst = 4337 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); 4338 if (!SelCCTrueConst || !SelCCFalseConst) 4339 return false; 4340 int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); 4341 int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); 4342 // The values must be -1/1 (requiring a swap) or 1/-1. 4343 if (SelCCTVal == -1 && SelCCFVal == 1) { 4344 std::swap(InnerLHS, InnerRHS); 4345 } else if (SelCCTVal != 1 || SelCCFVal != -1) 4346 return false; 4347 } 4348 4349 // Canonicalize unsigned case 4350 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { 4351 IsUnCmp = true; 4352 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; 4353 } 4354 4355 bool InnerSwapped = false; 4356 if (LHS == InnerRHS && RHS == InnerLHS) 4357 InnerSwapped = true; 4358 else if (LHS != InnerLHS || RHS != InnerRHS) 4359 return false; 4360 4361 switch (CC) { 4362 // (select_cc lhs, rhs, 0, \ 4363 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) 4364 case ISD::SETEQ: 4365 if (!InnerIsSel) 4366 return false; 4367 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) 4368 return false; 4369 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; 4370 break; 4371 4372 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4373 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) 4374 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) 4375 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4376 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) 4377 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) 4378 case ISD::SETULT: 4379 if (!IsUnCmp && InnerCC != ISD::SETNE) 4380 return false; 4381 IsUnCmp = true; 4382 LLVM_FALLTHROUGH; 4383 case ISD::SETLT: 4384 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || 4385 (InnerCC == ISD::SETLT && InnerSwapped)) 4386 NeedSwapOps = (TrueResVal == 1); 4387 else 4388 return false; 4389 break; 4390 4391 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4392 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) 4393 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) 4394 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4395 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) 4396 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) 4397 case ISD::SETUGT: 4398 if (!IsUnCmp && InnerCC != ISD::SETNE) 4399 return false; 4400 IsUnCmp = true; 4401 LLVM_FALLTHROUGH; 4402 case ISD::SETGT: 4403 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || 4404 (InnerCC == ISD::SETGT && InnerSwapped)) 4405 NeedSwapOps = (TrueResVal == -1); 4406 else 4407 return false; 4408 break; 4409 4410 default: 4411 return false; 4412 } 4413 4414 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); 4415 LLVM_DEBUG(N->dump()); 4416 4417 return true; 4418 } 4419 4420 // Return true if it's a software square-root/divide operand. 4421 static bool isSWTestOp(SDValue N) { 4422 if (N.getOpcode() == PPCISD::FTSQRT) 4423 return true; 4424 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0))) 4425 return false; 4426 switch (N.getConstantOperandVal(0)) { 4427 case Intrinsic::ppc_vsx_xvtdivdp: 4428 case Intrinsic::ppc_vsx_xvtdivsp: 4429 case Intrinsic::ppc_vsx_xvtsqrtdp: 4430 case Intrinsic::ppc_vsx_xvtsqrtsp: 4431 return true; 4432 } 4433 return false; 4434 } 4435 4436 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { 4437 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); 4438 // We are looking for following patterns, where `truncate to i1` actually has 4439 // the same semantic with `and 1`. 4440 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) 4441 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) 4442 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) 4443 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) 4444 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) 4445 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) 4446 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) 4447 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) 4448 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 4449 if (CC != ISD::SETEQ && CC != ISD::SETNE) 4450 return false; 4451 4452 SDValue CmpRHS = N->getOperand(3); 4453 if (!isa<ConstantSDNode>(CmpRHS) || 4454 cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0) 4455 return false; 4456 4457 SDValue CmpLHS = N->getOperand(2); 4458 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) 4459 return false; 4460 4461 unsigned PCC = 0; 4462 bool IsCCNE = CC == ISD::SETNE; 4463 if (CmpLHS.getOpcode() == ISD::AND && 4464 isa<ConstantSDNode>(CmpLHS.getOperand(1))) 4465 switch (CmpLHS.getConstantOperandVal(1)) { 4466 case 1: 4467 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; 4468 break; 4469 case 2: 4470 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; 4471 break; 4472 case 4: 4473 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; 4474 break; 4475 case 8: 4476 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; 4477 break; 4478 default: 4479 return false; 4480 } 4481 else if (CmpLHS.getOpcode() == ISD::TRUNCATE && 4482 CmpLHS.getValueType() == MVT::i1) 4483 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; 4484 4485 if (PCC) { 4486 SDLoc dl(N); 4487 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), 4488 N->getOperand(0)}; 4489 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 4490 return true; 4491 } 4492 return false; 4493 } 4494 4495 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { 4496 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4497 unsigned Imm; 4498 if (!isInt32Immediate(N->getOperand(1), Imm)) 4499 return false; 4500 4501 SDLoc dl(N); 4502 SDValue Val = N->getOperand(0); 4503 unsigned SH, MB, ME; 4504 // If this is an and of a value rotated between 0 and 31 bits and then and'd 4505 // with a mask, emit rlwinm 4506 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { 4507 Val = Val.getOperand(0); 4508 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 4509 getI32Imm(ME, dl)}; 4510 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4511 return true; 4512 } 4513 4514 // If this is just a masked value where the input is not handled, and 4515 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 4516 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { 4517 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), 4518 getI32Imm(ME, dl)}; 4519 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4520 return true; 4521 } 4522 4523 // AND X, 0 -> 0, not "rlwinm 32". 4524 if (Imm == 0) { 4525 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 4526 return true; 4527 } 4528 4529 return false; 4530 } 4531 4532 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { 4533 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4534 uint64_t Imm64; 4535 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) 4536 return false; 4537 4538 unsigned MB, ME; 4539 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { 4540 // MB ME 4541 // +----------------------+ 4542 // |xxxxxxxxxxx00011111000| 4543 // +----------------------+ 4544 // 0 32 64 4545 // We can only do it if the MB is larger than 32 and MB <= ME 4546 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even 4547 // we didn't rotate it. 4548 SDLoc dl(N); 4549 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), 4550 getI64Imm(ME - 32, dl)}; 4551 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); 4552 return true; 4553 } 4554 4555 return false; 4556 } 4557 4558 bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { 4559 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4560 uint64_t Imm64; 4561 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) 4562 return false; 4563 4564 // Do nothing if it is 16-bit imm as the pattern in the .td file handle 4565 // it well with "andi.". 4566 if (isUInt<16>(Imm64)) 4567 return false; 4568 4569 SDLoc Loc(N); 4570 SDValue Val = N->getOperand(0); 4571 4572 // Optimized with two rldicl's as follows: 4573 // Add missing bits on left to the mask and check that the mask is a 4574 // wrapped run of ones, i.e. 4575 // Change pattern |0001111100000011111111| 4576 // to |1111111100000011111111|. 4577 unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); 4578 if (NumOfLeadingZeros != 0) 4579 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); 4580 4581 unsigned MB, ME; 4582 if (!isRunOfOnes64(Imm64, MB, ME)) 4583 return false; 4584 4585 // ME MB MB-ME+63 4586 // +----------------------+ +----------------------+ 4587 // |1111111100000011111111| -> |0000001111111111111111| 4588 // +----------------------+ +----------------------+ 4589 // 0 63 0 63 4590 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. 4591 unsigned OnesOnLeft = ME + 1; 4592 unsigned ZerosInBetween = (MB - ME + 63) & 63; 4593 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear 4594 // on the left the bits that are already zeros in the mask. 4595 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, 4596 getI64Imm(OnesOnLeft, Loc), 4597 getI64Imm(ZerosInBetween, Loc)), 4598 0); 4599 // MB-ME+63 ME MB 4600 // +----------------------+ +----------------------+ 4601 // |0000001111111111111111| -> |0001111100000011111111| 4602 // +----------------------+ +----------------------+ 4603 // 0 63 0 63 4604 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the 4605 // left the number of ones we previously added. 4606 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), 4607 getI64Imm(NumOfLeadingZeros, Loc)}; 4608 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4609 return true; 4610 } 4611 4612 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { 4613 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4614 unsigned Imm; 4615 if (!isInt32Immediate(N->getOperand(1), Imm)) 4616 return false; 4617 4618 SDValue Val = N->getOperand(0); 4619 unsigned Imm2; 4620 // ISD::OR doesn't get all the bitfield insertion fun. 4621 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 4622 // bitfield insert. 4623 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) 4624 return false; 4625 4626 // The idea here is to check whether this is equivalent to: 4627 // (c1 & m) | (x & ~m) 4628 // where m is a run-of-ones mask. The logic here is that, for each bit in 4629 // c1 and c2: 4630 // - if both are 1, then the output will be 1. 4631 // - if both are 0, then the output will be 0. 4632 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 4633 // come from x. 4634 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 4635 // be 0. 4636 // If that last condition is never the case, then we can form m from the 4637 // bits that are the same between c1 and c2. 4638 unsigned MB, ME; 4639 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { 4640 SDLoc dl(N); 4641 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), 4642 getI32Imm(MB, dl), getI32Imm(ME, dl)}; 4643 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 4644 return true; 4645 } 4646 4647 return false; 4648 } 4649 4650 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { 4651 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4652 uint64_t Imm64; 4653 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) 4654 return false; 4655 4656 // If this is a 64-bit zero-extension mask, emit rldicl. 4657 unsigned MB = 64 - countTrailingOnes(Imm64); 4658 unsigned SH = 0; 4659 unsigned Imm; 4660 SDValue Val = N->getOperand(0); 4661 SDLoc dl(N); 4662 4663 if (Val.getOpcode() == ISD::ANY_EXTEND) { 4664 auto Op0 = Val.getOperand(0); 4665 if (Op0.getOpcode() == ISD::SRL && 4666 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 4667 4668 auto ResultType = Val.getNode()->getValueType(0); 4669 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); 4670 SDValue IDVal(ImDef, 0); 4671 4672 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, 4673 IDVal, Op0.getOperand(0), 4674 getI32Imm(1, dl)), 4675 0); 4676 SH = 64 - Imm; 4677 } 4678 } 4679 4680 // If the operand is a logical right shift, we can fold it into this 4681 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 4682 // for n <= mb. The right shift is really a left rotate followed by a 4683 // mask, and this mask is a more-restrictive sub-mask of the mask implied 4684 // by the shift. 4685 if (Val.getOpcode() == ISD::SRL && 4686 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 4687 assert(Imm < 64 && "Illegal shift amount"); 4688 Val = Val.getOperand(0); 4689 SH = 64 - Imm; 4690 } 4691 4692 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4693 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4694 return true; 4695 } 4696 4697 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { 4698 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4699 uint64_t Imm64; 4700 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || 4701 !isMask_64(~Imm64)) 4702 return false; 4703 4704 // If this is a negated 64-bit zero-extension mask, 4705 // i.e. the immediate is a sequence of ones from most significant side 4706 // and all zero for reminder, we should use rldicr. 4707 unsigned MB = 63 - countTrailingOnes(~Imm64); 4708 unsigned SH = 0; 4709 SDLoc dl(N); 4710 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4711 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 4712 return true; 4713 } 4714 4715 bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { 4716 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected"); 4717 uint64_t Imm64; 4718 unsigned MB, ME; 4719 SDValue N0 = N->getOperand(0); 4720 4721 // We won't get fewer instructions if the imm is 32-bit integer. 4722 // rldimi requires the imm to have consecutive ones with both sides zero. 4723 // Also, make sure the first Op has only one use, otherwise this may increase 4724 // register pressure since rldimi is destructive. 4725 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || 4726 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) 4727 return false; 4728 4729 unsigned SH = 63 - ME; 4730 SDLoc Dl(N); 4731 // Use select64Imm for making LI instr instead of directly putting Imm64 4732 SDValue Ops[] = { 4733 N->getOperand(0), 4734 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), 4735 getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; 4736 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); 4737 return true; 4738 } 4739 4740 // Select - Convert the specified operand from a target-independent to a 4741 // target-specific node if it hasn't already been changed. 4742 void PPCDAGToDAGISel::Select(SDNode *N) { 4743 SDLoc dl(N); 4744 if (N->isMachineOpcode()) { 4745 N->setNodeId(-1); 4746 return; // Already selected. 4747 } 4748 4749 // In case any misguided DAG-level optimizations form an ADD with a 4750 // TargetConstant operand, crash here instead of miscompiling (by selecting 4751 // an r+r add instead of some kind of r+i add). 4752 if (N->getOpcode() == ISD::ADD && 4753 N->getOperand(1).getOpcode() == ISD::TargetConstant) 4754 llvm_unreachable("Invalid ADD with TargetConstant operand"); 4755 4756 // Try matching complex bit permutations before doing anything else. 4757 if (tryBitPermutation(N)) 4758 return; 4759 4760 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). 4761 if (tryIntCompareInGPR(N)) 4762 return; 4763 4764 switch (N->getOpcode()) { 4765 default: break; 4766 4767 case ISD::Constant: 4768 if (N->getValueType(0) == MVT::i64) { 4769 SDNode *ResNode = selectI64Imm(CurDAG, N); 4770 if (!isa<ConstantSDNode>(ResNode)) { 4771 ReplaceNode(N, ResNode); 4772 return; 4773 } 4774 } 4775 break; 4776 4777 case ISD::INTRINSIC_WO_CHAIN: { 4778 if (!Subtarget->isISA3_1()) 4779 break; 4780 unsigned Opcode = 0; 4781 switch (N->getConstantOperandVal(0)) { 4782 default: 4783 break; 4784 case Intrinsic::ppc_altivec_vstribr_p: 4785 Opcode = PPC::VSTRIBR_rec; 4786 break; 4787 case Intrinsic::ppc_altivec_vstribl_p: 4788 Opcode = PPC::VSTRIBL_rec; 4789 break; 4790 case Intrinsic::ppc_altivec_vstrihr_p: 4791 Opcode = PPC::VSTRIHR_rec; 4792 break; 4793 case Intrinsic::ppc_altivec_vstrihl_p: 4794 Opcode = PPC::VSTRIHL_rec; 4795 break; 4796 } 4797 if (!Opcode) 4798 break; 4799 4800 // Generate the appropriate vector string isolate intrinsic to match. 4801 EVT VTs[] = {MVT::v16i8, MVT::Glue}; 4802 SDValue VecStrOp = 4803 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0); 4804 // Vector string isolate instructions update the EQ bit of CR6. 4805 // Generate a SETBC instruction to extract the bit and place it in a GPR. 4806 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32); 4807 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32); 4808 SDValue CRBit = SDValue( 4809 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1, 4810 CR6Reg, SubRegIdx, VecStrOp.getValue(1)), 4811 0); 4812 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit); 4813 return; 4814 } 4815 4816 case ISD::SETCC: 4817 case ISD::STRICT_FSETCC: 4818 case ISD::STRICT_FSETCCS: 4819 if (trySETCC(N)) 4820 return; 4821 break; 4822 // These nodes will be transformed into GETtlsADDR32 node, which 4823 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT 4824 case PPCISD::ADDI_TLSLD_L_ADDR: 4825 case PPCISD::ADDI_TLSGD_L_ADDR: { 4826 const Module *Mod = MF->getFunction().getParent(); 4827 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4828 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() || 4829 Mod->getPICLevel() == PICLevel::SmallPIC) 4830 break; 4831 // Attach global base pointer on GETtlsADDR32 node in order to 4832 // generate secure plt code for TLS symbols. 4833 getGlobalBaseReg(); 4834 } break; 4835 case PPCISD::CALL: { 4836 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4837 !TM.isPositionIndependent() || !Subtarget->isSecurePlt() || 4838 !Subtarget->isTargetELF()) 4839 break; 4840 4841 SDValue Op = N->getOperand(1); 4842 4843 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 4844 if (GA->getTargetFlags() == PPCII::MO_PLT) 4845 getGlobalBaseReg(); 4846 } 4847 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { 4848 if (ES->getTargetFlags() == PPCII::MO_PLT) 4849 getGlobalBaseReg(); 4850 } 4851 } 4852 break; 4853 4854 case PPCISD::GlobalBaseReg: 4855 ReplaceNode(N, getGlobalBaseReg()); 4856 return; 4857 4858 case ISD::FrameIndex: 4859 selectFrameIndex(N, N); 4860 return; 4861 4862 case PPCISD::MFOCRF: { 4863 SDValue InFlag = N->getOperand(1); 4864 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 4865 N->getOperand(0), InFlag)); 4866 return; 4867 } 4868 4869 case PPCISD::READ_TIME_BASE: 4870 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 4871 MVT::Other, N->getOperand(0))); 4872 return; 4873 4874 case PPCISD::SRA_ADDZE: { 4875 SDValue N0 = N->getOperand(0); 4876 SDValue ShiftAmt = 4877 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 4878 getConstantIntValue(), dl, 4879 N->getValueType(0)); 4880 if (N->getValueType(0) == MVT::i64) { 4881 SDNode *Op = 4882 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 4883 N0, ShiftAmt); 4884 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 4885 SDValue(Op, 1)); 4886 return; 4887 } else { 4888 assert(N->getValueType(0) == MVT::i32 && 4889 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 4890 SDNode *Op = 4891 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 4892 N0, ShiftAmt); 4893 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 4894 SDValue(Op, 1)); 4895 return; 4896 } 4897 } 4898 4899 case ISD::STORE: { 4900 // Change TLS initial-exec D-form stores to X-form stores. 4901 StoreSDNode *ST = cast<StoreSDNode>(N); 4902 if (EnableTLSOpt && Subtarget->isELFv2ABI() && 4903 ST->getAddressingMode() != ISD::PRE_INC) 4904 if (tryTLSXFormStore(ST)) 4905 return; 4906 break; 4907 } 4908 case ISD::LOAD: { 4909 // Handle preincrement loads. 4910 LoadSDNode *LD = cast<LoadSDNode>(N); 4911 EVT LoadedVT = LD->getMemoryVT(); 4912 4913 // Normal loads are handled by code generated from the .td file. 4914 if (LD->getAddressingMode() != ISD::PRE_INC) { 4915 // Change TLS initial-exec D-form loads to X-form loads. 4916 if (EnableTLSOpt && Subtarget->isELFv2ABI()) 4917 if (tryTLSXFormLoad(LD)) 4918 return; 4919 break; 4920 } 4921 4922 SDValue Offset = LD->getOffset(); 4923 if (Offset.getOpcode() == ISD::TargetConstant || 4924 Offset.getOpcode() == ISD::TargetGlobalAddress) { 4925 4926 unsigned Opcode; 4927 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4928 if (LD->getValueType(0) != MVT::i64) { 4929 // Handle PPC32 integer and normal FP loads. 4930 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4931 switch (LoadedVT.getSimpleVT().SimpleTy) { 4932 default: llvm_unreachable("Invalid PPC load type!"); 4933 case MVT::f64: Opcode = PPC::LFDU; break; 4934 case MVT::f32: Opcode = PPC::LFSU; break; 4935 case MVT::i32: Opcode = PPC::LWZU; break; 4936 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 4937 case MVT::i1: 4938 case MVT::i8: Opcode = PPC::LBZU; break; 4939 } 4940 } else { 4941 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4942 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4943 switch (LoadedVT.getSimpleVT().SimpleTy) { 4944 default: llvm_unreachable("Invalid PPC load type!"); 4945 case MVT::i64: Opcode = PPC::LDU; break; 4946 case MVT::i32: Opcode = PPC::LWZU8; break; 4947 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 4948 case MVT::i1: 4949 case MVT::i8: Opcode = PPC::LBZU8; break; 4950 } 4951 } 4952 4953 SDValue Chain = LD->getChain(); 4954 SDValue Base = LD->getBasePtr(); 4955 SDValue Ops[] = { Offset, Base, Chain }; 4956 SDNode *MN = CurDAG->getMachineNode( 4957 Opcode, dl, LD->getValueType(0), 4958 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4959 transferMemOperands(N, MN); 4960 ReplaceNode(N, MN); 4961 return; 4962 } else { 4963 unsigned Opcode; 4964 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4965 if (LD->getValueType(0) != MVT::i64) { 4966 // Handle PPC32 integer and normal FP loads. 4967 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4968 switch (LoadedVT.getSimpleVT().SimpleTy) { 4969 default: llvm_unreachable("Invalid PPC load type!"); 4970 case MVT::f64: Opcode = PPC::LFDUX; break; 4971 case MVT::f32: Opcode = PPC::LFSUX; break; 4972 case MVT::i32: Opcode = PPC::LWZUX; break; 4973 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 4974 case MVT::i1: 4975 case MVT::i8: Opcode = PPC::LBZUX; break; 4976 } 4977 } else { 4978 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4979 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 4980 "Invalid sext update load"); 4981 switch (LoadedVT.getSimpleVT().SimpleTy) { 4982 default: llvm_unreachable("Invalid PPC load type!"); 4983 case MVT::i64: Opcode = PPC::LDUX; break; 4984 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 4985 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 4986 case MVT::i1: 4987 case MVT::i8: Opcode = PPC::LBZUX8; break; 4988 } 4989 } 4990 4991 SDValue Chain = LD->getChain(); 4992 SDValue Base = LD->getBasePtr(); 4993 SDValue Ops[] = { Base, Offset, Chain }; 4994 SDNode *MN = CurDAG->getMachineNode( 4995 Opcode, dl, LD->getValueType(0), 4996 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4997 transferMemOperands(N, MN); 4998 ReplaceNode(N, MN); 4999 return; 5000 } 5001 } 5002 5003 case ISD::AND: 5004 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr 5005 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || 5006 tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) 5007 return; 5008 5009 // Other cases are autogenerated. 5010 break; 5011 case ISD::OR: { 5012 if (N->getValueType(0) == MVT::i32) 5013 if (tryBitfieldInsert(N)) 5014 return; 5015 5016 int16_t Imm; 5017 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 5018 isIntS16Immediate(N->getOperand(1), Imm)) { 5019 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); 5020 5021 // If this is equivalent to an add, then we can fold it with the 5022 // FrameIndex calculation. 5023 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 5024 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 5025 return; 5026 } 5027 } 5028 5029 // If this is 'or' against an imm with consecutive ones and both sides zero, 5030 // try to emit rldimi 5031 if (tryAsSingleRLDIMI(N)) 5032 return; 5033 5034 // OR with a 32-bit immediate can be handled by ori + oris 5035 // without creating an immediate in a GPR. 5036 uint64_t Imm64 = 0; 5037 bool IsPPC64 = Subtarget->isPPC64(); 5038 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 5039 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 5040 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. 5041 uint64_t ImmHi = Imm64 >> 16; 5042 uint64_t ImmLo = Imm64 & 0xFFFF; 5043 if (ImmHi != 0 && ImmLo != 0) { 5044 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 5045 N->getOperand(0), 5046 getI16Imm(ImmLo, dl)); 5047 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 5048 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); 5049 return; 5050 } 5051 } 5052 5053 // Other cases are autogenerated. 5054 break; 5055 } 5056 case ISD::XOR: { 5057 // XOR with a 32-bit immediate can be handled by xori + xoris 5058 // without creating an immediate in a GPR. 5059 uint64_t Imm64 = 0; 5060 bool IsPPC64 = Subtarget->isPPC64(); 5061 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 5062 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 5063 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. 5064 uint64_t ImmHi = Imm64 >> 16; 5065 uint64_t ImmLo = Imm64 & 0xFFFF; 5066 if (ImmHi != 0 && ImmLo != 0) { 5067 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 5068 N->getOperand(0), 5069 getI16Imm(ImmLo, dl)); 5070 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 5071 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); 5072 return; 5073 } 5074 } 5075 5076 break; 5077 } 5078 case ISD::ADD: { 5079 int16_t Imm; 5080 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 5081 isIntS16Immediate(N->getOperand(1), Imm)) { 5082 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 5083 return; 5084 } 5085 5086 break; 5087 } 5088 case ISD::SHL: { 5089 unsigned Imm, SH, MB, ME; 5090 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 5091 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 5092 SDValue Ops[] = { N->getOperand(0).getOperand(0), 5093 getI32Imm(SH, dl), getI32Imm(MB, dl), 5094 getI32Imm(ME, dl) }; 5095 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 5096 return; 5097 } 5098 5099 // Other cases are autogenerated. 5100 break; 5101 } 5102 case ISD::SRL: { 5103 unsigned Imm, SH, MB, ME; 5104 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 5105 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 5106 SDValue Ops[] = { N->getOperand(0).getOperand(0), 5107 getI32Imm(SH, dl), getI32Imm(MB, dl), 5108 getI32Imm(ME, dl) }; 5109 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 5110 return; 5111 } 5112 5113 // Other cases are autogenerated. 5114 break; 5115 } 5116 case ISD::MUL: { 5117 SDValue Op1 = N->getOperand(1); 5118 if (Op1.getOpcode() != ISD::Constant || Op1.getValueType() != MVT::i64) 5119 break; 5120 5121 // If the multiplier fits int16, we can handle it with mulli. 5122 int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue(); 5123 unsigned Shift = countTrailingZeros<uint64_t>(Imm); 5124 if (isInt<16>(Imm) || !Shift) 5125 break; 5126 5127 // If the shifted value fits int16, we can do this transformation: 5128 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to 5129 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2). 5130 uint64_t ImmSh = Imm >> Shift; 5131 if (isInt<16>(ImmSh)) { 5132 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16); 5133 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 5134 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64, 5135 N->getOperand(0), SDImm); 5136 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, SDValue(MulNode, 0), 5137 getI32Imm(Shift, dl), getI32Imm(63 - Shift, dl)); 5138 return; 5139 } 5140 break; 5141 } 5142 // FIXME: Remove this once the ANDI glue bug is fixed: 5143 case PPCISD::ANDI_rec_1_EQ_BIT: 5144 case PPCISD::ANDI_rec_1_GT_BIT: { 5145 if (!ANDIGlueBug) 5146 break; 5147 5148 EVT InVT = N->getOperand(0).getValueType(); 5149 assert((InVT == MVT::i64 || InVT == MVT::i32) && 5150 "Invalid input type for ANDI_rec_1_EQ_BIT"); 5151 5152 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; 5153 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 5154 N->getOperand(0), 5155 CurDAG->getTargetConstant(1, dl, InVT)), 5156 0); 5157 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 5158 SDValue SRIdxVal = CurDAG->getTargetConstant( 5159 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, 5160 dl, MVT::i32); 5161 5162 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 5163 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 5164 return; 5165 } 5166 case ISD::SELECT_CC: { 5167 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 5168 EVT PtrVT = 5169 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 5170 bool isPPC64 = (PtrVT == MVT::i64); 5171 5172 // If this is a select of i1 operands, we'll pattern match it. 5173 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) 5174 break; 5175 5176 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) { 5177 bool NeedSwapOps = false; 5178 bool IsUnCmp = false; 5179 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { 5180 SDValue LHS = N->getOperand(0); 5181 SDValue RHS = N->getOperand(1); 5182 if (NeedSwapOps) 5183 std::swap(LHS, RHS); 5184 5185 // Make use of SelectCC to generate the comparison to set CR bits, for 5186 // equality comparisons having one literal operand, SelectCC probably 5187 // doesn't need to materialize the whole literal and just use xoris to 5188 // check it first, it leads the following comparison result can't 5189 // exactly represent GT/LT relationship. So to avoid this we specify 5190 // SETGT/SETUGT here instead of SETEQ. 5191 SDValue GenCC = 5192 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); 5193 CurDAG->SelectNodeTo( 5194 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, 5195 N->getValueType(0), GenCC); 5196 NumP9Setb++; 5197 return; 5198 } 5199 } 5200 5201 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 5202 if (!isPPC64) 5203 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 5204 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 5205 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 5206 if (N1C->isNullValue() && N3C->isNullValue() && 5207 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 5208 // FIXME: Implement this optzn for PPC64. 5209 N->getValueType(0) == MVT::i32) { 5210 SDNode *Tmp = 5211 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 5212 N->getOperand(0), getI32Imm(~0U, dl)); 5213 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 5214 N->getOperand(0), SDValue(Tmp, 1)); 5215 return; 5216 } 5217 5218 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 5219 5220 if (N->getValueType(0) == MVT::i1) { 5221 // An i1 select is: (c & t) | (!c & f). 5222 bool Inv; 5223 unsigned Idx = getCRIdxForSetCC(CC, Inv); 5224 5225 unsigned SRI; 5226 switch (Idx) { 5227 default: llvm_unreachable("Invalid CC index"); 5228 case 0: SRI = PPC::sub_lt; break; 5229 case 1: SRI = PPC::sub_gt; break; 5230 case 2: SRI = PPC::sub_eq; break; 5231 case 3: SRI = PPC::sub_un; break; 5232 } 5233 5234 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 5235 5236 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 5237 CCBit, CCBit), 0); 5238 SDValue C = Inv ? NotCCBit : CCBit, 5239 NotC = Inv ? CCBit : NotCCBit; 5240 5241 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 5242 C, N->getOperand(2)), 0); 5243 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 5244 NotC, N->getOperand(3)), 0); 5245 5246 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 5247 return; 5248 } 5249 5250 unsigned BROpc = 5251 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget); 5252 5253 unsigned SelectCCOp; 5254 if (N->getValueType(0) == MVT::i32) 5255 SelectCCOp = PPC::SELECT_CC_I4; 5256 else if (N->getValueType(0) == MVT::i64) 5257 SelectCCOp = PPC::SELECT_CC_I8; 5258 else if (N->getValueType(0) == MVT::f32) { 5259 if (Subtarget->hasP8Vector()) 5260 SelectCCOp = PPC::SELECT_CC_VSSRC; 5261 else if (Subtarget->hasSPE()) 5262 SelectCCOp = PPC::SELECT_CC_SPE4; 5263 else 5264 SelectCCOp = PPC::SELECT_CC_F4; 5265 } else if (N->getValueType(0) == MVT::f64) { 5266 if (Subtarget->hasVSX()) 5267 SelectCCOp = PPC::SELECT_CC_VSFRC; 5268 else if (Subtarget->hasSPE()) 5269 SelectCCOp = PPC::SELECT_CC_SPE; 5270 else 5271 SelectCCOp = PPC::SELECT_CC_F8; 5272 } else if (N->getValueType(0) == MVT::f128) 5273 SelectCCOp = PPC::SELECT_CC_F16; 5274 else if (Subtarget->hasSPE()) 5275 SelectCCOp = PPC::SELECT_CC_SPE; 5276 else if (N->getValueType(0) == MVT::v2f64 || 5277 N->getValueType(0) == MVT::v2i64) 5278 SelectCCOp = PPC::SELECT_CC_VSRC; 5279 else 5280 SelectCCOp = PPC::SELECT_CC_VRRC; 5281 5282 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 5283 getI32Imm(BROpc, dl) }; 5284 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 5285 return; 5286 } 5287 case ISD::VECTOR_SHUFFLE: 5288 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 5289 N->getValueType(0) == MVT::v2i64)) { 5290 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 5291 5292 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 5293 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 5294 unsigned DM[2]; 5295 5296 for (int i = 0; i < 2; ++i) 5297 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 5298 DM[i] = 0; 5299 else 5300 DM[i] = 1; 5301 5302 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 5303 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 5304 isa<LoadSDNode>(Op1.getOperand(0))) { 5305 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 5306 SDValue Base, Offset; 5307 5308 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 5309 (LD->getMemoryVT() == MVT::f64 || 5310 LD->getMemoryVT() == MVT::i64) && 5311 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 5312 SDValue Chain = LD->getChain(); 5313 SDValue Ops[] = { Base, Offset, Chain }; 5314 MachineMemOperand *MemOp = LD->getMemOperand(); 5315 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 5316 N->getValueType(0), Ops); 5317 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); 5318 return; 5319 } 5320 } 5321 5322 // For little endian, we must swap the input operands and adjust 5323 // the mask elements (reverse and invert them). 5324 if (Subtarget->isLittleEndian()) { 5325 std::swap(Op1, Op2); 5326 unsigned tmp = DM[0]; 5327 DM[0] = 1 - DM[1]; 5328 DM[1] = 1 - tmp; 5329 } 5330 5331 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 5332 MVT::i32); 5333 SDValue Ops[] = { Op1, Op2, DMV }; 5334 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 5335 return; 5336 } 5337 5338 break; 5339 case PPCISD::BDNZ: 5340 case PPCISD::BDZ: { 5341 bool IsPPC64 = Subtarget->isPPC64(); 5342 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 5343 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 5344 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 5345 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 5346 MVT::Other, Ops); 5347 return; 5348 } 5349 case PPCISD::COND_BRANCH: { 5350 // Op #0 is the Chain. 5351 // Op #1 is the PPC::PRED_* number. 5352 // Op #2 is the CR# 5353 // Op #3 is the Dest MBB 5354 // Op #4 is the Flag. 5355 // Prevent PPC::PRED_* from being selected into LI. 5356 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 5357 if (EnableBranchHint) 5358 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); 5359 5360 SDValue Pred = getI32Imm(PCC, dl); 5361 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 5362 N->getOperand(0), N->getOperand(4) }; 5363 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5364 return; 5365 } 5366 case ISD::BR_CC: { 5367 if (tryFoldSWTestBRCC(N)) 5368 return; 5369 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 5370 unsigned PCC = 5371 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); 5372 5373 if (N->getOperand(2).getValueType() == MVT::i1) { 5374 unsigned Opc; 5375 bool Swap; 5376 switch (PCC) { 5377 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 5378 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 5379 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 5380 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 5381 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 5382 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 5383 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 5384 } 5385 5386 // A signed comparison of i1 values produces the opposite result to an 5387 // unsigned one if the condition code includes less-than or greater-than. 5388 // This is because 1 is the most negative signed i1 number and the most 5389 // positive unsigned i1 number. The CR-logical operations used for such 5390 // comparisons are non-commutative so for signed comparisons vs. unsigned 5391 // ones, the input operands just need to be swapped. 5392 if (ISD::isSignedIntSetCC(CC)) 5393 Swap = !Swap; 5394 5395 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 5396 N->getOperand(Swap ? 3 : 2), 5397 N->getOperand(Swap ? 2 : 3)), 0); 5398 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 5399 N->getOperand(0)); 5400 return; 5401 } 5402 5403 if (EnableBranchHint) 5404 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); 5405 5406 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 5407 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 5408 N->getOperand(4), N->getOperand(0) }; 5409 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5410 return; 5411 } 5412 case ISD::BRIND: { 5413 // FIXME: Should custom lower this. 5414 SDValue Chain = N->getOperand(0); 5415 SDValue Target = N->getOperand(1); 5416 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 5417 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 5418 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 5419 Chain), 0); 5420 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 5421 return; 5422 } 5423 case PPCISD::TOC_ENTRY: { 5424 const bool isPPC64 = Subtarget->isPPC64(); 5425 const bool isELFABI = Subtarget->isSVR4ABI(); 5426 const bool isAIXABI = Subtarget->isAIXABI(); 5427 5428 // PowerPC only support small, medium and large code model. 5429 const CodeModel::Model CModel = TM.getCodeModel(); 5430 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && 5431 "PowerPC doesn't support tiny or kernel code models."); 5432 5433 if (isAIXABI && CModel == CodeModel::Medium) 5434 report_fatal_error("Medium code model is not supported on AIX."); 5435 5436 // For 64-bit small code model, we allow SelectCodeCommon to handle this, 5437 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 5438 if (isPPC64 && CModel == CodeModel::Small) 5439 break; 5440 5441 // Handle 32-bit small code model. 5442 if (!isPPC64) { 5443 // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. 5444 auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { 5445 SDValue GA = TocEntry->getOperand(0); 5446 SDValue TocBase = TocEntry->getOperand(1); 5447 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 5448 TocBase); 5449 transferMemOperands(TocEntry, MN); 5450 ReplaceNode(TocEntry, MN); 5451 }; 5452 5453 if (isELFABI) { 5454 assert(TM.isPositionIndependent() && 5455 "32-bit ELF can only have TOC entries in position independent" 5456 " code."); 5457 // 32-bit ELF always uses a small code model toc access. 5458 replaceWithLWZtoc(N); 5459 return; 5460 } 5461 5462 if (isAIXABI && CModel == CodeModel::Small) { 5463 replaceWithLWZtoc(N); 5464 return; 5465 } 5466 } 5467 5468 assert(CModel != CodeModel::Small && "All small code models handled."); 5469 5470 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" 5471 " ELF/AIX or 32-bit AIX in the following."); 5472 5473 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode 5474 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We 5475 // generate two instructions as described below. The first source operand 5476 // is a symbol reference. If it must be toc-referenced according to 5477 // Subtarget, we generate: 5478 // [32-bit AIX] 5479 // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) 5480 // [64-bit ELF/AIX] 5481 // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) 5482 // Otherwise we generate: 5483 // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) 5484 SDValue GA = N->getOperand(0); 5485 SDValue TOCbase = N->getOperand(1); 5486 5487 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 5488 SDNode *Tmp = CurDAG->getMachineNode( 5489 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); 5490 5491 if (PPCLowering->isAccessedAsGotIndirect(GA)) { 5492 // If it is accessed as got-indirect, we need an extra LWZ/LD to load 5493 // the address. 5494 SDNode *MN = CurDAG->getMachineNode( 5495 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); 5496 5497 transferMemOperands(N, MN); 5498 ReplaceNode(N, MN); 5499 return; 5500 } 5501 5502 // Build the address relative to the TOC-pointer. 5503 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 5504 SDValue(Tmp, 0), GA)); 5505 return; 5506 } 5507 case PPCISD::PPC32_PICGOT: 5508 // Generate a PIC-safe GOT reference. 5509 assert(Subtarget->is32BitELFABI() && 5510 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 5511 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 5512 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 5513 MVT::i32); 5514 return; 5515 5516 case PPCISD::VADD_SPLAT: { 5517 // This expands into one of three sequences, depending on whether 5518 // the first operand is odd or even, positive or negative. 5519 assert(isa<ConstantSDNode>(N->getOperand(0)) && 5520 isa<ConstantSDNode>(N->getOperand(1)) && 5521 "Invalid operand on VADD_SPLAT!"); 5522 5523 int Elt = N->getConstantOperandVal(0); 5524 int EltSize = N->getConstantOperandVal(1); 5525 unsigned Opc1, Opc2, Opc3; 5526 EVT VT; 5527 5528 if (EltSize == 1) { 5529 Opc1 = PPC::VSPLTISB; 5530 Opc2 = PPC::VADDUBM; 5531 Opc3 = PPC::VSUBUBM; 5532 VT = MVT::v16i8; 5533 } else if (EltSize == 2) { 5534 Opc1 = PPC::VSPLTISH; 5535 Opc2 = PPC::VADDUHM; 5536 Opc3 = PPC::VSUBUHM; 5537 VT = MVT::v8i16; 5538 } else { 5539 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 5540 Opc1 = PPC::VSPLTISW; 5541 Opc2 = PPC::VADDUWM; 5542 Opc3 = PPC::VSUBUWM; 5543 VT = MVT::v4i32; 5544 } 5545 5546 if ((Elt & 1) == 0) { 5547 // Elt is even, in the range [-32,-18] + [16,30]. 5548 // 5549 // Convert: VADD_SPLAT elt, size 5550 // Into: tmp = VSPLTIS[BHW] elt 5551 // VADDU[BHW]M tmp, tmp 5552 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 5553 SDValue EltVal = getI32Imm(Elt >> 1, dl); 5554 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5555 SDValue TmpVal = SDValue(Tmp, 0); 5556 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 5557 return; 5558 } else if (Elt > 0) { 5559 // Elt is odd and positive, in the range [17,31]. 5560 // 5561 // Convert: VADD_SPLAT elt, size 5562 // Into: tmp1 = VSPLTIS[BHW] elt-16 5563 // tmp2 = VSPLTIS[BHW] -16 5564 // VSUBU[BHW]M tmp1, tmp2 5565 SDValue EltVal = getI32Imm(Elt - 16, dl); 5566 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5567 EltVal = getI32Imm(-16, dl); 5568 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5569 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 5570 SDValue(Tmp2, 0))); 5571 return; 5572 } else { 5573 // Elt is odd and negative, in the range [-31,-17]. 5574 // 5575 // Convert: VADD_SPLAT elt, size 5576 // Into: tmp1 = VSPLTIS[BHW] elt+16 5577 // tmp2 = VSPLTIS[BHW] -16 5578 // VADDU[BHW]M tmp1, tmp2 5579 SDValue EltVal = getI32Imm(Elt + 16, dl); 5580 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5581 EltVal = getI32Imm(-16, dl); 5582 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5583 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 5584 SDValue(Tmp2, 0))); 5585 return; 5586 } 5587 } 5588 } 5589 5590 SelectCode(N); 5591 } 5592 5593 // If the target supports the cmpb instruction, do the idiom recognition here. 5594 // We don't do this as a DAG combine because we don't want to do it as nodes 5595 // are being combined (because we might miss part of the eventual idiom). We 5596 // don't want to do it during instruction selection because we want to reuse 5597 // the logic for lowering the masking operations already part of the 5598 // instruction selector. 5599 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 5600 SDLoc dl(N); 5601 5602 assert(N->getOpcode() == ISD::OR && 5603 "Only OR nodes are supported for CMPB"); 5604 5605 SDValue Res; 5606 if (!Subtarget->hasCMPB()) 5607 return Res; 5608 5609 if (N->getValueType(0) != MVT::i32 && 5610 N->getValueType(0) != MVT::i64) 5611 return Res; 5612 5613 EVT VT = N->getValueType(0); 5614 5615 SDValue RHS, LHS; 5616 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 5617 uint64_t Mask = 0, Alt = 0; 5618 5619 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 5620 uint64_t &Mask, uint64_t &Alt, 5621 SDValue &LHS, SDValue &RHS) { 5622 if (O.getOpcode() != ISD::SELECT_CC) 5623 return false; 5624 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 5625 5626 if (!isa<ConstantSDNode>(O.getOperand(2)) || 5627 !isa<ConstantSDNode>(O.getOperand(3))) 5628 return false; 5629 5630 uint64_t PM = O.getConstantOperandVal(2); 5631 uint64_t PAlt = O.getConstantOperandVal(3); 5632 for (b = 0; b < 8; ++b) { 5633 uint64_t Mask = UINT64_C(0xFF) << (8*b); 5634 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 5635 break; 5636 } 5637 5638 if (b == 8) 5639 return false; 5640 Mask |= PM; 5641 Alt |= PAlt; 5642 5643 if (!isa<ConstantSDNode>(O.getOperand(1)) || 5644 O.getConstantOperandVal(1) != 0) { 5645 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 5646 if (Op0.getOpcode() == ISD::TRUNCATE) 5647 Op0 = Op0.getOperand(0); 5648 if (Op1.getOpcode() == ISD::TRUNCATE) 5649 Op1 = Op1.getOperand(0); 5650 5651 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 5652 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 5653 isa<ConstantSDNode>(Op0.getOperand(1))) { 5654 5655 unsigned Bits = Op0.getValueSizeInBits(); 5656 if (b != Bits/8-1) 5657 return false; 5658 if (Op0.getConstantOperandVal(1) != Bits-8) 5659 return false; 5660 5661 LHS = Op0.getOperand(0); 5662 RHS = Op1.getOperand(0); 5663 return true; 5664 } 5665 5666 // When we have small integers (i16 to be specific), the form present 5667 // post-legalization uses SETULT in the SELECT_CC for the 5668 // higher-order byte, depending on the fact that the 5669 // even-higher-order bytes are known to all be zero, for example: 5670 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 5671 // (so when the second byte is the same, because all higher-order 5672 // bits from bytes 3 and 4 are known to be zero, the result of the 5673 // xor can be at most 255) 5674 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 5675 isa<ConstantSDNode>(O.getOperand(1))) { 5676 5677 uint64_t ULim = O.getConstantOperandVal(1); 5678 if (ULim != (UINT64_C(1) << b*8)) 5679 return false; 5680 5681 // Now we need to make sure that the upper bytes are known to be 5682 // zero. 5683 unsigned Bits = Op0.getValueSizeInBits(); 5684 if (!CurDAG->MaskedValueIsZero( 5685 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 5686 return false; 5687 5688 LHS = Op0.getOperand(0); 5689 RHS = Op0.getOperand(1); 5690 return true; 5691 } 5692 5693 return false; 5694 } 5695 5696 if (CC != ISD::SETEQ) 5697 return false; 5698 5699 SDValue Op = O.getOperand(0); 5700 if (Op.getOpcode() == ISD::AND) { 5701 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5702 return false; 5703 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 5704 return false; 5705 5706 SDValue XOR = Op.getOperand(0); 5707 if (XOR.getOpcode() == ISD::TRUNCATE) 5708 XOR = XOR.getOperand(0); 5709 if (XOR.getOpcode() != ISD::XOR) 5710 return false; 5711 5712 LHS = XOR.getOperand(0); 5713 RHS = XOR.getOperand(1); 5714 return true; 5715 } else if (Op.getOpcode() == ISD::SRL) { 5716 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5717 return false; 5718 unsigned Bits = Op.getValueSizeInBits(); 5719 if (b != Bits/8-1) 5720 return false; 5721 if (Op.getConstantOperandVal(1) != Bits-8) 5722 return false; 5723 5724 SDValue XOR = Op.getOperand(0); 5725 if (XOR.getOpcode() == ISD::TRUNCATE) 5726 XOR = XOR.getOperand(0); 5727 if (XOR.getOpcode() != ISD::XOR) 5728 return false; 5729 5730 LHS = XOR.getOperand(0); 5731 RHS = XOR.getOperand(1); 5732 return true; 5733 } 5734 5735 return false; 5736 }; 5737 5738 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 5739 while (!Queue.empty()) { 5740 SDValue V = Queue.pop_back_val(); 5741 5742 for (const SDValue &O : V.getNode()->ops()) { 5743 unsigned b = 0; 5744 uint64_t M = 0, A = 0; 5745 SDValue OLHS, ORHS; 5746 if (O.getOpcode() == ISD::OR) { 5747 Queue.push_back(O); 5748 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 5749 if (!LHS) { 5750 LHS = OLHS; 5751 RHS = ORHS; 5752 BytesFound[b] = true; 5753 Mask |= M; 5754 Alt |= A; 5755 } else if ((LHS == ORHS && RHS == OLHS) || 5756 (RHS == ORHS && LHS == OLHS)) { 5757 BytesFound[b] = true; 5758 Mask |= M; 5759 Alt |= A; 5760 } else { 5761 return Res; 5762 } 5763 } else { 5764 return Res; 5765 } 5766 } 5767 } 5768 5769 unsigned LastB = 0, BCnt = 0; 5770 for (unsigned i = 0; i < 8; ++i) 5771 if (BytesFound[LastB]) { 5772 ++BCnt; 5773 LastB = i; 5774 } 5775 5776 if (!LastB || BCnt < 2) 5777 return Res; 5778 5779 // Because we'll be zero-extending the output anyway if don't have a specific 5780 // value for each input byte (via the Mask), we can 'anyext' the inputs. 5781 if (LHS.getValueType() != VT) { 5782 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 5783 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 5784 } 5785 5786 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 5787 5788 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 5789 if (NonTrivialMask && !Alt) { 5790 // Res = Mask & CMPB 5791 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5792 CurDAG->getConstant(Mask, dl, VT)); 5793 } else if (Alt) { 5794 // Res = (CMPB & Mask) | (~CMPB & Alt) 5795 // Which, as suggested here: 5796 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 5797 // can be written as: 5798 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 5799 // useful because the (Alt ^ Mask) can be pre-computed. 5800 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5801 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 5802 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 5803 CurDAG->getConstant(Alt, dl, VT)); 5804 } 5805 5806 return Res; 5807 } 5808 5809 // When CR bit registers are enabled, an extension of an i1 variable to a i32 5810 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 5811 // involves constant materialization of a 0 or a 1 or both. If the result of 5812 // the extension is then operated upon by some operator that can be constant 5813 // folded with a constant 0 or 1, and that constant can be materialized using 5814 // only one instruction (like a zero or one), then we should fold in those 5815 // operations with the select. 5816 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 5817 if (!Subtarget->useCRBits()) 5818 return; 5819 5820 if (N->getOpcode() != ISD::ZERO_EXTEND && 5821 N->getOpcode() != ISD::SIGN_EXTEND && 5822 N->getOpcode() != ISD::ANY_EXTEND) 5823 return; 5824 5825 if (N->getOperand(0).getValueType() != MVT::i1) 5826 return; 5827 5828 if (!N->hasOneUse()) 5829 return; 5830 5831 SDLoc dl(N); 5832 EVT VT = N->getValueType(0); 5833 SDValue Cond = N->getOperand(0); 5834 SDValue ConstTrue = 5835 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 5836 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 5837 5838 do { 5839 SDNode *User = *N->use_begin(); 5840 if (User->getNumOperands() != 2) 5841 break; 5842 5843 auto TryFold = [this, N, User, dl](SDValue Val) { 5844 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 5845 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 5846 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 5847 5848 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 5849 User->getValueType(0), {O0, O1}); 5850 }; 5851 5852 // FIXME: When the semantics of the interaction between select and undef 5853 // are clearly defined, it may turn out to be unnecessary to break here. 5854 SDValue TrueRes = TryFold(ConstTrue); 5855 if (!TrueRes || TrueRes.isUndef()) 5856 break; 5857 SDValue FalseRes = TryFold(ConstFalse); 5858 if (!FalseRes || FalseRes.isUndef()) 5859 break; 5860 5861 // For us to materialize these using one instruction, we must be able to 5862 // represent them as signed 16-bit integers. 5863 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 5864 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 5865 if (!isInt<16>(True) || !isInt<16>(False)) 5866 break; 5867 5868 // We can replace User with a new SELECT node, and try again to see if we 5869 // can fold the select with its user. 5870 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 5871 N = User; 5872 ConstTrue = TrueRes; 5873 ConstFalse = FalseRes; 5874 } while (N->hasOneUse()); 5875 } 5876 5877 void PPCDAGToDAGISel::PreprocessISelDAG() { 5878 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 5879 5880 bool MadeChange = false; 5881 while (Position != CurDAG->allnodes_begin()) { 5882 SDNode *N = &*--Position; 5883 if (N->use_empty()) 5884 continue; 5885 5886 SDValue Res; 5887 switch (N->getOpcode()) { 5888 default: break; 5889 case ISD::OR: 5890 Res = combineToCMPB(N); 5891 break; 5892 } 5893 5894 if (!Res) 5895 foldBoolExts(Res, N); 5896 5897 if (Res) { 5898 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 5899 LLVM_DEBUG(N->dump(CurDAG)); 5900 LLVM_DEBUG(dbgs() << "\nNew: "); 5901 LLVM_DEBUG(Res.getNode()->dump(CurDAG)); 5902 LLVM_DEBUG(dbgs() << "\n"); 5903 5904 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 5905 MadeChange = true; 5906 } 5907 } 5908 5909 if (MadeChange) 5910 CurDAG->RemoveDeadNodes(); 5911 } 5912 5913 /// PostprocessISelDAG - Perform some late peephole optimizations 5914 /// on the DAG representation. 5915 void PPCDAGToDAGISel::PostprocessISelDAG() { 5916 // Skip peepholes at -O0. 5917 if (TM.getOptLevel() == CodeGenOpt::None) 5918 return; 5919 5920 PeepholePPC64(); 5921 PeepholeCROps(); 5922 PeepholePPC64ZExt(); 5923 } 5924 5925 // Check if all users of this node will become isel where the second operand 5926 // is the constant zero. If this is so, and if we can negate the condition, 5927 // then we can flip the true and false operands. This will allow the zero to 5928 // be folded with the isel so that we don't need to materialize a register 5929 // containing zero. 5930 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 5931 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5932 UI != UE; ++UI) { 5933 SDNode *User = *UI; 5934 if (!User->isMachineOpcode()) 5935 return false; 5936 if (User->getMachineOpcode() != PPC::SELECT_I4 && 5937 User->getMachineOpcode() != PPC::SELECT_I8) 5938 return false; 5939 5940 SDNode *Op2 = User->getOperand(2).getNode(); 5941 if (!Op2->isMachineOpcode()) 5942 return false; 5943 5944 if (Op2->getMachineOpcode() != PPC::LI && 5945 Op2->getMachineOpcode() != PPC::LI8) 5946 return false; 5947 5948 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 5949 if (!C) 5950 return false; 5951 5952 if (!C->isNullValue()) 5953 return false; 5954 } 5955 5956 return true; 5957 } 5958 5959 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 5960 SmallVector<SDNode *, 4> ToReplace; 5961 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5962 UI != UE; ++UI) { 5963 SDNode *User = *UI; 5964 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 5965 User->getMachineOpcode() == PPC::SELECT_I8) && 5966 "Must have all select users"); 5967 ToReplace.push_back(User); 5968 } 5969 5970 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 5971 UE = ToReplace.end(); UI != UE; ++UI) { 5972 SDNode *User = *UI; 5973 SDNode *ResNode = 5974 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 5975 User->getValueType(0), User->getOperand(0), 5976 User->getOperand(2), 5977 User->getOperand(1)); 5978 5979 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5980 LLVM_DEBUG(User->dump(CurDAG)); 5981 LLVM_DEBUG(dbgs() << "\nNew: "); 5982 LLVM_DEBUG(ResNode->dump(CurDAG)); 5983 LLVM_DEBUG(dbgs() << "\n"); 5984 5985 ReplaceUses(User, ResNode); 5986 } 5987 } 5988 5989 void PPCDAGToDAGISel::PeepholeCROps() { 5990 bool IsModified; 5991 do { 5992 IsModified = false; 5993 for (SDNode &Node : CurDAG->allnodes()) { 5994 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 5995 if (!MachineNode || MachineNode->use_empty()) 5996 continue; 5997 SDNode *ResNode = MachineNode; 5998 5999 bool Op1Set = false, Op1Unset = false, 6000 Op1Not = false, 6001 Op2Set = false, Op2Unset = false, 6002 Op2Not = false; 6003 6004 unsigned Opcode = MachineNode->getMachineOpcode(); 6005 switch (Opcode) { 6006 default: break; 6007 case PPC::CRAND: 6008 case PPC::CRNAND: 6009 case PPC::CROR: 6010 case PPC::CRXOR: 6011 case PPC::CRNOR: 6012 case PPC::CREQV: 6013 case PPC::CRANDC: 6014 case PPC::CRORC: { 6015 SDValue Op = MachineNode->getOperand(1); 6016 if (Op.isMachineOpcode()) { 6017 if (Op.getMachineOpcode() == PPC::CRSET) 6018 Op2Set = true; 6019 else if (Op.getMachineOpcode() == PPC::CRUNSET) 6020 Op2Unset = true; 6021 else if (Op.getMachineOpcode() == PPC::CRNOR && 6022 Op.getOperand(0) == Op.getOperand(1)) 6023 Op2Not = true; 6024 } 6025 LLVM_FALLTHROUGH; 6026 } 6027 case PPC::BC: 6028 case PPC::BCn: 6029 case PPC::SELECT_I4: 6030 case PPC::SELECT_I8: 6031 case PPC::SELECT_F4: 6032 case PPC::SELECT_F8: 6033 case PPC::SELECT_SPE: 6034 case PPC::SELECT_SPE4: 6035 case PPC::SELECT_VRRC: 6036 case PPC::SELECT_VSFRC: 6037 case PPC::SELECT_VSSRC: 6038 case PPC::SELECT_VSRC: { 6039 SDValue Op = MachineNode->getOperand(0); 6040 if (Op.isMachineOpcode()) { 6041 if (Op.getMachineOpcode() == PPC::CRSET) 6042 Op1Set = true; 6043 else if (Op.getMachineOpcode() == PPC::CRUNSET) 6044 Op1Unset = true; 6045 else if (Op.getMachineOpcode() == PPC::CRNOR && 6046 Op.getOperand(0) == Op.getOperand(1)) 6047 Op1Not = true; 6048 } 6049 } 6050 break; 6051 } 6052 6053 bool SelectSwap = false; 6054 switch (Opcode) { 6055 default: break; 6056 case PPC::CRAND: 6057 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6058 // x & x = x 6059 ResNode = MachineNode->getOperand(0).getNode(); 6060 else if (Op1Set) 6061 // 1 & y = y 6062 ResNode = MachineNode->getOperand(1).getNode(); 6063 else if (Op2Set) 6064 // x & 1 = x 6065 ResNode = MachineNode->getOperand(0).getNode(); 6066 else if (Op1Unset || Op2Unset) 6067 // x & 0 = 0 & y = 0 6068 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6069 MVT::i1); 6070 else if (Op1Not) 6071 // ~x & y = andc(y, x) 6072 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6073 MVT::i1, MachineNode->getOperand(1), 6074 MachineNode->getOperand(0). 6075 getOperand(0)); 6076 else if (Op2Not) 6077 // x & ~y = andc(x, y) 6078 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6079 MVT::i1, MachineNode->getOperand(0), 6080 MachineNode->getOperand(1). 6081 getOperand(0)); 6082 else if (AllUsersSelectZero(MachineNode)) { 6083 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 6084 MVT::i1, MachineNode->getOperand(0), 6085 MachineNode->getOperand(1)); 6086 SelectSwap = true; 6087 } 6088 break; 6089 case PPC::CRNAND: 6090 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6091 // nand(x, x) -> nor(x, x) 6092 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6093 MVT::i1, MachineNode->getOperand(0), 6094 MachineNode->getOperand(0)); 6095 else if (Op1Set) 6096 // nand(1, y) -> nor(y, y) 6097 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6098 MVT::i1, MachineNode->getOperand(1), 6099 MachineNode->getOperand(1)); 6100 else if (Op2Set) 6101 // nand(x, 1) -> nor(x, x) 6102 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6103 MVT::i1, MachineNode->getOperand(0), 6104 MachineNode->getOperand(0)); 6105 else if (Op1Unset || Op2Unset) 6106 // nand(x, 0) = nand(0, y) = 1 6107 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6108 MVT::i1); 6109 else if (Op1Not) 6110 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 6111 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6112 MVT::i1, MachineNode->getOperand(0). 6113 getOperand(0), 6114 MachineNode->getOperand(1)); 6115 else if (Op2Not) 6116 // nand(x, ~y) = ~x | y = orc(y, x) 6117 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6118 MVT::i1, MachineNode->getOperand(1). 6119 getOperand(0), 6120 MachineNode->getOperand(0)); 6121 else if (AllUsersSelectZero(MachineNode)) { 6122 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 6123 MVT::i1, MachineNode->getOperand(0), 6124 MachineNode->getOperand(1)); 6125 SelectSwap = true; 6126 } 6127 break; 6128 case PPC::CROR: 6129 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6130 // x | x = x 6131 ResNode = MachineNode->getOperand(0).getNode(); 6132 else if (Op1Set || Op2Set) 6133 // x | 1 = 1 | y = 1 6134 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6135 MVT::i1); 6136 else if (Op1Unset) 6137 // 0 | y = y 6138 ResNode = MachineNode->getOperand(1).getNode(); 6139 else if (Op2Unset) 6140 // x | 0 = x 6141 ResNode = MachineNode->getOperand(0).getNode(); 6142 else if (Op1Not) 6143 // ~x | y = orc(y, x) 6144 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6145 MVT::i1, MachineNode->getOperand(1), 6146 MachineNode->getOperand(0). 6147 getOperand(0)); 6148 else if (Op2Not) 6149 // x | ~y = orc(x, y) 6150 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6151 MVT::i1, MachineNode->getOperand(0), 6152 MachineNode->getOperand(1). 6153 getOperand(0)); 6154 else if (AllUsersSelectZero(MachineNode)) { 6155 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6156 MVT::i1, MachineNode->getOperand(0), 6157 MachineNode->getOperand(1)); 6158 SelectSwap = true; 6159 } 6160 break; 6161 case PPC::CRXOR: 6162 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6163 // xor(x, x) = 0 6164 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6165 MVT::i1); 6166 else if (Op1Set) 6167 // xor(1, y) -> nor(y, y) 6168 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6169 MVT::i1, MachineNode->getOperand(1), 6170 MachineNode->getOperand(1)); 6171 else if (Op2Set) 6172 // xor(x, 1) -> nor(x, x) 6173 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6174 MVT::i1, MachineNode->getOperand(0), 6175 MachineNode->getOperand(0)); 6176 else if (Op1Unset) 6177 // xor(0, y) = y 6178 ResNode = MachineNode->getOperand(1).getNode(); 6179 else if (Op2Unset) 6180 // xor(x, 0) = x 6181 ResNode = MachineNode->getOperand(0).getNode(); 6182 else if (Op1Not) 6183 // xor(~x, y) = eqv(x, y) 6184 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6185 MVT::i1, MachineNode->getOperand(0). 6186 getOperand(0), 6187 MachineNode->getOperand(1)); 6188 else if (Op2Not) 6189 // xor(x, ~y) = eqv(x, y) 6190 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6191 MVT::i1, MachineNode->getOperand(0), 6192 MachineNode->getOperand(1). 6193 getOperand(0)); 6194 else if (AllUsersSelectZero(MachineNode)) { 6195 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6196 MVT::i1, MachineNode->getOperand(0), 6197 MachineNode->getOperand(1)); 6198 SelectSwap = true; 6199 } 6200 break; 6201 case PPC::CRNOR: 6202 if (Op1Set || Op2Set) 6203 // nor(1, y) -> 0 6204 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6205 MVT::i1); 6206 else if (Op1Unset) 6207 // nor(0, y) = ~y -> nor(y, y) 6208 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6209 MVT::i1, MachineNode->getOperand(1), 6210 MachineNode->getOperand(1)); 6211 else if (Op2Unset) 6212 // nor(x, 0) = ~x 6213 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6214 MVT::i1, MachineNode->getOperand(0), 6215 MachineNode->getOperand(0)); 6216 else if (Op1Not) 6217 // nor(~x, y) = andc(x, y) 6218 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6219 MVT::i1, MachineNode->getOperand(0). 6220 getOperand(0), 6221 MachineNode->getOperand(1)); 6222 else if (Op2Not) 6223 // nor(x, ~y) = andc(y, x) 6224 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6225 MVT::i1, MachineNode->getOperand(1). 6226 getOperand(0), 6227 MachineNode->getOperand(0)); 6228 else if (AllUsersSelectZero(MachineNode)) { 6229 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 6230 MVT::i1, MachineNode->getOperand(0), 6231 MachineNode->getOperand(1)); 6232 SelectSwap = true; 6233 } 6234 break; 6235 case PPC::CREQV: 6236 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6237 // eqv(x, x) = 1 6238 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6239 MVT::i1); 6240 else if (Op1Set) 6241 // eqv(1, y) = y 6242 ResNode = MachineNode->getOperand(1).getNode(); 6243 else if (Op2Set) 6244 // eqv(x, 1) = x 6245 ResNode = MachineNode->getOperand(0).getNode(); 6246 else if (Op1Unset) 6247 // eqv(0, y) = ~y -> nor(y, y) 6248 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6249 MVT::i1, MachineNode->getOperand(1), 6250 MachineNode->getOperand(1)); 6251 else if (Op2Unset) 6252 // eqv(x, 0) = ~x 6253 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6254 MVT::i1, MachineNode->getOperand(0), 6255 MachineNode->getOperand(0)); 6256 else if (Op1Not) 6257 // eqv(~x, y) = xor(x, y) 6258 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6259 MVT::i1, MachineNode->getOperand(0). 6260 getOperand(0), 6261 MachineNode->getOperand(1)); 6262 else if (Op2Not) 6263 // eqv(x, ~y) = xor(x, y) 6264 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6265 MVT::i1, MachineNode->getOperand(0), 6266 MachineNode->getOperand(1). 6267 getOperand(0)); 6268 else if (AllUsersSelectZero(MachineNode)) { 6269 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6270 MVT::i1, MachineNode->getOperand(0), 6271 MachineNode->getOperand(1)); 6272 SelectSwap = true; 6273 } 6274 break; 6275 case PPC::CRANDC: 6276 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6277 // andc(x, x) = 0 6278 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6279 MVT::i1); 6280 else if (Op1Set) 6281 // andc(1, y) = ~y 6282 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6283 MVT::i1, MachineNode->getOperand(1), 6284 MachineNode->getOperand(1)); 6285 else if (Op1Unset || Op2Set) 6286 // andc(0, y) = andc(x, 1) = 0 6287 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6288 MVT::i1); 6289 else if (Op2Unset) 6290 // andc(x, 0) = x 6291 ResNode = MachineNode->getOperand(0).getNode(); 6292 else if (Op1Not) 6293 // andc(~x, y) = ~(x | y) = nor(x, y) 6294 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6295 MVT::i1, MachineNode->getOperand(0). 6296 getOperand(0), 6297 MachineNode->getOperand(1)); 6298 else if (Op2Not) 6299 // andc(x, ~y) = x & y 6300 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 6301 MVT::i1, MachineNode->getOperand(0), 6302 MachineNode->getOperand(1). 6303 getOperand(0)); 6304 else if (AllUsersSelectZero(MachineNode)) { 6305 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6306 MVT::i1, MachineNode->getOperand(1), 6307 MachineNode->getOperand(0)); 6308 SelectSwap = true; 6309 } 6310 break; 6311 case PPC::CRORC: 6312 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6313 // orc(x, x) = 1 6314 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6315 MVT::i1); 6316 else if (Op1Set || Op2Unset) 6317 // orc(1, y) = orc(x, 0) = 1 6318 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6319 MVT::i1); 6320 else if (Op2Set) 6321 // orc(x, 1) = x 6322 ResNode = MachineNode->getOperand(0).getNode(); 6323 else if (Op1Unset) 6324 // orc(0, y) = ~y 6325 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6326 MVT::i1, MachineNode->getOperand(1), 6327 MachineNode->getOperand(1)); 6328 else if (Op1Not) 6329 // orc(~x, y) = ~(x & y) = nand(x, y) 6330 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 6331 MVT::i1, MachineNode->getOperand(0). 6332 getOperand(0), 6333 MachineNode->getOperand(1)); 6334 else if (Op2Not) 6335 // orc(x, ~y) = x | y 6336 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 6337 MVT::i1, MachineNode->getOperand(0), 6338 MachineNode->getOperand(1). 6339 getOperand(0)); 6340 else if (AllUsersSelectZero(MachineNode)) { 6341 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6342 MVT::i1, MachineNode->getOperand(1), 6343 MachineNode->getOperand(0)); 6344 SelectSwap = true; 6345 } 6346 break; 6347 case PPC::SELECT_I4: 6348 case PPC::SELECT_I8: 6349 case PPC::SELECT_F4: 6350 case PPC::SELECT_F8: 6351 case PPC::SELECT_SPE: 6352 case PPC::SELECT_SPE4: 6353 case PPC::SELECT_VRRC: 6354 case PPC::SELECT_VSFRC: 6355 case PPC::SELECT_VSSRC: 6356 case PPC::SELECT_VSRC: 6357 if (Op1Set) 6358 ResNode = MachineNode->getOperand(1).getNode(); 6359 else if (Op1Unset) 6360 ResNode = MachineNode->getOperand(2).getNode(); 6361 else if (Op1Not) 6362 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 6363 SDLoc(MachineNode), 6364 MachineNode->getValueType(0), 6365 MachineNode->getOperand(0). 6366 getOperand(0), 6367 MachineNode->getOperand(2), 6368 MachineNode->getOperand(1)); 6369 break; 6370 case PPC::BC: 6371 case PPC::BCn: 6372 if (Op1Not) 6373 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 6374 PPC::BC, 6375 SDLoc(MachineNode), 6376 MVT::Other, 6377 MachineNode->getOperand(0). 6378 getOperand(0), 6379 MachineNode->getOperand(1), 6380 MachineNode->getOperand(2)); 6381 // FIXME: Handle Op1Set, Op1Unset here too. 6382 break; 6383 } 6384 6385 // If we're inverting this node because it is used only by selects that 6386 // we'd like to swap, then swap the selects before the node replacement. 6387 if (SelectSwap) 6388 SwapAllSelectUsers(MachineNode); 6389 6390 if (ResNode != MachineNode) { 6391 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 6392 LLVM_DEBUG(MachineNode->dump(CurDAG)); 6393 LLVM_DEBUG(dbgs() << "\nNew: "); 6394 LLVM_DEBUG(ResNode->dump(CurDAG)); 6395 LLVM_DEBUG(dbgs() << "\n"); 6396 6397 ReplaceUses(MachineNode, ResNode); 6398 IsModified = true; 6399 } 6400 } 6401 if (IsModified) 6402 CurDAG->RemoveDeadNodes(); 6403 } while (IsModified); 6404 } 6405 6406 // Gather the set of 32-bit operations that are known to have their 6407 // higher-order 32 bits zero, where ToPromote contains all such operations. 6408 static bool PeepholePPC64ZExtGather(SDValue Op32, 6409 SmallPtrSetImpl<SDNode *> &ToPromote) { 6410 if (!Op32.isMachineOpcode()) 6411 return false; 6412 6413 // First, check for the "frontier" instructions (those that will clear the 6414 // higher-order 32 bits. 6415 6416 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 6417 // around. If it does not, then these instructions will clear the 6418 // higher-order bits. 6419 if ((Op32.getMachineOpcode() == PPC::RLWINM || 6420 Op32.getMachineOpcode() == PPC::RLWNM) && 6421 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 6422 ToPromote.insert(Op32.getNode()); 6423 return true; 6424 } 6425 6426 // SLW and SRW always clear the higher-order bits. 6427 if (Op32.getMachineOpcode() == PPC::SLW || 6428 Op32.getMachineOpcode() == PPC::SRW) { 6429 ToPromote.insert(Op32.getNode()); 6430 return true; 6431 } 6432 6433 // For LI and LIS, we need the immediate to be positive (so that it is not 6434 // sign extended). 6435 if (Op32.getMachineOpcode() == PPC::LI || 6436 Op32.getMachineOpcode() == PPC::LIS) { 6437 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 6438 return false; 6439 6440 ToPromote.insert(Op32.getNode()); 6441 return true; 6442 } 6443 6444 // LHBRX and LWBRX always clear the higher-order bits. 6445 if (Op32.getMachineOpcode() == PPC::LHBRX || 6446 Op32.getMachineOpcode() == PPC::LWBRX) { 6447 ToPromote.insert(Op32.getNode()); 6448 return true; 6449 } 6450 6451 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 6452 if (Op32.getMachineOpcode() == PPC::CNTLZW || 6453 Op32.getMachineOpcode() == PPC::CNTTZW) { 6454 ToPromote.insert(Op32.getNode()); 6455 return true; 6456 } 6457 6458 // Next, check for those instructions we can look through. 6459 6460 // Assuming the mask does not wrap around, then the higher-order bits are 6461 // taken directly from the first operand. 6462 if (Op32.getMachineOpcode() == PPC::RLWIMI && 6463 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 6464 SmallPtrSet<SDNode *, 16> ToPromote1; 6465 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6466 return false; 6467 6468 ToPromote.insert(Op32.getNode()); 6469 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6470 return true; 6471 } 6472 6473 // For OR, the higher-order bits are zero if that is true for both operands. 6474 // For SELECT_I4, the same is true (but the relevant operand numbers are 6475 // shifted by 1). 6476 if (Op32.getMachineOpcode() == PPC::OR || 6477 Op32.getMachineOpcode() == PPC::SELECT_I4) { 6478 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 6479 SmallPtrSet<SDNode *, 16> ToPromote1; 6480 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 6481 return false; 6482 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 6483 return false; 6484 6485 ToPromote.insert(Op32.getNode()); 6486 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6487 return true; 6488 } 6489 6490 // For ORI and ORIS, we need the higher-order bits of the first operand to be 6491 // zero, and also for the constant to be positive (so that it is not sign 6492 // extended). 6493 if (Op32.getMachineOpcode() == PPC::ORI || 6494 Op32.getMachineOpcode() == PPC::ORIS) { 6495 SmallPtrSet<SDNode *, 16> ToPromote1; 6496 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6497 return false; 6498 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 6499 return false; 6500 6501 ToPromote.insert(Op32.getNode()); 6502 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6503 return true; 6504 } 6505 6506 // The higher-order bits of AND are zero if that is true for at least one of 6507 // the operands. 6508 if (Op32.getMachineOpcode() == PPC::AND) { 6509 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 6510 bool Op0OK = 6511 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6512 bool Op1OK = 6513 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 6514 if (!Op0OK && !Op1OK) 6515 return false; 6516 6517 ToPromote.insert(Op32.getNode()); 6518 6519 if (Op0OK) 6520 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6521 6522 if (Op1OK) 6523 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 6524 6525 return true; 6526 } 6527 6528 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 6529 // of the first operand, or if the second operand is positive (so that it is 6530 // not sign extended). 6531 if (Op32.getMachineOpcode() == PPC::ANDI_rec || 6532 Op32.getMachineOpcode() == PPC::ANDIS_rec) { 6533 SmallPtrSet<SDNode *, 16> ToPromote1; 6534 bool Op0OK = 6535 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6536 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 6537 if (!Op0OK && !Op1OK) 6538 return false; 6539 6540 ToPromote.insert(Op32.getNode()); 6541 6542 if (Op0OK) 6543 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6544 6545 return true; 6546 } 6547 6548 return false; 6549 } 6550 6551 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 6552 if (!Subtarget->isPPC64()) 6553 return; 6554 6555 // When we zero-extend from i32 to i64, we use a pattern like this: 6556 // def : Pat<(i64 (zext i32:$in)), 6557 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 6558 // 0, 32)>; 6559 // There are several 32-bit shift/rotate instructions, however, that will 6560 // clear the higher-order bits of their output, rendering the RLDICL 6561 // unnecessary. When that happens, we remove it here, and redefine the 6562 // relevant 32-bit operation to be a 64-bit operation. 6563 6564 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6565 6566 bool MadeChange = false; 6567 while (Position != CurDAG->allnodes_begin()) { 6568 SDNode *N = &*--Position; 6569 // Skip dead nodes and any non-machine opcodes. 6570 if (N->use_empty() || !N->isMachineOpcode()) 6571 continue; 6572 6573 if (N->getMachineOpcode() != PPC::RLDICL) 6574 continue; 6575 6576 if (N->getConstantOperandVal(1) != 0 || 6577 N->getConstantOperandVal(2) != 32) 6578 continue; 6579 6580 SDValue ISR = N->getOperand(0); 6581 if (!ISR.isMachineOpcode() || 6582 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 6583 continue; 6584 6585 if (!ISR.hasOneUse()) 6586 continue; 6587 6588 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 6589 continue; 6590 6591 SDValue IDef = ISR.getOperand(0); 6592 if (!IDef.isMachineOpcode() || 6593 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 6594 continue; 6595 6596 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 6597 // can get rid of it. 6598 6599 SDValue Op32 = ISR->getOperand(1); 6600 if (!Op32.isMachineOpcode()) 6601 continue; 6602 6603 // There are some 32-bit instructions that always clear the high-order 32 6604 // bits, there are also some instructions (like AND) that we can look 6605 // through. 6606 SmallPtrSet<SDNode *, 16> ToPromote; 6607 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 6608 continue; 6609 6610 // If the ToPromote set contains nodes that have uses outside of the set 6611 // (except for the original INSERT_SUBREG), then abort the transformation. 6612 bool OutsideUse = false; 6613 for (SDNode *PN : ToPromote) { 6614 for (SDNode *UN : PN->uses()) { 6615 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 6616 OutsideUse = true; 6617 break; 6618 } 6619 } 6620 6621 if (OutsideUse) 6622 break; 6623 } 6624 if (OutsideUse) 6625 continue; 6626 6627 MadeChange = true; 6628 6629 // We now know that this zero extension can be removed by promoting to 6630 // nodes in ToPromote to 64-bit operations, where for operations in the 6631 // frontier of the set, we need to insert INSERT_SUBREGs for their 6632 // operands. 6633 for (SDNode *PN : ToPromote) { 6634 unsigned NewOpcode; 6635 switch (PN->getMachineOpcode()) { 6636 default: 6637 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 6638 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 6639 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 6640 case PPC::SLW: NewOpcode = PPC::SLW8; break; 6641 case PPC::SRW: NewOpcode = PPC::SRW8; break; 6642 case PPC::LI: NewOpcode = PPC::LI8; break; 6643 case PPC::LIS: NewOpcode = PPC::LIS8; break; 6644 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 6645 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 6646 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 6647 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 6648 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 6649 case PPC::OR: NewOpcode = PPC::OR8; break; 6650 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 6651 case PPC::ORI: NewOpcode = PPC::ORI8; break; 6652 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 6653 case PPC::AND: NewOpcode = PPC::AND8; break; 6654 case PPC::ANDI_rec: 6655 NewOpcode = PPC::ANDI8_rec; 6656 break; 6657 case PPC::ANDIS_rec: 6658 NewOpcode = PPC::ANDIS8_rec; 6659 break; 6660 } 6661 6662 // Note: During the replacement process, the nodes will be in an 6663 // inconsistent state (some instructions will have operands with values 6664 // of the wrong type). Once done, however, everything should be right 6665 // again. 6666 6667 SmallVector<SDValue, 4> Ops; 6668 for (const SDValue &V : PN->ops()) { 6669 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 6670 !isa<ConstantSDNode>(V)) { 6671 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 6672 SDNode *ReplOp = 6673 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 6674 ISR.getNode()->getVTList(), ReplOpOps); 6675 Ops.push_back(SDValue(ReplOp, 0)); 6676 } else { 6677 Ops.push_back(V); 6678 } 6679 } 6680 6681 // Because all to-be-promoted nodes only have users that are other 6682 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 6683 // the i32 result value type with i64. 6684 6685 SmallVector<EVT, 2> NewVTs; 6686 SDVTList VTs = PN->getVTList(); 6687 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 6688 if (VTs.VTs[i] == MVT::i32) 6689 NewVTs.push_back(MVT::i64); 6690 else 6691 NewVTs.push_back(VTs.VTs[i]); 6692 6693 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 6694 LLVM_DEBUG(PN->dump(CurDAG)); 6695 6696 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 6697 6698 LLVM_DEBUG(dbgs() << "\nNew: "); 6699 LLVM_DEBUG(PN->dump(CurDAG)); 6700 LLVM_DEBUG(dbgs() << "\n"); 6701 } 6702 6703 // Now we replace the original zero extend and its associated INSERT_SUBREG 6704 // with the value feeding the INSERT_SUBREG (which has now been promoted to 6705 // return an i64). 6706 6707 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 6708 LLVM_DEBUG(N->dump(CurDAG)); 6709 LLVM_DEBUG(dbgs() << "\nNew: "); 6710 LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); 6711 LLVM_DEBUG(dbgs() << "\n"); 6712 6713 ReplaceUses(N, Op32.getNode()); 6714 } 6715 6716 if (MadeChange) 6717 CurDAG->RemoveDeadNodes(); 6718 } 6719 6720 void PPCDAGToDAGISel::PeepholePPC64() { 6721 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6722 6723 while (Position != CurDAG->allnodes_begin()) { 6724 SDNode *N = &*--Position; 6725 // Skip dead nodes and any non-machine opcodes. 6726 if (N->use_empty() || !N->isMachineOpcode()) 6727 continue; 6728 6729 unsigned FirstOp; 6730 unsigned StorageOpcode = N->getMachineOpcode(); 6731 bool RequiresMod4Offset = false; 6732 6733 switch (StorageOpcode) { 6734 default: continue; 6735 6736 case PPC::LWA: 6737 case PPC::LD: 6738 case PPC::DFLOADf64: 6739 case PPC::DFLOADf32: 6740 RequiresMod4Offset = true; 6741 LLVM_FALLTHROUGH; 6742 case PPC::LBZ: 6743 case PPC::LBZ8: 6744 case PPC::LFD: 6745 case PPC::LFS: 6746 case PPC::LHA: 6747 case PPC::LHA8: 6748 case PPC::LHZ: 6749 case PPC::LHZ8: 6750 case PPC::LWZ: 6751 case PPC::LWZ8: 6752 FirstOp = 0; 6753 break; 6754 6755 case PPC::STD: 6756 case PPC::DFSTOREf64: 6757 case PPC::DFSTOREf32: 6758 RequiresMod4Offset = true; 6759 LLVM_FALLTHROUGH; 6760 case PPC::STB: 6761 case PPC::STB8: 6762 case PPC::STFD: 6763 case PPC::STFS: 6764 case PPC::STH: 6765 case PPC::STH8: 6766 case PPC::STW: 6767 case PPC::STW8: 6768 FirstOp = 1; 6769 break; 6770 } 6771 6772 // If this is a load or store with a zero offset, or within the alignment, 6773 // we may be able to fold an add-immediate into the memory operation. 6774 // The check against alignment is below, as it can't occur until we check 6775 // the arguments to N 6776 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 6777 continue; 6778 6779 SDValue Base = N->getOperand(FirstOp + 1); 6780 if (!Base.isMachineOpcode()) 6781 continue; 6782 6783 unsigned Flags = 0; 6784 bool ReplaceFlags = true; 6785 6786 // When the feeding operation is an add-immediate of some sort, 6787 // determine whether we need to add relocation information to the 6788 // target flags on the immediate operand when we fold it into the 6789 // load instruction. 6790 // 6791 // For something like ADDItocL, the relocation information is 6792 // inferred from the opcode; when we process it in the AsmPrinter, 6793 // we add the necessary relocation there. A load, though, can receive 6794 // relocation from various flavors of ADDIxxx, so we need to carry 6795 // the relocation information in the target flags. 6796 switch (Base.getMachineOpcode()) { 6797 default: continue; 6798 6799 case PPC::ADDI8: 6800 case PPC::ADDI: 6801 // In some cases (such as TLS) the relocation information 6802 // is already in place on the operand, so copying the operand 6803 // is sufficient. 6804 ReplaceFlags = false; 6805 // For these cases, the immediate may not be divisible by 4, in 6806 // which case the fold is illegal for DS-form instructions. (The 6807 // other cases provide aligned addresses and are always safe.) 6808 if (RequiresMod4Offset && 6809 (!isa<ConstantSDNode>(Base.getOperand(1)) || 6810 Base.getConstantOperandVal(1) % 4 != 0)) 6811 continue; 6812 break; 6813 case PPC::ADDIdtprelL: 6814 Flags = PPCII::MO_DTPREL_LO; 6815 break; 6816 case PPC::ADDItlsldL: 6817 Flags = PPCII::MO_TLSLD_LO; 6818 break; 6819 case PPC::ADDItocL: 6820 Flags = PPCII::MO_TOC_LO; 6821 break; 6822 } 6823 6824 SDValue ImmOpnd = Base.getOperand(1); 6825 6826 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 6827 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 6828 // we might have needed different @ha relocation values for the offset 6829 // pointers). 6830 int MaxDisplacement = 7; 6831 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6832 const GlobalValue *GV = GA->getGlobal(); 6833 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); 6834 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement); 6835 } 6836 6837 bool UpdateHBase = false; 6838 SDValue HBase = Base.getOperand(0); 6839 6840 int Offset = N->getConstantOperandVal(FirstOp); 6841 if (ReplaceFlags) { 6842 if (Offset < 0 || Offset > MaxDisplacement) { 6843 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 6844 // one use, then we can do this for any offset, we just need to also 6845 // update the offset (i.e. the symbol addend) on the addis also. 6846 if (Base.getMachineOpcode() != PPC::ADDItocL) 6847 continue; 6848 6849 if (!HBase.isMachineOpcode() || 6850 HBase.getMachineOpcode() != PPC::ADDIStocHA8) 6851 continue; 6852 6853 if (!Base.hasOneUse() || !HBase.hasOneUse()) 6854 continue; 6855 6856 SDValue HImmOpnd = HBase.getOperand(1); 6857 if (HImmOpnd != ImmOpnd) 6858 continue; 6859 6860 UpdateHBase = true; 6861 } 6862 } else { 6863 // If we're directly folding the addend from an addi instruction, then: 6864 // 1. In general, the offset on the memory access must be zero. 6865 // 2. If the addend is a constant, then it can be combined with a 6866 // non-zero offset, but only if the result meets the encoding 6867 // requirements. 6868 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 6869 Offset += C->getSExtValue(); 6870 6871 if (RequiresMod4Offset && (Offset % 4) != 0) 6872 continue; 6873 6874 if (!isInt<16>(Offset)) 6875 continue; 6876 6877 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 6878 ImmOpnd.getValueType()); 6879 } else if (Offset != 0) { 6880 continue; 6881 } 6882 } 6883 6884 // We found an opportunity. Reverse the operands from the add 6885 // immediate and substitute them into the load or store. If 6886 // needed, update the target flags for the immediate operand to 6887 // reflect the necessary relocation information. 6888 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 6889 LLVM_DEBUG(Base->dump(CurDAG)); 6890 LLVM_DEBUG(dbgs() << "\nN: "); 6891 LLVM_DEBUG(N->dump(CurDAG)); 6892 LLVM_DEBUG(dbgs() << "\n"); 6893 6894 // If the relocation information isn't already present on the 6895 // immediate operand, add it now. 6896 if (ReplaceFlags) { 6897 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6898 SDLoc dl(GA); 6899 const GlobalValue *GV = GA->getGlobal(); 6900 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout()); 6901 // We can't perform this optimization for data whose alignment 6902 // is insufficient for the instruction encoding. 6903 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) { 6904 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 6905 continue; 6906 } 6907 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 6908 } else if (ConstantPoolSDNode *CP = 6909 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 6910 const Constant *C = CP->getConstVal(); 6911 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), 6912 Offset, Flags); 6913 } 6914 } 6915 6916 if (FirstOp == 1) // Store 6917 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 6918 Base.getOperand(0), N->getOperand(3)); 6919 else // Load 6920 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 6921 N->getOperand(2)); 6922 6923 if (UpdateHBase) 6924 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 6925 ImmOpnd); 6926 6927 // The add-immediate may now be dead, in which case remove it. 6928 if (Base.getNode()->use_empty()) 6929 CurDAG->RemoveDeadNode(Base.getNode()); 6930 } 6931 } 6932 6933 /// createPPCISelDag - This pass converts a legalized DAG into a 6934 /// PowerPC-specific DAG, ready for instruction scheduling. 6935 /// 6936 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 6937 CodeGenOpt::Level OptLevel) { 6938 return new PPCDAGToDAGISel(TM, OptLevel); 6939 } 6940