1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a pattern matching instruction selector for PowerPC, 11 // converting from a legalized dag to a PPC dag. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCMCTargetDesc.h" 16 #include "MCTargetDesc/PPCPredicates.h" 17 #include "PPC.h" 18 #include "PPCISelLowering.h" 19 #include "PPCMachineFunctionInfo.h" 20 #include "PPCSubtarget.h" 21 #include "PPCTargetMachine.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/Analysis/BranchProbabilityInfo.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/KnownBits.h" 54 #include "llvm/Support/MathExtras.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include "llvm/Target/TargetInstrInfo.h" 57 #include "llvm/Target/TargetRegisterInfo.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <cstdint> 61 #include <iterator> 62 #include <limits> 63 #include <memory> 64 #include <new> 65 #include <tuple> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "ppc-codegen" 71 72 STATISTIC(NumSextSetcc, 73 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 74 STATISTIC(NumZextSetcc, 75 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 76 STATISTIC(SignExtensionsAdded, 77 "Number of sign extensions for compare inputs added."); 78 STATISTIC(ZeroExtensionsAdded, 79 "Number of zero extensions for compare inputs added."); 80 STATISTIC(NumLogicOpsOnComparison, 81 "Number of logical ops on i1 values calculated in GPR."); 82 STATISTIC(OmittedForNonExtendUses, 83 "Number of compares not eliminated as they have non-extending uses."); 84 85 // FIXME: Remove this once the bug has been fixed! 86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 88 89 static cl::opt<bool> 90 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 91 cl::desc("use aggressive ppc isel for bit permutations"), 92 cl::Hidden); 93 static cl::opt<bool> BPermRewriterNoMasking( 94 "ppc-bit-perm-rewriter-stress-rotates", 95 cl::desc("stress rotate selection in aggressive ppc isel for " 96 "bit permutations"), 97 cl::Hidden); 98 99 static cl::opt<bool> EnableBranchHint( 100 "ppc-use-branch-hint", cl::init(true), 101 cl::desc("Enable static hinting of branches on ppc"), 102 cl::Hidden); 103 104 namespace { 105 106 //===--------------------------------------------------------------------===// 107 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 108 /// instructions for SelectionDAG operations. 109 /// 110 class PPCDAGToDAGISel : public SelectionDAGISel { 111 const PPCTargetMachine &TM; 112 const PPCSubtarget *PPCSubTarget; 113 const PPCTargetLowering *PPCLowering; 114 unsigned GlobalBaseReg; 115 116 public: 117 explicit PPCDAGToDAGISel(PPCTargetMachine &tm) 118 : SelectionDAGISel(tm), TM(tm) {} 119 120 bool runOnMachineFunction(MachineFunction &MF) override { 121 // Make sure we re-emit a set of the global base reg if necessary 122 GlobalBaseReg = 0; 123 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 124 PPCLowering = PPCSubTarget->getTargetLowering(); 125 SelectionDAGISel::runOnMachineFunction(MF); 126 127 if (!PPCSubTarget->isSVR4ABI()) 128 InsertVRSaveCode(MF); 129 130 return true; 131 } 132 133 void PreprocessISelDAG() override; 134 void PostprocessISelDAG() override; 135 136 /// getI32Imm - Return a target constant with the specified value, of type 137 /// i32. 138 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 139 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 140 } 141 142 /// getI64Imm - Return a target constant with the specified value, of type 143 /// i64. 144 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 145 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 146 } 147 148 /// getSmallIPtrImm - Return a target constant of pointer type. 149 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 150 return CurDAG->getTargetConstant( 151 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 152 } 153 154 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 155 /// rotate and mask opcode and mask operation. 156 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 157 unsigned &SH, unsigned &MB, unsigned &ME); 158 159 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 160 /// base register. Return the virtual register that holds this value. 161 SDNode *getGlobalBaseReg(); 162 163 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 164 165 // Select - Convert the specified operand from a target-independent to a 166 // target-specific node if it hasn't already been changed. 167 void Select(SDNode *N) override; 168 169 bool tryBitfieldInsert(SDNode *N); 170 bool tryBitPermutation(SDNode *N); 171 172 /// SelectCC - Select a comparison of the specified values with the 173 /// specified condition code, returning the CR# of the expression. 174 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 175 const SDLoc &dl); 176 177 /// SelectAddrImm - Returns true if the address N can be represented by 178 /// a base register plus a signed 16-bit displacement [r+imm]. 179 bool SelectAddrImm(SDValue N, SDValue &Disp, 180 SDValue &Base) { 181 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); 182 } 183 184 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 185 /// immediate field. Note that the operand at this point is already the 186 /// result of a prior SelectAddressRegImm call. 187 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 188 if (N.getOpcode() == ISD::TargetConstant || 189 N.getOpcode() == ISD::TargetGlobalAddress) { 190 Out = N; 191 return true; 192 } 193 194 return false; 195 } 196 197 /// SelectAddrIdx - Given the specified addressed, check to see if it can be 198 /// represented as an indexed [r+r] operation. Returns false if it can 199 /// be represented by [r+imm], which are preferred. 200 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 201 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); 202 } 203 204 /// SelectAddrIdxOnly - Given the specified addressed, force it to be 205 /// represented as an indexed [r+r] operation. 206 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 207 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 208 } 209 210 /// SelectAddrImmX4 - Returns true if the address N can be represented by 211 /// a base register plus a signed 16-bit displacement that is a multiple of 4. 212 /// Suitable for use by STD and friends. 213 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 214 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); 215 } 216 217 // Select an address into a single register. 218 bool SelectAddr(SDValue N, SDValue &Base) { 219 Base = N; 220 return true; 221 } 222 223 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 224 /// inline asm expressions. It is always correct to compute the value into 225 /// a register. The case of adding a (possibly relocatable) constant to a 226 /// register can be improved, but it is wrong to substitute Reg+Reg for 227 /// Reg in an asm, because the load or store opcode would have to change. 228 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 229 unsigned ConstraintID, 230 std::vector<SDValue> &OutOps) override { 231 switch(ConstraintID) { 232 default: 233 errs() << "ConstraintID: " << ConstraintID << "\n"; 234 llvm_unreachable("Unexpected asm memory constraint"); 235 case InlineAsm::Constraint_es: 236 case InlineAsm::Constraint_i: 237 case InlineAsm::Constraint_m: 238 case InlineAsm::Constraint_o: 239 case InlineAsm::Constraint_Q: 240 case InlineAsm::Constraint_Z: 241 case InlineAsm::Constraint_Zy: 242 // We need to make sure that this one operand does not end up in r0 243 // (because we might end up lowering this as 0(%op)). 244 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 245 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 246 SDLoc dl(Op); 247 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 248 SDValue NewOp = 249 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 250 dl, Op.getValueType(), 251 Op, RC), 0); 252 253 OutOps.push_back(NewOp); 254 return false; 255 } 256 return true; 257 } 258 259 void InsertVRSaveCode(MachineFunction &MF); 260 261 StringRef getPassName() const override { 262 return "PowerPC DAG->DAG Pattern Instruction Selection"; 263 } 264 265 // Include the pieces autogenerated from the target description. 266 #include "PPCGenDAGISel.inc" 267 268 private: 269 // Conversion type for interpreting results of a 32-bit instruction as 270 // a 64-bit value or vice versa. 271 enum ExtOrTruncConversion { Ext, Trunc }; 272 273 // Modifiers to guide how an ISD::SETCC node's result is to be computed 274 // in a GPR. 275 // ZExtOrig - use the original condition code, zero-extend value 276 // ZExtInvert - invert the condition code, zero-extend value 277 // SExtOrig - use the original condition code, sign-extend value 278 // SExtInvert - invert the condition code, sign-extend value 279 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 280 281 bool trySETCC(SDNode *N); 282 bool tryEXTEND(SDNode *N); 283 bool tryLogicOpOfCompares(SDNode *N); 284 SDValue computeLogicOpInGPR(SDValue LogicOp); 285 SDValue signExtendInputIfNeeded(SDValue Input); 286 SDValue zeroExtendInputIfNeeded(SDValue Input); 287 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 288 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 289 int64_t RHSValue, SDLoc dl); 290 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 291 int64_t RHSValue, SDLoc dl); 292 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 293 int64_t RHSValue, SDLoc dl); 294 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 295 int64_t RHSValue, SDLoc dl); 296 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 297 298 void PeepholePPC64(); 299 void PeepholePPC64ZExt(); 300 void PeepholeCROps(); 301 302 SDValue combineToCMPB(SDNode *N); 303 void foldBoolExts(SDValue &Res, SDNode *&N); 304 305 bool AllUsersSelectZero(SDNode *N); 306 void SwapAllSelectUsers(SDNode *N); 307 308 void transferMemOperands(SDNode *N, SDNode *Result); 309 }; 310 311 } // end anonymous namespace 312 313 /// InsertVRSaveCode - Once the entire function has been instruction selected, 314 /// all virtual registers are created and all machine instructions are built, 315 /// check to see if we need to save/restore VRSAVE. If so, do it. 316 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 317 // Check to see if this function uses vector registers, which means we have to 318 // save and restore the VRSAVE register and update it with the regs we use. 319 // 320 // In this case, there will be virtual registers of vector type created 321 // by the scheduler. Detect them now. 322 bool HasVectorVReg = false; 323 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 324 unsigned Reg = TargetRegisterInfo::index2VirtReg(i); 325 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 326 HasVectorVReg = true; 327 break; 328 } 329 } 330 if (!HasVectorVReg) return; // nothing to do. 331 332 // If we have a vector register, we want to emit code into the entry and exit 333 // blocks to save and restore the VRSAVE register. We do this here (instead 334 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 335 // 336 // 1. This (trivially) reduces the load on the register allocator, by not 337 // having to represent the live range of the VRSAVE register. 338 // 2. This (more significantly) allows us to create a temporary virtual 339 // register to hold the saved VRSAVE value, allowing this temporary to be 340 // register allocated, instead of forcing it to be spilled to the stack. 341 342 // Create two vregs - one to hold the VRSAVE register that is live-in to the 343 // function and one for the value after having bits or'd into it. 344 unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 345 unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 346 347 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 348 MachineBasicBlock &EntryBB = *Fn.begin(); 349 DebugLoc dl; 350 // Emit the following code into the entry block: 351 // InVRSAVE = MFVRSAVE 352 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 353 // MTVRSAVE UpdatedVRSAVE 354 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 355 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 356 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 357 UpdatedVRSAVE).addReg(InVRSAVE); 358 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 359 360 // Find all return blocks, outputting a restore in each epilog. 361 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 362 if (BB->isReturnBlock()) { 363 IP = BB->end(); --IP; 364 365 // Skip over all terminator instructions, which are part of the return 366 // sequence. 367 MachineBasicBlock::iterator I2 = IP; 368 while (I2 != BB->begin() && (--I2)->isTerminator()) 369 IP = I2; 370 371 // Emit: MTVRSAVE InVRSave 372 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 373 } 374 } 375 } 376 377 /// getGlobalBaseReg - Output the instructions required to put the 378 /// base address to use for accessing globals into a register. 379 /// 380 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 381 if (!GlobalBaseReg) { 382 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 383 // Insert the set of GlobalBaseReg into the first MBB of the function 384 MachineBasicBlock &FirstMBB = MF->front(); 385 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 386 const Module *M = MF->getFunction()->getParent(); 387 DebugLoc dl; 388 389 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 390 if (PPCSubTarget->isTargetELF()) { 391 GlobalBaseReg = PPC::R30; 392 if (M->getPICLevel() == PICLevel::SmallPIC) { 393 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 394 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 395 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 396 } else { 397 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 398 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 399 unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 400 BuildMI(FirstMBB, MBBI, dl, 401 TII.get(PPC::UpdateGBR), GlobalBaseReg) 402 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 403 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 404 } 405 } else { 406 GlobalBaseReg = 407 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 408 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 409 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 410 } 411 } else { 412 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 413 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 414 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 415 } 416 } 417 return CurDAG->getRegister(GlobalBaseReg, 418 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 419 .getNode(); 420 } 421 422 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 423 /// or 64-bit immediate, and if the value can be accurately represented as a 424 /// sign extension from a 16-bit value. If so, this returns true and the 425 /// immediate. 426 static bool isIntS16Immediate(SDNode *N, short &Imm) { 427 if (N->getOpcode() != ISD::Constant) 428 return false; 429 430 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 431 if (N->getValueType(0) == MVT::i32) 432 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 433 else 434 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 435 } 436 437 static bool isIntS16Immediate(SDValue Op, short &Imm) { 438 return isIntS16Immediate(Op.getNode(), Imm); 439 } 440 441 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 442 /// operand. If so Imm will receive the 32-bit value. 443 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 444 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 445 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 446 return true; 447 } 448 return false; 449 } 450 451 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 452 /// operand. If so Imm will receive the 64-bit value. 453 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 454 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 455 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 456 return true; 457 } 458 return false; 459 } 460 461 // isInt32Immediate - This method tests to see if a constant operand. 462 // If so Imm will receive the 32 bit value. 463 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 464 return isInt32Immediate(N.getNode(), Imm); 465 } 466 467 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, 468 const SDValue &DestMBB) { 469 assert(isa<BasicBlockSDNode>(DestMBB)); 470 471 if (!FuncInfo->BPI) return PPC::BR_NO_HINT; 472 473 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 474 const TerminatorInst *BBTerm = BB->getTerminator(); 475 476 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 477 478 const BasicBlock *TBB = BBTerm->getSuccessor(0); 479 const BasicBlock *FBB = BBTerm->getSuccessor(1); 480 481 auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); 482 auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); 483 484 // We only want to handle cases which are easy to predict at static time, e.g. 485 // C++ throw statement, that is very likely not taken, or calling never 486 // returned function, e.g. stdlib exit(). So we set Threshold to filter 487 // unwanted cases. 488 // 489 // Below is LLVM branch weight table, we only want to handle case 1, 2 490 // 491 // Case Taken:Nontaken Example 492 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 493 // 2. Invoke-terminating 1:1048575 494 // 3. Coldblock 4:64 __builtin_expect 495 // 4. Loop Branch 124:4 For loop 496 // 5. PH/ZH/FPH 20:12 497 const uint32_t Threshold = 10000; 498 499 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 500 return PPC::BR_NO_HINT; 501 502 DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" 503 << BB->getName() << "'\n" 504 << " -> " << TBB->getName() << ": " << TProb << "\n" 505 << " -> " << FBB->getName() << ": " << FProb << "\n"); 506 507 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 508 509 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 510 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 511 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 512 std::swap(TProb, FProb); 513 514 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 515 } 516 517 // isOpcWithIntImmediate - This method tests to see if the node is a specific 518 // opcode and that it has a immediate integer right operand. 519 // If so Imm will receive the 32 bit value. 520 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 521 return N->getOpcode() == Opc 522 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 523 } 524 525 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 526 SDLoc dl(SN); 527 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 528 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 529 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 530 if (SN->hasOneUse()) 531 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 532 getSmallIPtrImm(Offset, dl)); 533 else 534 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 535 getSmallIPtrImm(Offset, dl))); 536 } 537 538 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 539 bool isShiftMask, unsigned &SH, 540 unsigned &MB, unsigned &ME) { 541 // Don't even go down this path for i64, since different logic will be 542 // necessary for rldicl/rldicr/rldimi. 543 if (N->getValueType(0) != MVT::i32) 544 return false; 545 546 unsigned Shift = 32; 547 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 548 unsigned Opcode = N->getOpcode(); 549 if (N->getNumOperands() != 2 || 550 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 551 return false; 552 553 if (Opcode == ISD::SHL) { 554 // apply shift left to mask if it comes first 555 if (isShiftMask) Mask = Mask << Shift; 556 // determine which bits are made indeterminant by shift 557 Indeterminant = ~(0xFFFFFFFFu << Shift); 558 } else if (Opcode == ISD::SRL) { 559 // apply shift right to mask if it comes first 560 if (isShiftMask) Mask = Mask >> Shift; 561 // determine which bits are made indeterminant by shift 562 Indeterminant = ~(0xFFFFFFFFu >> Shift); 563 // adjust for the left rotate 564 Shift = 32 - Shift; 565 } else if (Opcode == ISD::ROTL) { 566 Indeterminant = 0; 567 } else { 568 return false; 569 } 570 571 // if the mask doesn't intersect any Indeterminant bits 572 if (Mask && !(Mask & Indeterminant)) { 573 SH = Shift & 31; 574 // make sure the mask is still a mask (wrap arounds may not be) 575 return isRunOfOnes(Mask, MB, ME); 576 } 577 return false; 578 } 579 580 /// Turn an or of two masked values into the rotate left word immediate then 581 /// mask insert (rlwimi) instruction. 582 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 583 SDValue Op0 = N->getOperand(0); 584 SDValue Op1 = N->getOperand(1); 585 SDLoc dl(N); 586 587 KnownBits LKnown, RKnown; 588 CurDAG->computeKnownBits(Op0, LKnown); 589 CurDAG->computeKnownBits(Op1, RKnown); 590 591 unsigned TargetMask = LKnown.Zero.getZExtValue(); 592 unsigned InsertMask = RKnown.Zero.getZExtValue(); 593 594 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 595 unsigned Op0Opc = Op0.getOpcode(); 596 unsigned Op1Opc = Op1.getOpcode(); 597 unsigned Value, SH = 0; 598 TargetMask = ~TargetMask; 599 InsertMask = ~InsertMask; 600 601 // If the LHS has a foldable shift and the RHS does not, then swap it to the 602 // RHS so that we can fold the shift into the insert. 603 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 604 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 605 Op0.getOperand(0).getOpcode() == ISD::SRL) { 606 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 607 Op1.getOperand(0).getOpcode() != ISD::SRL) { 608 std::swap(Op0, Op1); 609 std::swap(Op0Opc, Op1Opc); 610 std::swap(TargetMask, InsertMask); 611 } 612 } 613 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 614 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 615 Op1.getOperand(0).getOpcode() != ISD::SRL) { 616 std::swap(Op0, Op1); 617 std::swap(Op0Opc, Op1Opc); 618 std::swap(TargetMask, InsertMask); 619 } 620 } 621 622 unsigned MB, ME; 623 if (isRunOfOnes(InsertMask, MB, ME)) { 624 SDValue Tmp1, Tmp2; 625 626 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 627 isInt32Immediate(Op1.getOperand(1), Value)) { 628 Op1 = Op1.getOperand(0); 629 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 630 } 631 if (Op1Opc == ISD::AND) { 632 // The AND mask might not be a constant, and we need to make sure that 633 // if we're going to fold the masking with the insert, all bits not 634 // know to be zero in the mask are known to be one. 635 KnownBits MKnown; 636 CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); 637 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 638 639 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 640 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 641 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 642 // Note that Value must be in range here (less than 32) because 643 // otherwise there would not be any bits set in InsertMask. 644 Op1 = Op1.getOperand(0).getOperand(0); 645 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 646 } 647 } 648 649 SH &= 31; 650 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 651 getI32Imm(ME, dl) }; 652 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 653 return true; 654 } 655 } 656 return false; 657 } 658 659 // Predict the number of instructions that would be generated by calling 660 // getInt64(N). 661 static unsigned getInt64CountDirect(int64_t Imm) { 662 // Assume no remaining bits. 663 unsigned Remainder = 0; 664 // Assume no shift required. 665 unsigned Shift = 0; 666 667 // If it can't be represented as a 32 bit value. 668 if (!isInt<32>(Imm)) { 669 Shift = countTrailingZeros<uint64_t>(Imm); 670 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 671 672 // If the shifted value fits 32 bits. 673 if (isInt<32>(ImmSh)) { 674 // Go with the shifted value. 675 Imm = ImmSh; 676 } else { 677 // Still stuck with a 64 bit value. 678 Remainder = Imm; 679 Shift = 32; 680 Imm >>= 32; 681 } 682 } 683 684 // Intermediate operand. 685 unsigned Result = 0; 686 687 // Handle first 32 bits. 688 unsigned Lo = Imm & 0xFFFF; 689 690 // Simple value. 691 if (isInt<16>(Imm)) { 692 // Just the Lo bits. 693 ++Result; 694 } else if (Lo) { 695 // Handle the Hi bits and Lo bits. 696 Result += 2; 697 } else { 698 // Just the Hi bits. 699 ++Result; 700 } 701 702 // If no shift, we're done. 703 if (!Shift) return Result; 704 705 // If Hi word == Lo word, 706 // we can use rldimi to insert the Lo word into Hi word. 707 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 708 ++Result; 709 return Result; 710 } 711 712 // Shift for next step if the upper 32-bits were not zero. 713 if (Imm) 714 ++Result; 715 716 // Add in the last bits as required. 717 if ((Remainder >> 16) & 0xFFFF) 718 ++Result; 719 if (Remainder & 0xFFFF) 720 ++Result; 721 722 return Result; 723 } 724 725 static uint64_t Rot64(uint64_t Imm, unsigned R) { 726 return (Imm << R) | (Imm >> (64 - R)); 727 } 728 729 static unsigned getInt64Count(int64_t Imm) { 730 unsigned Count = getInt64CountDirect(Imm); 731 if (Count == 1) 732 return Count; 733 734 for (unsigned r = 1; r < 63; ++r) { 735 uint64_t RImm = Rot64(Imm, r); 736 unsigned RCount = getInt64CountDirect(RImm) + 1; 737 Count = std::min(Count, RCount); 738 739 // See comments in getInt64 for an explanation of the logic below. 740 unsigned LS = findLastSet(RImm); 741 if (LS != r-1) 742 continue; 743 744 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 745 uint64_t RImmWithOnes = RImm | OnesMask; 746 747 RCount = getInt64CountDirect(RImmWithOnes) + 1; 748 Count = std::min(Count, RCount); 749 } 750 751 return Count; 752 } 753 754 // Select a 64-bit constant. For cost-modeling purposes, getInt64Count 755 // (above) needs to be kept in sync with this function. 756 static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, 757 int64_t Imm) { 758 // Assume no remaining bits. 759 unsigned Remainder = 0; 760 // Assume no shift required. 761 unsigned Shift = 0; 762 763 // If it can't be represented as a 32 bit value. 764 if (!isInt<32>(Imm)) { 765 Shift = countTrailingZeros<uint64_t>(Imm); 766 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 767 768 // If the shifted value fits 32 bits. 769 if (isInt<32>(ImmSh)) { 770 // Go with the shifted value. 771 Imm = ImmSh; 772 } else { 773 // Still stuck with a 64 bit value. 774 Remainder = Imm; 775 Shift = 32; 776 Imm >>= 32; 777 } 778 } 779 780 // Intermediate operand. 781 SDNode *Result; 782 783 // Handle first 32 bits. 784 unsigned Lo = Imm & 0xFFFF; 785 unsigned Hi = (Imm >> 16) & 0xFFFF; 786 787 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 788 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 789 }; 790 791 // Simple value. 792 if (isInt<16>(Imm)) { 793 // Just the Lo bits. 794 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); 795 } else if (Lo) { 796 // Handle the Hi bits. 797 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 798 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 799 // And Lo bits. 800 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 801 SDValue(Result, 0), getI32Imm(Lo)); 802 } else { 803 // Just the Hi bits. 804 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 805 } 806 807 // If no shift, we're done. 808 if (!Shift) return Result; 809 810 // If Hi word == Lo word, 811 // we can use rldimi to insert the Lo word into Hi word. 812 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 813 SDValue Ops[] = 814 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 815 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 816 } 817 818 // Shift for next step if the upper 32-bits were not zero. 819 if (Imm) { 820 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 821 SDValue(Result, 0), 822 getI32Imm(Shift), 823 getI32Imm(63 - Shift)); 824 } 825 826 // Add in the last bits as required. 827 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 828 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 829 SDValue(Result, 0), getI32Imm(Hi)); 830 } 831 if ((Lo = Remainder & 0xFFFF)) { 832 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 833 SDValue(Result, 0), getI32Imm(Lo)); 834 } 835 836 return Result; 837 } 838 839 static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { 840 unsigned Count = getInt64CountDirect(Imm); 841 if (Count == 1) 842 return getInt64Direct(CurDAG, dl, Imm); 843 844 unsigned RMin = 0; 845 846 int64_t MatImm; 847 unsigned MaskEnd; 848 849 for (unsigned r = 1; r < 63; ++r) { 850 uint64_t RImm = Rot64(Imm, r); 851 unsigned RCount = getInt64CountDirect(RImm) + 1; 852 if (RCount < Count) { 853 Count = RCount; 854 RMin = r; 855 MatImm = RImm; 856 MaskEnd = 63; 857 } 858 859 // If the immediate to generate has many trailing zeros, it might be 860 // worthwhile to generate a rotated value with too many leading ones 861 // (because that's free with li/lis's sign-extension semantics), and then 862 // mask them off after rotation. 863 864 unsigned LS = findLastSet(RImm); 865 // We're adding (63-LS) higher-order ones, and we expect to mask them off 866 // after performing the inverse rotation by (64-r). So we need that: 867 // 63-LS == 64-r => LS == r-1 868 if (LS != r-1) 869 continue; 870 871 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 872 uint64_t RImmWithOnes = RImm | OnesMask; 873 874 RCount = getInt64CountDirect(RImmWithOnes) + 1; 875 if (RCount < Count) { 876 Count = RCount; 877 RMin = r; 878 MatImm = RImmWithOnes; 879 MaskEnd = LS; 880 } 881 } 882 883 if (!RMin) 884 return getInt64Direct(CurDAG, dl, Imm); 885 886 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 887 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 888 }; 889 890 SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0); 891 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 892 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 893 } 894 895 // Select a 64-bit constant. 896 static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) { 897 SDLoc dl(N); 898 899 // Get 64 bit value. 900 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 901 return getInt64(CurDAG, dl, Imm); 902 } 903 904 namespace { 905 906 class BitPermutationSelector { 907 struct ValueBit { 908 SDValue V; 909 910 // The bit number in the value, using a convention where bit 0 is the 911 // lowest-order bit. 912 unsigned Idx; 913 914 enum Kind { 915 ConstZero, 916 Variable 917 } K; 918 919 ValueBit(SDValue V, unsigned I, Kind K = Variable) 920 : V(V), Idx(I), K(K) {} 921 ValueBit(Kind K = Variable) 922 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 923 924 bool isZero() const { 925 return K == ConstZero; 926 } 927 928 bool hasValue() const { 929 return K == Variable; 930 } 931 932 SDValue getValue() const { 933 assert(hasValue() && "Cannot get the value of a constant bit"); 934 return V; 935 } 936 937 unsigned getValueBitIndex() const { 938 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 939 return Idx; 940 } 941 }; 942 943 // A bit group has the same underlying value and the same rotate factor. 944 struct BitGroup { 945 SDValue V; 946 unsigned RLAmt; 947 unsigned StartIdx, EndIdx; 948 949 // This rotation amount assumes that the lower 32 bits of the quantity are 950 // replicated in the high 32 bits by the rotation operator (which is done 951 // by rlwinm and friends in 64-bit mode). 952 bool Repl32; 953 // Did converting to Repl32 == true change the rotation factor? If it did, 954 // it decreased it by 32. 955 bool Repl32CR; 956 // Was this group coalesced after setting Repl32 to true? 957 bool Repl32Coalesced; 958 959 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 960 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 961 Repl32Coalesced(false) { 962 DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << 963 " [" << S << ", " << E << "]\n"); 964 } 965 }; 966 967 // Information on each (Value, RLAmt) pair (like the number of groups 968 // associated with each) used to choose the lowering method. 969 struct ValueRotInfo { 970 SDValue V; 971 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 972 unsigned NumGroups = 0; 973 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 974 bool Repl32 = false; 975 976 ValueRotInfo() = default; 977 978 // For sorting (in reverse order) by NumGroups, and then by 979 // FirstGroupStartIdx. 980 bool operator < (const ValueRotInfo &Other) const { 981 // We need to sort so that the non-Repl32 come first because, when we're 982 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 983 // masking operation. 984 if (Repl32 < Other.Repl32) 985 return true; 986 else if (Repl32 > Other.Repl32) 987 return false; 988 else if (NumGroups > Other.NumGroups) 989 return true; 990 else if (NumGroups < Other.NumGroups) 991 return false; 992 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 993 return true; 994 return false; 995 } 996 }; 997 998 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 999 using ValueBitsMemoizer = 1000 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1001 ValueBitsMemoizer Memoizer; 1002 1003 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1004 // The bool is true if something interesting was deduced, otherwise if we're 1005 // providing only a generic representation of V (or something else likewise 1006 // uninteresting for instruction selection) through the SmallVector. 1007 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1008 unsigned NumBits) { 1009 auto &ValueEntry = Memoizer[V]; 1010 if (ValueEntry) 1011 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1012 ValueEntry.reset(new ValueBitsMemoizedValue()); 1013 bool &Interesting = ValueEntry->first; 1014 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1015 Bits.resize(NumBits); 1016 1017 switch (V.getOpcode()) { 1018 default: break; 1019 case ISD::ROTL: 1020 if (isa<ConstantSDNode>(V.getOperand(1))) { 1021 unsigned RotAmt = V.getConstantOperandVal(1); 1022 1023 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1024 1025 for (unsigned i = 0; i < NumBits; ++i) 1026 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1027 1028 return std::make_pair(Interesting = true, &Bits); 1029 } 1030 break; 1031 case ISD::SHL: 1032 if (isa<ConstantSDNode>(V.getOperand(1))) { 1033 unsigned ShiftAmt = V.getConstantOperandVal(1); 1034 1035 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1036 1037 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1038 Bits[i] = LHSBits[i - ShiftAmt]; 1039 1040 for (unsigned i = 0; i < ShiftAmt; ++i) 1041 Bits[i] = ValueBit(ValueBit::ConstZero); 1042 1043 return std::make_pair(Interesting = true, &Bits); 1044 } 1045 break; 1046 case ISD::SRL: 1047 if (isa<ConstantSDNode>(V.getOperand(1))) { 1048 unsigned ShiftAmt = V.getConstantOperandVal(1); 1049 1050 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1051 1052 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1053 Bits[i] = LHSBits[i + ShiftAmt]; 1054 1055 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1056 Bits[i] = ValueBit(ValueBit::ConstZero); 1057 1058 return std::make_pair(Interesting = true, &Bits); 1059 } 1060 break; 1061 case ISD::AND: 1062 if (isa<ConstantSDNode>(V.getOperand(1))) { 1063 uint64_t Mask = V.getConstantOperandVal(1); 1064 1065 const SmallVector<ValueBit, 64> *LHSBits; 1066 // Mark this as interesting, only if the LHS was also interesting. This 1067 // prevents the overall procedure from matching a single immediate 'and' 1068 // (which is non-optimal because such an and might be folded with other 1069 // things if we don't select it here). 1070 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1071 1072 for (unsigned i = 0; i < NumBits; ++i) 1073 if (((Mask >> i) & 1) == 1) 1074 Bits[i] = (*LHSBits)[i]; 1075 else 1076 Bits[i] = ValueBit(ValueBit::ConstZero); 1077 1078 return std::make_pair(Interesting, &Bits); 1079 } 1080 break; 1081 case ISD::OR: { 1082 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1083 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1084 1085 bool AllDisjoint = true; 1086 for (unsigned i = 0; i < NumBits; ++i) 1087 if (LHSBits[i].isZero()) 1088 Bits[i] = RHSBits[i]; 1089 else if (RHSBits[i].isZero()) 1090 Bits[i] = LHSBits[i]; 1091 else { 1092 AllDisjoint = false; 1093 break; 1094 } 1095 1096 if (!AllDisjoint) 1097 break; 1098 1099 return std::make_pair(Interesting = true, &Bits); 1100 } 1101 } 1102 1103 for (unsigned i = 0; i < NumBits; ++i) 1104 Bits[i] = ValueBit(V, i); 1105 1106 return std::make_pair(Interesting = false, &Bits); 1107 } 1108 1109 // For each value (except the constant ones), compute the left-rotate amount 1110 // to get it from its original to final position. 1111 void computeRotationAmounts() { 1112 HasZeros = false; 1113 RLAmt.resize(Bits.size()); 1114 for (unsigned i = 0; i < Bits.size(); ++i) 1115 if (Bits[i].hasValue()) { 1116 unsigned VBI = Bits[i].getValueBitIndex(); 1117 if (i >= VBI) 1118 RLAmt[i] = i - VBI; 1119 else 1120 RLAmt[i] = Bits.size() - (VBI - i); 1121 } else if (Bits[i].isZero()) { 1122 HasZeros = true; 1123 RLAmt[i] = UINT32_MAX; 1124 } else { 1125 llvm_unreachable("Unknown value bit type"); 1126 } 1127 } 1128 1129 // Collect groups of consecutive bits with the same underlying value and 1130 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1131 // they break up groups. 1132 void collectBitGroups(bool LateMask) { 1133 BitGroups.clear(); 1134 1135 unsigned LastRLAmt = RLAmt[0]; 1136 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1137 unsigned LastGroupStartIdx = 0; 1138 for (unsigned i = 1; i < Bits.size(); ++i) { 1139 unsigned ThisRLAmt = RLAmt[i]; 1140 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1141 if (LateMask && !ThisValue) { 1142 ThisValue = LastValue; 1143 ThisRLAmt = LastRLAmt; 1144 // If we're doing late masking, then the first bit group always starts 1145 // at zero (even if the first bits were zero). 1146 if (BitGroups.empty()) 1147 LastGroupStartIdx = 0; 1148 } 1149 1150 // If this bit has the same underlying value and the same rotate factor as 1151 // the last one, then they're part of the same group. 1152 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1153 continue; 1154 1155 if (LastValue.getNode()) 1156 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1157 i-1)); 1158 LastRLAmt = ThisRLAmt; 1159 LastValue = ThisValue; 1160 LastGroupStartIdx = i; 1161 } 1162 if (LastValue.getNode()) 1163 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1164 Bits.size()-1)); 1165 1166 if (BitGroups.empty()) 1167 return; 1168 1169 // We might be able to combine the first and last groups. 1170 if (BitGroups.size() > 1) { 1171 // If the first and last groups are the same, then remove the first group 1172 // in favor of the last group, making the ending index of the last group 1173 // equal to the ending index of the to-be-removed first group. 1174 if (BitGroups[0].StartIdx == 0 && 1175 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1176 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1177 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1178 DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1179 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1180 BitGroups.erase(BitGroups.begin()); 1181 } 1182 } 1183 } 1184 1185 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1186 // associated with each. If there is a degeneracy, pick the one that occurs 1187 // first (in the final value). 1188 void collectValueRotInfo() { 1189 ValueRots.clear(); 1190 1191 for (auto &BG : BitGroups) { 1192 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1193 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1194 VRI.V = BG.V; 1195 VRI.RLAmt = BG.RLAmt; 1196 VRI.Repl32 = BG.Repl32; 1197 VRI.NumGroups += 1; 1198 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1199 } 1200 1201 // Now that we've collected the various ValueRotInfo instances, we need to 1202 // sort them. 1203 ValueRotsVec.clear(); 1204 for (auto &I : ValueRots) { 1205 ValueRotsVec.push_back(I.second); 1206 } 1207 std::sort(ValueRotsVec.begin(), ValueRotsVec.end()); 1208 } 1209 1210 // In 64-bit mode, rlwinm and friends have a rotation operator that 1211 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1212 // indices of these instructions can only be in the lower 32 bits, so they 1213 // can only represent some 64-bit bit groups. However, when they can be used, 1214 // the 32-bit replication can be used to represent, as a single bit group, 1215 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1216 // groups when possible. Returns true if any of the bit groups were 1217 // converted. 1218 void assignRepl32BitGroups() { 1219 // If we have bits like this: 1220 // 1221 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1222 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1223 // Groups: | RLAmt = 8 | RLAmt = 40 | 1224 // 1225 // But, making use of a 32-bit operation that replicates the low-order 32 1226 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1227 // of 8. 1228 1229 auto IsAllLow32 = [this](BitGroup & BG) { 1230 if (BG.StartIdx <= BG.EndIdx) { 1231 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1232 if (!Bits[i].hasValue()) 1233 continue; 1234 if (Bits[i].getValueBitIndex() >= 32) 1235 return false; 1236 } 1237 } else { 1238 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1239 if (!Bits[i].hasValue()) 1240 continue; 1241 if (Bits[i].getValueBitIndex() >= 32) 1242 return false; 1243 } 1244 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1245 if (!Bits[i].hasValue()) 1246 continue; 1247 if (Bits[i].getValueBitIndex() >= 32) 1248 return false; 1249 } 1250 } 1251 1252 return true; 1253 }; 1254 1255 for (auto &BG : BitGroups) { 1256 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1257 if (IsAllLow32(BG)) { 1258 if (BG.RLAmt >= 32) { 1259 BG.RLAmt -= 32; 1260 BG.Repl32CR = true; 1261 } 1262 1263 BG.Repl32 = true; 1264 1265 DEBUG(dbgs() << "\t32-bit replicated bit group for " << 1266 BG.V.getNode() << " RLAmt = " << BG.RLAmt << 1267 " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1268 } 1269 } 1270 } 1271 1272 // Now walk through the bit groups, consolidating where possible. 1273 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1274 // We might want to remove this bit group by merging it with the previous 1275 // group (which might be the ending group). 1276 auto IP = (I == BitGroups.begin()) ? 1277 std::prev(BitGroups.end()) : std::prev(I); 1278 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1279 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1280 1281 DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << 1282 I->V.getNode() << " RLAmt = " << I->RLAmt << 1283 " [" << I->StartIdx << ", " << I->EndIdx << 1284 "] with group with range [" << 1285 IP->StartIdx << ", " << IP->EndIdx << "]\n"); 1286 1287 IP->EndIdx = I->EndIdx; 1288 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1289 IP->Repl32Coalesced = true; 1290 I = BitGroups.erase(I); 1291 continue; 1292 } else { 1293 // There is a special case worth handling: If there is a single group 1294 // covering the entire upper 32 bits, and it can be merged with both 1295 // the next and previous groups (which might be the same group), then 1296 // do so. If it is the same group (so there will be only one group in 1297 // total), then we need to reverse the order of the range so that it 1298 // covers the entire 64 bits. 1299 if (I->StartIdx == 32 && I->EndIdx == 63) { 1300 assert(std::next(I) == BitGroups.end() && 1301 "bit group ends at index 63 but there is another?"); 1302 auto IN = BitGroups.begin(); 1303 1304 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1305 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1306 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1307 IsAllLow32(*I)) { 1308 1309 DEBUG(dbgs() << "\tcombining bit group for " << 1310 I->V.getNode() << " RLAmt = " << I->RLAmt << 1311 " [" << I->StartIdx << ", " << I->EndIdx << 1312 "] with 32-bit replicated groups with ranges [" << 1313 IP->StartIdx << ", " << IP->EndIdx << "] and [" << 1314 IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1315 1316 if (IP == IN) { 1317 // There is only one other group; change it to cover the whole 1318 // range (backward, so that it can still be Repl32 but cover the 1319 // whole 64-bit range). 1320 IP->StartIdx = 31; 1321 IP->EndIdx = 30; 1322 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1323 IP->Repl32Coalesced = true; 1324 I = BitGroups.erase(I); 1325 } else { 1326 // There are two separate groups, one before this group and one 1327 // after us (at the beginning). We're going to remove this group, 1328 // but also the group at the very beginning. 1329 IP->EndIdx = IN->EndIdx; 1330 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1331 IP->Repl32Coalesced = true; 1332 I = BitGroups.erase(I); 1333 BitGroups.erase(BitGroups.begin()); 1334 } 1335 1336 // This must be the last group in the vector (and we might have 1337 // just invalidated the iterator above), so break here. 1338 break; 1339 } 1340 } 1341 } 1342 1343 ++I; 1344 } 1345 } 1346 1347 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1348 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1349 } 1350 1351 uint64_t getZerosMask() { 1352 uint64_t Mask = 0; 1353 for (unsigned i = 0; i < Bits.size(); ++i) { 1354 if (Bits[i].hasValue()) 1355 continue; 1356 Mask |= (UINT64_C(1) << i); 1357 } 1358 1359 return ~Mask; 1360 } 1361 1362 // Depending on the number of groups for a particular value, it might be 1363 // better to rotate, mask explicitly (using andi/andis), and then or the 1364 // result. Select this part of the result first. 1365 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1366 if (BPermRewriterNoMasking) 1367 return; 1368 1369 for (ValueRotInfo &VRI : ValueRotsVec) { 1370 unsigned Mask = 0; 1371 for (unsigned i = 0; i < Bits.size(); ++i) { 1372 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1373 continue; 1374 if (RLAmt[i] != VRI.RLAmt) 1375 continue; 1376 Mask |= (1u << i); 1377 } 1378 1379 // Compute the masks for andi/andis that would be necessary. 1380 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1381 assert((ANDIMask != 0 || ANDISMask != 0) && 1382 "No set bits in mask for value bit groups"); 1383 bool NeedsRotate = VRI.RLAmt != 0; 1384 1385 // We're trying to minimize the number of instructions. If we have one 1386 // group, using one of andi/andis can break even. If we have three 1387 // groups, we can use both andi and andis and break even (to use both 1388 // andi and andis we also need to or the results together). We need four 1389 // groups if we also need to rotate. To use andi/andis we need to do more 1390 // than break even because rotate-and-mask instructions tend to be easier 1391 // to schedule. 1392 1393 // FIXME: We've biased here against using andi/andis, which is right for 1394 // POWER cores, but not optimal everywhere. For example, on the A2, 1395 // andi/andis have single-cycle latency whereas the rotate-and-mask 1396 // instructions take two cycles, and it would be better to bias toward 1397 // andi/andis in break-even cases. 1398 1399 unsigned NumAndInsts = (unsigned) NeedsRotate + 1400 (unsigned) (ANDIMask != 0) + 1401 (unsigned) (ANDISMask != 0) + 1402 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1403 (unsigned) (bool) Res; 1404 1405 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1406 " RL: " << VRI.RLAmt << ":" << 1407 "\n\t\t\tisel using masking: " << NumAndInsts << 1408 " using rotates: " << VRI.NumGroups << "\n"); 1409 1410 if (NumAndInsts >= VRI.NumGroups) 1411 continue; 1412 1413 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1414 1415 if (InstCnt) *InstCnt += NumAndInsts; 1416 1417 SDValue VRot; 1418 if (VRI.RLAmt) { 1419 SDValue Ops[] = 1420 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1421 getI32Imm(31, dl) }; 1422 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1423 Ops), 0); 1424 } else { 1425 VRot = VRI.V; 1426 } 1427 1428 SDValue ANDIVal, ANDISVal; 1429 if (ANDIMask != 0) 1430 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1431 VRot, getI32Imm(ANDIMask, dl)), 0); 1432 if (ANDISMask != 0) 1433 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1434 VRot, getI32Imm(ANDISMask, dl)), 0); 1435 1436 SDValue TotalVal; 1437 if (!ANDIVal) 1438 TotalVal = ANDISVal; 1439 else if (!ANDISVal) 1440 TotalVal = ANDIVal; 1441 else 1442 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1443 ANDIVal, ANDISVal), 0); 1444 1445 if (!Res) 1446 Res = TotalVal; 1447 else 1448 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1449 Res, TotalVal), 0); 1450 1451 // Now, remove all groups with this underlying value and rotation 1452 // factor. 1453 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1454 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1455 }); 1456 } 1457 } 1458 1459 // Instruction selection for the 32-bit case. 1460 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1461 SDLoc dl(N); 1462 SDValue Res; 1463 1464 if (InstCnt) *InstCnt = 0; 1465 1466 // Take care of cases that should use andi/andis first. 1467 SelectAndParts32(dl, Res, InstCnt); 1468 1469 // If we've not yet selected a 'starting' instruction, and we have no zeros 1470 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1471 // number of groups), and start with this rotated value. 1472 if ((!HasZeros || LateMask) && !Res) { 1473 ValueRotInfo &VRI = ValueRotsVec[0]; 1474 if (VRI.RLAmt) { 1475 if (InstCnt) *InstCnt += 1; 1476 SDValue Ops[] = 1477 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1478 getI32Imm(31, dl) }; 1479 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1480 0); 1481 } else { 1482 Res = VRI.V; 1483 } 1484 1485 // Now, remove all groups with this underlying value and rotation factor. 1486 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1487 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1488 }); 1489 } 1490 1491 if (InstCnt) *InstCnt += BitGroups.size(); 1492 1493 // Insert the other groups (one at a time). 1494 for (auto &BG : BitGroups) { 1495 if (!Res) { 1496 SDValue Ops[] = 1497 { BG.V, getI32Imm(BG.RLAmt, dl), 1498 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1499 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1500 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1501 } else { 1502 SDValue Ops[] = 1503 { Res, BG.V, getI32Imm(BG.RLAmt, dl), 1504 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1505 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1506 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1507 } 1508 } 1509 1510 if (LateMask) { 1511 unsigned Mask = (unsigned) getZerosMask(); 1512 1513 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1514 assert((ANDIMask != 0 || ANDISMask != 0) && 1515 "No set bits in zeros mask?"); 1516 1517 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1518 (unsigned) (ANDISMask != 0) + 1519 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1520 1521 SDValue ANDIVal, ANDISVal; 1522 if (ANDIMask != 0) 1523 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1524 Res, getI32Imm(ANDIMask, dl)), 0); 1525 if (ANDISMask != 0) 1526 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1527 Res, getI32Imm(ANDISMask, dl)), 0); 1528 1529 if (!ANDIVal) 1530 Res = ANDISVal; 1531 else if (!ANDISVal) 1532 Res = ANDIVal; 1533 else 1534 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1535 ANDIVal, ANDISVal), 0); 1536 } 1537 1538 return Res.getNode(); 1539 } 1540 1541 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1542 unsigned MaskStart, unsigned MaskEnd, 1543 bool IsIns) { 1544 // In the notation used by the instructions, 'start' and 'end' are reversed 1545 // because bits are counted from high to low order. 1546 unsigned InstMaskStart = 64 - MaskEnd - 1, 1547 InstMaskEnd = 64 - MaskStart - 1; 1548 1549 if (Repl32) 1550 return 1; 1551 1552 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1553 InstMaskEnd == 63 - RLAmt) 1554 return 1; 1555 1556 return 2; 1557 } 1558 1559 // For 64-bit values, not all combinations of rotates and masks are 1560 // available. Produce one if it is available. 1561 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1562 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1563 unsigned *InstCnt = nullptr) { 1564 // In the notation used by the instructions, 'start' and 'end' are reversed 1565 // because bits are counted from high to low order. 1566 unsigned InstMaskStart = 64 - MaskEnd - 1, 1567 InstMaskEnd = 64 - MaskStart - 1; 1568 1569 if (InstCnt) *InstCnt += 1; 1570 1571 if (Repl32) { 1572 // This rotation amount assumes that the lower 32 bits of the quantity 1573 // are replicated in the high 32 bits by the rotation operator (which is 1574 // done by rlwinm and friends). 1575 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1576 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1577 SDValue Ops[] = 1578 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), 1579 getI32Imm(InstMaskEnd - 32, dl) }; 1580 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1581 Ops), 0); 1582 } 1583 1584 if (InstMaskEnd == 63) { 1585 SDValue Ops[] = 1586 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1587 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1588 } 1589 1590 if (InstMaskStart == 0) { 1591 SDValue Ops[] = 1592 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; 1593 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 1594 } 1595 1596 if (InstMaskEnd == 63 - RLAmt) { 1597 SDValue Ops[] = 1598 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1599 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 1600 } 1601 1602 // We cannot do this with a single instruction, so we'll use two. The 1603 // problem is that we're not free to choose both a rotation amount and mask 1604 // start and end independently. We can choose an arbitrary mask start and 1605 // end, but then the rotation amount is fixed. Rotation, however, can be 1606 // inverted, and so by applying an "inverse" rotation first, we can get the 1607 // desired result. 1608 if (InstCnt) *InstCnt += 1; 1609 1610 // The rotation mask for the second instruction must be MaskStart. 1611 unsigned RLAmt2 = MaskStart; 1612 // The first instruction must rotate V so that the overall rotation amount 1613 // is RLAmt. 1614 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1615 if (RLAmt1) 1616 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1617 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 1618 } 1619 1620 // For 64-bit values, not all combinations of rotates and masks are 1621 // available. Produce a rotate-mask-and-insert if one is available. 1622 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 1623 unsigned RLAmt, bool Repl32, unsigned MaskStart, 1624 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 1625 // In the notation used by the instructions, 'start' and 'end' are reversed 1626 // because bits are counted from high to low order. 1627 unsigned InstMaskStart = 64 - MaskEnd - 1, 1628 InstMaskEnd = 64 - MaskStart - 1; 1629 1630 if (InstCnt) *InstCnt += 1; 1631 1632 if (Repl32) { 1633 // This rotation amount assumes that the lower 32 bits of the quantity 1634 // are replicated in the high 32 bits by the rotation operator (which is 1635 // done by rlwinm and friends). 1636 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1637 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1638 SDValue Ops[] = 1639 { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), 1640 getI32Imm(InstMaskEnd - 32, dl) }; 1641 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 1642 Ops), 0); 1643 } 1644 1645 if (InstMaskEnd == 63 - RLAmt) { 1646 SDValue Ops[] = 1647 { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1648 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 1649 } 1650 1651 // We cannot do this with a single instruction, so we'll use two. The 1652 // problem is that we're not free to choose both a rotation amount and mask 1653 // start and end independently. We can choose an arbitrary mask start and 1654 // end, but then the rotation amount is fixed. Rotation, however, can be 1655 // inverted, and so by applying an "inverse" rotation first, we can get the 1656 // desired result. 1657 if (InstCnt) *InstCnt += 1; 1658 1659 // The rotation mask for the second instruction must be MaskStart. 1660 unsigned RLAmt2 = MaskStart; 1661 // The first instruction must rotate V so that the overall rotation amount 1662 // is RLAmt. 1663 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1664 if (RLAmt1) 1665 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1666 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 1667 } 1668 1669 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1670 if (BPermRewriterNoMasking) 1671 return; 1672 1673 // The idea here is the same as in the 32-bit version, but with additional 1674 // complications from the fact that Repl32 might be true. Because we 1675 // aggressively convert bit groups to Repl32 form (which, for small 1676 // rotation factors, involves no other change), and then coalesce, it might 1677 // be the case that a single 64-bit masking operation could handle both 1678 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 1679 // form allowed coalescing, then we must use a 32-bit rotaton in order to 1680 // completely capture the new combined bit group. 1681 1682 for (ValueRotInfo &VRI : ValueRotsVec) { 1683 uint64_t Mask = 0; 1684 1685 // We need to add to the mask all bits from the associated bit groups. 1686 // If Repl32 is false, we need to add bits from bit groups that have 1687 // Repl32 true, but are trivially convertable to Repl32 false. Such a 1688 // group is trivially convertable if it overlaps only with the lower 32 1689 // bits, and the group has not been coalesced. 1690 auto MatchingBG = [VRI](const BitGroup &BG) { 1691 if (VRI.V != BG.V) 1692 return false; 1693 1694 unsigned EffRLAmt = BG.RLAmt; 1695 if (!VRI.Repl32 && BG.Repl32) { 1696 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 1697 !BG.Repl32Coalesced) { 1698 if (BG.Repl32CR) 1699 EffRLAmt += 32; 1700 } else { 1701 return false; 1702 } 1703 } else if (VRI.Repl32 != BG.Repl32) { 1704 return false; 1705 } 1706 1707 return VRI.RLAmt == EffRLAmt; 1708 }; 1709 1710 for (auto &BG : BitGroups) { 1711 if (!MatchingBG(BG)) 1712 continue; 1713 1714 if (BG.StartIdx <= BG.EndIdx) { 1715 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 1716 Mask |= (UINT64_C(1) << i); 1717 } else { 1718 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 1719 Mask |= (UINT64_C(1) << i); 1720 for (unsigned i = 0; i <= BG.EndIdx; ++i) 1721 Mask |= (UINT64_C(1) << i); 1722 } 1723 } 1724 1725 // We can use the 32-bit andi/andis technique if the mask does not 1726 // require any higher-order bits. This can save an instruction compared 1727 // to always using the general 64-bit technique. 1728 bool Use32BitInsts = isUInt<32>(Mask); 1729 // Compute the masks for andi/andis that would be necessary. 1730 unsigned ANDIMask = (Mask & UINT16_MAX), 1731 ANDISMask = (Mask >> 16) & UINT16_MAX; 1732 1733 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 1734 1735 unsigned NumAndInsts = (unsigned) NeedsRotate + 1736 (unsigned) (bool) Res; 1737 if (Use32BitInsts) 1738 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 1739 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1740 else 1741 NumAndInsts += getInt64Count(Mask) + /* and */ 1; 1742 1743 unsigned NumRLInsts = 0; 1744 bool FirstBG = true; 1745 bool MoreBG = false; 1746 for (auto &BG : BitGroups) { 1747 if (!MatchingBG(BG)) { 1748 MoreBG = true; 1749 continue; 1750 } 1751 NumRLInsts += 1752 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 1753 !FirstBG); 1754 FirstBG = false; 1755 } 1756 1757 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1758 " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << 1759 "\n\t\t\tisel using masking: " << NumAndInsts << 1760 " using rotates: " << NumRLInsts << "\n"); 1761 1762 // When we'd use andi/andis, we bias toward using the rotates (andi only 1763 // has a record form, and is cracked on POWER cores). However, when using 1764 // general 64-bit constant formation, bias toward the constant form, 1765 // because that exposes more opportunities for CSE. 1766 if (NumAndInsts > NumRLInsts) 1767 continue; 1768 // When merging multiple bit groups, instruction or is used. 1769 // But when rotate is used, rldimi can inert the rotated value into any 1770 // register, so instruction or can be avoided. 1771 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 1772 continue; 1773 1774 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1775 1776 if (InstCnt) *InstCnt += NumAndInsts; 1777 1778 SDValue VRot; 1779 // We actually need to generate a rotation if we have a non-zero rotation 1780 // factor or, in the Repl32 case, if we care about any of the 1781 // higher-order replicated bits. In the latter case, we generate a mask 1782 // backward so that it actually includes the entire 64 bits. 1783 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 1784 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1785 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 1786 else 1787 VRot = VRI.V; 1788 1789 SDValue TotalVal; 1790 if (Use32BitInsts) { 1791 assert((ANDIMask != 0 || ANDISMask != 0) && 1792 "No set bits in mask when using 32-bit ands for 64-bit value"); 1793 1794 SDValue ANDIVal, ANDISVal; 1795 if (ANDIMask != 0) 1796 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 1797 VRot, getI32Imm(ANDIMask, dl)), 0); 1798 if (ANDISMask != 0) 1799 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 1800 VRot, getI32Imm(ANDISMask, dl)), 0); 1801 1802 if (!ANDIVal) 1803 TotalVal = ANDISVal; 1804 else if (!ANDISVal) 1805 TotalVal = ANDIVal; 1806 else 1807 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1808 ANDIVal, ANDISVal), 0); 1809 } else { 1810 TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0); 1811 TotalVal = 1812 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 1813 VRot, TotalVal), 0); 1814 } 1815 1816 if (!Res) 1817 Res = TotalVal; 1818 else 1819 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1820 Res, TotalVal), 0); 1821 1822 // Now, remove all groups with this underlying value and rotation 1823 // factor. 1824 eraseMatchingBitGroups(MatchingBG); 1825 } 1826 } 1827 1828 // Instruction selection for the 64-bit case. 1829 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 1830 SDLoc dl(N); 1831 SDValue Res; 1832 1833 if (InstCnt) *InstCnt = 0; 1834 1835 // Take care of cases that should use andi/andis first. 1836 SelectAndParts64(dl, Res, InstCnt); 1837 1838 // If we've not yet selected a 'starting' instruction, and we have no zeros 1839 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1840 // number of groups), and start with this rotated value. 1841 if ((!HasZeros || LateMask) && !Res) { 1842 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 1843 // groups will come first, and so the VRI representing the largest number 1844 // of groups might not be first (it might be the first Repl32 groups). 1845 unsigned MaxGroupsIdx = 0; 1846 if (!ValueRotsVec[0].Repl32) { 1847 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 1848 if (ValueRotsVec[i].Repl32) { 1849 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 1850 MaxGroupsIdx = i; 1851 break; 1852 } 1853 } 1854 1855 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 1856 bool NeedsRotate = false; 1857 if (VRI.RLAmt) { 1858 NeedsRotate = true; 1859 } else if (VRI.Repl32) { 1860 for (auto &BG : BitGroups) { 1861 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 1862 BG.Repl32 != VRI.Repl32) 1863 continue; 1864 1865 // We don't need a rotate if the bit group is confined to the lower 1866 // 32 bits. 1867 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 1868 continue; 1869 1870 NeedsRotate = true; 1871 break; 1872 } 1873 } 1874 1875 if (NeedsRotate) 1876 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1877 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 1878 InstCnt); 1879 else 1880 Res = VRI.V; 1881 1882 // Now, remove all groups with this underlying value and rotation factor. 1883 if (Res) 1884 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1885 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 1886 BG.Repl32 == VRI.Repl32; 1887 }); 1888 } 1889 1890 // Because 64-bit rotates are more flexible than inserts, we might have a 1891 // preference regarding which one we do first (to save one instruction). 1892 if (!Res) 1893 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 1894 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 1895 false) < 1896 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 1897 true)) { 1898 if (I != BitGroups.begin()) { 1899 BitGroup BG = *I; 1900 BitGroups.erase(I); 1901 BitGroups.insert(BitGroups.begin(), BG); 1902 } 1903 1904 break; 1905 } 1906 } 1907 1908 // Insert the other groups (one at a time). 1909 for (auto &BG : BitGroups) { 1910 if (!Res) 1911 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 1912 BG.EndIdx, InstCnt); 1913 else 1914 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 1915 BG.StartIdx, BG.EndIdx, InstCnt); 1916 } 1917 1918 if (LateMask) { 1919 uint64_t Mask = getZerosMask(); 1920 1921 // We can use the 32-bit andi/andis technique if the mask does not 1922 // require any higher-order bits. This can save an instruction compared 1923 // to always using the general 64-bit technique. 1924 bool Use32BitInsts = isUInt<32>(Mask); 1925 // Compute the masks for andi/andis that would be necessary. 1926 unsigned ANDIMask = (Mask & UINT16_MAX), 1927 ANDISMask = (Mask >> 16) & UINT16_MAX; 1928 1929 if (Use32BitInsts) { 1930 assert((ANDIMask != 0 || ANDISMask != 0) && 1931 "No set bits in mask when using 32-bit ands for 64-bit value"); 1932 1933 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1934 (unsigned) (ANDISMask != 0) + 1935 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1936 1937 SDValue ANDIVal, ANDISVal; 1938 if (ANDIMask != 0) 1939 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 1940 Res, getI32Imm(ANDIMask, dl)), 0); 1941 if (ANDISMask != 0) 1942 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 1943 Res, getI32Imm(ANDISMask, dl)), 0); 1944 1945 if (!ANDIVal) 1946 Res = ANDISVal; 1947 else if (!ANDISVal) 1948 Res = ANDIVal; 1949 else 1950 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1951 ANDIVal, ANDISVal), 0); 1952 } else { 1953 if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1; 1954 1955 SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0); 1956 Res = 1957 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 1958 Res, MaskVal), 0); 1959 } 1960 } 1961 1962 return Res.getNode(); 1963 } 1964 1965 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 1966 // Fill in BitGroups. 1967 collectBitGroups(LateMask); 1968 if (BitGroups.empty()) 1969 return nullptr; 1970 1971 // For 64-bit values, figure out when we can use 32-bit instructions. 1972 if (Bits.size() == 64) 1973 assignRepl32BitGroups(); 1974 1975 // Fill in ValueRotsVec. 1976 collectValueRotInfo(); 1977 1978 if (Bits.size() == 32) { 1979 return Select32(N, LateMask, InstCnt); 1980 } else { 1981 assert(Bits.size() == 64 && "Not 64 bits here?"); 1982 return Select64(N, LateMask, InstCnt); 1983 } 1984 1985 return nullptr; 1986 } 1987 1988 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 1989 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 1990 } 1991 1992 SmallVector<ValueBit, 64> Bits; 1993 1994 bool HasZeros; 1995 SmallVector<unsigned, 64> RLAmt; 1996 1997 SmallVector<BitGroup, 16> BitGroups; 1998 1999 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2000 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2001 2002 SelectionDAG *CurDAG; 2003 2004 public: 2005 BitPermutationSelector(SelectionDAG *DAG) 2006 : CurDAG(DAG) {} 2007 2008 // Here we try to match complex bit permutations into a set of 2009 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2010 // known to produce optimial code for common cases (like i32 byte swapping). 2011 SDNode *Select(SDNode *N) { 2012 Memoizer.clear(); 2013 auto Result = 2014 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2015 if (!Result.first) 2016 return nullptr; 2017 Bits = std::move(*Result.second); 2018 2019 DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2020 " selection for: "); 2021 DEBUG(N->dump(CurDAG)); 2022 2023 // Fill it RLAmt and set HasZeros. 2024 computeRotationAmounts(); 2025 2026 if (!HasZeros) 2027 return Select(N, false); 2028 2029 // We currently have two techniques for handling results with zeros: early 2030 // masking (the default) and late masking. Late masking is sometimes more 2031 // efficient, but because the structure of the bit groups is different, it 2032 // is hard to tell without generating both and comparing the results. With 2033 // late masking, we ignore zeros in the resulting value when inserting each 2034 // set of bit groups, and then mask in the zeros at the end. With early 2035 // masking, we only insert the non-zero parts of the result at every step. 2036 2037 unsigned InstCnt, InstCntLateMask; 2038 DEBUG(dbgs() << "\tEarly masking:\n"); 2039 SDNode *RN = Select(N, false, &InstCnt); 2040 DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2041 2042 DEBUG(dbgs() << "\tLate masking:\n"); 2043 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2044 DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << 2045 " instructions\n"); 2046 2047 if (InstCnt <= InstCntLateMask) { 2048 DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2049 return RN; 2050 } 2051 2052 DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2053 return RNLM; 2054 } 2055 }; 2056 2057 } // end anonymous namespace 2058 2059 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 2060 if (N->getValueType(0) != MVT::i32 && 2061 N->getValueType(0) != MVT::i64) 2062 return false; 2063 2064 if (!UseBitPermRewriter) 2065 return false; 2066 2067 switch (N->getOpcode()) { 2068 default: break; 2069 case ISD::ROTL: 2070 case ISD::SHL: 2071 case ISD::SRL: 2072 case ISD::AND: 2073 case ISD::OR: { 2074 BitPermutationSelector BPS(CurDAG); 2075 if (SDNode *New = BPS.Select(N)) { 2076 ReplaceNode(N, New); 2077 return true; 2078 } 2079 return false; 2080 } 2081 } 2082 2083 return false; 2084 } 2085 2086 /// SelectCC - Select a comparison of the specified values with the specified 2087 /// condition code, returning the CR# of the expression. 2088 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2089 const SDLoc &dl) { 2090 // Always select the LHS. 2091 unsigned Opc; 2092 2093 if (LHS.getValueType() == MVT::i32) { 2094 unsigned Imm; 2095 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2096 if (isInt32Immediate(RHS, Imm)) { 2097 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 2098 if (isUInt<16>(Imm)) 2099 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 2100 getI32Imm(Imm & 0xFFFF, dl)), 2101 0); 2102 // If this is a 16-bit signed immediate, fold it. 2103 if (isInt<16>((int)Imm)) 2104 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 2105 getI32Imm(Imm & 0xFFFF, dl)), 2106 0); 2107 2108 // For non-equality comparisons, the default code would materialize the 2109 // constant, then compare against it, like this: 2110 // lis r2, 4660 2111 // ori r2, r2, 22136 2112 // cmpw cr0, r3, r2 2113 // Since we are just comparing for equality, we can emit this instead: 2114 // xoris r0,r3,0x1234 2115 // cmplwi cr0,r0,0x5678 2116 // beq cr0,L6 2117 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 2118 getI32Imm(Imm >> 16, dl)), 0); 2119 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 2120 getI32Imm(Imm & 0xFFFF, dl)), 0); 2121 } 2122 Opc = PPC::CMPLW; 2123 } else if (ISD::isUnsignedIntSetCC(CC)) { 2124 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 2125 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 2126 getI32Imm(Imm & 0xFFFF, dl)), 0); 2127 Opc = PPC::CMPLW; 2128 } else { 2129 short SImm; 2130 if (isIntS16Immediate(RHS, SImm)) 2131 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 2132 getI32Imm((int)SImm & 0xFFFF, 2133 dl)), 2134 0); 2135 Opc = PPC::CMPW; 2136 } 2137 } else if (LHS.getValueType() == MVT::i64) { 2138 uint64_t Imm; 2139 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2140 if (isInt64Immediate(RHS.getNode(), Imm)) { 2141 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 2142 if (isUInt<16>(Imm)) 2143 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 2144 getI32Imm(Imm & 0xFFFF, dl)), 2145 0); 2146 // If this is a 16-bit signed immediate, fold it. 2147 if (isInt<16>(Imm)) 2148 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 2149 getI32Imm(Imm & 0xFFFF, dl)), 2150 0); 2151 2152 // For non-equality comparisons, the default code would materialize the 2153 // constant, then compare against it, like this: 2154 // lis r2, 4660 2155 // ori r2, r2, 22136 2156 // cmpd cr0, r3, r2 2157 // Since we are just comparing for equality, we can emit this instead: 2158 // xoris r0,r3,0x1234 2159 // cmpldi cr0,r0,0x5678 2160 // beq cr0,L6 2161 if (isUInt<32>(Imm)) { 2162 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 2163 getI64Imm(Imm >> 16, dl)), 0); 2164 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 2165 getI64Imm(Imm & 0xFFFF, dl)), 2166 0); 2167 } 2168 } 2169 Opc = PPC::CMPLD; 2170 } else if (ISD::isUnsignedIntSetCC(CC)) { 2171 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 2172 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 2173 getI64Imm(Imm & 0xFFFF, dl)), 0); 2174 Opc = PPC::CMPLD; 2175 } else { 2176 short SImm; 2177 if (isIntS16Immediate(RHS, SImm)) 2178 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 2179 getI64Imm(SImm & 0xFFFF, dl)), 2180 0); 2181 Opc = PPC::CMPD; 2182 } 2183 } else if (LHS.getValueType() == MVT::f32) { 2184 Opc = PPC::FCMPUS; 2185 } else { 2186 assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); 2187 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 2188 } 2189 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 2190 } 2191 2192 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { 2193 switch (CC) { 2194 case ISD::SETUEQ: 2195 case ISD::SETONE: 2196 case ISD::SETOLE: 2197 case ISD::SETOGE: 2198 llvm_unreachable("Should be lowered by legalize!"); 2199 default: llvm_unreachable("Unknown condition!"); 2200 case ISD::SETOEQ: 2201 case ISD::SETEQ: return PPC::PRED_EQ; 2202 case ISD::SETUNE: 2203 case ISD::SETNE: return PPC::PRED_NE; 2204 case ISD::SETOLT: 2205 case ISD::SETLT: return PPC::PRED_LT; 2206 case ISD::SETULE: 2207 case ISD::SETLE: return PPC::PRED_LE; 2208 case ISD::SETOGT: 2209 case ISD::SETGT: return PPC::PRED_GT; 2210 case ISD::SETUGE: 2211 case ISD::SETGE: return PPC::PRED_GE; 2212 case ISD::SETO: return PPC::PRED_NU; 2213 case ISD::SETUO: return PPC::PRED_UN; 2214 // These two are invalid for floating point. Assume we have int. 2215 case ISD::SETULT: return PPC::PRED_LT; 2216 case ISD::SETUGT: return PPC::PRED_GT; 2217 } 2218 } 2219 2220 /// getCRIdxForSetCC - Return the index of the condition register field 2221 /// associated with the SetCC condition, and whether or not the field is 2222 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 2223 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 2224 Invert = false; 2225 switch (CC) { 2226 default: llvm_unreachable("Unknown condition!"); 2227 case ISD::SETOLT: 2228 case ISD::SETLT: return 0; // Bit #0 = SETOLT 2229 case ISD::SETOGT: 2230 case ISD::SETGT: return 1; // Bit #1 = SETOGT 2231 case ISD::SETOEQ: 2232 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 2233 case ISD::SETUO: return 3; // Bit #3 = SETUO 2234 case ISD::SETUGE: 2235 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 2236 case ISD::SETULE: 2237 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 2238 case ISD::SETUNE: 2239 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 2240 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 2241 case ISD::SETUEQ: 2242 case ISD::SETOGE: 2243 case ISD::SETOLE: 2244 case ISD::SETONE: 2245 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 2246 // These are invalid for floating point. Assume integer. 2247 case ISD::SETULT: return 0; 2248 case ISD::SETUGT: return 1; 2249 } 2250 } 2251 2252 // getVCmpInst: return the vector compare instruction for the specified 2253 // vector type and condition code. Since this is for altivec specific code, 2254 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 2255 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 2256 bool HasVSX, bool &Swap, bool &Negate) { 2257 Swap = false; 2258 Negate = false; 2259 2260 if (VecVT.isFloatingPoint()) { 2261 /* Handle some cases by swapping input operands. */ 2262 switch (CC) { 2263 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 2264 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 2265 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 2266 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 2267 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 2268 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 2269 default: break; 2270 } 2271 /* Handle some cases by negating the result. */ 2272 switch (CC) { 2273 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 2274 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 2275 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 2276 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 2277 default: break; 2278 } 2279 /* We have instructions implementing the remaining cases. */ 2280 switch (CC) { 2281 case ISD::SETEQ: 2282 case ISD::SETOEQ: 2283 if (VecVT == MVT::v4f32) 2284 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 2285 else if (VecVT == MVT::v2f64) 2286 return PPC::XVCMPEQDP; 2287 break; 2288 case ISD::SETGT: 2289 case ISD::SETOGT: 2290 if (VecVT == MVT::v4f32) 2291 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 2292 else if (VecVT == MVT::v2f64) 2293 return PPC::XVCMPGTDP; 2294 break; 2295 case ISD::SETGE: 2296 case ISD::SETOGE: 2297 if (VecVT == MVT::v4f32) 2298 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 2299 else if (VecVT == MVT::v2f64) 2300 return PPC::XVCMPGEDP; 2301 break; 2302 default: 2303 break; 2304 } 2305 llvm_unreachable("Invalid floating-point vector compare condition"); 2306 } else { 2307 /* Handle some cases by swapping input operands. */ 2308 switch (CC) { 2309 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 2310 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 2311 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 2312 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 2313 default: break; 2314 } 2315 /* Handle some cases by negating the result. */ 2316 switch (CC) { 2317 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 2318 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 2319 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 2320 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 2321 default: break; 2322 } 2323 /* We have instructions implementing the remaining cases. */ 2324 switch (CC) { 2325 case ISD::SETEQ: 2326 case ISD::SETUEQ: 2327 if (VecVT == MVT::v16i8) 2328 return PPC::VCMPEQUB; 2329 else if (VecVT == MVT::v8i16) 2330 return PPC::VCMPEQUH; 2331 else if (VecVT == MVT::v4i32) 2332 return PPC::VCMPEQUW; 2333 else if (VecVT == MVT::v2i64) 2334 return PPC::VCMPEQUD; 2335 break; 2336 case ISD::SETGT: 2337 if (VecVT == MVT::v16i8) 2338 return PPC::VCMPGTSB; 2339 else if (VecVT == MVT::v8i16) 2340 return PPC::VCMPGTSH; 2341 else if (VecVT == MVT::v4i32) 2342 return PPC::VCMPGTSW; 2343 else if (VecVT == MVT::v2i64) 2344 return PPC::VCMPGTSD; 2345 break; 2346 case ISD::SETUGT: 2347 if (VecVT == MVT::v16i8) 2348 return PPC::VCMPGTUB; 2349 else if (VecVT == MVT::v8i16) 2350 return PPC::VCMPGTUH; 2351 else if (VecVT == MVT::v4i32) 2352 return PPC::VCMPGTUW; 2353 else if (VecVT == MVT::v2i64) 2354 return PPC::VCMPGTUD; 2355 break; 2356 default: 2357 break; 2358 } 2359 llvm_unreachable("Invalid integer vector compare condition"); 2360 } 2361 } 2362 2363 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 2364 SDLoc dl(N); 2365 unsigned Imm; 2366 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2367 EVT PtrVT = 2368 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 2369 bool isPPC64 = (PtrVT == MVT::i64); 2370 2371 if (!PPCSubTarget->useCRBits() && 2372 isInt32Immediate(N->getOperand(1), Imm)) { 2373 // We can codegen setcc op, imm very efficiently compared to a brcond. 2374 // Check for those cases here. 2375 // setcc op, 0 2376 if (Imm == 0) { 2377 SDValue Op = N->getOperand(0); 2378 switch (CC) { 2379 default: break; 2380 case ISD::SETEQ: { 2381 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 2382 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 2383 getI32Imm(31, dl) }; 2384 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2385 return true; 2386 } 2387 case ISD::SETNE: { 2388 if (isPPC64) break; 2389 SDValue AD = 2390 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2391 Op, getI32Imm(~0U, dl)), 0); 2392 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 2393 return true; 2394 } 2395 case ISD::SETLT: { 2396 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 2397 getI32Imm(31, dl) }; 2398 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2399 return true; 2400 } 2401 case ISD::SETGT: { 2402 SDValue T = 2403 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 2404 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 2405 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 2406 getI32Imm(31, dl) }; 2407 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2408 return true; 2409 } 2410 } 2411 } else if (Imm == ~0U) { // setcc op, -1 2412 SDValue Op = N->getOperand(0); 2413 switch (CC) { 2414 default: break; 2415 case ISD::SETEQ: 2416 if (isPPC64) break; 2417 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2418 Op, getI32Imm(1, dl)), 0); 2419 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 2420 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 2421 MVT::i32, 2422 getI32Imm(0, dl)), 2423 0), Op.getValue(1)); 2424 return true; 2425 case ISD::SETNE: { 2426 if (isPPC64) break; 2427 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 2428 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2429 Op, getI32Imm(~0U, dl)); 2430 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 2431 SDValue(AD, 1)); 2432 return true; 2433 } 2434 case ISD::SETLT: { 2435 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 2436 getI32Imm(1, dl)), 0); 2437 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 2438 Op), 0); 2439 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 2440 getI32Imm(31, dl) }; 2441 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2442 return true; 2443 } 2444 case ISD::SETGT: { 2445 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 2446 getI32Imm(31, dl) }; 2447 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 2448 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 2449 return true; 2450 } 2451 } 2452 } 2453 } 2454 2455 SDValue LHS = N->getOperand(0); 2456 SDValue RHS = N->getOperand(1); 2457 2458 // Altivec Vector compare instructions do not set any CR register by default and 2459 // vector compare operations return the same type as the operands. 2460 if (LHS.getValueType().isVector()) { 2461 if (PPCSubTarget->hasQPX()) 2462 return false; 2463 2464 EVT VecVT = LHS.getValueType(); 2465 bool Swap, Negate; 2466 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 2467 PPCSubTarget->hasVSX(), Swap, Negate); 2468 if (Swap) 2469 std::swap(LHS, RHS); 2470 2471 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 2472 if (Negate) { 2473 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 2474 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 2475 ResVT, VCmp, VCmp); 2476 return true; 2477 } 2478 2479 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 2480 return true; 2481 } 2482 2483 if (PPCSubTarget->useCRBits()) 2484 return false; 2485 2486 bool Inv; 2487 unsigned Idx = getCRIdxForSetCC(CC, Inv); 2488 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 2489 SDValue IntCR; 2490 2491 // Force the ccreg into CR7. 2492 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 2493 2494 SDValue InFlag(nullptr, 0); // Null incoming flag value. 2495 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 2496 InFlag).getValue(1); 2497 2498 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 2499 CCReg), 0); 2500 2501 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 2502 getI32Imm(31, dl), getI32Imm(31, dl) }; 2503 if (!Inv) { 2504 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2505 return true; 2506 } 2507 2508 // Get the specified bit. 2509 SDValue Tmp = 2510 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 2511 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 2512 return true; 2513 } 2514 2515 // Is this opcode a bitwise logical operation? 2516 static bool isLogicOp(unsigned Opc) { 2517 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2518 } 2519 2520 /// If this node is a sign/zero extension of an integer comparison, 2521 /// it can usually be computed in GPR's rather than using comparison 2522 /// instructions and ISEL. We only do this on 64-bit targets for now 2523 /// as the code is specialized for 64-bit (it uses 64-bit instructions 2524 /// and assumes 64-bit registers). 2525 bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { 2526 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 2527 return false; 2528 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2529 N->getOpcode() == ISD::SIGN_EXTEND) && 2530 "Expecting a zero/sign extend node!"); 2531 2532 SDValue WideRes; 2533 // If we are zero-extending the result of a logical operation on i1 2534 // values, we can keep the values in GPRs. 2535 if (isLogicOp(N->getOperand(0).getOpcode()) && 2536 N->getOperand(0).getValueType() == MVT::i1 && 2537 N->getOpcode() == ISD::ZERO_EXTEND) 2538 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2539 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2540 return false; 2541 else 2542 WideRes = 2543 getSETCCInGPR(N->getOperand(0), 2544 N->getOpcode() == ISD::SIGN_EXTEND ? 2545 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2546 2547 if (!WideRes) 2548 return false; 2549 2550 SDLoc dl(N); 2551 bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; 2552 bool Output32Bit = N->getValueType(0) == MVT::i32; 2553 2554 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2555 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2556 2557 SDValue ConvOp = WideRes; 2558 if (Inputs32Bit != Output32Bit) 2559 ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext : 2560 ExtOrTruncConversion::Trunc); 2561 ReplaceNode(N, ConvOp.getNode()); 2562 2563 return true; 2564 } 2565 2566 // Lower a logical operation on i1 values into a GPR sequence if possible. 2567 // The result can be kept in a GPR if requested. 2568 // Three types of inputs can be handled: 2569 // - SETCC 2570 // - TRUNCATE 2571 // - Logical operation (AND/OR/XOR) 2572 // There is also a special case that is handled (namely a complement operation 2573 // achieved with xor %a, -1). 2574 SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { 2575 assert(isLogicOp(LogicOp.getOpcode()) && 2576 "Can only handle logic operations here."); 2577 assert(LogicOp.getValueType() == MVT::i1 && 2578 "Can only handle logic operations on i1 values here."); 2579 SDLoc dl(LogicOp); 2580 SDValue LHS, RHS; 2581 2582 // Special case: xor %a, -1 2583 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2584 2585 // Produces a GPR sequence for each operand of the binary logic operation. 2586 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2587 // the value in a GPR and for logic operations, it will recursively produce 2588 // a GPR sequence for the operation. 2589 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2590 unsigned OperandOpcode = Operand.getOpcode(); 2591 if (OperandOpcode == ISD::SETCC) 2592 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2593 else if (OperandOpcode == ISD::TRUNCATE) { 2594 SDValue InputOp = Operand.getOperand(0); 2595 EVT InVT = InputOp.getValueType(); 2596 return 2597 SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2598 PPC::RLDICL, dl, InVT, InputOp, 2599 getI64Imm(0, dl), getI64Imm(63, dl)), 0); 2600 } else if (isLogicOp(OperandOpcode)) 2601 return computeLogicOpInGPR(Operand); 2602 return SDValue(); 2603 }; 2604 LHS = getLogicOperand(LogicOp.getOperand(0)); 2605 RHS = getLogicOperand(LogicOp.getOperand(1)); 2606 2607 // If a GPR sequence can't be produced for the LHS we can't proceed. 2608 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2609 // a bitwise negation operation. 2610 if (!LHS || (!RHS && !IsBitwiseNegation)) 2611 return SDValue(); 2612 2613 NumLogicOpsOnComparison++; 2614 2615 // We will use the inputs as 64-bit values. 2616 if (LHS.getValueType() == MVT::i32) 2617 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2618 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2619 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2620 2621 unsigned NewOpc; 2622 switch (LogicOp.getOpcode()) { 2623 default: llvm_unreachable("Unknown logic operation."); 2624 case ISD::AND: NewOpc = PPC::AND8; break; 2625 case ISD::OR: NewOpc = PPC::OR8; break; 2626 case ISD::XOR: NewOpc = PPC::XOR8; break; 2627 } 2628 2629 if (IsBitwiseNegation) { 2630 RHS = getI64Imm(1, dl); 2631 NewOpc = PPC::XORI8; 2632 } 2633 2634 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2635 2636 } 2637 2638 /// Try performing logical operations on results of comparisons in GPRs. 2639 /// It is typically preferred from a performance perspective over performing 2640 /// the operations on individual bits in the CR. We only do this on 64-bit 2641 /// targets for now as the code is specialized for 64-bit (it uses 64-bit 2642 /// instructions and assumes 64-bit registers). 2643 bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { 2644 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 2645 return false; 2646 if (N->getValueType(0) != MVT::i1) 2647 return false; 2648 assert(isLogicOp(N->getOpcode()) && 2649 "Expected a logic operation on setcc results."); 2650 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2651 if (!LoweredLogical) 2652 return false; 2653 2654 SDLoc dl(N); 2655 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2656 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2657 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2658 SDValue LHS = LoweredLogical.getOperand(0); 2659 SDValue RHS = LoweredLogical.getOperand(1); 2660 SDValue WideOp; 2661 SDValue OpToConvToRecForm; 2662 2663 // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode 2664 // that is input to the XORI. 2665 if (IsBitwiseNegate && 2666 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2667 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2668 else if (IsBitwiseNegate) 2669 // If the input to the XORI isn't an extension, that's what we're after. 2670 OpToConvToRecForm = LoweredLogical.getOperand(0); 2671 else 2672 // If this is not an XORI, it is a reg-reg logical op and we can convert it 2673 // to record-form. 2674 OpToConvToRecForm = LoweredLogical; 2675 2676 // Get the record-form version of the node we're looking to use to get the 2677 // CR result from. 2678 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2679 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2680 2681 // Convert the right node to record-form. This is either the logical we're 2682 // looking at or it is the input node to the negation (if we're looking at 2683 // a bitwise negation). 2684 if (NewOpc != -1 && IsBitwiseNegate) { 2685 // The input to the XORI has a record-form. Use it. 2686 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2687 "Expected a PPC::XORI8 only for bitwise negation."); 2688 // Emit the record-form instruction. 2689 std::vector<SDValue> Ops; 2690 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2691 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2692 2693 WideOp = 2694 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2695 OpToConvToRecForm.getValueType(), 2696 MVT::Glue, Ops), 0); 2697 } else { 2698 assert((NewOpc != -1 || !IsBitwiseNegate) && 2699 "No record form available for AND8/OR8/XOR8?"); 2700 WideOp = 2701 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, 2702 MVT::i64, MVT::Glue, LHS, RHS), 0); 2703 } 2704 2705 // Select this node to a single bit from CR0 set by the record-form node 2706 // just created. For bitwise negation, use the EQ bit which is the equivalent 2707 // of negating the result (i.e. it is a bit set when the result of the 2708 // operation is zero). 2709 SDValue SRIdxVal = 2710 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2711 SDValue CRBit = 2712 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2713 MVT::i1, CR0Reg, SRIdxVal, 2714 WideOp.getValue(1)), 0); 2715 ReplaceNode(N, CRBit.getNode()); 2716 return true; 2717 } 2718 2719 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2720 /// Useful when emitting comparison code for 32-bit values without using 2721 /// the compare instruction (which only considers the lower 32-bits). 2722 SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) { 2723 assert(Input.getValueType() == MVT::i32 && 2724 "Can only sign-extend 32-bit values here."); 2725 unsigned Opc = Input.getOpcode(); 2726 2727 // The value was sign extended and then truncated to 32-bits. No need to 2728 // sign extend it again. 2729 if (Opc == ISD::TRUNCATE && 2730 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2731 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2732 return Input; 2733 2734 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2735 // The input is a sign-extending load. No reason to sign-extend. 2736 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2737 return Input; 2738 2739 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2740 // We don't sign-extend constants and already sign-extended values. 2741 if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG || 2742 Opc == ISD::SIGN_EXTEND) 2743 return Input; 2744 2745 SDLoc dl(Input); 2746 SignExtensionsAdded++; 2747 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0); 2748 } 2749 2750 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2751 /// Useful when emitting comparison code for 32-bit values without using 2752 /// the compare instruction (which only considers the lower 32-bits). 2753 SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) { 2754 assert(Input.getValueType() == MVT::i32 && 2755 "Can only zero-extend 32-bit values here."); 2756 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2757 unsigned Opc = Input.getOpcode(); 2758 2759 // No need to zero-extend loaded values (unless they're loaded with 2760 // a sign-extending load). 2761 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2762 return Input; 2763 2764 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2765 bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0; 2766 // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have 2767 // to conservatively actually clear the high bits. We also don't need to 2768 // zero-extend constants or values that are already zero-extended. 2769 if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND) 2770 return Input; 2771 2772 SDLoc dl(Input); 2773 ZeroExtensionsAdded++; 2774 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input, 2775 getI64Imm(0, dl), getI64Imm(32, dl)), 2776 0); 2777 } 2778 2779 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2780 // course not actual zero/sign extensions that will generate machine code, 2781 // they're just a way to reinterpret a 32 bit value in a register as a 2782 // 64 bit value and vice-versa. 2783 SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes, 2784 ExtOrTruncConversion Conv) { 2785 SDLoc dl(NatWidthRes); 2786 2787 // For reinterpreting 32-bit values as 64 bit values, we generate 2788 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2789 if (Conv == ExtOrTruncConversion::Ext) { 2790 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2791 SDValue SubRegIdx = 2792 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2793 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2794 ImDef, NatWidthRes, SubRegIdx), 0); 2795 } 2796 2797 assert(Conv == ExtOrTruncConversion::Trunc && 2798 "Unknown convertion between 32 and 64 bit values."); 2799 // For reinterpreting 64-bit values as 32-bit values, we just need to 2800 // EXTRACT_SUBREG (i.e. extract the low word). 2801 SDValue SubRegIdx = 2802 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2803 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2804 NatWidthRes, SubRegIdx), 0); 2805 } 2806 2807 /// Produces a zero-extended result of comparing two 32-bit values according to 2808 /// the passed condition code. 2809 SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2810 ISD::CondCode CC, 2811 int64_t RHSValue, SDLoc dl) { 2812 bool IsRHSZero = RHSValue == 0; 2813 switch (CC) { 2814 default: return SDValue(); 2815 case ISD::SETEQ: { 2816 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2817 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2818 SDValue Xor = IsRHSZero ? LHS : 2819 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2820 SDValue Clz = 2821 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2822 SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), 2823 getI32Imm(31, dl) }; 2824 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2825 ShiftOps), 0); 2826 } 2827 case ISD::SETNE: { 2828 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2829 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2830 SDValue Xor = IsRHSZero ? LHS : 2831 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2832 SDValue Clz = 2833 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2834 SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), 2835 getI32Imm(31, dl) }; 2836 SDValue Shift = 2837 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2838 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2839 getI32Imm(1, dl)), 0); 2840 } 2841 } 2842 } 2843 2844 /// Produces a sign-extended result of comparing two 32-bit values according to 2845 /// the passed condition code. 2846 SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, 2847 ISD::CondCode CC, 2848 int64_t RHSValue, SDLoc dl) { 2849 bool IsRHSZero = RHSValue == 0; 2850 switch (CC) { 2851 default: return SDValue(); 2852 case ISD::SETEQ: { 2853 // (sext (setcc %a, %b, seteq)) -> 2854 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 2855 // (sext (setcc %a, 0, seteq)) -> 2856 // (ashr (shl (ctlz %a), 58), 63) 2857 SDValue CountInput = IsRHSZero ? LHS : 2858 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2859 SDValue Cntlzw = 2860 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 2861 SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) }; 2862 SDValue Sldi = 2863 SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0); 2864 return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, 2865 getI32Imm(63, dl)), 0); 2866 } 2867 case ISD::SETNE: { 2868 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 2869 // flip the bit, finally take 2's complement. 2870 // (sext (setcc %a, %b, setne)) -> 2871 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 2872 // Same as above, but the first xor is not needed. 2873 // (sext (setcc %a, 0, setne)) -> 2874 // (neg (xor (lshr (ctlz %a), 5), 1)) 2875 SDValue Xor = IsRHSZero ? LHS : 2876 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2877 SDValue Clz = 2878 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2879 SDValue ShiftOps[] = 2880 { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; 2881 SDValue Shift = 2882 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2883 SDValue Xori = 2884 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2885 getI32Imm(1, dl)), 0); 2886 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 2887 } 2888 } 2889 } 2890 2891 /// Produces a zero-extended result of comparing two 64-bit values according to 2892 /// the passed condition code. 2893 SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, 2894 ISD::CondCode CC, 2895 int64_t RHSValue, SDLoc dl) { 2896 bool IsRHSZero = RHSValue == 0; 2897 switch (CC) { 2898 default: return SDValue(); 2899 case ISD::SETEQ: { 2900 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 2901 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 2902 SDValue Xor = IsRHSZero ? LHS : 2903 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2904 SDValue Clz = 2905 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 2906 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 2907 getI64Imm(58, dl), getI64Imm(63, dl)), 2908 0); 2909 } 2910 } 2911 } 2912 2913 /// Produces a sign-extended result of comparing two 64-bit values according to 2914 /// the passed condition code. 2915 SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, 2916 ISD::CondCode CC, 2917 int64_t RHSValue, SDLoc dl) { 2918 bool IsRHSZero = RHSValue == 0; 2919 switch (CC) { 2920 default: return SDValue(); 2921 case ISD::SETEQ: { 2922 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 2923 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 2924 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 2925 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 2926 SDValue AddInput = IsRHSZero ? LHS : 2927 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2928 SDValue Addic = 2929 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 2930 AddInput, getI32Imm(~0U, dl)), 0); 2931 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 2932 Addic, Addic.getValue(1)), 0); 2933 } 2934 } 2935 } 2936 2937 /// Does this SDValue have any uses for which keeping the value in a GPR is 2938 /// appropriate. This is meant to be used on values that have type i1 since 2939 /// it is somewhat meaningless to ask if values of other types can be kept in 2940 /// GPR's. 2941 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 2942 assert(Compare.getOpcode() == ISD::SETCC && 2943 "An ISD::SETCC node required here."); 2944 2945 // For values that have a single use, the caller should obviously already have 2946 // checked if that use is an extending use. We check the other uses here. 2947 if (Compare.hasOneUse()) 2948 return true; 2949 // We want the value in a GPR if it is being extended, used for a select, or 2950 // used in logical operations. 2951 for (auto CompareUse : Compare.getNode()->uses()) 2952 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 2953 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 2954 CompareUse->getOpcode() != ISD::SELECT && 2955 !isLogicOp(CompareUse->getOpcode())) { 2956 OmittedForNonExtendUses++; 2957 return false; 2958 } 2959 return true; 2960 } 2961 2962 /// Returns an equivalent of a SETCC node but with the result the same width as 2963 /// the inputs. This can nalso be used for SELECT_CC if either the true or false 2964 /// values is a power of two while the other is zero. 2965 SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, 2966 SetccInGPROpts ConvOpts) { 2967 assert((Compare.getOpcode() == ISD::SETCC || 2968 Compare.getOpcode() == ISD::SELECT_CC) && 2969 "An ISD::SETCC node required here."); 2970 2971 // Don't convert this comparison to a GPR sequence because there are uses 2972 // of the i1 result (i.e. uses that require the result in the CR). 2973 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 2974 return SDValue(); 2975 2976 SDValue LHS = Compare.getOperand(0); 2977 SDValue RHS = Compare.getOperand(1); 2978 2979 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 2980 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 2981 ISD::CondCode CC = 2982 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 2983 EVT InputVT = LHS.getValueType(); 2984 if (InputVT != MVT::i32 && InputVT != MVT::i64) 2985 return SDValue(); 2986 2987 if (ConvOpts == SetccInGPROpts::ZExtInvert || 2988 ConvOpts == SetccInGPROpts::SExtInvert) 2989 CC = ISD::getSetCCInverse(CC, true); 2990 2991 bool Inputs32Bit = InputVT == MVT::i32; 2992 if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { 2993 LHS = signExtendInputIfNeeded(LHS); 2994 RHS = signExtendInputIfNeeded(RHS); 2995 } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { 2996 LHS = zeroExtendInputIfNeeded(LHS); 2997 RHS = zeroExtendInputIfNeeded(RHS); 2998 } 2999 3000 SDLoc dl(Compare); 3001 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3002 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3003 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3004 ConvOpts == SetccInGPROpts::SExtInvert; 3005 3006 if (IsSext && Inputs32Bit) 3007 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3008 else if (Inputs32Bit) 3009 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3010 else if (IsSext) 3011 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3012 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3013 } 3014 3015 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 3016 // Transfer memoperands. 3017 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3018 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 3019 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); 3020 } 3021 3022 // Select - Convert the specified operand from a target-independent to a 3023 // target-specific node if it hasn't already been changed. 3024 void PPCDAGToDAGISel::Select(SDNode *N) { 3025 SDLoc dl(N); 3026 if (N->isMachineOpcode()) { 3027 N->setNodeId(-1); 3028 return; // Already selected. 3029 } 3030 3031 // In case any misguided DAG-level optimizations form an ADD with a 3032 // TargetConstant operand, crash here instead of miscompiling (by selecting 3033 // an r+r add instead of some kind of r+i add). 3034 if (N->getOpcode() == ISD::ADD && 3035 N->getOperand(1).getOpcode() == ISD::TargetConstant) 3036 llvm_unreachable("Invalid ADD with TargetConstant operand"); 3037 3038 // Try matching complex bit permutations before doing anything else. 3039 if (tryBitPermutation(N)) 3040 return; 3041 3042 switch (N->getOpcode()) { 3043 default: break; 3044 3045 case ISD::Constant: 3046 if (N->getValueType(0) == MVT::i64) { 3047 ReplaceNode(N, getInt64(CurDAG, N)); 3048 return; 3049 } 3050 break; 3051 3052 case ISD::ZERO_EXTEND: 3053 case ISD::SIGN_EXTEND: 3054 if (tryEXTEND(N)) 3055 return; 3056 break; 3057 3058 case ISD::SETCC: 3059 if (trySETCC(N)) 3060 return; 3061 break; 3062 3063 case PPCISD::GlobalBaseReg: 3064 ReplaceNode(N, getGlobalBaseReg()); 3065 return; 3066 3067 case ISD::FrameIndex: 3068 selectFrameIndex(N, N); 3069 return; 3070 3071 case PPCISD::MFOCRF: { 3072 SDValue InFlag = N->getOperand(1); 3073 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 3074 N->getOperand(0), InFlag)); 3075 return; 3076 } 3077 3078 case PPCISD::READ_TIME_BASE: 3079 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 3080 MVT::Other, N->getOperand(0))); 3081 return; 3082 3083 case PPCISD::SRA_ADDZE: { 3084 SDValue N0 = N->getOperand(0); 3085 SDValue ShiftAmt = 3086 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 3087 getConstantIntValue(), dl, 3088 N->getValueType(0)); 3089 if (N->getValueType(0) == MVT::i64) { 3090 SDNode *Op = 3091 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 3092 N0, ShiftAmt); 3093 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 3094 SDValue(Op, 1)); 3095 return; 3096 } else { 3097 assert(N->getValueType(0) == MVT::i32 && 3098 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 3099 SDNode *Op = 3100 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 3101 N0, ShiftAmt); 3102 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 3103 SDValue(Op, 1)); 3104 return; 3105 } 3106 } 3107 3108 case ISD::LOAD: { 3109 // Handle preincrement loads. 3110 LoadSDNode *LD = cast<LoadSDNode>(N); 3111 EVT LoadedVT = LD->getMemoryVT(); 3112 3113 // Normal loads are handled by code generated from the .td file. 3114 if (LD->getAddressingMode() != ISD::PRE_INC) 3115 break; 3116 3117 SDValue Offset = LD->getOffset(); 3118 if (Offset.getOpcode() == ISD::TargetConstant || 3119 Offset.getOpcode() == ISD::TargetGlobalAddress) { 3120 3121 unsigned Opcode; 3122 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3123 if (LD->getValueType(0) != MVT::i64) { 3124 // Handle PPC32 integer and normal FP loads. 3125 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3126 switch (LoadedVT.getSimpleVT().SimpleTy) { 3127 default: llvm_unreachable("Invalid PPC load type!"); 3128 case MVT::f64: Opcode = PPC::LFDU; break; 3129 case MVT::f32: Opcode = PPC::LFSU; break; 3130 case MVT::i32: Opcode = PPC::LWZU; break; 3131 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 3132 case MVT::i1: 3133 case MVT::i8: Opcode = PPC::LBZU; break; 3134 } 3135 } else { 3136 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 3137 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3138 switch (LoadedVT.getSimpleVT().SimpleTy) { 3139 default: llvm_unreachable("Invalid PPC load type!"); 3140 case MVT::i64: Opcode = PPC::LDU; break; 3141 case MVT::i32: Opcode = PPC::LWZU8; break; 3142 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 3143 case MVT::i1: 3144 case MVT::i8: Opcode = PPC::LBZU8; break; 3145 } 3146 } 3147 3148 SDValue Chain = LD->getChain(); 3149 SDValue Base = LD->getBasePtr(); 3150 SDValue Ops[] = { Offset, Base, Chain }; 3151 SDNode *MN = CurDAG->getMachineNode( 3152 Opcode, dl, LD->getValueType(0), 3153 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 3154 transferMemOperands(N, MN); 3155 ReplaceNode(N, MN); 3156 return; 3157 } else { 3158 unsigned Opcode; 3159 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3160 if (LD->getValueType(0) != MVT::i64) { 3161 // Handle PPC32 integer and normal FP loads. 3162 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3163 switch (LoadedVT.getSimpleVT().SimpleTy) { 3164 default: llvm_unreachable("Invalid PPC load type!"); 3165 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 3166 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 3167 case MVT::f64: Opcode = PPC::LFDUX; break; 3168 case MVT::f32: Opcode = PPC::LFSUX; break; 3169 case MVT::i32: Opcode = PPC::LWZUX; break; 3170 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 3171 case MVT::i1: 3172 case MVT::i8: Opcode = PPC::LBZUX; break; 3173 } 3174 } else { 3175 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 3176 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 3177 "Invalid sext update load"); 3178 switch (LoadedVT.getSimpleVT().SimpleTy) { 3179 default: llvm_unreachable("Invalid PPC load type!"); 3180 case MVT::i64: Opcode = PPC::LDUX; break; 3181 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 3182 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 3183 case MVT::i1: 3184 case MVT::i8: Opcode = PPC::LBZUX8; break; 3185 } 3186 } 3187 3188 SDValue Chain = LD->getChain(); 3189 SDValue Base = LD->getBasePtr(); 3190 SDValue Ops[] = { Base, Offset, Chain }; 3191 SDNode *MN = CurDAG->getMachineNode( 3192 Opcode, dl, LD->getValueType(0), 3193 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 3194 transferMemOperands(N, MN); 3195 ReplaceNode(N, MN); 3196 return; 3197 } 3198 } 3199 3200 case ISD::AND: { 3201 if (tryLogicOpOfCompares(N)) 3202 return; 3203 3204 unsigned Imm, Imm2, SH, MB, ME; 3205 uint64_t Imm64; 3206 3207 // If this is an and of a value rotated between 0 and 31 bits and then and'd 3208 // with a mask, emit rlwinm 3209 if (isInt32Immediate(N->getOperand(1), Imm) && 3210 isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { 3211 SDValue Val = N->getOperand(0).getOperand(0); 3212 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 3213 getI32Imm(ME, dl) }; 3214 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3215 return; 3216 } 3217 // If this is just a masked value where the input is not handled above, and 3218 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 3219 if (isInt32Immediate(N->getOperand(1), Imm) && 3220 isRunOfOnes(Imm, MB, ME) && 3221 N->getOperand(0).getOpcode() != ISD::ROTL) { 3222 SDValue Val = N->getOperand(0); 3223 SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), 3224 getI32Imm(ME, dl) }; 3225 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3226 return; 3227 } 3228 // If this is a 64-bit zero-extension mask, emit rldicl. 3229 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 3230 isMask_64(Imm64)) { 3231 SDValue Val = N->getOperand(0); 3232 MB = 64 - countTrailingOnes(Imm64); 3233 SH = 0; 3234 3235 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3236 auto Op0 = Val.getOperand(0); 3237 if ( Op0.getOpcode() == ISD::SRL && 3238 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 3239 3240 auto ResultType = Val.getNode()->getValueType(0); 3241 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 3242 ResultType); 3243 SDValue IDVal (ImDef, 0); 3244 3245 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 3246 ResultType, IDVal, Op0.getOperand(0), 3247 getI32Imm(1, dl)), 0); 3248 SH = 64 - Imm; 3249 } 3250 } 3251 3252 // If the operand is a logical right shift, we can fold it into this 3253 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 3254 // for n <= mb. The right shift is really a left rotate followed by a 3255 // mask, and this mask is a more-restrictive sub-mask of the mask implied 3256 // by the shift. 3257 if (Val.getOpcode() == ISD::SRL && 3258 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 3259 assert(Imm < 64 && "Illegal shift amount"); 3260 Val = Val.getOperand(0); 3261 SH = 64 - Imm; 3262 } 3263 3264 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 3265 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 3266 return; 3267 } 3268 // If this is a negated 64-bit zero-extension mask, 3269 // i.e. the immediate is a sequence of ones from most significant side 3270 // and all zero for reminder, we should use rldicr. 3271 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 3272 isMask_64(~Imm64)) { 3273 SDValue Val = N->getOperand(0); 3274 MB = 63 - countTrailingOnes(~Imm64); 3275 SH = 0; 3276 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 3277 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 3278 return; 3279 } 3280 3281 // AND X, 0 -> 0, not "rlwinm 32". 3282 if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { 3283 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 3284 return; 3285 } 3286 // ISD::OR doesn't get all the bitfield insertion fun. 3287 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 3288 // bitfield insert. 3289 if (isInt32Immediate(N->getOperand(1), Imm) && 3290 N->getOperand(0).getOpcode() == ISD::OR && 3291 isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { 3292 // The idea here is to check whether this is equivalent to: 3293 // (c1 & m) | (x & ~m) 3294 // where m is a run-of-ones mask. The logic here is that, for each bit in 3295 // c1 and c2: 3296 // - if both are 1, then the output will be 1. 3297 // - if both are 0, then the output will be 0. 3298 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 3299 // come from x. 3300 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 3301 // be 0. 3302 // If that last condition is never the case, then we can form m from the 3303 // bits that are the same between c1 and c2. 3304 unsigned MB, ME; 3305 if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { 3306 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3307 N->getOperand(0).getOperand(1), 3308 getI32Imm(0, dl), getI32Imm(MB, dl), 3309 getI32Imm(ME, dl) }; 3310 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 3311 return; 3312 } 3313 } 3314 3315 // Other cases are autogenerated. 3316 break; 3317 } 3318 case ISD::OR: { 3319 if (N->getValueType(0) == MVT::i32) 3320 if (tryBitfieldInsert(N)) 3321 return; 3322 3323 if (tryLogicOpOfCompares(N)) 3324 return; 3325 3326 short Imm; 3327 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 3328 isIntS16Immediate(N->getOperand(1), Imm)) { 3329 KnownBits LHSKnown; 3330 CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); 3331 3332 // If this is equivalent to an add, then we can fold it with the 3333 // FrameIndex calculation. 3334 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 3335 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 3336 return; 3337 } 3338 } 3339 3340 // Other cases are autogenerated. 3341 break; 3342 } 3343 case ISD::XOR: { 3344 if (tryLogicOpOfCompares(N)) 3345 return; 3346 break; 3347 } 3348 case ISD::ADD: { 3349 short Imm; 3350 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 3351 isIntS16Immediate(N->getOperand(1), Imm)) { 3352 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 3353 return; 3354 } 3355 3356 break; 3357 } 3358 case ISD::SHL: { 3359 unsigned Imm, SH, MB, ME; 3360 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 3361 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 3362 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3363 getI32Imm(SH, dl), getI32Imm(MB, dl), 3364 getI32Imm(ME, dl) }; 3365 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3366 return; 3367 } 3368 3369 // Other cases are autogenerated. 3370 break; 3371 } 3372 case ISD::SRL: { 3373 unsigned Imm, SH, MB, ME; 3374 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 3375 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 3376 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3377 getI32Imm(SH, dl), getI32Imm(MB, dl), 3378 getI32Imm(ME, dl) }; 3379 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3380 return; 3381 } 3382 3383 // Other cases are autogenerated. 3384 break; 3385 } 3386 // FIXME: Remove this once the ANDI glue bug is fixed: 3387 case PPCISD::ANDIo_1_EQ_BIT: 3388 case PPCISD::ANDIo_1_GT_BIT: { 3389 if (!ANDIGlueBug) 3390 break; 3391 3392 EVT InVT = N->getOperand(0).getValueType(); 3393 assert((InVT == MVT::i64 || InVT == MVT::i32) && 3394 "Invalid input type for ANDIo_1_EQ_BIT"); 3395 3396 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; 3397 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 3398 N->getOperand(0), 3399 CurDAG->getTargetConstant(1, dl, InVT)), 3400 0); 3401 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 3402 SDValue SRIdxVal = 3403 CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? 3404 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); 3405 3406 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 3407 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 3408 return; 3409 } 3410 case ISD::SELECT_CC: { 3411 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 3412 EVT PtrVT = 3413 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 3414 bool isPPC64 = (PtrVT == MVT::i64); 3415 3416 // If this is a select of i1 operands, we'll pattern match it. 3417 if (PPCSubTarget->useCRBits() && 3418 N->getOperand(0).getValueType() == MVT::i1) 3419 break; 3420 3421 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 3422 if (!isPPC64) 3423 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3424 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 3425 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 3426 if (N1C->isNullValue() && N3C->isNullValue() && 3427 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 3428 // FIXME: Implement this optzn for PPC64. 3429 N->getValueType(0) == MVT::i32) { 3430 SDNode *Tmp = 3431 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 3432 N->getOperand(0), getI32Imm(~0U, dl)); 3433 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 3434 N->getOperand(0), SDValue(Tmp, 1)); 3435 return; 3436 } 3437 3438 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 3439 3440 if (N->getValueType(0) == MVT::i1) { 3441 // An i1 select is: (c & t) | (!c & f). 3442 bool Inv; 3443 unsigned Idx = getCRIdxForSetCC(CC, Inv); 3444 3445 unsigned SRI; 3446 switch (Idx) { 3447 default: llvm_unreachable("Invalid CC index"); 3448 case 0: SRI = PPC::sub_lt; break; 3449 case 1: SRI = PPC::sub_gt; break; 3450 case 2: SRI = PPC::sub_eq; break; 3451 case 3: SRI = PPC::sub_un; break; 3452 } 3453 3454 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 3455 3456 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 3457 CCBit, CCBit), 0); 3458 SDValue C = Inv ? NotCCBit : CCBit, 3459 NotC = Inv ? CCBit : NotCCBit; 3460 3461 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 3462 C, N->getOperand(2)), 0); 3463 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 3464 NotC, N->getOperand(3)), 0); 3465 3466 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 3467 return; 3468 } 3469 3470 unsigned BROpc = getPredicateForSetCC(CC); 3471 3472 unsigned SelectCCOp; 3473 if (N->getValueType(0) == MVT::i32) 3474 SelectCCOp = PPC::SELECT_CC_I4; 3475 else if (N->getValueType(0) == MVT::i64) 3476 SelectCCOp = PPC::SELECT_CC_I8; 3477 else if (N->getValueType(0) == MVT::f32) 3478 if (PPCSubTarget->hasP8Vector()) 3479 SelectCCOp = PPC::SELECT_CC_VSSRC; 3480 else 3481 SelectCCOp = PPC::SELECT_CC_F4; 3482 else if (N->getValueType(0) == MVT::f64) 3483 if (PPCSubTarget->hasVSX()) 3484 SelectCCOp = PPC::SELECT_CC_VSFRC; 3485 else 3486 SelectCCOp = PPC::SELECT_CC_F8; 3487 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 3488 SelectCCOp = PPC::SELECT_CC_QFRC; 3489 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 3490 SelectCCOp = PPC::SELECT_CC_QSRC; 3491 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 3492 SelectCCOp = PPC::SELECT_CC_QBRC; 3493 else if (N->getValueType(0) == MVT::v2f64 || 3494 N->getValueType(0) == MVT::v2i64) 3495 SelectCCOp = PPC::SELECT_CC_VSRC; 3496 else 3497 SelectCCOp = PPC::SELECT_CC_VRRC; 3498 3499 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 3500 getI32Imm(BROpc, dl) }; 3501 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 3502 return; 3503 } 3504 case ISD::VSELECT: 3505 if (PPCSubTarget->hasVSX()) { 3506 SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; 3507 CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); 3508 return; 3509 } 3510 break; 3511 3512 case ISD::VECTOR_SHUFFLE: 3513 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 3514 N->getValueType(0) == MVT::v2i64)) { 3515 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 3516 3517 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 3518 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 3519 unsigned DM[2]; 3520 3521 for (int i = 0; i < 2; ++i) 3522 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 3523 DM[i] = 0; 3524 else 3525 DM[i] = 1; 3526 3527 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 3528 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 3529 isa<LoadSDNode>(Op1.getOperand(0))) { 3530 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 3531 SDValue Base, Offset; 3532 3533 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 3534 (LD->getMemoryVT() == MVT::f64 || 3535 LD->getMemoryVT() == MVT::i64) && 3536 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 3537 SDValue Chain = LD->getChain(); 3538 SDValue Ops[] = { Base, Offset, Chain }; 3539 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3540 MemOp[0] = LD->getMemOperand(); 3541 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 3542 N->getValueType(0), Ops); 3543 cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); 3544 return; 3545 } 3546 } 3547 3548 // For little endian, we must swap the input operands and adjust 3549 // the mask elements (reverse and invert them). 3550 if (PPCSubTarget->isLittleEndian()) { 3551 std::swap(Op1, Op2); 3552 unsigned tmp = DM[0]; 3553 DM[0] = 1 - DM[1]; 3554 DM[1] = 1 - tmp; 3555 } 3556 3557 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 3558 MVT::i32); 3559 SDValue Ops[] = { Op1, Op2, DMV }; 3560 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 3561 return; 3562 } 3563 3564 break; 3565 case PPCISD::BDNZ: 3566 case PPCISD::BDZ: { 3567 bool IsPPC64 = PPCSubTarget->isPPC64(); 3568 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 3569 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 3570 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 3571 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 3572 MVT::Other, Ops); 3573 return; 3574 } 3575 case PPCISD::COND_BRANCH: { 3576 // Op #0 is the Chain. 3577 // Op #1 is the PPC::PRED_* number. 3578 // Op #2 is the CR# 3579 // Op #3 is the Dest MBB 3580 // Op #4 is the Flag. 3581 // Prevent PPC::PRED_* from being selected into LI. 3582 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3583 if (EnableBranchHint) 3584 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); 3585 3586 SDValue Pred = getI32Imm(PCC, dl); 3587 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 3588 N->getOperand(0), N->getOperand(4) }; 3589 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 3590 return; 3591 } 3592 case ISD::BR_CC: { 3593 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 3594 unsigned PCC = getPredicateForSetCC(CC); 3595 3596 if (N->getOperand(2).getValueType() == MVT::i1) { 3597 unsigned Opc; 3598 bool Swap; 3599 switch (PCC) { 3600 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 3601 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 3602 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 3603 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 3604 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 3605 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 3606 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 3607 } 3608 3609 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 3610 N->getOperand(Swap ? 3 : 2), 3611 N->getOperand(Swap ? 2 : 3)), 0); 3612 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 3613 N->getOperand(0)); 3614 return; 3615 } 3616 3617 if (EnableBranchHint) 3618 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); 3619 3620 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 3621 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 3622 N->getOperand(4), N->getOperand(0) }; 3623 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 3624 return; 3625 } 3626 case ISD::BRIND: { 3627 // FIXME: Should custom lower this. 3628 SDValue Chain = N->getOperand(0); 3629 SDValue Target = N->getOperand(1); 3630 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 3631 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 3632 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 3633 Chain), 0); 3634 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 3635 return; 3636 } 3637 case PPCISD::TOC_ENTRY: { 3638 assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && 3639 "Only supported for 64-bit ABI and 32-bit SVR4"); 3640 if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { 3641 SDValue GA = N->getOperand(0); 3642 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 3643 N->getOperand(1)); 3644 transferMemOperands(N, MN); 3645 ReplaceNode(N, MN); 3646 return; 3647 } 3648 3649 // For medium and large code model, we generate two instructions as 3650 // described below. Otherwise we allow SelectCodeCommon to handle this, 3651 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 3652 CodeModel::Model CModel = TM.getCodeModel(); 3653 if (CModel != CodeModel::Medium && CModel != CodeModel::Large) 3654 break; 3655 3656 // The first source operand is a TargetGlobalAddress or a TargetJumpTable. 3657 // If it must be toc-referenced according to PPCSubTarget, we generate: 3658 // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) 3659 // Otherwise we generate: 3660 // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) 3661 SDValue GA = N->getOperand(0); 3662 SDValue TOCbase = N->getOperand(1); 3663 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, 3664 TOCbase, GA); 3665 3666 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || 3667 CModel == CodeModel::Large) { 3668 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 3669 SDValue(Tmp, 0)); 3670 transferMemOperands(N, MN); 3671 ReplaceNode(N, MN); 3672 return; 3673 } 3674 3675 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { 3676 const GlobalValue *GV = G->getGlobal(); 3677 unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); 3678 if (GVFlags & PPCII::MO_NLP_FLAG) { 3679 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 3680 SDValue(Tmp, 0)); 3681 transferMemOperands(N, MN); 3682 ReplaceNode(N, MN); 3683 return; 3684 } 3685 } 3686 3687 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 3688 SDValue(Tmp, 0), GA)); 3689 return; 3690 } 3691 case PPCISD::PPC32_PICGOT: 3692 // Generate a PIC-safe GOT reference. 3693 assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && 3694 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 3695 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 3696 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 3697 MVT::i32); 3698 return; 3699 3700 case PPCISD::VADD_SPLAT: { 3701 // This expands into one of three sequences, depending on whether 3702 // the first operand is odd or even, positive or negative. 3703 assert(isa<ConstantSDNode>(N->getOperand(0)) && 3704 isa<ConstantSDNode>(N->getOperand(1)) && 3705 "Invalid operand on VADD_SPLAT!"); 3706 3707 int Elt = N->getConstantOperandVal(0); 3708 int EltSize = N->getConstantOperandVal(1); 3709 unsigned Opc1, Opc2, Opc3; 3710 EVT VT; 3711 3712 if (EltSize == 1) { 3713 Opc1 = PPC::VSPLTISB; 3714 Opc2 = PPC::VADDUBM; 3715 Opc3 = PPC::VSUBUBM; 3716 VT = MVT::v16i8; 3717 } else if (EltSize == 2) { 3718 Opc1 = PPC::VSPLTISH; 3719 Opc2 = PPC::VADDUHM; 3720 Opc3 = PPC::VSUBUHM; 3721 VT = MVT::v8i16; 3722 } else { 3723 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 3724 Opc1 = PPC::VSPLTISW; 3725 Opc2 = PPC::VADDUWM; 3726 Opc3 = PPC::VSUBUWM; 3727 VT = MVT::v4i32; 3728 } 3729 3730 if ((Elt & 1) == 0) { 3731 // Elt is even, in the range [-32,-18] + [16,30]. 3732 // 3733 // Convert: VADD_SPLAT elt, size 3734 // Into: tmp = VSPLTIS[BHW] elt 3735 // VADDU[BHW]M tmp, tmp 3736 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 3737 SDValue EltVal = getI32Imm(Elt >> 1, dl); 3738 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3739 SDValue TmpVal = SDValue(Tmp, 0); 3740 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 3741 return; 3742 } else if (Elt > 0) { 3743 // Elt is odd and positive, in the range [17,31]. 3744 // 3745 // Convert: VADD_SPLAT elt, size 3746 // Into: tmp1 = VSPLTIS[BHW] elt-16 3747 // tmp2 = VSPLTIS[BHW] -16 3748 // VSUBU[BHW]M tmp1, tmp2 3749 SDValue EltVal = getI32Imm(Elt - 16, dl); 3750 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3751 EltVal = getI32Imm(-16, dl); 3752 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3753 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 3754 SDValue(Tmp2, 0))); 3755 return; 3756 } else { 3757 // Elt is odd and negative, in the range [-31,-17]. 3758 // 3759 // Convert: VADD_SPLAT elt, size 3760 // Into: tmp1 = VSPLTIS[BHW] elt+16 3761 // tmp2 = VSPLTIS[BHW] -16 3762 // VADDU[BHW]M tmp1, tmp2 3763 SDValue EltVal = getI32Imm(Elt + 16, dl); 3764 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3765 EltVal = getI32Imm(-16, dl); 3766 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3767 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 3768 SDValue(Tmp2, 0))); 3769 return; 3770 } 3771 } 3772 } 3773 3774 SelectCode(N); 3775 } 3776 3777 // If the target supports the cmpb instruction, do the idiom recognition here. 3778 // We don't do this as a DAG combine because we don't want to do it as nodes 3779 // are being combined (because we might miss part of the eventual idiom). We 3780 // don't want to do it during instruction selection because we want to reuse 3781 // the logic for lowering the masking operations already part of the 3782 // instruction selector. 3783 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 3784 SDLoc dl(N); 3785 3786 assert(N->getOpcode() == ISD::OR && 3787 "Only OR nodes are supported for CMPB"); 3788 3789 SDValue Res; 3790 if (!PPCSubTarget->hasCMPB()) 3791 return Res; 3792 3793 if (N->getValueType(0) != MVT::i32 && 3794 N->getValueType(0) != MVT::i64) 3795 return Res; 3796 3797 EVT VT = N->getValueType(0); 3798 3799 SDValue RHS, LHS; 3800 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 3801 uint64_t Mask = 0, Alt = 0; 3802 3803 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 3804 uint64_t &Mask, uint64_t &Alt, 3805 SDValue &LHS, SDValue &RHS) { 3806 if (O.getOpcode() != ISD::SELECT_CC) 3807 return false; 3808 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 3809 3810 if (!isa<ConstantSDNode>(O.getOperand(2)) || 3811 !isa<ConstantSDNode>(O.getOperand(3))) 3812 return false; 3813 3814 uint64_t PM = O.getConstantOperandVal(2); 3815 uint64_t PAlt = O.getConstantOperandVal(3); 3816 for (b = 0; b < 8; ++b) { 3817 uint64_t Mask = UINT64_C(0xFF) << (8*b); 3818 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 3819 break; 3820 } 3821 3822 if (b == 8) 3823 return false; 3824 Mask |= PM; 3825 Alt |= PAlt; 3826 3827 if (!isa<ConstantSDNode>(O.getOperand(1)) || 3828 O.getConstantOperandVal(1) != 0) { 3829 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 3830 if (Op0.getOpcode() == ISD::TRUNCATE) 3831 Op0 = Op0.getOperand(0); 3832 if (Op1.getOpcode() == ISD::TRUNCATE) 3833 Op1 = Op1.getOperand(0); 3834 3835 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 3836 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 3837 isa<ConstantSDNode>(Op0.getOperand(1))) { 3838 3839 unsigned Bits = Op0.getValueSizeInBits(); 3840 if (b != Bits/8-1) 3841 return false; 3842 if (Op0.getConstantOperandVal(1) != Bits-8) 3843 return false; 3844 3845 LHS = Op0.getOperand(0); 3846 RHS = Op1.getOperand(0); 3847 return true; 3848 } 3849 3850 // When we have small integers (i16 to be specific), the form present 3851 // post-legalization uses SETULT in the SELECT_CC for the 3852 // higher-order byte, depending on the fact that the 3853 // even-higher-order bytes are known to all be zero, for example: 3854 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 3855 // (so when the second byte is the same, because all higher-order 3856 // bits from bytes 3 and 4 are known to be zero, the result of the 3857 // xor can be at most 255) 3858 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 3859 isa<ConstantSDNode>(O.getOperand(1))) { 3860 3861 uint64_t ULim = O.getConstantOperandVal(1); 3862 if (ULim != (UINT64_C(1) << b*8)) 3863 return false; 3864 3865 // Now we need to make sure that the upper bytes are known to be 3866 // zero. 3867 unsigned Bits = Op0.getValueSizeInBits(); 3868 if (!CurDAG->MaskedValueIsZero( 3869 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 3870 return false; 3871 3872 LHS = Op0.getOperand(0); 3873 RHS = Op0.getOperand(1); 3874 return true; 3875 } 3876 3877 return false; 3878 } 3879 3880 if (CC != ISD::SETEQ) 3881 return false; 3882 3883 SDValue Op = O.getOperand(0); 3884 if (Op.getOpcode() == ISD::AND) { 3885 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3886 return false; 3887 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 3888 return false; 3889 3890 SDValue XOR = Op.getOperand(0); 3891 if (XOR.getOpcode() == ISD::TRUNCATE) 3892 XOR = XOR.getOperand(0); 3893 if (XOR.getOpcode() != ISD::XOR) 3894 return false; 3895 3896 LHS = XOR.getOperand(0); 3897 RHS = XOR.getOperand(1); 3898 return true; 3899 } else if (Op.getOpcode() == ISD::SRL) { 3900 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3901 return false; 3902 unsigned Bits = Op.getValueSizeInBits(); 3903 if (b != Bits/8-1) 3904 return false; 3905 if (Op.getConstantOperandVal(1) != Bits-8) 3906 return false; 3907 3908 SDValue XOR = Op.getOperand(0); 3909 if (XOR.getOpcode() == ISD::TRUNCATE) 3910 XOR = XOR.getOperand(0); 3911 if (XOR.getOpcode() != ISD::XOR) 3912 return false; 3913 3914 LHS = XOR.getOperand(0); 3915 RHS = XOR.getOperand(1); 3916 return true; 3917 } 3918 3919 return false; 3920 }; 3921 3922 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 3923 while (!Queue.empty()) { 3924 SDValue V = Queue.pop_back_val(); 3925 3926 for (const SDValue &O : V.getNode()->ops()) { 3927 unsigned b; 3928 uint64_t M = 0, A = 0; 3929 SDValue OLHS, ORHS; 3930 if (O.getOpcode() == ISD::OR) { 3931 Queue.push_back(O); 3932 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 3933 if (!LHS) { 3934 LHS = OLHS; 3935 RHS = ORHS; 3936 BytesFound[b] = true; 3937 Mask |= M; 3938 Alt |= A; 3939 } else if ((LHS == ORHS && RHS == OLHS) || 3940 (RHS == ORHS && LHS == OLHS)) { 3941 BytesFound[b] = true; 3942 Mask |= M; 3943 Alt |= A; 3944 } else { 3945 return Res; 3946 } 3947 } else { 3948 return Res; 3949 } 3950 } 3951 } 3952 3953 unsigned LastB = 0, BCnt = 0; 3954 for (unsigned i = 0; i < 8; ++i) 3955 if (BytesFound[LastB]) { 3956 ++BCnt; 3957 LastB = i; 3958 } 3959 3960 if (!LastB || BCnt < 2) 3961 return Res; 3962 3963 // Because we'll be zero-extending the output anyway if don't have a specific 3964 // value for each input byte (via the Mask), we can 'anyext' the inputs. 3965 if (LHS.getValueType() != VT) { 3966 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 3967 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 3968 } 3969 3970 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 3971 3972 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 3973 if (NonTrivialMask && !Alt) { 3974 // Res = Mask & CMPB 3975 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 3976 CurDAG->getConstant(Mask, dl, VT)); 3977 } else if (Alt) { 3978 // Res = (CMPB & Mask) | (~CMPB & Alt) 3979 // Which, as suggested here: 3980 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 3981 // can be written as: 3982 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 3983 // useful because the (Alt ^ Mask) can be pre-computed. 3984 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 3985 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 3986 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 3987 CurDAG->getConstant(Alt, dl, VT)); 3988 } 3989 3990 return Res; 3991 } 3992 3993 // When CR bit registers are enabled, an extension of an i1 variable to a i32 3994 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 3995 // involves constant materialization of a 0 or a 1 or both. If the result of 3996 // the extension is then operated upon by some operator that can be constant 3997 // folded with a constant 0 or 1, and that constant can be materialized using 3998 // only one instruction (like a zero or one), then we should fold in those 3999 // operations with the select. 4000 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 4001 if (!PPCSubTarget->useCRBits()) 4002 return; 4003 4004 if (N->getOpcode() != ISD::ZERO_EXTEND && 4005 N->getOpcode() != ISD::SIGN_EXTEND && 4006 N->getOpcode() != ISD::ANY_EXTEND) 4007 return; 4008 4009 if (N->getOperand(0).getValueType() != MVT::i1) 4010 return; 4011 4012 if (!N->hasOneUse()) 4013 return; 4014 4015 SDLoc dl(N); 4016 EVT VT = N->getValueType(0); 4017 SDValue Cond = N->getOperand(0); 4018 SDValue ConstTrue = 4019 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 4020 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 4021 4022 do { 4023 SDNode *User = *N->use_begin(); 4024 if (User->getNumOperands() != 2) 4025 break; 4026 4027 auto TryFold = [this, N, User, dl](SDValue Val) { 4028 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 4029 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 4030 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 4031 4032 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 4033 User->getValueType(0), 4034 O0.getNode(), O1.getNode()); 4035 }; 4036 4037 SDValue TrueRes = TryFold(ConstTrue); 4038 if (!TrueRes) 4039 break; 4040 SDValue FalseRes = TryFold(ConstFalse); 4041 if (!FalseRes) 4042 break; 4043 4044 // For us to materialize these using one instruction, we must be able to 4045 // represent them as signed 16-bit integers. 4046 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 4047 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 4048 if (!isInt<16>(True) || !isInt<16>(False)) 4049 break; 4050 4051 // We can replace User with a new SELECT node, and try again to see if we 4052 // can fold the select with its user. 4053 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 4054 N = User; 4055 ConstTrue = TrueRes; 4056 ConstFalse = FalseRes; 4057 } while (N->hasOneUse()); 4058 } 4059 4060 void PPCDAGToDAGISel::PreprocessISelDAG() { 4061 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4062 ++Position; 4063 4064 bool MadeChange = false; 4065 while (Position != CurDAG->allnodes_begin()) { 4066 SDNode *N = &*--Position; 4067 if (N->use_empty()) 4068 continue; 4069 4070 SDValue Res; 4071 switch (N->getOpcode()) { 4072 default: break; 4073 case ISD::OR: 4074 Res = combineToCMPB(N); 4075 break; 4076 } 4077 4078 if (!Res) 4079 foldBoolExts(Res, N); 4080 4081 if (Res) { 4082 DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 4083 DEBUG(N->dump(CurDAG)); 4084 DEBUG(dbgs() << "\nNew: "); 4085 DEBUG(Res.getNode()->dump(CurDAG)); 4086 DEBUG(dbgs() << "\n"); 4087 4088 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 4089 MadeChange = true; 4090 } 4091 } 4092 4093 if (MadeChange) 4094 CurDAG->RemoveDeadNodes(); 4095 } 4096 4097 /// PostprocessISelDAG - Perform some late peephole optimizations 4098 /// on the DAG representation. 4099 void PPCDAGToDAGISel::PostprocessISelDAG() { 4100 // Skip peepholes at -O0. 4101 if (TM.getOptLevel() == CodeGenOpt::None) 4102 return; 4103 4104 PeepholePPC64(); 4105 PeepholeCROps(); 4106 PeepholePPC64ZExt(); 4107 } 4108 4109 // Check if all users of this node will become isel where the second operand 4110 // is the constant zero. If this is so, and if we can negate the condition, 4111 // then we can flip the true and false operands. This will allow the zero to 4112 // be folded with the isel so that we don't need to materialize a register 4113 // containing zero. 4114 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 4115 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4116 UI != UE; ++UI) { 4117 SDNode *User = *UI; 4118 if (!User->isMachineOpcode()) 4119 return false; 4120 if (User->getMachineOpcode() != PPC::SELECT_I4 && 4121 User->getMachineOpcode() != PPC::SELECT_I8) 4122 return false; 4123 4124 SDNode *Op2 = User->getOperand(2).getNode(); 4125 if (!Op2->isMachineOpcode()) 4126 return false; 4127 4128 if (Op2->getMachineOpcode() != PPC::LI && 4129 Op2->getMachineOpcode() != PPC::LI8) 4130 return false; 4131 4132 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 4133 if (!C) 4134 return false; 4135 4136 if (!C->isNullValue()) 4137 return false; 4138 } 4139 4140 return true; 4141 } 4142 4143 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 4144 SmallVector<SDNode *, 4> ToReplace; 4145 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4146 UI != UE; ++UI) { 4147 SDNode *User = *UI; 4148 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 4149 User->getMachineOpcode() == PPC::SELECT_I8) && 4150 "Must have all select users"); 4151 ToReplace.push_back(User); 4152 } 4153 4154 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 4155 UE = ToReplace.end(); UI != UE; ++UI) { 4156 SDNode *User = *UI; 4157 SDNode *ResNode = 4158 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 4159 User->getValueType(0), User->getOperand(0), 4160 User->getOperand(2), 4161 User->getOperand(1)); 4162 4163 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 4164 DEBUG(User->dump(CurDAG)); 4165 DEBUG(dbgs() << "\nNew: "); 4166 DEBUG(ResNode->dump(CurDAG)); 4167 DEBUG(dbgs() << "\n"); 4168 4169 ReplaceUses(User, ResNode); 4170 } 4171 } 4172 4173 void PPCDAGToDAGISel::PeepholeCROps() { 4174 bool IsModified; 4175 do { 4176 IsModified = false; 4177 for (SDNode &Node : CurDAG->allnodes()) { 4178 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 4179 if (!MachineNode || MachineNode->use_empty()) 4180 continue; 4181 SDNode *ResNode = MachineNode; 4182 4183 bool Op1Set = false, Op1Unset = false, 4184 Op1Not = false, 4185 Op2Set = false, Op2Unset = false, 4186 Op2Not = false; 4187 4188 unsigned Opcode = MachineNode->getMachineOpcode(); 4189 switch (Opcode) { 4190 default: break; 4191 case PPC::CRAND: 4192 case PPC::CRNAND: 4193 case PPC::CROR: 4194 case PPC::CRXOR: 4195 case PPC::CRNOR: 4196 case PPC::CREQV: 4197 case PPC::CRANDC: 4198 case PPC::CRORC: { 4199 SDValue Op = MachineNode->getOperand(1); 4200 if (Op.isMachineOpcode()) { 4201 if (Op.getMachineOpcode() == PPC::CRSET) 4202 Op2Set = true; 4203 else if (Op.getMachineOpcode() == PPC::CRUNSET) 4204 Op2Unset = true; 4205 else if (Op.getMachineOpcode() == PPC::CRNOR && 4206 Op.getOperand(0) == Op.getOperand(1)) 4207 Op2Not = true; 4208 } 4209 LLVM_FALLTHROUGH; 4210 } 4211 case PPC::BC: 4212 case PPC::BCn: 4213 case PPC::SELECT_I4: 4214 case PPC::SELECT_I8: 4215 case PPC::SELECT_F4: 4216 case PPC::SELECT_F8: 4217 case PPC::SELECT_QFRC: 4218 case PPC::SELECT_QSRC: 4219 case PPC::SELECT_QBRC: 4220 case PPC::SELECT_VRRC: 4221 case PPC::SELECT_VSFRC: 4222 case PPC::SELECT_VSSRC: 4223 case PPC::SELECT_VSRC: { 4224 SDValue Op = MachineNode->getOperand(0); 4225 if (Op.isMachineOpcode()) { 4226 if (Op.getMachineOpcode() == PPC::CRSET) 4227 Op1Set = true; 4228 else if (Op.getMachineOpcode() == PPC::CRUNSET) 4229 Op1Unset = true; 4230 else if (Op.getMachineOpcode() == PPC::CRNOR && 4231 Op.getOperand(0) == Op.getOperand(1)) 4232 Op1Not = true; 4233 } 4234 } 4235 break; 4236 } 4237 4238 bool SelectSwap = false; 4239 switch (Opcode) { 4240 default: break; 4241 case PPC::CRAND: 4242 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4243 // x & x = x 4244 ResNode = MachineNode->getOperand(0).getNode(); 4245 else if (Op1Set) 4246 // 1 & y = y 4247 ResNode = MachineNode->getOperand(1).getNode(); 4248 else if (Op2Set) 4249 // x & 1 = x 4250 ResNode = MachineNode->getOperand(0).getNode(); 4251 else if (Op1Unset || Op2Unset) 4252 // x & 0 = 0 & y = 0 4253 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4254 MVT::i1); 4255 else if (Op1Not) 4256 // ~x & y = andc(y, x) 4257 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4258 MVT::i1, MachineNode->getOperand(1), 4259 MachineNode->getOperand(0). 4260 getOperand(0)); 4261 else if (Op2Not) 4262 // x & ~y = andc(x, y) 4263 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4264 MVT::i1, MachineNode->getOperand(0), 4265 MachineNode->getOperand(1). 4266 getOperand(0)); 4267 else if (AllUsersSelectZero(MachineNode)) { 4268 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 4269 MVT::i1, MachineNode->getOperand(0), 4270 MachineNode->getOperand(1)); 4271 SelectSwap = true; 4272 } 4273 break; 4274 case PPC::CRNAND: 4275 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4276 // nand(x, x) -> nor(x, x) 4277 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4278 MVT::i1, MachineNode->getOperand(0), 4279 MachineNode->getOperand(0)); 4280 else if (Op1Set) 4281 // nand(1, y) -> nor(y, y) 4282 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4283 MVT::i1, MachineNode->getOperand(1), 4284 MachineNode->getOperand(1)); 4285 else if (Op2Set) 4286 // nand(x, 1) -> nor(x, x) 4287 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4288 MVT::i1, MachineNode->getOperand(0), 4289 MachineNode->getOperand(0)); 4290 else if (Op1Unset || Op2Unset) 4291 // nand(x, 0) = nand(0, y) = 1 4292 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4293 MVT::i1); 4294 else if (Op1Not) 4295 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 4296 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4297 MVT::i1, MachineNode->getOperand(0). 4298 getOperand(0), 4299 MachineNode->getOperand(1)); 4300 else if (Op2Not) 4301 // nand(x, ~y) = ~x | y = orc(y, x) 4302 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4303 MVT::i1, MachineNode->getOperand(1). 4304 getOperand(0), 4305 MachineNode->getOperand(0)); 4306 else if (AllUsersSelectZero(MachineNode)) { 4307 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 4308 MVT::i1, MachineNode->getOperand(0), 4309 MachineNode->getOperand(1)); 4310 SelectSwap = true; 4311 } 4312 break; 4313 case PPC::CROR: 4314 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4315 // x | x = x 4316 ResNode = MachineNode->getOperand(0).getNode(); 4317 else if (Op1Set || Op2Set) 4318 // x | 1 = 1 | y = 1 4319 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4320 MVT::i1); 4321 else if (Op1Unset) 4322 // 0 | y = y 4323 ResNode = MachineNode->getOperand(1).getNode(); 4324 else if (Op2Unset) 4325 // x | 0 = x 4326 ResNode = MachineNode->getOperand(0).getNode(); 4327 else if (Op1Not) 4328 // ~x | y = orc(y, x) 4329 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4330 MVT::i1, MachineNode->getOperand(1), 4331 MachineNode->getOperand(0). 4332 getOperand(0)); 4333 else if (Op2Not) 4334 // x | ~y = orc(x, y) 4335 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4336 MVT::i1, MachineNode->getOperand(0), 4337 MachineNode->getOperand(1). 4338 getOperand(0)); 4339 else if (AllUsersSelectZero(MachineNode)) { 4340 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4341 MVT::i1, MachineNode->getOperand(0), 4342 MachineNode->getOperand(1)); 4343 SelectSwap = true; 4344 } 4345 break; 4346 case PPC::CRXOR: 4347 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4348 // xor(x, x) = 0 4349 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4350 MVT::i1); 4351 else if (Op1Set) 4352 // xor(1, y) -> nor(y, y) 4353 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4354 MVT::i1, MachineNode->getOperand(1), 4355 MachineNode->getOperand(1)); 4356 else if (Op2Set) 4357 // xor(x, 1) -> nor(x, x) 4358 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4359 MVT::i1, MachineNode->getOperand(0), 4360 MachineNode->getOperand(0)); 4361 else if (Op1Unset) 4362 // xor(0, y) = y 4363 ResNode = MachineNode->getOperand(1).getNode(); 4364 else if (Op2Unset) 4365 // xor(x, 0) = x 4366 ResNode = MachineNode->getOperand(0).getNode(); 4367 else if (Op1Not) 4368 // xor(~x, y) = eqv(x, y) 4369 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4370 MVT::i1, MachineNode->getOperand(0). 4371 getOperand(0), 4372 MachineNode->getOperand(1)); 4373 else if (Op2Not) 4374 // xor(x, ~y) = eqv(x, y) 4375 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4376 MVT::i1, MachineNode->getOperand(0), 4377 MachineNode->getOperand(1). 4378 getOperand(0)); 4379 else if (AllUsersSelectZero(MachineNode)) { 4380 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4381 MVT::i1, MachineNode->getOperand(0), 4382 MachineNode->getOperand(1)); 4383 SelectSwap = true; 4384 } 4385 break; 4386 case PPC::CRNOR: 4387 if (Op1Set || Op2Set) 4388 // nor(1, y) -> 0 4389 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4390 MVT::i1); 4391 else if (Op1Unset) 4392 // nor(0, y) = ~y -> nor(y, y) 4393 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4394 MVT::i1, MachineNode->getOperand(1), 4395 MachineNode->getOperand(1)); 4396 else if (Op2Unset) 4397 // nor(x, 0) = ~x 4398 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4399 MVT::i1, MachineNode->getOperand(0), 4400 MachineNode->getOperand(0)); 4401 else if (Op1Not) 4402 // nor(~x, y) = andc(x, y) 4403 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4404 MVT::i1, MachineNode->getOperand(0). 4405 getOperand(0), 4406 MachineNode->getOperand(1)); 4407 else if (Op2Not) 4408 // nor(x, ~y) = andc(y, x) 4409 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4410 MVT::i1, MachineNode->getOperand(1). 4411 getOperand(0), 4412 MachineNode->getOperand(0)); 4413 else if (AllUsersSelectZero(MachineNode)) { 4414 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 4415 MVT::i1, MachineNode->getOperand(0), 4416 MachineNode->getOperand(1)); 4417 SelectSwap = true; 4418 } 4419 break; 4420 case PPC::CREQV: 4421 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4422 // eqv(x, x) = 1 4423 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4424 MVT::i1); 4425 else if (Op1Set) 4426 // eqv(1, y) = y 4427 ResNode = MachineNode->getOperand(1).getNode(); 4428 else if (Op2Set) 4429 // eqv(x, 1) = x 4430 ResNode = MachineNode->getOperand(0).getNode(); 4431 else if (Op1Unset) 4432 // eqv(0, y) = ~y -> nor(y, y) 4433 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4434 MVT::i1, MachineNode->getOperand(1), 4435 MachineNode->getOperand(1)); 4436 else if (Op2Unset) 4437 // eqv(x, 0) = ~x 4438 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4439 MVT::i1, MachineNode->getOperand(0), 4440 MachineNode->getOperand(0)); 4441 else if (Op1Not) 4442 // eqv(~x, y) = xor(x, y) 4443 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4444 MVT::i1, MachineNode->getOperand(0). 4445 getOperand(0), 4446 MachineNode->getOperand(1)); 4447 else if (Op2Not) 4448 // eqv(x, ~y) = xor(x, y) 4449 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4450 MVT::i1, MachineNode->getOperand(0), 4451 MachineNode->getOperand(1). 4452 getOperand(0)); 4453 else if (AllUsersSelectZero(MachineNode)) { 4454 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4455 MVT::i1, MachineNode->getOperand(0), 4456 MachineNode->getOperand(1)); 4457 SelectSwap = true; 4458 } 4459 break; 4460 case PPC::CRANDC: 4461 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4462 // andc(x, x) = 0 4463 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4464 MVT::i1); 4465 else if (Op1Set) 4466 // andc(1, y) = ~y 4467 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4468 MVT::i1, MachineNode->getOperand(1), 4469 MachineNode->getOperand(1)); 4470 else if (Op1Unset || Op2Set) 4471 // andc(0, y) = andc(x, 1) = 0 4472 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4473 MVT::i1); 4474 else if (Op2Unset) 4475 // andc(x, 0) = x 4476 ResNode = MachineNode->getOperand(0).getNode(); 4477 else if (Op1Not) 4478 // andc(~x, y) = ~(x | y) = nor(x, y) 4479 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4480 MVT::i1, MachineNode->getOperand(0). 4481 getOperand(0), 4482 MachineNode->getOperand(1)); 4483 else if (Op2Not) 4484 // andc(x, ~y) = x & y 4485 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 4486 MVT::i1, MachineNode->getOperand(0), 4487 MachineNode->getOperand(1). 4488 getOperand(0)); 4489 else if (AllUsersSelectZero(MachineNode)) { 4490 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4491 MVT::i1, MachineNode->getOperand(1), 4492 MachineNode->getOperand(0)); 4493 SelectSwap = true; 4494 } 4495 break; 4496 case PPC::CRORC: 4497 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4498 // orc(x, x) = 1 4499 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4500 MVT::i1); 4501 else if (Op1Set || Op2Unset) 4502 // orc(1, y) = orc(x, 0) = 1 4503 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4504 MVT::i1); 4505 else if (Op2Set) 4506 // orc(x, 1) = x 4507 ResNode = MachineNode->getOperand(0).getNode(); 4508 else if (Op1Unset) 4509 // orc(0, y) = ~y 4510 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4511 MVT::i1, MachineNode->getOperand(1), 4512 MachineNode->getOperand(1)); 4513 else if (Op1Not) 4514 // orc(~x, y) = ~(x & y) = nand(x, y) 4515 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 4516 MVT::i1, MachineNode->getOperand(0). 4517 getOperand(0), 4518 MachineNode->getOperand(1)); 4519 else if (Op2Not) 4520 // orc(x, ~y) = x | y 4521 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 4522 MVT::i1, MachineNode->getOperand(0), 4523 MachineNode->getOperand(1). 4524 getOperand(0)); 4525 else if (AllUsersSelectZero(MachineNode)) { 4526 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4527 MVT::i1, MachineNode->getOperand(1), 4528 MachineNode->getOperand(0)); 4529 SelectSwap = true; 4530 } 4531 break; 4532 case PPC::SELECT_I4: 4533 case PPC::SELECT_I8: 4534 case PPC::SELECT_F4: 4535 case PPC::SELECT_F8: 4536 case PPC::SELECT_QFRC: 4537 case PPC::SELECT_QSRC: 4538 case PPC::SELECT_QBRC: 4539 case PPC::SELECT_VRRC: 4540 case PPC::SELECT_VSFRC: 4541 case PPC::SELECT_VSSRC: 4542 case PPC::SELECT_VSRC: 4543 if (Op1Set) 4544 ResNode = MachineNode->getOperand(1).getNode(); 4545 else if (Op1Unset) 4546 ResNode = MachineNode->getOperand(2).getNode(); 4547 else if (Op1Not) 4548 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 4549 SDLoc(MachineNode), 4550 MachineNode->getValueType(0), 4551 MachineNode->getOperand(0). 4552 getOperand(0), 4553 MachineNode->getOperand(2), 4554 MachineNode->getOperand(1)); 4555 break; 4556 case PPC::BC: 4557 case PPC::BCn: 4558 if (Op1Not) 4559 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 4560 PPC::BC, 4561 SDLoc(MachineNode), 4562 MVT::Other, 4563 MachineNode->getOperand(0). 4564 getOperand(0), 4565 MachineNode->getOperand(1), 4566 MachineNode->getOperand(2)); 4567 // FIXME: Handle Op1Set, Op1Unset here too. 4568 break; 4569 } 4570 4571 // If we're inverting this node because it is used only by selects that 4572 // we'd like to swap, then swap the selects before the node replacement. 4573 if (SelectSwap) 4574 SwapAllSelectUsers(MachineNode); 4575 4576 if (ResNode != MachineNode) { 4577 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 4578 DEBUG(MachineNode->dump(CurDAG)); 4579 DEBUG(dbgs() << "\nNew: "); 4580 DEBUG(ResNode->dump(CurDAG)); 4581 DEBUG(dbgs() << "\n"); 4582 4583 ReplaceUses(MachineNode, ResNode); 4584 IsModified = true; 4585 } 4586 } 4587 if (IsModified) 4588 CurDAG->RemoveDeadNodes(); 4589 } while (IsModified); 4590 } 4591 4592 // Gather the set of 32-bit operations that are known to have their 4593 // higher-order 32 bits zero, where ToPromote contains all such operations. 4594 static bool PeepholePPC64ZExtGather(SDValue Op32, 4595 SmallPtrSetImpl<SDNode *> &ToPromote) { 4596 if (!Op32.isMachineOpcode()) 4597 return false; 4598 4599 // First, check for the "frontier" instructions (those that will clear the 4600 // higher-order 32 bits. 4601 4602 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 4603 // around. If it does not, then these instructions will clear the 4604 // higher-order bits. 4605 if ((Op32.getMachineOpcode() == PPC::RLWINM || 4606 Op32.getMachineOpcode() == PPC::RLWNM) && 4607 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 4608 ToPromote.insert(Op32.getNode()); 4609 return true; 4610 } 4611 4612 // SLW and SRW always clear the higher-order bits. 4613 if (Op32.getMachineOpcode() == PPC::SLW || 4614 Op32.getMachineOpcode() == PPC::SRW) { 4615 ToPromote.insert(Op32.getNode()); 4616 return true; 4617 } 4618 4619 // For LI and LIS, we need the immediate to be positive (so that it is not 4620 // sign extended). 4621 if (Op32.getMachineOpcode() == PPC::LI || 4622 Op32.getMachineOpcode() == PPC::LIS) { 4623 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 4624 return false; 4625 4626 ToPromote.insert(Op32.getNode()); 4627 return true; 4628 } 4629 4630 // LHBRX and LWBRX always clear the higher-order bits. 4631 if (Op32.getMachineOpcode() == PPC::LHBRX || 4632 Op32.getMachineOpcode() == PPC::LWBRX) { 4633 ToPromote.insert(Op32.getNode()); 4634 return true; 4635 } 4636 4637 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 4638 if (Op32.getMachineOpcode() == PPC::CNTLZW || 4639 Op32.getMachineOpcode() == PPC::CNTTZW) { 4640 ToPromote.insert(Op32.getNode()); 4641 return true; 4642 } 4643 4644 // Next, check for those instructions we can look through. 4645 4646 // Assuming the mask does not wrap around, then the higher-order bits are 4647 // taken directly from the first operand. 4648 if (Op32.getMachineOpcode() == PPC::RLWIMI && 4649 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 4650 SmallPtrSet<SDNode *, 16> ToPromote1; 4651 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 4652 return false; 4653 4654 ToPromote.insert(Op32.getNode()); 4655 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4656 return true; 4657 } 4658 4659 // For OR, the higher-order bits are zero if that is true for both operands. 4660 // For SELECT_I4, the same is true (but the relevant operand numbers are 4661 // shifted by 1). 4662 if (Op32.getMachineOpcode() == PPC::OR || 4663 Op32.getMachineOpcode() == PPC::SELECT_I4) { 4664 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 4665 SmallPtrSet<SDNode *, 16> ToPromote1; 4666 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 4667 return false; 4668 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 4669 return false; 4670 4671 ToPromote.insert(Op32.getNode()); 4672 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4673 return true; 4674 } 4675 4676 // For ORI and ORIS, we need the higher-order bits of the first operand to be 4677 // zero, and also for the constant to be positive (so that it is not sign 4678 // extended). 4679 if (Op32.getMachineOpcode() == PPC::ORI || 4680 Op32.getMachineOpcode() == PPC::ORIS) { 4681 SmallPtrSet<SDNode *, 16> ToPromote1; 4682 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 4683 return false; 4684 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 4685 return false; 4686 4687 ToPromote.insert(Op32.getNode()); 4688 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4689 return true; 4690 } 4691 4692 // The higher-order bits of AND are zero if that is true for at least one of 4693 // the operands. 4694 if (Op32.getMachineOpcode() == PPC::AND) { 4695 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 4696 bool Op0OK = 4697 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 4698 bool Op1OK = 4699 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 4700 if (!Op0OK && !Op1OK) 4701 return false; 4702 4703 ToPromote.insert(Op32.getNode()); 4704 4705 if (Op0OK) 4706 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4707 4708 if (Op1OK) 4709 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 4710 4711 return true; 4712 } 4713 4714 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 4715 // of the first operand, or if the second operand is positive (so that it is 4716 // not sign extended). 4717 if (Op32.getMachineOpcode() == PPC::ANDIo || 4718 Op32.getMachineOpcode() == PPC::ANDISo) { 4719 SmallPtrSet<SDNode *, 16> ToPromote1; 4720 bool Op0OK = 4721 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 4722 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 4723 if (!Op0OK && !Op1OK) 4724 return false; 4725 4726 ToPromote.insert(Op32.getNode()); 4727 4728 if (Op0OK) 4729 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4730 4731 return true; 4732 } 4733 4734 return false; 4735 } 4736 4737 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 4738 if (!PPCSubTarget->isPPC64()) 4739 return; 4740 4741 // When we zero-extend from i32 to i64, we use a pattern like this: 4742 // def : Pat<(i64 (zext i32:$in)), 4743 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 4744 // 0, 32)>; 4745 // There are several 32-bit shift/rotate instructions, however, that will 4746 // clear the higher-order bits of their output, rendering the RLDICL 4747 // unnecessary. When that happens, we remove it here, and redefine the 4748 // relevant 32-bit operation to be a 64-bit operation. 4749 4750 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4751 ++Position; 4752 4753 bool MadeChange = false; 4754 while (Position != CurDAG->allnodes_begin()) { 4755 SDNode *N = &*--Position; 4756 // Skip dead nodes and any non-machine opcodes. 4757 if (N->use_empty() || !N->isMachineOpcode()) 4758 continue; 4759 4760 if (N->getMachineOpcode() != PPC::RLDICL) 4761 continue; 4762 4763 if (N->getConstantOperandVal(1) != 0 || 4764 N->getConstantOperandVal(2) != 32) 4765 continue; 4766 4767 SDValue ISR = N->getOperand(0); 4768 if (!ISR.isMachineOpcode() || 4769 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 4770 continue; 4771 4772 if (!ISR.hasOneUse()) 4773 continue; 4774 4775 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 4776 continue; 4777 4778 SDValue IDef = ISR.getOperand(0); 4779 if (!IDef.isMachineOpcode() || 4780 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 4781 continue; 4782 4783 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 4784 // can get rid of it. 4785 4786 SDValue Op32 = ISR->getOperand(1); 4787 if (!Op32.isMachineOpcode()) 4788 continue; 4789 4790 // There are some 32-bit instructions that always clear the high-order 32 4791 // bits, there are also some instructions (like AND) that we can look 4792 // through. 4793 SmallPtrSet<SDNode *, 16> ToPromote; 4794 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 4795 continue; 4796 4797 // If the ToPromote set contains nodes that have uses outside of the set 4798 // (except for the original INSERT_SUBREG), then abort the transformation. 4799 bool OutsideUse = false; 4800 for (SDNode *PN : ToPromote) { 4801 for (SDNode *UN : PN->uses()) { 4802 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 4803 OutsideUse = true; 4804 break; 4805 } 4806 } 4807 4808 if (OutsideUse) 4809 break; 4810 } 4811 if (OutsideUse) 4812 continue; 4813 4814 MadeChange = true; 4815 4816 // We now know that this zero extension can be removed by promoting to 4817 // nodes in ToPromote to 64-bit operations, where for operations in the 4818 // frontier of the set, we need to insert INSERT_SUBREGs for their 4819 // operands. 4820 for (SDNode *PN : ToPromote) { 4821 unsigned NewOpcode; 4822 switch (PN->getMachineOpcode()) { 4823 default: 4824 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 4825 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 4826 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 4827 case PPC::SLW: NewOpcode = PPC::SLW8; break; 4828 case PPC::SRW: NewOpcode = PPC::SRW8; break; 4829 case PPC::LI: NewOpcode = PPC::LI8; break; 4830 case PPC::LIS: NewOpcode = PPC::LIS8; break; 4831 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 4832 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 4833 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 4834 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 4835 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 4836 case PPC::OR: NewOpcode = PPC::OR8; break; 4837 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 4838 case PPC::ORI: NewOpcode = PPC::ORI8; break; 4839 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 4840 case PPC::AND: NewOpcode = PPC::AND8; break; 4841 case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break; 4842 case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break; 4843 } 4844 4845 // Note: During the replacement process, the nodes will be in an 4846 // inconsistent state (some instructions will have operands with values 4847 // of the wrong type). Once done, however, everything should be right 4848 // again. 4849 4850 SmallVector<SDValue, 4> Ops; 4851 for (const SDValue &V : PN->ops()) { 4852 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 4853 !isa<ConstantSDNode>(V)) { 4854 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 4855 SDNode *ReplOp = 4856 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 4857 ISR.getNode()->getVTList(), ReplOpOps); 4858 Ops.push_back(SDValue(ReplOp, 0)); 4859 } else { 4860 Ops.push_back(V); 4861 } 4862 } 4863 4864 // Because all to-be-promoted nodes only have users that are other 4865 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 4866 // the i32 result value type with i64. 4867 4868 SmallVector<EVT, 2> NewVTs; 4869 SDVTList VTs = PN->getVTList(); 4870 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 4871 if (VTs.VTs[i] == MVT::i32) 4872 NewVTs.push_back(MVT::i64); 4873 else 4874 NewVTs.push_back(VTs.VTs[i]); 4875 4876 DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 4877 DEBUG(PN->dump(CurDAG)); 4878 4879 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 4880 4881 DEBUG(dbgs() << "\nNew: "); 4882 DEBUG(PN->dump(CurDAG)); 4883 DEBUG(dbgs() << "\n"); 4884 } 4885 4886 // Now we replace the original zero extend and its associated INSERT_SUBREG 4887 // with the value feeding the INSERT_SUBREG (which has now been promoted to 4888 // return an i64). 4889 4890 DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 4891 DEBUG(N->dump(CurDAG)); 4892 DEBUG(dbgs() << "\nNew: "); 4893 DEBUG(Op32.getNode()->dump(CurDAG)); 4894 DEBUG(dbgs() << "\n"); 4895 4896 ReplaceUses(N, Op32.getNode()); 4897 } 4898 4899 if (MadeChange) 4900 CurDAG->RemoveDeadNodes(); 4901 } 4902 4903 void PPCDAGToDAGISel::PeepholePPC64() { 4904 // These optimizations are currently supported only for 64-bit SVR4. 4905 if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) 4906 return; 4907 4908 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4909 ++Position; 4910 4911 while (Position != CurDAG->allnodes_begin()) { 4912 SDNode *N = &*--Position; 4913 // Skip dead nodes and any non-machine opcodes. 4914 if (N->use_empty() || !N->isMachineOpcode()) 4915 continue; 4916 4917 unsigned FirstOp; 4918 unsigned StorageOpcode = N->getMachineOpcode(); 4919 4920 switch (StorageOpcode) { 4921 default: continue; 4922 4923 case PPC::LBZ: 4924 case PPC::LBZ8: 4925 case PPC::LD: 4926 case PPC::LFD: 4927 case PPC::LFS: 4928 case PPC::LHA: 4929 case PPC::LHA8: 4930 case PPC::LHZ: 4931 case PPC::LHZ8: 4932 case PPC::LWA: 4933 case PPC::LWZ: 4934 case PPC::LWZ8: 4935 FirstOp = 0; 4936 break; 4937 4938 case PPC::STB: 4939 case PPC::STB8: 4940 case PPC::STD: 4941 case PPC::STFD: 4942 case PPC::STFS: 4943 case PPC::STH: 4944 case PPC::STH8: 4945 case PPC::STW: 4946 case PPC::STW8: 4947 FirstOp = 1; 4948 break; 4949 } 4950 4951 // If this is a load or store with a zero offset, or within the alignment, 4952 // we may be able to fold an add-immediate into the memory operation. 4953 // The check against alignment is below, as it can't occur until we check 4954 // the arguments to N 4955 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 4956 continue; 4957 4958 SDValue Base = N->getOperand(FirstOp + 1); 4959 if (!Base.isMachineOpcode()) 4960 continue; 4961 4962 unsigned Flags = 0; 4963 bool ReplaceFlags = true; 4964 4965 // When the feeding operation is an add-immediate of some sort, 4966 // determine whether we need to add relocation information to the 4967 // target flags on the immediate operand when we fold it into the 4968 // load instruction. 4969 // 4970 // For something like ADDItocL, the relocation information is 4971 // inferred from the opcode; when we process it in the AsmPrinter, 4972 // we add the necessary relocation there. A load, though, can receive 4973 // relocation from various flavors of ADDIxxx, so we need to carry 4974 // the relocation information in the target flags. 4975 switch (Base.getMachineOpcode()) { 4976 default: continue; 4977 4978 case PPC::ADDI8: 4979 case PPC::ADDI: 4980 // In some cases (such as TLS) the relocation information 4981 // is already in place on the operand, so copying the operand 4982 // is sufficient. 4983 ReplaceFlags = false; 4984 // For these cases, the immediate may not be divisible by 4, in 4985 // which case the fold is illegal for DS-form instructions. (The 4986 // other cases provide aligned addresses and are always safe.) 4987 if ((StorageOpcode == PPC::LWA || 4988 StorageOpcode == PPC::LD || 4989 StorageOpcode == PPC::STD) && 4990 (!isa<ConstantSDNode>(Base.getOperand(1)) || 4991 Base.getConstantOperandVal(1) % 4 != 0)) 4992 continue; 4993 break; 4994 case PPC::ADDIdtprelL: 4995 Flags = PPCII::MO_DTPREL_LO; 4996 break; 4997 case PPC::ADDItlsldL: 4998 Flags = PPCII::MO_TLSLD_LO; 4999 break; 5000 case PPC::ADDItocL: 5001 Flags = PPCII::MO_TOC_LO; 5002 break; 5003 } 5004 5005 SDValue ImmOpnd = Base.getOperand(1); 5006 5007 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 5008 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 5009 // we might have needed different @ha relocation values for the offset 5010 // pointers). 5011 int MaxDisplacement = 7; 5012 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 5013 const GlobalValue *GV = GA->getGlobal(); 5014 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 5015 } 5016 5017 bool UpdateHBase = false; 5018 SDValue HBase = Base.getOperand(0); 5019 5020 int Offset = N->getConstantOperandVal(FirstOp); 5021 if (ReplaceFlags) { 5022 if (Offset < 0 || Offset > MaxDisplacement) { 5023 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 5024 // one use, then we can do this for any offset, we just need to also 5025 // update the offset (i.e. the symbol addend) on the addis also. 5026 if (Base.getMachineOpcode() != PPC::ADDItocL) 5027 continue; 5028 5029 if (!HBase.isMachineOpcode() || 5030 HBase.getMachineOpcode() != PPC::ADDIStocHA) 5031 continue; 5032 5033 if (!Base.hasOneUse() || !HBase.hasOneUse()) 5034 continue; 5035 5036 SDValue HImmOpnd = HBase.getOperand(1); 5037 if (HImmOpnd != ImmOpnd) 5038 continue; 5039 5040 UpdateHBase = true; 5041 } 5042 } else { 5043 // If we're directly folding the addend from an addi instruction, then: 5044 // 1. In general, the offset on the memory access must be zero. 5045 // 2. If the addend is a constant, then it can be combined with a 5046 // non-zero offset, but only if the result meets the encoding 5047 // requirements. 5048 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 5049 Offset += C->getSExtValue(); 5050 5051 if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD || 5052 StorageOpcode == PPC::STD) && (Offset % 4) != 0) 5053 continue; 5054 5055 if (!isInt<16>(Offset)) 5056 continue; 5057 5058 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 5059 ImmOpnd.getValueType()); 5060 } else if (Offset != 0) { 5061 continue; 5062 } 5063 } 5064 5065 // We found an opportunity. Reverse the operands from the add 5066 // immediate and substitute them into the load or store. If 5067 // needed, update the target flags for the immediate operand to 5068 // reflect the necessary relocation information. 5069 DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 5070 DEBUG(Base->dump(CurDAG)); 5071 DEBUG(dbgs() << "\nN: "); 5072 DEBUG(N->dump(CurDAG)); 5073 DEBUG(dbgs() << "\n"); 5074 5075 // If the relocation information isn't already present on the 5076 // immediate operand, add it now. 5077 if (ReplaceFlags) { 5078 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 5079 SDLoc dl(GA); 5080 const GlobalValue *GV = GA->getGlobal(); 5081 // We can't perform this optimization for data whose alignment 5082 // is insufficient for the instruction encoding. 5083 if (GV->getAlignment() < 4 && 5084 (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || 5085 StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { 5086 DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 5087 continue; 5088 } 5089 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 5090 } else if (ConstantPoolSDNode *CP = 5091 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 5092 const Constant *C = CP->getConstVal(); 5093 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, 5094 CP->getAlignment(), 5095 Offset, Flags); 5096 } 5097 } 5098 5099 if (FirstOp == 1) // Store 5100 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 5101 Base.getOperand(0), N->getOperand(3)); 5102 else // Load 5103 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 5104 N->getOperand(2)); 5105 5106 if (UpdateHBase) 5107 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 5108 ImmOpnd); 5109 5110 // The add-immediate may now be dead, in which case remove it. 5111 if (Base.getNode()->use_empty()) 5112 CurDAG->RemoveDeadNode(Base.getNode()); 5113 } 5114 } 5115 5116 /// createPPCISelDag - This pass converts a legalized DAG into a 5117 /// PowerPC-specific DAG, ready for instruction scheduling. 5118 /// 5119 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { 5120 return new PPCDAGToDAGISel(TM); 5121 } 5122