1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a pattern matching instruction selector for PowerPC, 11 // converting from a legalized dag to a PPC dag. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCMCTargetDesc.h" 16 #include "MCTargetDesc/PPCPredicates.h" 17 #include "PPC.h" 18 #include "PPCISelLowering.h" 19 #include "PPCMachineFunctionInfo.h" 20 #include "PPCSubtarget.h" 21 #include "PPCTargetMachine.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/Analysis/BranchProbabilityInfo.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/KnownBits.h" 54 #include "llvm/Support/MathExtras.h" 55 #include "llvm/Support/raw_ostream.h" 56 #include "llvm/Target/TargetInstrInfo.h" 57 #include "llvm/Target/TargetRegisterInfo.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <cstdint> 61 #include <iterator> 62 #include <limits> 63 #include <memory> 64 #include <new> 65 #include <tuple> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "ppc-codegen" 71 72 STATISTIC(NumSextSetcc, 73 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 74 STATISTIC(NumZextSetcc, 75 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 76 STATISTIC(SignExtensionsAdded, 77 "Number of sign extensions for compare inputs added."); 78 STATISTIC(ZeroExtensionsAdded, 79 "Number of zero extensions for compare inputs added."); 80 STATISTIC(NumLogicOpsOnComparison, 81 "Number of logical ops on i1 values calculated in GPR."); 82 STATISTIC(OmittedForNonExtendUses, 83 "Number of compares not eliminated as they have non-extending uses."); 84 85 // FIXME: Remove this once the bug has been fixed! 86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 88 89 static cl::opt<bool> 90 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 91 cl::desc("use aggressive ppc isel for bit permutations"), 92 cl::Hidden); 93 static cl::opt<bool> BPermRewriterNoMasking( 94 "ppc-bit-perm-rewriter-stress-rotates", 95 cl::desc("stress rotate selection in aggressive ppc isel for " 96 "bit permutations"), 97 cl::Hidden); 98 99 static cl::opt<bool> EnableBranchHint( 100 "ppc-use-branch-hint", cl::init(true), 101 cl::desc("Enable static hinting of branches on ppc"), 102 cl::Hidden); 103 104 namespace { 105 106 //===--------------------------------------------------------------------===// 107 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 108 /// instructions for SelectionDAG operations. 109 /// 110 class PPCDAGToDAGISel : public SelectionDAGISel { 111 const PPCTargetMachine &TM; 112 const PPCSubtarget *PPCSubTarget; 113 const PPCTargetLowering *PPCLowering; 114 unsigned GlobalBaseReg; 115 116 public: 117 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 118 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 119 120 bool runOnMachineFunction(MachineFunction &MF) override { 121 // Make sure we re-emit a set of the global base reg if necessary 122 GlobalBaseReg = 0; 123 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 124 PPCLowering = PPCSubTarget->getTargetLowering(); 125 SelectionDAGISel::runOnMachineFunction(MF); 126 127 if (!PPCSubTarget->isSVR4ABI()) 128 InsertVRSaveCode(MF); 129 130 return true; 131 } 132 133 void PreprocessISelDAG() override; 134 void PostprocessISelDAG() override; 135 136 /// getI32Imm - Return a target constant with the specified value, of type 137 /// i32. 138 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 139 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 140 } 141 142 /// getI64Imm - Return a target constant with the specified value, of type 143 /// i64. 144 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 145 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 146 } 147 148 /// getSmallIPtrImm - Return a target constant of pointer type. 149 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 150 return CurDAG->getTargetConstant( 151 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 152 } 153 154 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 155 /// rotate and mask opcode and mask operation. 156 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 157 unsigned &SH, unsigned &MB, unsigned &ME); 158 159 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 160 /// base register. Return the virtual register that holds this value. 161 SDNode *getGlobalBaseReg(); 162 163 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 164 165 // Select - Convert the specified operand from a target-independent to a 166 // target-specific node if it hasn't already been changed. 167 void Select(SDNode *N) override; 168 169 bool tryBitfieldInsert(SDNode *N); 170 bool tryBitPermutation(SDNode *N); 171 172 /// SelectCC - Select a comparison of the specified values with the 173 /// specified condition code, returning the CR# of the expression. 174 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 175 const SDLoc &dl); 176 177 /// SelectAddrImm - Returns true if the address N can be represented by 178 /// a base register plus a signed 16-bit displacement [r+imm]. 179 bool SelectAddrImm(SDValue N, SDValue &Disp, 180 SDValue &Base) { 181 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); 182 } 183 184 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 185 /// immediate field. Note that the operand at this point is already the 186 /// result of a prior SelectAddressRegImm call. 187 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 188 if (N.getOpcode() == ISD::TargetConstant || 189 N.getOpcode() == ISD::TargetGlobalAddress) { 190 Out = N; 191 return true; 192 } 193 194 return false; 195 } 196 197 /// SelectAddrIdx - Given the specified addressed, check to see if it can be 198 /// represented as an indexed [r+r] operation. Returns false if it can 199 /// be represented by [r+imm], which are preferred. 200 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 201 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); 202 } 203 204 /// SelectAddrIdxOnly - Given the specified addressed, force it to be 205 /// represented as an indexed [r+r] operation. 206 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 207 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 208 } 209 210 /// SelectAddrImmX4 - Returns true if the address N can be represented by 211 /// a base register plus a signed 16-bit displacement that is a multiple of 4. 212 /// Suitable for use by STD and friends. 213 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 214 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); 215 } 216 217 // Select an address into a single register. 218 bool SelectAddr(SDValue N, SDValue &Base) { 219 Base = N; 220 return true; 221 } 222 223 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 224 /// inline asm expressions. It is always correct to compute the value into 225 /// a register. The case of adding a (possibly relocatable) constant to a 226 /// register can be improved, but it is wrong to substitute Reg+Reg for 227 /// Reg in an asm, because the load or store opcode would have to change. 228 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 229 unsigned ConstraintID, 230 std::vector<SDValue> &OutOps) override { 231 switch(ConstraintID) { 232 default: 233 errs() << "ConstraintID: " << ConstraintID << "\n"; 234 llvm_unreachable("Unexpected asm memory constraint"); 235 case InlineAsm::Constraint_es: 236 case InlineAsm::Constraint_i: 237 case InlineAsm::Constraint_m: 238 case InlineAsm::Constraint_o: 239 case InlineAsm::Constraint_Q: 240 case InlineAsm::Constraint_Z: 241 case InlineAsm::Constraint_Zy: 242 // We need to make sure that this one operand does not end up in r0 243 // (because we might end up lowering this as 0(%op)). 244 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 245 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 246 SDLoc dl(Op); 247 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 248 SDValue NewOp = 249 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 250 dl, Op.getValueType(), 251 Op, RC), 0); 252 253 OutOps.push_back(NewOp); 254 return false; 255 } 256 return true; 257 } 258 259 void InsertVRSaveCode(MachineFunction &MF); 260 261 StringRef getPassName() const override { 262 return "PowerPC DAG->DAG Pattern Instruction Selection"; 263 } 264 265 // Include the pieces autogenerated from the target description. 266 #include "PPCGenDAGISel.inc" 267 268 private: 269 // Conversion type for interpreting results of a 32-bit instruction as 270 // a 64-bit value or vice versa. 271 enum ExtOrTruncConversion { Ext, Trunc }; 272 273 // Modifiers to guide how an ISD::SETCC node's result is to be computed 274 // in a GPR. 275 // ZExtOrig - use the original condition code, zero-extend value 276 // ZExtInvert - invert the condition code, zero-extend value 277 // SExtOrig - use the original condition code, sign-extend value 278 // SExtInvert - invert the condition code, sign-extend value 279 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 280 281 bool trySETCC(SDNode *N); 282 bool tryEXTEND(SDNode *N); 283 bool tryLogicOpOfCompares(SDNode *N); 284 SDValue computeLogicOpInGPR(SDValue LogicOp); 285 SDValue signExtendInputIfNeeded(SDValue Input); 286 SDValue zeroExtendInputIfNeeded(SDValue Input); 287 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 288 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 289 int64_t RHSValue, SDLoc dl); 290 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 291 int64_t RHSValue, SDLoc dl); 292 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 293 int64_t RHSValue, SDLoc dl); 294 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 295 int64_t RHSValue, SDLoc dl); 296 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 297 298 void PeepholePPC64(); 299 void PeepholePPC64ZExt(); 300 void PeepholeCROps(); 301 302 SDValue combineToCMPB(SDNode *N); 303 void foldBoolExts(SDValue &Res, SDNode *&N); 304 305 bool AllUsersSelectZero(SDNode *N); 306 void SwapAllSelectUsers(SDNode *N); 307 308 void transferMemOperands(SDNode *N, SDNode *Result); 309 }; 310 311 } // end anonymous namespace 312 313 /// InsertVRSaveCode - Once the entire function has been instruction selected, 314 /// all virtual registers are created and all machine instructions are built, 315 /// check to see if we need to save/restore VRSAVE. If so, do it. 316 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 317 // Check to see if this function uses vector registers, which means we have to 318 // save and restore the VRSAVE register and update it with the regs we use. 319 // 320 // In this case, there will be virtual registers of vector type created 321 // by the scheduler. Detect them now. 322 bool HasVectorVReg = false; 323 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 324 unsigned Reg = TargetRegisterInfo::index2VirtReg(i); 325 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 326 HasVectorVReg = true; 327 break; 328 } 329 } 330 if (!HasVectorVReg) return; // nothing to do. 331 332 // If we have a vector register, we want to emit code into the entry and exit 333 // blocks to save and restore the VRSAVE register. We do this here (instead 334 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 335 // 336 // 1. This (trivially) reduces the load on the register allocator, by not 337 // having to represent the live range of the VRSAVE register. 338 // 2. This (more significantly) allows us to create a temporary virtual 339 // register to hold the saved VRSAVE value, allowing this temporary to be 340 // register allocated, instead of forcing it to be spilled to the stack. 341 342 // Create two vregs - one to hold the VRSAVE register that is live-in to the 343 // function and one for the value after having bits or'd into it. 344 unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 345 unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 346 347 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 348 MachineBasicBlock &EntryBB = *Fn.begin(); 349 DebugLoc dl; 350 // Emit the following code into the entry block: 351 // InVRSAVE = MFVRSAVE 352 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 353 // MTVRSAVE UpdatedVRSAVE 354 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 355 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 356 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 357 UpdatedVRSAVE).addReg(InVRSAVE); 358 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 359 360 // Find all return blocks, outputting a restore in each epilog. 361 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 362 if (BB->isReturnBlock()) { 363 IP = BB->end(); --IP; 364 365 // Skip over all terminator instructions, which are part of the return 366 // sequence. 367 MachineBasicBlock::iterator I2 = IP; 368 while (I2 != BB->begin() && (--I2)->isTerminator()) 369 IP = I2; 370 371 // Emit: MTVRSAVE InVRSave 372 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 373 } 374 } 375 } 376 377 /// getGlobalBaseReg - Output the instructions required to put the 378 /// base address to use for accessing globals into a register. 379 /// 380 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 381 if (!GlobalBaseReg) { 382 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 383 // Insert the set of GlobalBaseReg into the first MBB of the function 384 MachineBasicBlock &FirstMBB = MF->front(); 385 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 386 const Module *M = MF->getFunction()->getParent(); 387 DebugLoc dl; 388 389 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 390 if (PPCSubTarget->isTargetELF()) { 391 GlobalBaseReg = PPC::R30; 392 if (M->getPICLevel() == PICLevel::SmallPIC) { 393 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 394 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 395 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 396 } else { 397 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 398 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 399 unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 400 BuildMI(FirstMBB, MBBI, dl, 401 TII.get(PPC::UpdateGBR), GlobalBaseReg) 402 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 403 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 404 } 405 } else { 406 GlobalBaseReg = 407 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 408 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 409 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 410 } 411 } else { 412 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 413 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 414 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 415 } 416 } 417 return CurDAG->getRegister(GlobalBaseReg, 418 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 419 .getNode(); 420 } 421 422 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 423 /// operand. If so Imm will receive the 32-bit value. 424 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 425 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 426 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 427 return true; 428 } 429 return false; 430 } 431 432 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 433 /// operand. If so Imm will receive the 64-bit value. 434 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 435 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 436 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 437 return true; 438 } 439 return false; 440 } 441 442 // isInt32Immediate - This method tests to see if a constant operand. 443 // If so Imm will receive the 32 bit value. 444 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 445 return isInt32Immediate(N.getNode(), Imm); 446 } 447 448 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, 449 const SDValue &DestMBB) { 450 assert(isa<BasicBlockSDNode>(DestMBB)); 451 452 if (!FuncInfo->BPI) return PPC::BR_NO_HINT; 453 454 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 455 const TerminatorInst *BBTerm = BB->getTerminator(); 456 457 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 458 459 const BasicBlock *TBB = BBTerm->getSuccessor(0); 460 const BasicBlock *FBB = BBTerm->getSuccessor(1); 461 462 auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); 463 auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); 464 465 // We only want to handle cases which are easy to predict at static time, e.g. 466 // C++ throw statement, that is very likely not taken, or calling never 467 // returned function, e.g. stdlib exit(). So we set Threshold to filter 468 // unwanted cases. 469 // 470 // Below is LLVM branch weight table, we only want to handle case 1, 2 471 // 472 // Case Taken:Nontaken Example 473 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 474 // 2. Invoke-terminating 1:1048575 475 // 3. Coldblock 4:64 __builtin_expect 476 // 4. Loop Branch 124:4 For loop 477 // 5. PH/ZH/FPH 20:12 478 const uint32_t Threshold = 10000; 479 480 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 481 return PPC::BR_NO_HINT; 482 483 DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" 484 << BB->getName() << "'\n" 485 << " -> " << TBB->getName() << ": " << TProb << "\n" 486 << " -> " << FBB->getName() << ": " << FProb << "\n"); 487 488 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 489 490 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 491 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 492 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 493 std::swap(TProb, FProb); 494 495 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 496 } 497 498 // isOpcWithIntImmediate - This method tests to see if the node is a specific 499 // opcode and that it has a immediate integer right operand. 500 // If so Imm will receive the 32 bit value. 501 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 502 return N->getOpcode() == Opc 503 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 504 } 505 506 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 507 SDLoc dl(SN); 508 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 509 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 510 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 511 if (SN->hasOneUse()) 512 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 513 getSmallIPtrImm(Offset, dl)); 514 else 515 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 516 getSmallIPtrImm(Offset, dl))); 517 } 518 519 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 520 bool isShiftMask, unsigned &SH, 521 unsigned &MB, unsigned &ME) { 522 // Don't even go down this path for i64, since different logic will be 523 // necessary for rldicl/rldicr/rldimi. 524 if (N->getValueType(0) != MVT::i32) 525 return false; 526 527 unsigned Shift = 32; 528 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 529 unsigned Opcode = N->getOpcode(); 530 if (N->getNumOperands() != 2 || 531 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 532 return false; 533 534 if (Opcode == ISD::SHL) { 535 // apply shift left to mask if it comes first 536 if (isShiftMask) Mask = Mask << Shift; 537 // determine which bits are made indeterminant by shift 538 Indeterminant = ~(0xFFFFFFFFu << Shift); 539 } else if (Opcode == ISD::SRL) { 540 // apply shift right to mask if it comes first 541 if (isShiftMask) Mask = Mask >> Shift; 542 // determine which bits are made indeterminant by shift 543 Indeterminant = ~(0xFFFFFFFFu >> Shift); 544 // adjust for the left rotate 545 Shift = 32 - Shift; 546 } else if (Opcode == ISD::ROTL) { 547 Indeterminant = 0; 548 } else { 549 return false; 550 } 551 552 // if the mask doesn't intersect any Indeterminant bits 553 if (Mask && !(Mask & Indeterminant)) { 554 SH = Shift & 31; 555 // make sure the mask is still a mask (wrap arounds may not be) 556 return isRunOfOnes(Mask, MB, ME); 557 } 558 return false; 559 } 560 561 /// Turn an or of two masked values into the rotate left word immediate then 562 /// mask insert (rlwimi) instruction. 563 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 564 SDValue Op0 = N->getOperand(0); 565 SDValue Op1 = N->getOperand(1); 566 SDLoc dl(N); 567 568 KnownBits LKnown, RKnown; 569 CurDAG->computeKnownBits(Op0, LKnown); 570 CurDAG->computeKnownBits(Op1, RKnown); 571 572 unsigned TargetMask = LKnown.Zero.getZExtValue(); 573 unsigned InsertMask = RKnown.Zero.getZExtValue(); 574 575 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 576 unsigned Op0Opc = Op0.getOpcode(); 577 unsigned Op1Opc = Op1.getOpcode(); 578 unsigned Value, SH = 0; 579 TargetMask = ~TargetMask; 580 InsertMask = ~InsertMask; 581 582 // If the LHS has a foldable shift and the RHS does not, then swap it to the 583 // RHS so that we can fold the shift into the insert. 584 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 585 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 586 Op0.getOperand(0).getOpcode() == ISD::SRL) { 587 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 588 Op1.getOperand(0).getOpcode() != ISD::SRL) { 589 std::swap(Op0, Op1); 590 std::swap(Op0Opc, Op1Opc); 591 std::swap(TargetMask, InsertMask); 592 } 593 } 594 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 595 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 596 Op1.getOperand(0).getOpcode() != ISD::SRL) { 597 std::swap(Op0, Op1); 598 std::swap(Op0Opc, Op1Opc); 599 std::swap(TargetMask, InsertMask); 600 } 601 } 602 603 unsigned MB, ME; 604 if (isRunOfOnes(InsertMask, MB, ME)) { 605 SDValue Tmp1, Tmp2; 606 607 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 608 isInt32Immediate(Op1.getOperand(1), Value)) { 609 Op1 = Op1.getOperand(0); 610 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 611 } 612 if (Op1Opc == ISD::AND) { 613 // The AND mask might not be a constant, and we need to make sure that 614 // if we're going to fold the masking with the insert, all bits not 615 // know to be zero in the mask are known to be one. 616 KnownBits MKnown; 617 CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); 618 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 619 620 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 621 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 622 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 623 // Note that Value must be in range here (less than 32) because 624 // otherwise there would not be any bits set in InsertMask. 625 Op1 = Op1.getOperand(0).getOperand(0); 626 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 627 } 628 } 629 630 SH &= 31; 631 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 632 getI32Imm(ME, dl) }; 633 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 634 return true; 635 } 636 } 637 return false; 638 } 639 640 // Predict the number of instructions that would be generated by calling 641 // getInt64(N). 642 static unsigned getInt64CountDirect(int64_t Imm) { 643 // Assume no remaining bits. 644 unsigned Remainder = 0; 645 // Assume no shift required. 646 unsigned Shift = 0; 647 648 // If it can't be represented as a 32 bit value. 649 if (!isInt<32>(Imm)) { 650 Shift = countTrailingZeros<uint64_t>(Imm); 651 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 652 653 // If the shifted value fits 32 bits. 654 if (isInt<32>(ImmSh)) { 655 // Go with the shifted value. 656 Imm = ImmSh; 657 } else { 658 // Still stuck with a 64 bit value. 659 Remainder = Imm; 660 Shift = 32; 661 Imm >>= 32; 662 } 663 } 664 665 // Intermediate operand. 666 unsigned Result = 0; 667 668 // Handle first 32 bits. 669 unsigned Lo = Imm & 0xFFFF; 670 671 // Simple value. 672 if (isInt<16>(Imm)) { 673 // Just the Lo bits. 674 ++Result; 675 } else if (Lo) { 676 // Handle the Hi bits and Lo bits. 677 Result += 2; 678 } else { 679 // Just the Hi bits. 680 ++Result; 681 } 682 683 // If no shift, we're done. 684 if (!Shift) return Result; 685 686 // If Hi word == Lo word, 687 // we can use rldimi to insert the Lo word into Hi word. 688 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 689 ++Result; 690 return Result; 691 } 692 693 // Shift for next step if the upper 32-bits were not zero. 694 if (Imm) 695 ++Result; 696 697 // Add in the last bits as required. 698 if ((Remainder >> 16) & 0xFFFF) 699 ++Result; 700 if (Remainder & 0xFFFF) 701 ++Result; 702 703 return Result; 704 } 705 706 static uint64_t Rot64(uint64_t Imm, unsigned R) { 707 return (Imm << R) | (Imm >> (64 - R)); 708 } 709 710 static unsigned getInt64Count(int64_t Imm) { 711 unsigned Count = getInt64CountDirect(Imm); 712 if (Count == 1) 713 return Count; 714 715 for (unsigned r = 1; r < 63; ++r) { 716 uint64_t RImm = Rot64(Imm, r); 717 unsigned RCount = getInt64CountDirect(RImm) + 1; 718 Count = std::min(Count, RCount); 719 720 // See comments in getInt64 for an explanation of the logic below. 721 unsigned LS = findLastSet(RImm); 722 if (LS != r-1) 723 continue; 724 725 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 726 uint64_t RImmWithOnes = RImm | OnesMask; 727 728 RCount = getInt64CountDirect(RImmWithOnes) + 1; 729 Count = std::min(Count, RCount); 730 } 731 732 return Count; 733 } 734 735 // Select a 64-bit constant. For cost-modeling purposes, getInt64Count 736 // (above) needs to be kept in sync with this function. 737 static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, 738 int64_t Imm) { 739 // Assume no remaining bits. 740 unsigned Remainder = 0; 741 // Assume no shift required. 742 unsigned Shift = 0; 743 744 // If it can't be represented as a 32 bit value. 745 if (!isInt<32>(Imm)) { 746 Shift = countTrailingZeros<uint64_t>(Imm); 747 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 748 749 // If the shifted value fits 32 bits. 750 if (isInt<32>(ImmSh)) { 751 // Go with the shifted value. 752 Imm = ImmSh; 753 } else { 754 // Still stuck with a 64 bit value. 755 Remainder = Imm; 756 Shift = 32; 757 Imm >>= 32; 758 } 759 } 760 761 // Intermediate operand. 762 SDNode *Result; 763 764 // Handle first 32 bits. 765 unsigned Lo = Imm & 0xFFFF; 766 unsigned Hi = (Imm >> 16) & 0xFFFF; 767 768 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 769 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 770 }; 771 772 // Simple value. 773 if (isInt<16>(Imm)) { 774 // Just the Lo bits. 775 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); 776 } else if (Lo) { 777 // Handle the Hi bits. 778 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 779 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 780 // And Lo bits. 781 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 782 SDValue(Result, 0), getI32Imm(Lo)); 783 } else { 784 // Just the Hi bits. 785 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 786 } 787 788 // If no shift, we're done. 789 if (!Shift) return Result; 790 791 // If Hi word == Lo word, 792 // we can use rldimi to insert the Lo word into Hi word. 793 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 794 SDValue Ops[] = 795 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 796 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 797 } 798 799 // Shift for next step if the upper 32-bits were not zero. 800 if (Imm) { 801 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 802 SDValue(Result, 0), 803 getI32Imm(Shift), 804 getI32Imm(63 - Shift)); 805 } 806 807 // Add in the last bits as required. 808 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 809 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 810 SDValue(Result, 0), getI32Imm(Hi)); 811 } 812 if ((Lo = Remainder & 0xFFFF)) { 813 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 814 SDValue(Result, 0), getI32Imm(Lo)); 815 } 816 817 return Result; 818 } 819 820 static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { 821 unsigned Count = getInt64CountDirect(Imm); 822 if (Count == 1) 823 return getInt64Direct(CurDAG, dl, Imm); 824 825 unsigned RMin = 0; 826 827 int64_t MatImm; 828 unsigned MaskEnd; 829 830 for (unsigned r = 1; r < 63; ++r) { 831 uint64_t RImm = Rot64(Imm, r); 832 unsigned RCount = getInt64CountDirect(RImm) + 1; 833 if (RCount < Count) { 834 Count = RCount; 835 RMin = r; 836 MatImm = RImm; 837 MaskEnd = 63; 838 } 839 840 // If the immediate to generate has many trailing zeros, it might be 841 // worthwhile to generate a rotated value with too many leading ones 842 // (because that's free with li/lis's sign-extension semantics), and then 843 // mask them off after rotation. 844 845 unsigned LS = findLastSet(RImm); 846 // We're adding (63-LS) higher-order ones, and we expect to mask them off 847 // after performing the inverse rotation by (64-r). So we need that: 848 // 63-LS == 64-r => LS == r-1 849 if (LS != r-1) 850 continue; 851 852 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 853 uint64_t RImmWithOnes = RImm | OnesMask; 854 855 RCount = getInt64CountDirect(RImmWithOnes) + 1; 856 if (RCount < Count) { 857 Count = RCount; 858 RMin = r; 859 MatImm = RImmWithOnes; 860 MaskEnd = LS; 861 } 862 } 863 864 if (!RMin) 865 return getInt64Direct(CurDAG, dl, Imm); 866 867 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 868 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 869 }; 870 871 SDValue Val = SDValue(getInt64Direct(CurDAG, dl, MatImm), 0); 872 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 873 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 874 } 875 876 // Select a 64-bit constant. 877 static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) { 878 SDLoc dl(N); 879 880 // Get 64 bit value. 881 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 882 return getInt64(CurDAG, dl, Imm); 883 } 884 885 namespace { 886 887 class BitPermutationSelector { 888 struct ValueBit { 889 SDValue V; 890 891 // The bit number in the value, using a convention where bit 0 is the 892 // lowest-order bit. 893 unsigned Idx; 894 895 enum Kind { 896 ConstZero, 897 Variable 898 } K; 899 900 ValueBit(SDValue V, unsigned I, Kind K = Variable) 901 : V(V), Idx(I), K(K) {} 902 ValueBit(Kind K = Variable) 903 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 904 905 bool isZero() const { 906 return K == ConstZero; 907 } 908 909 bool hasValue() const { 910 return K == Variable; 911 } 912 913 SDValue getValue() const { 914 assert(hasValue() && "Cannot get the value of a constant bit"); 915 return V; 916 } 917 918 unsigned getValueBitIndex() const { 919 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 920 return Idx; 921 } 922 }; 923 924 // A bit group has the same underlying value and the same rotate factor. 925 struct BitGroup { 926 SDValue V; 927 unsigned RLAmt; 928 unsigned StartIdx, EndIdx; 929 930 // This rotation amount assumes that the lower 32 bits of the quantity are 931 // replicated in the high 32 bits by the rotation operator (which is done 932 // by rlwinm and friends in 64-bit mode). 933 bool Repl32; 934 // Did converting to Repl32 == true change the rotation factor? If it did, 935 // it decreased it by 32. 936 bool Repl32CR; 937 // Was this group coalesced after setting Repl32 to true? 938 bool Repl32Coalesced; 939 940 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 941 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 942 Repl32Coalesced(false) { 943 DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << 944 " [" << S << ", " << E << "]\n"); 945 } 946 }; 947 948 // Information on each (Value, RLAmt) pair (like the number of groups 949 // associated with each) used to choose the lowering method. 950 struct ValueRotInfo { 951 SDValue V; 952 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 953 unsigned NumGroups = 0; 954 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 955 bool Repl32 = false; 956 957 ValueRotInfo() = default; 958 959 // For sorting (in reverse order) by NumGroups, and then by 960 // FirstGroupStartIdx. 961 bool operator < (const ValueRotInfo &Other) const { 962 // We need to sort so that the non-Repl32 come first because, when we're 963 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 964 // masking operation. 965 if (Repl32 < Other.Repl32) 966 return true; 967 else if (Repl32 > Other.Repl32) 968 return false; 969 else if (NumGroups > Other.NumGroups) 970 return true; 971 else if (NumGroups < Other.NumGroups) 972 return false; 973 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 974 return true; 975 return false; 976 } 977 }; 978 979 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 980 using ValueBitsMemoizer = 981 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 982 ValueBitsMemoizer Memoizer; 983 984 // Return a pair of bool and a SmallVector pointer to a memoization entry. 985 // The bool is true if something interesting was deduced, otherwise if we're 986 // providing only a generic representation of V (or something else likewise 987 // uninteresting for instruction selection) through the SmallVector. 988 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 989 unsigned NumBits) { 990 auto &ValueEntry = Memoizer[V]; 991 if (ValueEntry) 992 return std::make_pair(ValueEntry->first, &ValueEntry->second); 993 ValueEntry.reset(new ValueBitsMemoizedValue()); 994 bool &Interesting = ValueEntry->first; 995 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 996 Bits.resize(NumBits); 997 998 switch (V.getOpcode()) { 999 default: break; 1000 case ISD::ROTL: 1001 if (isa<ConstantSDNode>(V.getOperand(1))) { 1002 unsigned RotAmt = V.getConstantOperandVal(1); 1003 1004 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1005 1006 for (unsigned i = 0; i < NumBits; ++i) 1007 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1008 1009 return std::make_pair(Interesting = true, &Bits); 1010 } 1011 break; 1012 case ISD::SHL: 1013 if (isa<ConstantSDNode>(V.getOperand(1))) { 1014 unsigned ShiftAmt = V.getConstantOperandVal(1); 1015 1016 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1017 1018 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1019 Bits[i] = LHSBits[i - ShiftAmt]; 1020 1021 for (unsigned i = 0; i < ShiftAmt; ++i) 1022 Bits[i] = ValueBit(ValueBit::ConstZero); 1023 1024 return std::make_pair(Interesting = true, &Bits); 1025 } 1026 break; 1027 case ISD::SRL: 1028 if (isa<ConstantSDNode>(V.getOperand(1))) { 1029 unsigned ShiftAmt = V.getConstantOperandVal(1); 1030 1031 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1032 1033 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1034 Bits[i] = LHSBits[i + ShiftAmt]; 1035 1036 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1037 Bits[i] = ValueBit(ValueBit::ConstZero); 1038 1039 return std::make_pair(Interesting = true, &Bits); 1040 } 1041 break; 1042 case ISD::AND: 1043 if (isa<ConstantSDNode>(V.getOperand(1))) { 1044 uint64_t Mask = V.getConstantOperandVal(1); 1045 1046 const SmallVector<ValueBit, 64> *LHSBits; 1047 // Mark this as interesting, only if the LHS was also interesting. This 1048 // prevents the overall procedure from matching a single immediate 'and' 1049 // (which is non-optimal because such an and might be folded with other 1050 // things if we don't select it here). 1051 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1052 1053 for (unsigned i = 0; i < NumBits; ++i) 1054 if (((Mask >> i) & 1) == 1) 1055 Bits[i] = (*LHSBits)[i]; 1056 else 1057 Bits[i] = ValueBit(ValueBit::ConstZero); 1058 1059 return std::make_pair(Interesting, &Bits); 1060 } 1061 break; 1062 case ISD::OR: { 1063 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1064 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1065 1066 bool AllDisjoint = true; 1067 for (unsigned i = 0; i < NumBits; ++i) 1068 if (LHSBits[i].isZero()) 1069 Bits[i] = RHSBits[i]; 1070 else if (RHSBits[i].isZero()) 1071 Bits[i] = LHSBits[i]; 1072 else { 1073 AllDisjoint = false; 1074 break; 1075 } 1076 1077 if (!AllDisjoint) 1078 break; 1079 1080 return std::make_pair(Interesting = true, &Bits); 1081 } 1082 } 1083 1084 for (unsigned i = 0; i < NumBits; ++i) 1085 Bits[i] = ValueBit(V, i); 1086 1087 return std::make_pair(Interesting = false, &Bits); 1088 } 1089 1090 // For each value (except the constant ones), compute the left-rotate amount 1091 // to get it from its original to final position. 1092 void computeRotationAmounts() { 1093 HasZeros = false; 1094 RLAmt.resize(Bits.size()); 1095 for (unsigned i = 0; i < Bits.size(); ++i) 1096 if (Bits[i].hasValue()) { 1097 unsigned VBI = Bits[i].getValueBitIndex(); 1098 if (i >= VBI) 1099 RLAmt[i] = i - VBI; 1100 else 1101 RLAmt[i] = Bits.size() - (VBI - i); 1102 } else if (Bits[i].isZero()) { 1103 HasZeros = true; 1104 RLAmt[i] = UINT32_MAX; 1105 } else { 1106 llvm_unreachable("Unknown value bit type"); 1107 } 1108 } 1109 1110 // Collect groups of consecutive bits with the same underlying value and 1111 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1112 // they break up groups. 1113 void collectBitGroups(bool LateMask) { 1114 BitGroups.clear(); 1115 1116 unsigned LastRLAmt = RLAmt[0]; 1117 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1118 unsigned LastGroupStartIdx = 0; 1119 for (unsigned i = 1; i < Bits.size(); ++i) { 1120 unsigned ThisRLAmt = RLAmt[i]; 1121 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1122 if (LateMask && !ThisValue) { 1123 ThisValue = LastValue; 1124 ThisRLAmt = LastRLAmt; 1125 // If we're doing late masking, then the first bit group always starts 1126 // at zero (even if the first bits were zero). 1127 if (BitGroups.empty()) 1128 LastGroupStartIdx = 0; 1129 } 1130 1131 // If this bit has the same underlying value and the same rotate factor as 1132 // the last one, then they're part of the same group. 1133 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1134 continue; 1135 1136 if (LastValue.getNode()) 1137 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1138 i-1)); 1139 LastRLAmt = ThisRLAmt; 1140 LastValue = ThisValue; 1141 LastGroupStartIdx = i; 1142 } 1143 if (LastValue.getNode()) 1144 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1145 Bits.size()-1)); 1146 1147 if (BitGroups.empty()) 1148 return; 1149 1150 // We might be able to combine the first and last groups. 1151 if (BitGroups.size() > 1) { 1152 // If the first and last groups are the same, then remove the first group 1153 // in favor of the last group, making the ending index of the last group 1154 // equal to the ending index of the to-be-removed first group. 1155 if (BitGroups[0].StartIdx == 0 && 1156 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1157 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1158 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1159 DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1160 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1161 BitGroups.erase(BitGroups.begin()); 1162 } 1163 } 1164 } 1165 1166 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1167 // associated with each. If there is a degeneracy, pick the one that occurs 1168 // first (in the final value). 1169 void collectValueRotInfo() { 1170 ValueRots.clear(); 1171 1172 for (auto &BG : BitGroups) { 1173 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1174 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1175 VRI.V = BG.V; 1176 VRI.RLAmt = BG.RLAmt; 1177 VRI.Repl32 = BG.Repl32; 1178 VRI.NumGroups += 1; 1179 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1180 } 1181 1182 // Now that we've collected the various ValueRotInfo instances, we need to 1183 // sort them. 1184 ValueRotsVec.clear(); 1185 for (auto &I : ValueRots) { 1186 ValueRotsVec.push_back(I.second); 1187 } 1188 std::sort(ValueRotsVec.begin(), ValueRotsVec.end()); 1189 } 1190 1191 // In 64-bit mode, rlwinm and friends have a rotation operator that 1192 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1193 // indices of these instructions can only be in the lower 32 bits, so they 1194 // can only represent some 64-bit bit groups. However, when they can be used, 1195 // the 32-bit replication can be used to represent, as a single bit group, 1196 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1197 // groups when possible. Returns true if any of the bit groups were 1198 // converted. 1199 void assignRepl32BitGroups() { 1200 // If we have bits like this: 1201 // 1202 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1203 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1204 // Groups: | RLAmt = 8 | RLAmt = 40 | 1205 // 1206 // But, making use of a 32-bit operation that replicates the low-order 32 1207 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1208 // of 8. 1209 1210 auto IsAllLow32 = [this](BitGroup & BG) { 1211 if (BG.StartIdx <= BG.EndIdx) { 1212 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1213 if (!Bits[i].hasValue()) 1214 continue; 1215 if (Bits[i].getValueBitIndex() >= 32) 1216 return false; 1217 } 1218 } else { 1219 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1220 if (!Bits[i].hasValue()) 1221 continue; 1222 if (Bits[i].getValueBitIndex() >= 32) 1223 return false; 1224 } 1225 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1226 if (!Bits[i].hasValue()) 1227 continue; 1228 if (Bits[i].getValueBitIndex() >= 32) 1229 return false; 1230 } 1231 } 1232 1233 return true; 1234 }; 1235 1236 for (auto &BG : BitGroups) { 1237 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1238 if (IsAllLow32(BG)) { 1239 if (BG.RLAmt >= 32) { 1240 BG.RLAmt -= 32; 1241 BG.Repl32CR = true; 1242 } 1243 1244 BG.Repl32 = true; 1245 1246 DEBUG(dbgs() << "\t32-bit replicated bit group for " << 1247 BG.V.getNode() << " RLAmt = " << BG.RLAmt << 1248 " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1249 } 1250 } 1251 } 1252 1253 // Now walk through the bit groups, consolidating where possible. 1254 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1255 // We might want to remove this bit group by merging it with the previous 1256 // group (which might be the ending group). 1257 auto IP = (I == BitGroups.begin()) ? 1258 std::prev(BitGroups.end()) : std::prev(I); 1259 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1260 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1261 1262 DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << 1263 I->V.getNode() << " RLAmt = " << I->RLAmt << 1264 " [" << I->StartIdx << ", " << I->EndIdx << 1265 "] with group with range [" << 1266 IP->StartIdx << ", " << IP->EndIdx << "]\n"); 1267 1268 IP->EndIdx = I->EndIdx; 1269 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1270 IP->Repl32Coalesced = true; 1271 I = BitGroups.erase(I); 1272 continue; 1273 } else { 1274 // There is a special case worth handling: If there is a single group 1275 // covering the entire upper 32 bits, and it can be merged with both 1276 // the next and previous groups (which might be the same group), then 1277 // do so. If it is the same group (so there will be only one group in 1278 // total), then we need to reverse the order of the range so that it 1279 // covers the entire 64 bits. 1280 if (I->StartIdx == 32 && I->EndIdx == 63) { 1281 assert(std::next(I) == BitGroups.end() && 1282 "bit group ends at index 63 but there is another?"); 1283 auto IN = BitGroups.begin(); 1284 1285 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1286 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1287 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1288 IsAllLow32(*I)) { 1289 1290 DEBUG(dbgs() << "\tcombining bit group for " << 1291 I->V.getNode() << " RLAmt = " << I->RLAmt << 1292 " [" << I->StartIdx << ", " << I->EndIdx << 1293 "] with 32-bit replicated groups with ranges [" << 1294 IP->StartIdx << ", " << IP->EndIdx << "] and [" << 1295 IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1296 1297 if (IP == IN) { 1298 // There is only one other group; change it to cover the whole 1299 // range (backward, so that it can still be Repl32 but cover the 1300 // whole 64-bit range). 1301 IP->StartIdx = 31; 1302 IP->EndIdx = 30; 1303 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1304 IP->Repl32Coalesced = true; 1305 I = BitGroups.erase(I); 1306 } else { 1307 // There are two separate groups, one before this group and one 1308 // after us (at the beginning). We're going to remove this group, 1309 // but also the group at the very beginning. 1310 IP->EndIdx = IN->EndIdx; 1311 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1312 IP->Repl32Coalesced = true; 1313 I = BitGroups.erase(I); 1314 BitGroups.erase(BitGroups.begin()); 1315 } 1316 1317 // This must be the last group in the vector (and we might have 1318 // just invalidated the iterator above), so break here. 1319 break; 1320 } 1321 } 1322 } 1323 1324 ++I; 1325 } 1326 } 1327 1328 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1329 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1330 } 1331 1332 uint64_t getZerosMask() { 1333 uint64_t Mask = 0; 1334 for (unsigned i = 0; i < Bits.size(); ++i) { 1335 if (Bits[i].hasValue()) 1336 continue; 1337 Mask |= (UINT64_C(1) << i); 1338 } 1339 1340 return ~Mask; 1341 } 1342 1343 // Depending on the number of groups for a particular value, it might be 1344 // better to rotate, mask explicitly (using andi/andis), and then or the 1345 // result. Select this part of the result first. 1346 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1347 if (BPermRewriterNoMasking) 1348 return; 1349 1350 for (ValueRotInfo &VRI : ValueRotsVec) { 1351 unsigned Mask = 0; 1352 for (unsigned i = 0; i < Bits.size(); ++i) { 1353 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1354 continue; 1355 if (RLAmt[i] != VRI.RLAmt) 1356 continue; 1357 Mask |= (1u << i); 1358 } 1359 1360 // Compute the masks for andi/andis that would be necessary. 1361 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1362 assert((ANDIMask != 0 || ANDISMask != 0) && 1363 "No set bits in mask for value bit groups"); 1364 bool NeedsRotate = VRI.RLAmt != 0; 1365 1366 // We're trying to minimize the number of instructions. If we have one 1367 // group, using one of andi/andis can break even. If we have three 1368 // groups, we can use both andi and andis and break even (to use both 1369 // andi and andis we also need to or the results together). We need four 1370 // groups if we also need to rotate. To use andi/andis we need to do more 1371 // than break even because rotate-and-mask instructions tend to be easier 1372 // to schedule. 1373 1374 // FIXME: We've biased here against using andi/andis, which is right for 1375 // POWER cores, but not optimal everywhere. For example, on the A2, 1376 // andi/andis have single-cycle latency whereas the rotate-and-mask 1377 // instructions take two cycles, and it would be better to bias toward 1378 // andi/andis in break-even cases. 1379 1380 unsigned NumAndInsts = (unsigned) NeedsRotate + 1381 (unsigned) (ANDIMask != 0) + 1382 (unsigned) (ANDISMask != 0) + 1383 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1384 (unsigned) (bool) Res; 1385 1386 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1387 " RL: " << VRI.RLAmt << ":" << 1388 "\n\t\t\tisel using masking: " << NumAndInsts << 1389 " using rotates: " << VRI.NumGroups << "\n"); 1390 1391 if (NumAndInsts >= VRI.NumGroups) 1392 continue; 1393 1394 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1395 1396 if (InstCnt) *InstCnt += NumAndInsts; 1397 1398 SDValue VRot; 1399 if (VRI.RLAmt) { 1400 SDValue Ops[] = 1401 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1402 getI32Imm(31, dl) }; 1403 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1404 Ops), 0); 1405 } else { 1406 VRot = VRI.V; 1407 } 1408 1409 SDValue ANDIVal, ANDISVal; 1410 if (ANDIMask != 0) 1411 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1412 VRot, getI32Imm(ANDIMask, dl)), 0); 1413 if (ANDISMask != 0) 1414 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1415 VRot, getI32Imm(ANDISMask, dl)), 0); 1416 1417 SDValue TotalVal; 1418 if (!ANDIVal) 1419 TotalVal = ANDISVal; 1420 else if (!ANDISVal) 1421 TotalVal = ANDIVal; 1422 else 1423 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1424 ANDIVal, ANDISVal), 0); 1425 1426 if (!Res) 1427 Res = TotalVal; 1428 else 1429 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1430 Res, TotalVal), 0); 1431 1432 // Now, remove all groups with this underlying value and rotation 1433 // factor. 1434 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1435 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1436 }); 1437 } 1438 } 1439 1440 // Instruction selection for the 32-bit case. 1441 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1442 SDLoc dl(N); 1443 SDValue Res; 1444 1445 if (InstCnt) *InstCnt = 0; 1446 1447 // Take care of cases that should use andi/andis first. 1448 SelectAndParts32(dl, Res, InstCnt); 1449 1450 // If we've not yet selected a 'starting' instruction, and we have no zeros 1451 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1452 // number of groups), and start with this rotated value. 1453 if ((!HasZeros || LateMask) && !Res) { 1454 ValueRotInfo &VRI = ValueRotsVec[0]; 1455 if (VRI.RLAmt) { 1456 if (InstCnt) *InstCnt += 1; 1457 SDValue Ops[] = 1458 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1459 getI32Imm(31, dl) }; 1460 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1461 0); 1462 } else { 1463 Res = VRI.V; 1464 } 1465 1466 // Now, remove all groups with this underlying value and rotation factor. 1467 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1468 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1469 }); 1470 } 1471 1472 if (InstCnt) *InstCnt += BitGroups.size(); 1473 1474 // Insert the other groups (one at a time). 1475 for (auto &BG : BitGroups) { 1476 if (!Res) { 1477 SDValue Ops[] = 1478 { BG.V, getI32Imm(BG.RLAmt, dl), 1479 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1480 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1481 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1482 } else { 1483 SDValue Ops[] = 1484 { Res, BG.V, getI32Imm(BG.RLAmt, dl), 1485 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1486 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1487 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1488 } 1489 } 1490 1491 if (LateMask) { 1492 unsigned Mask = (unsigned) getZerosMask(); 1493 1494 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1495 assert((ANDIMask != 0 || ANDISMask != 0) && 1496 "No set bits in zeros mask?"); 1497 1498 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1499 (unsigned) (ANDISMask != 0) + 1500 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1501 1502 SDValue ANDIVal, ANDISVal; 1503 if (ANDIMask != 0) 1504 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1505 Res, getI32Imm(ANDIMask, dl)), 0); 1506 if (ANDISMask != 0) 1507 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1508 Res, getI32Imm(ANDISMask, dl)), 0); 1509 1510 if (!ANDIVal) 1511 Res = ANDISVal; 1512 else if (!ANDISVal) 1513 Res = ANDIVal; 1514 else 1515 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1516 ANDIVal, ANDISVal), 0); 1517 } 1518 1519 return Res.getNode(); 1520 } 1521 1522 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1523 unsigned MaskStart, unsigned MaskEnd, 1524 bool IsIns) { 1525 // In the notation used by the instructions, 'start' and 'end' are reversed 1526 // because bits are counted from high to low order. 1527 unsigned InstMaskStart = 64 - MaskEnd - 1, 1528 InstMaskEnd = 64 - MaskStart - 1; 1529 1530 if (Repl32) 1531 return 1; 1532 1533 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1534 InstMaskEnd == 63 - RLAmt) 1535 return 1; 1536 1537 return 2; 1538 } 1539 1540 // For 64-bit values, not all combinations of rotates and masks are 1541 // available. Produce one if it is available. 1542 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1543 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1544 unsigned *InstCnt = nullptr) { 1545 // In the notation used by the instructions, 'start' and 'end' are reversed 1546 // because bits are counted from high to low order. 1547 unsigned InstMaskStart = 64 - MaskEnd - 1, 1548 InstMaskEnd = 64 - MaskStart - 1; 1549 1550 if (InstCnt) *InstCnt += 1; 1551 1552 if (Repl32) { 1553 // This rotation amount assumes that the lower 32 bits of the quantity 1554 // are replicated in the high 32 bits by the rotation operator (which is 1555 // done by rlwinm and friends). 1556 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1557 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1558 SDValue Ops[] = 1559 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), 1560 getI32Imm(InstMaskEnd - 32, dl) }; 1561 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1562 Ops), 0); 1563 } 1564 1565 if (InstMaskEnd == 63) { 1566 SDValue Ops[] = 1567 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1568 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1569 } 1570 1571 if (InstMaskStart == 0) { 1572 SDValue Ops[] = 1573 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; 1574 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 1575 } 1576 1577 if (InstMaskEnd == 63 - RLAmt) { 1578 SDValue Ops[] = 1579 { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1580 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 1581 } 1582 1583 // We cannot do this with a single instruction, so we'll use two. The 1584 // problem is that we're not free to choose both a rotation amount and mask 1585 // start and end independently. We can choose an arbitrary mask start and 1586 // end, but then the rotation amount is fixed. Rotation, however, can be 1587 // inverted, and so by applying an "inverse" rotation first, we can get the 1588 // desired result. 1589 if (InstCnt) *InstCnt += 1; 1590 1591 // The rotation mask for the second instruction must be MaskStart. 1592 unsigned RLAmt2 = MaskStart; 1593 // The first instruction must rotate V so that the overall rotation amount 1594 // is RLAmt. 1595 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1596 if (RLAmt1) 1597 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1598 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 1599 } 1600 1601 // For 64-bit values, not all combinations of rotates and masks are 1602 // available. Produce a rotate-mask-and-insert if one is available. 1603 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 1604 unsigned RLAmt, bool Repl32, unsigned MaskStart, 1605 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 1606 // In the notation used by the instructions, 'start' and 'end' are reversed 1607 // because bits are counted from high to low order. 1608 unsigned InstMaskStart = 64 - MaskEnd - 1, 1609 InstMaskEnd = 64 - MaskStart - 1; 1610 1611 if (InstCnt) *InstCnt += 1; 1612 1613 if (Repl32) { 1614 // This rotation amount assumes that the lower 32 bits of the quantity 1615 // are replicated in the high 32 bits by the rotation operator (which is 1616 // done by rlwinm and friends). 1617 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1618 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1619 SDValue Ops[] = 1620 { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), 1621 getI32Imm(InstMaskEnd - 32, dl) }; 1622 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 1623 Ops), 0); 1624 } 1625 1626 if (InstMaskEnd == 63 - RLAmt) { 1627 SDValue Ops[] = 1628 { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; 1629 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 1630 } 1631 1632 // We cannot do this with a single instruction, so we'll use two. The 1633 // problem is that we're not free to choose both a rotation amount and mask 1634 // start and end independently. We can choose an arbitrary mask start and 1635 // end, but then the rotation amount is fixed. Rotation, however, can be 1636 // inverted, and so by applying an "inverse" rotation first, we can get the 1637 // desired result. 1638 if (InstCnt) *InstCnt += 1; 1639 1640 // The rotation mask for the second instruction must be MaskStart. 1641 unsigned RLAmt2 = MaskStart; 1642 // The first instruction must rotate V so that the overall rotation amount 1643 // is RLAmt. 1644 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1645 if (RLAmt1) 1646 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1647 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 1648 } 1649 1650 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1651 if (BPermRewriterNoMasking) 1652 return; 1653 1654 // The idea here is the same as in the 32-bit version, but with additional 1655 // complications from the fact that Repl32 might be true. Because we 1656 // aggressively convert bit groups to Repl32 form (which, for small 1657 // rotation factors, involves no other change), and then coalesce, it might 1658 // be the case that a single 64-bit masking operation could handle both 1659 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 1660 // form allowed coalescing, then we must use a 32-bit rotaton in order to 1661 // completely capture the new combined bit group. 1662 1663 for (ValueRotInfo &VRI : ValueRotsVec) { 1664 uint64_t Mask = 0; 1665 1666 // We need to add to the mask all bits from the associated bit groups. 1667 // If Repl32 is false, we need to add bits from bit groups that have 1668 // Repl32 true, but are trivially convertable to Repl32 false. Such a 1669 // group is trivially convertable if it overlaps only with the lower 32 1670 // bits, and the group has not been coalesced. 1671 auto MatchingBG = [VRI](const BitGroup &BG) { 1672 if (VRI.V != BG.V) 1673 return false; 1674 1675 unsigned EffRLAmt = BG.RLAmt; 1676 if (!VRI.Repl32 && BG.Repl32) { 1677 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 1678 !BG.Repl32Coalesced) { 1679 if (BG.Repl32CR) 1680 EffRLAmt += 32; 1681 } else { 1682 return false; 1683 } 1684 } else if (VRI.Repl32 != BG.Repl32) { 1685 return false; 1686 } 1687 1688 return VRI.RLAmt == EffRLAmt; 1689 }; 1690 1691 for (auto &BG : BitGroups) { 1692 if (!MatchingBG(BG)) 1693 continue; 1694 1695 if (BG.StartIdx <= BG.EndIdx) { 1696 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 1697 Mask |= (UINT64_C(1) << i); 1698 } else { 1699 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 1700 Mask |= (UINT64_C(1) << i); 1701 for (unsigned i = 0; i <= BG.EndIdx; ++i) 1702 Mask |= (UINT64_C(1) << i); 1703 } 1704 } 1705 1706 // We can use the 32-bit andi/andis technique if the mask does not 1707 // require any higher-order bits. This can save an instruction compared 1708 // to always using the general 64-bit technique. 1709 bool Use32BitInsts = isUInt<32>(Mask); 1710 // Compute the masks for andi/andis that would be necessary. 1711 unsigned ANDIMask = (Mask & UINT16_MAX), 1712 ANDISMask = (Mask >> 16) & UINT16_MAX; 1713 1714 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 1715 1716 unsigned NumAndInsts = (unsigned) NeedsRotate + 1717 (unsigned) (bool) Res; 1718 if (Use32BitInsts) 1719 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 1720 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1721 else 1722 NumAndInsts += getInt64Count(Mask) + /* and */ 1; 1723 1724 unsigned NumRLInsts = 0; 1725 bool FirstBG = true; 1726 bool MoreBG = false; 1727 for (auto &BG : BitGroups) { 1728 if (!MatchingBG(BG)) { 1729 MoreBG = true; 1730 continue; 1731 } 1732 NumRLInsts += 1733 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 1734 !FirstBG); 1735 FirstBG = false; 1736 } 1737 1738 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1739 " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << 1740 "\n\t\t\tisel using masking: " << NumAndInsts << 1741 " using rotates: " << NumRLInsts << "\n"); 1742 1743 // When we'd use andi/andis, we bias toward using the rotates (andi only 1744 // has a record form, and is cracked on POWER cores). However, when using 1745 // general 64-bit constant formation, bias toward the constant form, 1746 // because that exposes more opportunities for CSE. 1747 if (NumAndInsts > NumRLInsts) 1748 continue; 1749 // When merging multiple bit groups, instruction or is used. 1750 // But when rotate is used, rldimi can inert the rotated value into any 1751 // register, so instruction or can be avoided. 1752 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 1753 continue; 1754 1755 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1756 1757 if (InstCnt) *InstCnt += NumAndInsts; 1758 1759 SDValue VRot; 1760 // We actually need to generate a rotation if we have a non-zero rotation 1761 // factor or, in the Repl32 case, if we care about any of the 1762 // higher-order replicated bits. In the latter case, we generate a mask 1763 // backward so that it actually includes the entire 64 bits. 1764 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 1765 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1766 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 1767 else 1768 VRot = VRI.V; 1769 1770 SDValue TotalVal; 1771 if (Use32BitInsts) { 1772 assert((ANDIMask != 0 || ANDISMask != 0) && 1773 "No set bits in mask when using 32-bit ands for 64-bit value"); 1774 1775 SDValue ANDIVal, ANDISVal; 1776 if (ANDIMask != 0) 1777 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 1778 VRot, getI32Imm(ANDIMask, dl)), 0); 1779 if (ANDISMask != 0) 1780 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 1781 VRot, getI32Imm(ANDISMask, dl)), 0); 1782 1783 if (!ANDIVal) 1784 TotalVal = ANDISVal; 1785 else if (!ANDISVal) 1786 TotalVal = ANDIVal; 1787 else 1788 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1789 ANDIVal, ANDISVal), 0); 1790 } else { 1791 TotalVal = SDValue(getInt64(CurDAG, dl, Mask), 0); 1792 TotalVal = 1793 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 1794 VRot, TotalVal), 0); 1795 } 1796 1797 if (!Res) 1798 Res = TotalVal; 1799 else 1800 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1801 Res, TotalVal), 0); 1802 1803 // Now, remove all groups with this underlying value and rotation 1804 // factor. 1805 eraseMatchingBitGroups(MatchingBG); 1806 } 1807 } 1808 1809 // Instruction selection for the 64-bit case. 1810 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 1811 SDLoc dl(N); 1812 SDValue Res; 1813 1814 if (InstCnt) *InstCnt = 0; 1815 1816 // Take care of cases that should use andi/andis first. 1817 SelectAndParts64(dl, Res, InstCnt); 1818 1819 // If we've not yet selected a 'starting' instruction, and we have no zeros 1820 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1821 // number of groups), and start with this rotated value. 1822 if ((!HasZeros || LateMask) && !Res) { 1823 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 1824 // groups will come first, and so the VRI representing the largest number 1825 // of groups might not be first (it might be the first Repl32 groups). 1826 unsigned MaxGroupsIdx = 0; 1827 if (!ValueRotsVec[0].Repl32) { 1828 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 1829 if (ValueRotsVec[i].Repl32) { 1830 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 1831 MaxGroupsIdx = i; 1832 break; 1833 } 1834 } 1835 1836 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 1837 bool NeedsRotate = false; 1838 if (VRI.RLAmt) { 1839 NeedsRotate = true; 1840 } else if (VRI.Repl32) { 1841 for (auto &BG : BitGroups) { 1842 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 1843 BG.Repl32 != VRI.Repl32) 1844 continue; 1845 1846 // We don't need a rotate if the bit group is confined to the lower 1847 // 32 bits. 1848 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 1849 continue; 1850 1851 NeedsRotate = true; 1852 break; 1853 } 1854 } 1855 1856 if (NeedsRotate) 1857 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1858 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 1859 InstCnt); 1860 else 1861 Res = VRI.V; 1862 1863 // Now, remove all groups with this underlying value and rotation factor. 1864 if (Res) 1865 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1866 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 1867 BG.Repl32 == VRI.Repl32; 1868 }); 1869 } 1870 1871 // Because 64-bit rotates are more flexible than inserts, we might have a 1872 // preference regarding which one we do first (to save one instruction). 1873 if (!Res) 1874 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 1875 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 1876 false) < 1877 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 1878 true)) { 1879 if (I != BitGroups.begin()) { 1880 BitGroup BG = *I; 1881 BitGroups.erase(I); 1882 BitGroups.insert(BitGroups.begin(), BG); 1883 } 1884 1885 break; 1886 } 1887 } 1888 1889 // Insert the other groups (one at a time). 1890 for (auto &BG : BitGroups) { 1891 if (!Res) 1892 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 1893 BG.EndIdx, InstCnt); 1894 else 1895 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 1896 BG.StartIdx, BG.EndIdx, InstCnt); 1897 } 1898 1899 if (LateMask) { 1900 uint64_t Mask = getZerosMask(); 1901 1902 // We can use the 32-bit andi/andis technique if the mask does not 1903 // require any higher-order bits. This can save an instruction compared 1904 // to always using the general 64-bit technique. 1905 bool Use32BitInsts = isUInt<32>(Mask); 1906 // Compute the masks for andi/andis that would be necessary. 1907 unsigned ANDIMask = (Mask & UINT16_MAX), 1908 ANDISMask = (Mask >> 16) & UINT16_MAX; 1909 1910 if (Use32BitInsts) { 1911 assert((ANDIMask != 0 || ANDISMask != 0) && 1912 "No set bits in mask when using 32-bit ands for 64-bit value"); 1913 1914 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1915 (unsigned) (ANDISMask != 0) + 1916 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1917 1918 SDValue ANDIVal, ANDISVal; 1919 if (ANDIMask != 0) 1920 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 1921 Res, getI32Imm(ANDIMask, dl)), 0); 1922 if (ANDISMask != 0) 1923 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 1924 Res, getI32Imm(ANDISMask, dl)), 0); 1925 1926 if (!ANDIVal) 1927 Res = ANDISVal; 1928 else if (!ANDISVal) 1929 Res = ANDIVal; 1930 else 1931 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1932 ANDIVal, ANDISVal), 0); 1933 } else { 1934 if (InstCnt) *InstCnt += getInt64Count(Mask) + /* and */ 1; 1935 1936 SDValue MaskVal = SDValue(getInt64(CurDAG, dl, Mask), 0); 1937 Res = 1938 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 1939 Res, MaskVal), 0); 1940 } 1941 } 1942 1943 return Res.getNode(); 1944 } 1945 1946 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 1947 // Fill in BitGroups. 1948 collectBitGroups(LateMask); 1949 if (BitGroups.empty()) 1950 return nullptr; 1951 1952 // For 64-bit values, figure out when we can use 32-bit instructions. 1953 if (Bits.size() == 64) 1954 assignRepl32BitGroups(); 1955 1956 // Fill in ValueRotsVec. 1957 collectValueRotInfo(); 1958 1959 if (Bits.size() == 32) { 1960 return Select32(N, LateMask, InstCnt); 1961 } else { 1962 assert(Bits.size() == 64 && "Not 64 bits here?"); 1963 return Select64(N, LateMask, InstCnt); 1964 } 1965 1966 return nullptr; 1967 } 1968 1969 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 1970 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 1971 } 1972 1973 SmallVector<ValueBit, 64> Bits; 1974 1975 bool HasZeros; 1976 SmallVector<unsigned, 64> RLAmt; 1977 1978 SmallVector<BitGroup, 16> BitGroups; 1979 1980 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 1981 SmallVector<ValueRotInfo, 16> ValueRotsVec; 1982 1983 SelectionDAG *CurDAG; 1984 1985 public: 1986 BitPermutationSelector(SelectionDAG *DAG) 1987 : CurDAG(DAG) {} 1988 1989 // Here we try to match complex bit permutations into a set of 1990 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 1991 // known to produce optimial code for common cases (like i32 byte swapping). 1992 SDNode *Select(SDNode *N) { 1993 Memoizer.clear(); 1994 auto Result = 1995 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 1996 if (!Result.first) 1997 return nullptr; 1998 Bits = std::move(*Result.second); 1999 2000 DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2001 " selection for: "); 2002 DEBUG(N->dump(CurDAG)); 2003 2004 // Fill it RLAmt and set HasZeros. 2005 computeRotationAmounts(); 2006 2007 if (!HasZeros) 2008 return Select(N, false); 2009 2010 // We currently have two techniques for handling results with zeros: early 2011 // masking (the default) and late masking. Late masking is sometimes more 2012 // efficient, but because the structure of the bit groups is different, it 2013 // is hard to tell without generating both and comparing the results. With 2014 // late masking, we ignore zeros in the resulting value when inserting each 2015 // set of bit groups, and then mask in the zeros at the end. With early 2016 // masking, we only insert the non-zero parts of the result at every step. 2017 2018 unsigned InstCnt, InstCntLateMask; 2019 DEBUG(dbgs() << "\tEarly masking:\n"); 2020 SDNode *RN = Select(N, false, &InstCnt); 2021 DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2022 2023 DEBUG(dbgs() << "\tLate masking:\n"); 2024 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2025 DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << 2026 " instructions\n"); 2027 2028 if (InstCnt <= InstCntLateMask) { 2029 DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2030 return RN; 2031 } 2032 2033 DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2034 return RNLM; 2035 } 2036 }; 2037 2038 } // end anonymous namespace 2039 2040 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 2041 if (N->getValueType(0) != MVT::i32 && 2042 N->getValueType(0) != MVT::i64) 2043 return false; 2044 2045 if (!UseBitPermRewriter) 2046 return false; 2047 2048 switch (N->getOpcode()) { 2049 default: break; 2050 case ISD::ROTL: 2051 case ISD::SHL: 2052 case ISD::SRL: 2053 case ISD::AND: 2054 case ISD::OR: { 2055 BitPermutationSelector BPS(CurDAG); 2056 if (SDNode *New = BPS.Select(N)) { 2057 ReplaceNode(N, New); 2058 return true; 2059 } 2060 return false; 2061 } 2062 } 2063 2064 return false; 2065 } 2066 2067 /// SelectCC - Select a comparison of the specified values with the specified 2068 /// condition code, returning the CR# of the expression. 2069 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2070 const SDLoc &dl) { 2071 // Always select the LHS. 2072 unsigned Opc; 2073 2074 if (LHS.getValueType() == MVT::i32) { 2075 unsigned Imm; 2076 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2077 if (isInt32Immediate(RHS, Imm)) { 2078 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 2079 if (isUInt<16>(Imm)) 2080 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 2081 getI32Imm(Imm & 0xFFFF, dl)), 2082 0); 2083 // If this is a 16-bit signed immediate, fold it. 2084 if (isInt<16>((int)Imm)) 2085 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 2086 getI32Imm(Imm & 0xFFFF, dl)), 2087 0); 2088 2089 // For non-equality comparisons, the default code would materialize the 2090 // constant, then compare against it, like this: 2091 // lis r2, 4660 2092 // ori r2, r2, 22136 2093 // cmpw cr0, r3, r2 2094 // Since we are just comparing for equality, we can emit this instead: 2095 // xoris r0,r3,0x1234 2096 // cmplwi cr0,r0,0x5678 2097 // beq cr0,L6 2098 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 2099 getI32Imm(Imm >> 16, dl)), 0); 2100 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 2101 getI32Imm(Imm & 0xFFFF, dl)), 0); 2102 } 2103 Opc = PPC::CMPLW; 2104 } else if (ISD::isUnsignedIntSetCC(CC)) { 2105 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 2106 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 2107 getI32Imm(Imm & 0xFFFF, dl)), 0); 2108 Opc = PPC::CMPLW; 2109 } else { 2110 int16_t SImm; 2111 if (isIntS16Immediate(RHS, SImm)) 2112 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 2113 getI32Imm((int)SImm & 0xFFFF, 2114 dl)), 2115 0); 2116 Opc = PPC::CMPW; 2117 } 2118 } else if (LHS.getValueType() == MVT::i64) { 2119 uint64_t Imm; 2120 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 2121 if (isInt64Immediate(RHS.getNode(), Imm)) { 2122 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 2123 if (isUInt<16>(Imm)) 2124 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 2125 getI32Imm(Imm & 0xFFFF, dl)), 2126 0); 2127 // If this is a 16-bit signed immediate, fold it. 2128 if (isInt<16>(Imm)) 2129 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 2130 getI32Imm(Imm & 0xFFFF, dl)), 2131 0); 2132 2133 // For non-equality comparisons, the default code would materialize the 2134 // constant, then compare against it, like this: 2135 // lis r2, 4660 2136 // ori r2, r2, 22136 2137 // cmpd cr0, r3, r2 2138 // Since we are just comparing for equality, we can emit this instead: 2139 // xoris r0,r3,0x1234 2140 // cmpldi cr0,r0,0x5678 2141 // beq cr0,L6 2142 if (isUInt<32>(Imm)) { 2143 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 2144 getI64Imm(Imm >> 16, dl)), 0); 2145 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 2146 getI64Imm(Imm & 0xFFFF, dl)), 2147 0); 2148 } 2149 } 2150 Opc = PPC::CMPLD; 2151 } else if (ISD::isUnsignedIntSetCC(CC)) { 2152 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 2153 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 2154 getI64Imm(Imm & 0xFFFF, dl)), 0); 2155 Opc = PPC::CMPLD; 2156 } else { 2157 int16_t SImm; 2158 if (isIntS16Immediate(RHS, SImm)) 2159 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 2160 getI64Imm(SImm & 0xFFFF, dl)), 2161 0); 2162 Opc = PPC::CMPD; 2163 } 2164 } else if (LHS.getValueType() == MVT::f32) { 2165 Opc = PPC::FCMPUS; 2166 } else { 2167 assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); 2168 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 2169 } 2170 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 2171 } 2172 2173 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { 2174 switch (CC) { 2175 case ISD::SETUEQ: 2176 case ISD::SETONE: 2177 case ISD::SETOLE: 2178 case ISD::SETOGE: 2179 llvm_unreachable("Should be lowered by legalize!"); 2180 default: llvm_unreachable("Unknown condition!"); 2181 case ISD::SETOEQ: 2182 case ISD::SETEQ: return PPC::PRED_EQ; 2183 case ISD::SETUNE: 2184 case ISD::SETNE: return PPC::PRED_NE; 2185 case ISD::SETOLT: 2186 case ISD::SETLT: return PPC::PRED_LT; 2187 case ISD::SETULE: 2188 case ISD::SETLE: return PPC::PRED_LE; 2189 case ISD::SETOGT: 2190 case ISD::SETGT: return PPC::PRED_GT; 2191 case ISD::SETUGE: 2192 case ISD::SETGE: return PPC::PRED_GE; 2193 case ISD::SETO: return PPC::PRED_NU; 2194 case ISD::SETUO: return PPC::PRED_UN; 2195 // These two are invalid for floating point. Assume we have int. 2196 case ISD::SETULT: return PPC::PRED_LT; 2197 case ISD::SETUGT: return PPC::PRED_GT; 2198 } 2199 } 2200 2201 /// getCRIdxForSetCC - Return the index of the condition register field 2202 /// associated with the SetCC condition, and whether or not the field is 2203 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 2204 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 2205 Invert = false; 2206 switch (CC) { 2207 default: llvm_unreachable("Unknown condition!"); 2208 case ISD::SETOLT: 2209 case ISD::SETLT: return 0; // Bit #0 = SETOLT 2210 case ISD::SETOGT: 2211 case ISD::SETGT: return 1; // Bit #1 = SETOGT 2212 case ISD::SETOEQ: 2213 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 2214 case ISD::SETUO: return 3; // Bit #3 = SETUO 2215 case ISD::SETUGE: 2216 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 2217 case ISD::SETULE: 2218 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 2219 case ISD::SETUNE: 2220 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 2221 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 2222 case ISD::SETUEQ: 2223 case ISD::SETOGE: 2224 case ISD::SETOLE: 2225 case ISD::SETONE: 2226 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 2227 // These are invalid for floating point. Assume integer. 2228 case ISD::SETULT: return 0; 2229 case ISD::SETUGT: return 1; 2230 } 2231 } 2232 2233 // getVCmpInst: return the vector compare instruction for the specified 2234 // vector type and condition code. Since this is for altivec specific code, 2235 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 2236 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 2237 bool HasVSX, bool &Swap, bool &Negate) { 2238 Swap = false; 2239 Negate = false; 2240 2241 if (VecVT.isFloatingPoint()) { 2242 /* Handle some cases by swapping input operands. */ 2243 switch (CC) { 2244 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 2245 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 2246 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 2247 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 2248 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 2249 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 2250 default: break; 2251 } 2252 /* Handle some cases by negating the result. */ 2253 switch (CC) { 2254 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 2255 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 2256 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 2257 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 2258 default: break; 2259 } 2260 /* We have instructions implementing the remaining cases. */ 2261 switch (CC) { 2262 case ISD::SETEQ: 2263 case ISD::SETOEQ: 2264 if (VecVT == MVT::v4f32) 2265 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 2266 else if (VecVT == MVT::v2f64) 2267 return PPC::XVCMPEQDP; 2268 break; 2269 case ISD::SETGT: 2270 case ISD::SETOGT: 2271 if (VecVT == MVT::v4f32) 2272 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 2273 else if (VecVT == MVT::v2f64) 2274 return PPC::XVCMPGTDP; 2275 break; 2276 case ISD::SETGE: 2277 case ISD::SETOGE: 2278 if (VecVT == MVT::v4f32) 2279 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 2280 else if (VecVT == MVT::v2f64) 2281 return PPC::XVCMPGEDP; 2282 break; 2283 default: 2284 break; 2285 } 2286 llvm_unreachable("Invalid floating-point vector compare condition"); 2287 } else { 2288 /* Handle some cases by swapping input operands. */ 2289 switch (CC) { 2290 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 2291 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 2292 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 2293 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 2294 default: break; 2295 } 2296 /* Handle some cases by negating the result. */ 2297 switch (CC) { 2298 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 2299 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 2300 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 2301 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 2302 default: break; 2303 } 2304 /* We have instructions implementing the remaining cases. */ 2305 switch (CC) { 2306 case ISD::SETEQ: 2307 case ISD::SETUEQ: 2308 if (VecVT == MVT::v16i8) 2309 return PPC::VCMPEQUB; 2310 else if (VecVT == MVT::v8i16) 2311 return PPC::VCMPEQUH; 2312 else if (VecVT == MVT::v4i32) 2313 return PPC::VCMPEQUW; 2314 else if (VecVT == MVT::v2i64) 2315 return PPC::VCMPEQUD; 2316 break; 2317 case ISD::SETGT: 2318 if (VecVT == MVT::v16i8) 2319 return PPC::VCMPGTSB; 2320 else if (VecVT == MVT::v8i16) 2321 return PPC::VCMPGTSH; 2322 else if (VecVT == MVT::v4i32) 2323 return PPC::VCMPGTSW; 2324 else if (VecVT == MVT::v2i64) 2325 return PPC::VCMPGTSD; 2326 break; 2327 case ISD::SETUGT: 2328 if (VecVT == MVT::v16i8) 2329 return PPC::VCMPGTUB; 2330 else if (VecVT == MVT::v8i16) 2331 return PPC::VCMPGTUH; 2332 else if (VecVT == MVT::v4i32) 2333 return PPC::VCMPGTUW; 2334 else if (VecVT == MVT::v2i64) 2335 return PPC::VCMPGTUD; 2336 break; 2337 default: 2338 break; 2339 } 2340 llvm_unreachable("Invalid integer vector compare condition"); 2341 } 2342 } 2343 2344 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 2345 SDLoc dl(N); 2346 unsigned Imm; 2347 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2348 EVT PtrVT = 2349 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 2350 bool isPPC64 = (PtrVT == MVT::i64); 2351 2352 if (!PPCSubTarget->useCRBits() && 2353 isInt32Immediate(N->getOperand(1), Imm)) { 2354 // We can codegen setcc op, imm very efficiently compared to a brcond. 2355 // Check for those cases here. 2356 // setcc op, 0 2357 if (Imm == 0) { 2358 SDValue Op = N->getOperand(0); 2359 switch (CC) { 2360 default: break; 2361 case ISD::SETEQ: { 2362 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 2363 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 2364 getI32Imm(31, dl) }; 2365 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2366 return true; 2367 } 2368 case ISD::SETNE: { 2369 if (isPPC64) break; 2370 SDValue AD = 2371 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2372 Op, getI32Imm(~0U, dl)), 0); 2373 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 2374 return true; 2375 } 2376 case ISD::SETLT: { 2377 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 2378 getI32Imm(31, dl) }; 2379 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2380 return true; 2381 } 2382 case ISD::SETGT: { 2383 SDValue T = 2384 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 2385 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 2386 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 2387 getI32Imm(31, dl) }; 2388 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2389 return true; 2390 } 2391 } 2392 } else if (Imm == ~0U) { // setcc op, -1 2393 SDValue Op = N->getOperand(0); 2394 switch (CC) { 2395 default: break; 2396 case ISD::SETEQ: 2397 if (isPPC64) break; 2398 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2399 Op, getI32Imm(1, dl)), 0); 2400 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 2401 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 2402 MVT::i32, 2403 getI32Imm(0, dl)), 2404 0), Op.getValue(1)); 2405 return true; 2406 case ISD::SETNE: { 2407 if (isPPC64) break; 2408 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 2409 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 2410 Op, getI32Imm(~0U, dl)); 2411 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 2412 SDValue(AD, 1)); 2413 return true; 2414 } 2415 case ISD::SETLT: { 2416 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 2417 getI32Imm(1, dl)), 0); 2418 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 2419 Op), 0); 2420 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 2421 getI32Imm(31, dl) }; 2422 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2423 return true; 2424 } 2425 case ISD::SETGT: { 2426 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 2427 getI32Imm(31, dl) }; 2428 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 2429 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 2430 return true; 2431 } 2432 } 2433 } 2434 } 2435 2436 SDValue LHS = N->getOperand(0); 2437 SDValue RHS = N->getOperand(1); 2438 2439 // Altivec Vector compare instructions do not set any CR register by default and 2440 // vector compare operations return the same type as the operands. 2441 if (LHS.getValueType().isVector()) { 2442 if (PPCSubTarget->hasQPX()) 2443 return false; 2444 2445 EVT VecVT = LHS.getValueType(); 2446 bool Swap, Negate; 2447 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 2448 PPCSubTarget->hasVSX(), Swap, Negate); 2449 if (Swap) 2450 std::swap(LHS, RHS); 2451 2452 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 2453 if (Negate) { 2454 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 2455 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 2456 ResVT, VCmp, VCmp); 2457 return true; 2458 } 2459 2460 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 2461 return true; 2462 } 2463 2464 if (PPCSubTarget->useCRBits()) 2465 return false; 2466 2467 bool Inv; 2468 unsigned Idx = getCRIdxForSetCC(CC, Inv); 2469 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 2470 SDValue IntCR; 2471 2472 // Force the ccreg into CR7. 2473 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 2474 2475 SDValue InFlag(nullptr, 0); // Null incoming flag value. 2476 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 2477 InFlag).getValue(1); 2478 2479 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 2480 CCReg), 0); 2481 2482 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 2483 getI32Imm(31, dl), getI32Imm(31, dl) }; 2484 if (!Inv) { 2485 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 2486 return true; 2487 } 2488 2489 // Get the specified bit. 2490 SDValue Tmp = 2491 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 2492 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 2493 return true; 2494 } 2495 2496 // Is this opcode a bitwise logical operation? 2497 static bool isLogicOp(unsigned Opc) { 2498 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2499 } 2500 2501 /// If this node is a sign/zero extension of an integer comparison, 2502 /// it can usually be computed in GPR's rather than using comparison 2503 /// instructions and ISEL. We only do this on 64-bit targets for now 2504 /// as the code is specialized for 64-bit (it uses 64-bit instructions 2505 /// and assumes 64-bit registers). 2506 bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) { 2507 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 2508 return false; 2509 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2510 N->getOpcode() == ISD::SIGN_EXTEND) && 2511 "Expecting a zero/sign extend node!"); 2512 2513 SDValue WideRes; 2514 // If we are zero-extending the result of a logical operation on i1 2515 // values, we can keep the values in GPRs. 2516 if (isLogicOp(N->getOperand(0).getOpcode()) && 2517 N->getOperand(0).getValueType() == MVT::i1 && 2518 N->getOpcode() == ISD::ZERO_EXTEND) 2519 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2520 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2521 return false; 2522 else 2523 WideRes = 2524 getSETCCInGPR(N->getOperand(0), 2525 N->getOpcode() == ISD::SIGN_EXTEND ? 2526 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2527 2528 if (!WideRes) 2529 return false; 2530 2531 SDLoc dl(N); 2532 bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32; 2533 bool Output32Bit = N->getValueType(0) == MVT::i32; 2534 2535 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2536 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2537 2538 SDValue ConvOp = WideRes; 2539 if (Inputs32Bit != Output32Bit) 2540 ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext : 2541 ExtOrTruncConversion::Trunc); 2542 ReplaceNode(N, ConvOp.getNode()); 2543 2544 return true; 2545 } 2546 2547 // Lower a logical operation on i1 values into a GPR sequence if possible. 2548 // The result can be kept in a GPR if requested. 2549 // Three types of inputs can be handled: 2550 // - SETCC 2551 // - TRUNCATE 2552 // - Logical operation (AND/OR/XOR) 2553 // There is also a special case that is handled (namely a complement operation 2554 // achieved with xor %a, -1). 2555 SDValue PPCDAGToDAGISel::computeLogicOpInGPR(SDValue LogicOp) { 2556 assert(isLogicOp(LogicOp.getOpcode()) && 2557 "Can only handle logic operations here."); 2558 assert(LogicOp.getValueType() == MVT::i1 && 2559 "Can only handle logic operations on i1 values here."); 2560 SDLoc dl(LogicOp); 2561 SDValue LHS, RHS; 2562 2563 // Special case: xor %a, -1 2564 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2565 2566 // Produces a GPR sequence for each operand of the binary logic operation. 2567 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2568 // the value in a GPR and for logic operations, it will recursively produce 2569 // a GPR sequence for the operation. 2570 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2571 unsigned OperandOpcode = Operand.getOpcode(); 2572 if (OperandOpcode == ISD::SETCC) 2573 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2574 else if (OperandOpcode == ISD::TRUNCATE) { 2575 SDValue InputOp = Operand.getOperand(0); 2576 EVT InVT = InputOp.getValueType(); 2577 return 2578 SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2579 PPC::RLDICL, dl, InVT, InputOp, 2580 getI64Imm(0, dl), getI64Imm(63, dl)), 0); 2581 } else if (isLogicOp(OperandOpcode)) 2582 return computeLogicOpInGPR(Operand); 2583 return SDValue(); 2584 }; 2585 LHS = getLogicOperand(LogicOp.getOperand(0)); 2586 RHS = getLogicOperand(LogicOp.getOperand(1)); 2587 2588 // If a GPR sequence can't be produced for the LHS we can't proceed. 2589 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2590 // a bitwise negation operation. 2591 if (!LHS || (!RHS && !IsBitwiseNegation)) 2592 return SDValue(); 2593 2594 NumLogicOpsOnComparison++; 2595 2596 // We will use the inputs as 64-bit values. 2597 if (LHS.getValueType() == MVT::i32) 2598 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2599 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2600 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2601 2602 unsigned NewOpc; 2603 switch (LogicOp.getOpcode()) { 2604 default: llvm_unreachable("Unknown logic operation."); 2605 case ISD::AND: NewOpc = PPC::AND8; break; 2606 case ISD::OR: NewOpc = PPC::OR8; break; 2607 case ISD::XOR: NewOpc = PPC::XOR8; break; 2608 } 2609 2610 if (IsBitwiseNegation) { 2611 RHS = getI64Imm(1, dl); 2612 NewOpc = PPC::XORI8; 2613 } 2614 2615 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2616 2617 } 2618 2619 /// Try performing logical operations on results of comparisons in GPRs. 2620 /// It is typically preferred from a performance perspective over performing 2621 /// the operations on individual bits in the CR. We only do this on 64-bit 2622 /// targets for now as the code is specialized for 64-bit (it uses 64-bit 2623 /// instructions and assumes 64-bit registers). 2624 bool PPCDAGToDAGISel::tryLogicOpOfCompares(SDNode *N) { 2625 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 2626 return false; 2627 if (N->getValueType(0) != MVT::i1) 2628 return false; 2629 assert(isLogicOp(N->getOpcode()) && 2630 "Expected a logic operation on setcc results."); 2631 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2632 if (!LoweredLogical) 2633 return false; 2634 2635 SDLoc dl(N); 2636 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2637 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2638 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2639 SDValue LHS = LoweredLogical.getOperand(0); 2640 SDValue RHS = LoweredLogical.getOperand(1); 2641 SDValue WideOp; 2642 SDValue OpToConvToRecForm; 2643 2644 // Look through any 32-bit to 64-bit implicit extend nodes to find the opcode 2645 // that is input to the XORI. 2646 if (IsBitwiseNegate && 2647 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2648 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2649 else if (IsBitwiseNegate) 2650 // If the input to the XORI isn't an extension, that's what we're after. 2651 OpToConvToRecForm = LoweredLogical.getOperand(0); 2652 else 2653 // If this is not an XORI, it is a reg-reg logical op and we can convert it 2654 // to record-form. 2655 OpToConvToRecForm = LoweredLogical; 2656 2657 // Get the record-form version of the node we're looking to use to get the 2658 // CR result from. 2659 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2660 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2661 2662 // Convert the right node to record-form. This is either the logical we're 2663 // looking at or it is the input node to the negation (if we're looking at 2664 // a bitwise negation). 2665 if (NewOpc != -1 && IsBitwiseNegate) { 2666 // The input to the XORI has a record-form. Use it. 2667 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2668 "Expected a PPC::XORI8 only for bitwise negation."); 2669 // Emit the record-form instruction. 2670 std::vector<SDValue> Ops; 2671 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2672 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2673 2674 WideOp = 2675 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2676 OpToConvToRecForm.getValueType(), 2677 MVT::Glue, Ops), 0); 2678 } else { 2679 assert((NewOpc != -1 || !IsBitwiseNegate) && 2680 "No record form available for AND8/OR8/XOR8?"); 2681 WideOp = 2682 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, 2683 MVT::i64, MVT::Glue, LHS, RHS), 0); 2684 } 2685 2686 // Select this node to a single bit from CR0 set by the record-form node 2687 // just created. For bitwise negation, use the EQ bit which is the equivalent 2688 // of negating the result (i.e. it is a bit set when the result of the 2689 // operation is zero). 2690 SDValue SRIdxVal = 2691 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2692 SDValue CRBit = 2693 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2694 MVT::i1, CR0Reg, SRIdxVal, 2695 WideOp.getValue(1)), 0); 2696 ReplaceNode(N, CRBit.getNode()); 2697 return true; 2698 } 2699 2700 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2701 /// Useful when emitting comparison code for 32-bit values without using 2702 /// the compare instruction (which only considers the lower 32-bits). 2703 SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) { 2704 assert(Input.getValueType() == MVT::i32 && 2705 "Can only sign-extend 32-bit values here."); 2706 unsigned Opc = Input.getOpcode(); 2707 2708 // The value was sign extended and then truncated to 32-bits. No need to 2709 // sign extend it again. 2710 if (Opc == ISD::TRUNCATE && 2711 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2712 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2713 return Input; 2714 2715 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2716 // The input is a sign-extending load. No reason to sign-extend. 2717 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2718 return Input; 2719 2720 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2721 // We don't sign-extend constants and already sign-extended values. 2722 if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG || 2723 Opc == ISD::SIGN_EXTEND) 2724 return Input; 2725 2726 SDLoc dl(Input); 2727 SignExtensionsAdded++; 2728 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0); 2729 } 2730 2731 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2732 /// Useful when emitting comparison code for 32-bit values without using 2733 /// the compare instruction (which only considers the lower 32-bits). 2734 SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) { 2735 assert(Input.getValueType() == MVT::i32 && 2736 "Can only zero-extend 32-bit values here."); 2737 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2738 unsigned Opc = Input.getOpcode(); 2739 2740 // No need to zero-extend loaded values (unless they're loaded with 2741 // a sign-extending load). 2742 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2743 return Input; 2744 2745 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2746 bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0; 2747 // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have 2748 // to conservatively actually clear the high bits. We also don't need to 2749 // zero-extend constants or values that are already zero-extended. 2750 if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND) 2751 return Input; 2752 2753 SDLoc dl(Input); 2754 ZeroExtensionsAdded++; 2755 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input, 2756 getI64Imm(0, dl), getI64Imm(32, dl)), 2757 0); 2758 } 2759 2760 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2761 // course not actual zero/sign extensions that will generate machine code, 2762 // they're just a way to reinterpret a 32 bit value in a register as a 2763 // 64 bit value and vice-versa. 2764 SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes, 2765 ExtOrTruncConversion Conv) { 2766 SDLoc dl(NatWidthRes); 2767 2768 // For reinterpreting 32-bit values as 64 bit values, we generate 2769 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2770 if (Conv == ExtOrTruncConversion::Ext) { 2771 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2772 SDValue SubRegIdx = 2773 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2774 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2775 ImDef, NatWidthRes, SubRegIdx), 0); 2776 } 2777 2778 assert(Conv == ExtOrTruncConversion::Trunc && 2779 "Unknown convertion between 32 and 64 bit values."); 2780 // For reinterpreting 64-bit values as 32-bit values, we just need to 2781 // EXTRACT_SUBREG (i.e. extract the low word). 2782 SDValue SubRegIdx = 2783 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2784 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2785 NatWidthRes, SubRegIdx), 0); 2786 } 2787 2788 /// Produces a zero-extended result of comparing two 32-bit values according to 2789 /// the passed condition code. 2790 SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2791 ISD::CondCode CC, 2792 int64_t RHSValue, SDLoc dl) { 2793 bool IsRHSZero = RHSValue == 0; 2794 switch (CC) { 2795 default: return SDValue(); 2796 case ISD::SETEQ: { 2797 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2798 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2799 SDValue Xor = IsRHSZero ? LHS : 2800 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2801 SDValue Clz = 2802 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2803 SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), 2804 getI32Imm(31, dl) }; 2805 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2806 ShiftOps), 0); 2807 } 2808 case ISD::SETNE: { 2809 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2810 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2811 SDValue Xor = IsRHSZero ? LHS : 2812 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2813 SDValue Clz = 2814 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2815 SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl), 2816 getI32Imm(31, dl) }; 2817 SDValue Shift = 2818 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2819 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2820 getI32Imm(1, dl)), 0); 2821 } 2822 } 2823 } 2824 2825 /// Produces a sign-extended result of comparing two 32-bit values according to 2826 /// the passed condition code. 2827 SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS, 2828 ISD::CondCode CC, 2829 int64_t RHSValue, SDLoc dl) { 2830 bool IsRHSZero = RHSValue == 0; 2831 switch (CC) { 2832 default: return SDValue(); 2833 case ISD::SETEQ: { 2834 // (sext (setcc %a, %b, seteq)) -> 2835 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 2836 // (sext (setcc %a, 0, seteq)) -> 2837 // (ashr (shl (ctlz %a), 58), 63) 2838 SDValue CountInput = IsRHSZero ? LHS : 2839 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2840 SDValue Cntlzw = 2841 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 2842 SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) }; 2843 SDValue Sldi = 2844 SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0); 2845 return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi, 2846 getI32Imm(63, dl)), 0); 2847 } 2848 case ISD::SETNE: { 2849 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 2850 // flip the bit, finally take 2's complement. 2851 // (sext (setcc %a, %b, setne)) -> 2852 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 2853 // Same as above, but the first xor is not needed. 2854 // (sext (setcc %a, 0, setne)) -> 2855 // (neg (xor (lshr (ctlz %a), 5), 1)) 2856 SDValue Xor = IsRHSZero ? LHS : 2857 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2858 SDValue Clz = 2859 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2860 SDValue ShiftOps[] = 2861 { Clz, getI32Imm(27, dl), getI32Imm(5, dl), getI32Imm(31, dl) }; 2862 SDValue Shift = 2863 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2864 SDValue Xori = 2865 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2866 getI32Imm(1, dl)), 0); 2867 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 2868 } 2869 } 2870 } 2871 2872 /// Produces a zero-extended result of comparing two 64-bit values according to 2873 /// the passed condition code. 2874 SDValue PPCDAGToDAGISel::get64BitZExtCompare(SDValue LHS, SDValue RHS, 2875 ISD::CondCode CC, 2876 int64_t RHSValue, SDLoc dl) { 2877 bool IsRHSZero = RHSValue == 0; 2878 switch (CC) { 2879 default: return SDValue(); 2880 case ISD::SETEQ: { 2881 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 2882 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 2883 SDValue Xor = IsRHSZero ? LHS : 2884 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2885 SDValue Clz = 2886 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 2887 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 2888 getI64Imm(58, dl), getI64Imm(63, dl)), 2889 0); 2890 } 2891 } 2892 } 2893 2894 /// Produces a sign-extended result of comparing two 64-bit values according to 2895 /// the passed condition code. 2896 SDValue PPCDAGToDAGISel::get64BitSExtCompare(SDValue LHS, SDValue RHS, 2897 ISD::CondCode CC, 2898 int64_t RHSValue, SDLoc dl) { 2899 bool IsRHSZero = RHSValue == 0; 2900 switch (CC) { 2901 default: return SDValue(); 2902 case ISD::SETEQ: { 2903 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 2904 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 2905 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 2906 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 2907 SDValue AddInput = IsRHSZero ? LHS : 2908 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2909 SDValue Addic = 2910 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 2911 AddInput, getI32Imm(~0U, dl)), 0); 2912 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 2913 Addic, Addic.getValue(1)), 0); 2914 } 2915 } 2916 } 2917 2918 /// Does this SDValue have any uses for which keeping the value in a GPR is 2919 /// appropriate. This is meant to be used on values that have type i1 since 2920 /// it is somewhat meaningless to ask if values of other types can be kept in 2921 /// GPR's. 2922 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 2923 assert(Compare.getOpcode() == ISD::SETCC && 2924 "An ISD::SETCC node required here."); 2925 2926 // For values that have a single use, the caller should obviously already have 2927 // checked if that use is an extending use. We check the other uses here. 2928 if (Compare.hasOneUse()) 2929 return true; 2930 // We want the value in a GPR if it is being extended, used for a select, or 2931 // used in logical operations. 2932 for (auto CompareUse : Compare.getNode()->uses()) 2933 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 2934 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 2935 CompareUse->getOpcode() != ISD::SELECT && 2936 !isLogicOp(CompareUse->getOpcode())) { 2937 OmittedForNonExtendUses++; 2938 return false; 2939 } 2940 return true; 2941 } 2942 2943 /// Returns an equivalent of a SETCC node but with the result the same width as 2944 /// the inputs. This can nalso be used for SELECT_CC if either the true or false 2945 /// values is a power of two while the other is zero. 2946 SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare, 2947 SetccInGPROpts ConvOpts) { 2948 assert((Compare.getOpcode() == ISD::SETCC || 2949 Compare.getOpcode() == ISD::SELECT_CC) && 2950 "An ISD::SETCC node required here."); 2951 2952 // Don't convert this comparison to a GPR sequence because there are uses 2953 // of the i1 result (i.e. uses that require the result in the CR). 2954 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 2955 return SDValue(); 2956 2957 SDValue LHS = Compare.getOperand(0); 2958 SDValue RHS = Compare.getOperand(1); 2959 2960 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 2961 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 2962 ISD::CondCode CC = 2963 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 2964 EVT InputVT = LHS.getValueType(); 2965 if (InputVT != MVT::i32 && InputVT != MVT::i64) 2966 return SDValue(); 2967 2968 if (ConvOpts == SetccInGPROpts::ZExtInvert || 2969 ConvOpts == SetccInGPROpts::SExtInvert) 2970 CC = ISD::getSetCCInverse(CC, true); 2971 2972 bool Inputs32Bit = InputVT == MVT::i32; 2973 if (ISD::isSignedIntSetCC(CC) && Inputs32Bit) { 2974 LHS = signExtendInputIfNeeded(LHS); 2975 RHS = signExtendInputIfNeeded(RHS); 2976 } else if (ISD::isUnsignedIntSetCC(CC) && Inputs32Bit) { 2977 LHS = zeroExtendInputIfNeeded(LHS); 2978 RHS = zeroExtendInputIfNeeded(RHS); 2979 } 2980 2981 SDLoc dl(Compare); 2982 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2983 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 2984 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 2985 ConvOpts == SetccInGPROpts::SExtInvert; 2986 2987 if (IsSext && Inputs32Bit) 2988 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 2989 else if (Inputs32Bit) 2990 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 2991 else if (IsSext) 2992 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 2993 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 2994 } 2995 2996 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 2997 // Transfer memoperands. 2998 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 2999 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 3000 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); 3001 } 3002 3003 // Select - Convert the specified operand from a target-independent to a 3004 // target-specific node if it hasn't already been changed. 3005 void PPCDAGToDAGISel::Select(SDNode *N) { 3006 SDLoc dl(N); 3007 if (N->isMachineOpcode()) { 3008 N->setNodeId(-1); 3009 return; // Already selected. 3010 } 3011 3012 // In case any misguided DAG-level optimizations form an ADD with a 3013 // TargetConstant operand, crash here instead of miscompiling (by selecting 3014 // an r+r add instead of some kind of r+i add). 3015 if (N->getOpcode() == ISD::ADD && 3016 N->getOperand(1).getOpcode() == ISD::TargetConstant) 3017 llvm_unreachable("Invalid ADD with TargetConstant operand"); 3018 3019 // Try matching complex bit permutations before doing anything else. 3020 if (tryBitPermutation(N)) 3021 return; 3022 3023 switch (N->getOpcode()) { 3024 default: break; 3025 3026 case ISD::Constant: 3027 if (N->getValueType(0) == MVT::i64) { 3028 ReplaceNode(N, getInt64(CurDAG, N)); 3029 return; 3030 } 3031 break; 3032 3033 case ISD::ZERO_EXTEND: 3034 case ISD::SIGN_EXTEND: 3035 if (tryEXTEND(N)) 3036 return; 3037 break; 3038 3039 case ISD::SETCC: 3040 if (trySETCC(N)) 3041 return; 3042 break; 3043 3044 case PPCISD::GlobalBaseReg: 3045 ReplaceNode(N, getGlobalBaseReg()); 3046 return; 3047 3048 case ISD::FrameIndex: 3049 selectFrameIndex(N, N); 3050 return; 3051 3052 case PPCISD::MFOCRF: { 3053 SDValue InFlag = N->getOperand(1); 3054 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 3055 N->getOperand(0), InFlag)); 3056 return; 3057 } 3058 3059 case PPCISD::READ_TIME_BASE: 3060 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 3061 MVT::Other, N->getOperand(0))); 3062 return; 3063 3064 case PPCISD::SRA_ADDZE: { 3065 SDValue N0 = N->getOperand(0); 3066 SDValue ShiftAmt = 3067 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 3068 getConstantIntValue(), dl, 3069 N->getValueType(0)); 3070 if (N->getValueType(0) == MVT::i64) { 3071 SDNode *Op = 3072 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 3073 N0, ShiftAmt); 3074 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 3075 SDValue(Op, 1)); 3076 return; 3077 } else { 3078 assert(N->getValueType(0) == MVT::i32 && 3079 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 3080 SDNode *Op = 3081 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 3082 N0, ShiftAmt); 3083 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 3084 SDValue(Op, 1)); 3085 return; 3086 } 3087 } 3088 3089 case ISD::LOAD: { 3090 // Handle preincrement loads. 3091 LoadSDNode *LD = cast<LoadSDNode>(N); 3092 EVT LoadedVT = LD->getMemoryVT(); 3093 3094 // Normal loads are handled by code generated from the .td file. 3095 if (LD->getAddressingMode() != ISD::PRE_INC) 3096 break; 3097 3098 SDValue Offset = LD->getOffset(); 3099 if (Offset.getOpcode() == ISD::TargetConstant || 3100 Offset.getOpcode() == ISD::TargetGlobalAddress) { 3101 3102 unsigned Opcode; 3103 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3104 if (LD->getValueType(0) != MVT::i64) { 3105 // Handle PPC32 integer and normal FP loads. 3106 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3107 switch (LoadedVT.getSimpleVT().SimpleTy) { 3108 default: llvm_unreachable("Invalid PPC load type!"); 3109 case MVT::f64: Opcode = PPC::LFDU; break; 3110 case MVT::f32: Opcode = PPC::LFSU; break; 3111 case MVT::i32: Opcode = PPC::LWZU; break; 3112 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 3113 case MVT::i1: 3114 case MVT::i8: Opcode = PPC::LBZU; break; 3115 } 3116 } else { 3117 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 3118 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3119 switch (LoadedVT.getSimpleVT().SimpleTy) { 3120 default: llvm_unreachable("Invalid PPC load type!"); 3121 case MVT::i64: Opcode = PPC::LDU; break; 3122 case MVT::i32: Opcode = PPC::LWZU8; break; 3123 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 3124 case MVT::i1: 3125 case MVT::i8: Opcode = PPC::LBZU8; break; 3126 } 3127 } 3128 3129 SDValue Chain = LD->getChain(); 3130 SDValue Base = LD->getBasePtr(); 3131 SDValue Ops[] = { Offset, Base, Chain }; 3132 SDNode *MN = CurDAG->getMachineNode( 3133 Opcode, dl, LD->getValueType(0), 3134 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 3135 transferMemOperands(N, MN); 3136 ReplaceNode(N, MN); 3137 return; 3138 } else { 3139 unsigned Opcode; 3140 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3141 if (LD->getValueType(0) != MVT::i64) { 3142 // Handle PPC32 integer and normal FP loads. 3143 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3144 switch (LoadedVT.getSimpleVT().SimpleTy) { 3145 default: llvm_unreachable("Invalid PPC load type!"); 3146 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 3147 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 3148 case MVT::f64: Opcode = PPC::LFDUX; break; 3149 case MVT::f32: Opcode = PPC::LFSUX; break; 3150 case MVT::i32: Opcode = PPC::LWZUX; break; 3151 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 3152 case MVT::i1: 3153 case MVT::i8: Opcode = PPC::LBZUX; break; 3154 } 3155 } else { 3156 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 3157 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 3158 "Invalid sext update load"); 3159 switch (LoadedVT.getSimpleVT().SimpleTy) { 3160 default: llvm_unreachable("Invalid PPC load type!"); 3161 case MVT::i64: Opcode = PPC::LDUX; break; 3162 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 3163 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 3164 case MVT::i1: 3165 case MVT::i8: Opcode = PPC::LBZUX8; break; 3166 } 3167 } 3168 3169 SDValue Chain = LD->getChain(); 3170 SDValue Base = LD->getBasePtr(); 3171 SDValue Ops[] = { Base, Offset, Chain }; 3172 SDNode *MN = CurDAG->getMachineNode( 3173 Opcode, dl, LD->getValueType(0), 3174 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 3175 transferMemOperands(N, MN); 3176 ReplaceNode(N, MN); 3177 return; 3178 } 3179 } 3180 3181 case ISD::AND: { 3182 if (tryLogicOpOfCompares(N)) 3183 return; 3184 3185 unsigned Imm, Imm2, SH, MB, ME; 3186 uint64_t Imm64; 3187 3188 // If this is an and of a value rotated between 0 and 31 bits and then and'd 3189 // with a mask, emit rlwinm 3190 if (isInt32Immediate(N->getOperand(1), Imm) && 3191 isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { 3192 SDValue Val = N->getOperand(0).getOperand(0); 3193 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 3194 getI32Imm(ME, dl) }; 3195 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3196 return; 3197 } 3198 // If this is just a masked value where the input is not handled above, and 3199 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 3200 if (isInt32Immediate(N->getOperand(1), Imm) && 3201 isRunOfOnes(Imm, MB, ME) && 3202 N->getOperand(0).getOpcode() != ISD::ROTL) { 3203 SDValue Val = N->getOperand(0); 3204 SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), 3205 getI32Imm(ME, dl) }; 3206 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3207 return; 3208 } 3209 // If this is a 64-bit zero-extension mask, emit rldicl. 3210 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 3211 isMask_64(Imm64)) { 3212 SDValue Val = N->getOperand(0); 3213 MB = 64 - countTrailingOnes(Imm64); 3214 SH = 0; 3215 3216 if (Val.getOpcode() == ISD::ANY_EXTEND) { 3217 auto Op0 = Val.getOperand(0); 3218 if ( Op0.getOpcode() == ISD::SRL && 3219 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 3220 3221 auto ResultType = Val.getNode()->getValueType(0); 3222 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 3223 ResultType); 3224 SDValue IDVal (ImDef, 0); 3225 3226 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 3227 ResultType, IDVal, Op0.getOperand(0), 3228 getI32Imm(1, dl)), 0); 3229 SH = 64 - Imm; 3230 } 3231 } 3232 3233 // If the operand is a logical right shift, we can fold it into this 3234 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 3235 // for n <= mb. The right shift is really a left rotate followed by a 3236 // mask, and this mask is a more-restrictive sub-mask of the mask implied 3237 // by the shift. 3238 if (Val.getOpcode() == ISD::SRL && 3239 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 3240 assert(Imm < 64 && "Illegal shift amount"); 3241 Val = Val.getOperand(0); 3242 SH = 64 - Imm; 3243 } 3244 3245 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 3246 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 3247 return; 3248 } 3249 // If this is a negated 64-bit zero-extension mask, 3250 // i.e. the immediate is a sequence of ones from most significant side 3251 // and all zero for reminder, we should use rldicr. 3252 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 3253 isMask_64(~Imm64)) { 3254 SDValue Val = N->getOperand(0); 3255 MB = 63 - countTrailingOnes(~Imm64); 3256 SH = 0; 3257 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 3258 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 3259 return; 3260 } 3261 3262 // AND X, 0 -> 0, not "rlwinm 32". 3263 if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { 3264 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 3265 return; 3266 } 3267 // ISD::OR doesn't get all the bitfield insertion fun. 3268 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 3269 // bitfield insert. 3270 if (isInt32Immediate(N->getOperand(1), Imm) && 3271 N->getOperand(0).getOpcode() == ISD::OR && 3272 isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { 3273 // The idea here is to check whether this is equivalent to: 3274 // (c1 & m) | (x & ~m) 3275 // where m is a run-of-ones mask. The logic here is that, for each bit in 3276 // c1 and c2: 3277 // - if both are 1, then the output will be 1. 3278 // - if both are 0, then the output will be 0. 3279 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 3280 // come from x. 3281 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 3282 // be 0. 3283 // If that last condition is never the case, then we can form m from the 3284 // bits that are the same between c1 and c2. 3285 unsigned MB, ME; 3286 if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { 3287 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3288 N->getOperand(0).getOperand(1), 3289 getI32Imm(0, dl), getI32Imm(MB, dl), 3290 getI32Imm(ME, dl) }; 3291 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 3292 return; 3293 } 3294 } 3295 3296 // Other cases are autogenerated. 3297 break; 3298 } 3299 case ISD::OR: { 3300 if (N->getValueType(0) == MVT::i32) 3301 if (tryBitfieldInsert(N)) 3302 return; 3303 3304 if (tryLogicOpOfCompares(N)) 3305 return; 3306 3307 int16_t Imm; 3308 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 3309 isIntS16Immediate(N->getOperand(1), Imm)) { 3310 KnownBits LHSKnown; 3311 CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); 3312 3313 // If this is equivalent to an add, then we can fold it with the 3314 // FrameIndex calculation. 3315 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 3316 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 3317 return; 3318 } 3319 } 3320 3321 // Other cases are autogenerated. 3322 break; 3323 } 3324 case ISD::XOR: { 3325 if (tryLogicOpOfCompares(N)) 3326 return; 3327 break; 3328 } 3329 case ISD::ADD: { 3330 int16_t Imm; 3331 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 3332 isIntS16Immediate(N->getOperand(1), Imm)) { 3333 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 3334 return; 3335 } 3336 3337 break; 3338 } 3339 case ISD::SHL: { 3340 unsigned Imm, SH, MB, ME; 3341 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 3342 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 3343 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3344 getI32Imm(SH, dl), getI32Imm(MB, dl), 3345 getI32Imm(ME, dl) }; 3346 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3347 return; 3348 } 3349 3350 // Other cases are autogenerated. 3351 break; 3352 } 3353 case ISD::SRL: { 3354 unsigned Imm, SH, MB, ME; 3355 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 3356 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 3357 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3358 getI32Imm(SH, dl), getI32Imm(MB, dl), 3359 getI32Imm(ME, dl) }; 3360 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3361 return; 3362 } 3363 3364 // Other cases are autogenerated. 3365 break; 3366 } 3367 // FIXME: Remove this once the ANDI glue bug is fixed: 3368 case PPCISD::ANDIo_1_EQ_BIT: 3369 case PPCISD::ANDIo_1_GT_BIT: { 3370 if (!ANDIGlueBug) 3371 break; 3372 3373 EVT InVT = N->getOperand(0).getValueType(); 3374 assert((InVT == MVT::i64 || InVT == MVT::i32) && 3375 "Invalid input type for ANDIo_1_EQ_BIT"); 3376 3377 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; 3378 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 3379 N->getOperand(0), 3380 CurDAG->getTargetConstant(1, dl, InVT)), 3381 0); 3382 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 3383 SDValue SRIdxVal = 3384 CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? 3385 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); 3386 3387 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 3388 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 3389 return; 3390 } 3391 case ISD::SELECT_CC: { 3392 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 3393 EVT PtrVT = 3394 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 3395 bool isPPC64 = (PtrVT == MVT::i64); 3396 3397 // If this is a select of i1 operands, we'll pattern match it. 3398 if (PPCSubTarget->useCRBits() && 3399 N->getOperand(0).getValueType() == MVT::i1) 3400 break; 3401 3402 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 3403 if (!isPPC64) 3404 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3405 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 3406 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 3407 if (N1C->isNullValue() && N3C->isNullValue() && 3408 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 3409 // FIXME: Implement this optzn for PPC64. 3410 N->getValueType(0) == MVT::i32) { 3411 SDNode *Tmp = 3412 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 3413 N->getOperand(0), getI32Imm(~0U, dl)); 3414 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 3415 N->getOperand(0), SDValue(Tmp, 1)); 3416 return; 3417 } 3418 3419 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 3420 3421 if (N->getValueType(0) == MVT::i1) { 3422 // An i1 select is: (c & t) | (!c & f). 3423 bool Inv; 3424 unsigned Idx = getCRIdxForSetCC(CC, Inv); 3425 3426 unsigned SRI; 3427 switch (Idx) { 3428 default: llvm_unreachable("Invalid CC index"); 3429 case 0: SRI = PPC::sub_lt; break; 3430 case 1: SRI = PPC::sub_gt; break; 3431 case 2: SRI = PPC::sub_eq; break; 3432 case 3: SRI = PPC::sub_un; break; 3433 } 3434 3435 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 3436 3437 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 3438 CCBit, CCBit), 0); 3439 SDValue C = Inv ? NotCCBit : CCBit, 3440 NotC = Inv ? CCBit : NotCCBit; 3441 3442 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 3443 C, N->getOperand(2)), 0); 3444 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 3445 NotC, N->getOperand(3)), 0); 3446 3447 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 3448 return; 3449 } 3450 3451 unsigned BROpc = getPredicateForSetCC(CC); 3452 3453 unsigned SelectCCOp; 3454 if (N->getValueType(0) == MVT::i32) 3455 SelectCCOp = PPC::SELECT_CC_I4; 3456 else if (N->getValueType(0) == MVT::i64) 3457 SelectCCOp = PPC::SELECT_CC_I8; 3458 else if (N->getValueType(0) == MVT::f32) 3459 if (PPCSubTarget->hasP8Vector()) 3460 SelectCCOp = PPC::SELECT_CC_VSSRC; 3461 else 3462 SelectCCOp = PPC::SELECT_CC_F4; 3463 else if (N->getValueType(0) == MVT::f64) 3464 if (PPCSubTarget->hasVSX()) 3465 SelectCCOp = PPC::SELECT_CC_VSFRC; 3466 else 3467 SelectCCOp = PPC::SELECT_CC_F8; 3468 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 3469 SelectCCOp = PPC::SELECT_CC_QFRC; 3470 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 3471 SelectCCOp = PPC::SELECT_CC_QSRC; 3472 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 3473 SelectCCOp = PPC::SELECT_CC_QBRC; 3474 else if (N->getValueType(0) == MVT::v2f64 || 3475 N->getValueType(0) == MVT::v2i64) 3476 SelectCCOp = PPC::SELECT_CC_VSRC; 3477 else 3478 SelectCCOp = PPC::SELECT_CC_VRRC; 3479 3480 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 3481 getI32Imm(BROpc, dl) }; 3482 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 3483 return; 3484 } 3485 case ISD::VSELECT: 3486 if (PPCSubTarget->hasVSX()) { 3487 SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; 3488 CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); 3489 return; 3490 } 3491 break; 3492 3493 case ISD::VECTOR_SHUFFLE: 3494 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 3495 N->getValueType(0) == MVT::v2i64)) { 3496 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 3497 3498 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 3499 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 3500 unsigned DM[2]; 3501 3502 for (int i = 0; i < 2; ++i) 3503 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 3504 DM[i] = 0; 3505 else 3506 DM[i] = 1; 3507 3508 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 3509 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 3510 isa<LoadSDNode>(Op1.getOperand(0))) { 3511 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 3512 SDValue Base, Offset; 3513 3514 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 3515 (LD->getMemoryVT() == MVT::f64 || 3516 LD->getMemoryVT() == MVT::i64) && 3517 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 3518 SDValue Chain = LD->getChain(); 3519 SDValue Ops[] = { Base, Offset, Chain }; 3520 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3521 MemOp[0] = LD->getMemOperand(); 3522 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 3523 N->getValueType(0), Ops); 3524 cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); 3525 return; 3526 } 3527 } 3528 3529 // For little endian, we must swap the input operands and adjust 3530 // the mask elements (reverse and invert them). 3531 if (PPCSubTarget->isLittleEndian()) { 3532 std::swap(Op1, Op2); 3533 unsigned tmp = DM[0]; 3534 DM[0] = 1 - DM[1]; 3535 DM[1] = 1 - tmp; 3536 } 3537 3538 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 3539 MVT::i32); 3540 SDValue Ops[] = { Op1, Op2, DMV }; 3541 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 3542 return; 3543 } 3544 3545 break; 3546 case PPCISD::BDNZ: 3547 case PPCISD::BDZ: { 3548 bool IsPPC64 = PPCSubTarget->isPPC64(); 3549 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 3550 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 3551 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 3552 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 3553 MVT::Other, Ops); 3554 return; 3555 } 3556 case PPCISD::COND_BRANCH: { 3557 // Op #0 is the Chain. 3558 // Op #1 is the PPC::PRED_* number. 3559 // Op #2 is the CR# 3560 // Op #3 is the Dest MBB 3561 // Op #4 is the Flag. 3562 // Prevent PPC::PRED_* from being selected into LI. 3563 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 3564 if (EnableBranchHint) 3565 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); 3566 3567 SDValue Pred = getI32Imm(PCC, dl); 3568 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 3569 N->getOperand(0), N->getOperand(4) }; 3570 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 3571 return; 3572 } 3573 case ISD::BR_CC: { 3574 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 3575 unsigned PCC = getPredicateForSetCC(CC); 3576 3577 if (N->getOperand(2).getValueType() == MVT::i1) { 3578 unsigned Opc; 3579 bool Swap; 3580 switch (PCC) { 3581 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 3582 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 3583 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 3584 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 3585 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 3586 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 3587 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 3588 } 3589 3590 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 3591 N->getOperand(Swap ? 3 : 2), 3592 N->getOperand(Swap ? 2 : 3)), 0); 3593 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 3594 N->getOperand(0)); 3595 return; 3596 } 3597 3598 if (EnableBranchHint) 3599 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); 3600 3601 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 3602 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 3603 N->getOperand(4), N->getOperand(0) }; 3604 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 3605 return; 3606 } 3607 case ISD::BRIND: { 3608 // FIXME: Should custom lower this. 3609 SDValue Chain = N->getOperand(0); 3610 SDValue Target = N->getOperand(1); 3611 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 3612 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 3613 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 3614 Chain), 0); 3615 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 3616 return; 3617 } 3618 case PPCISD::TOC_ENTRY: { 3619 assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && 3620 "Only supported for 64-bit ABI and 32-bit SVR4"); 3621 if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { 3622 SDValue GA = N->getOperand(0); 3623 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 3624 N->getOperand(1)); 3625 transferMemOperands(N, MN); 3626 ReplaceNode(N, MN); 3627 return; 3628 } 3629 3630 // For medium and large code model, we generate two instructions as 3631 // described below. Otherwise we allow SelectCodeCommon to handle this, 3632 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 3633 CodeModel::Model CModel = TM.getCodeModel(); 3634 if (CModel != CodeModel::Medium && CModel != CodeModel::Large) 3635 break; 3636 3637 // The first source operand is a TargetGlobalAddress or a TargetJumpTable. 3638 // If it must be toc-referenced according to PPCSubTarget, we generate: 3639 // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) 3640 // Otherwise we generate: 3641 // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) 3642 SDValue GA = N->getOperand(0); 3643 SDValue TOCbase = N->getOperand(1); 3644 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, 3645 TOCbase, GA); 3646 3647 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || 3648 CModel == CodeModel::Large) { 3649 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 3650 SDValue(Tmp, 0)); 3651 transferMemOperands(N, MN); 3652 ReplaceNode(N, MN); 3653 return; 3654 } 3655 3656 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { 3657 const GlobalValue *GV = G->getGlobal(); 3658 unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); 3659 if (GVFlags & PPCII::MO_NLP_FLAG) { 3660 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 3661 SDValue(Tmp, 0)); 3662 transferMemOperands(N, MN); 3663 ReplaceNode(N, MN); 3664 return; 3665 } 3666 } 3667 3668 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 3669 SDValue(Tmp, 0), GA)); 3670 return; 3671 } 3672 case PPCISD::PPC32_PICGOT: 3673 // Generate a PIC-safe GOT reference. 3674 assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && 3675 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 3676 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 3677 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 3678 MVT::i32); 3679 return; 3680 3681 case PPCISD::VADD_SPLAT: { 3682 // This expands into one of three sequences, depending on whether 3683 // the first operand is odd or even, positive or negative. 3684 assert(isa<ConstantSDNode>(N->getOperand(0)) && 3685 isa<ConstantSDNode>(N->getOperand(1)) && 3686 "Invalid operand on VADD_SPLAT!"); 3687 3688 int Elt = N->getConstantOperandVal(0); 3689 int EltSize = N->getConstantOperandVal(1); 3690 unsigned Opc1, Opc2, Opc3; 3691 EVT VT; 3692 3693 if (EltSize == 1) { 3694 Opc1 = PPC::VSPLTISB; 3695 Opc2 = PPC::VADDUBM; 3696 Opc3 = PPC::VSUBUBM; 3697 VT = MVT::v16i8; 3698 } else if (EltSize == 2) { 3699 Opc1 = PPC::VSPLTISH; 3700 Opc2 = PPC::VADDUHM; 3701 Opc3 = PPC::VSUBUHM; 3702 VT = MVT::v8i16; 3703 } else { 3704 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 3705 Opc1 = PPC::VSPLTISW; 3706 Opc2 = PPC::VADDUWM; 3707 Opc3 = PPC::VSUBUWM; 3708 VT = MVT::v4i32; 3709 } 3710 3711 if ((Elt & 1) == 0) { 3712 // Elt is even, in the range [-32,-18] + [16,30]. 3713 // 3714 // Convert: VADD_SPLAT elt, size 3715 // Into: tmp = VSPLTIS[BHW] elt 3716 // VADDU[BHW]M tmp, tmp 3717 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 3718 SDValue EltVal = getI32Imm(Elt >> 1, dl); 3719 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3720 SDValue TmpVal = SDValue(Tmp, 0); 3721 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 3722 return; 3723 } else if (Elt > 0) { 3724 // Elt is odd and positive, in the range [17,31]. 3725 // 3726 // Convert: VADD_SPLAT elt, size 3727 // Into: tmp1 = VSPLTIS[BHW] elt-16 3728 // tmp2 = VSPLTIS[BHW] -16 3729 // VSUBU[BHW]M tmp1, tmp2 3730 SDValue EltVal = getI32Imm(Elt - 16, dl); 3731 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3732 EltVal = getI32Imm(-16, dl); 3733 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3734 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 3735 SDValue(Tmp2, 0))); 3736 return; 3737 } else { 3738 // Elt is odd and negative, in the range [-31,-17]. 3739 // 3740 // Convert: VADD_SPLAT elt, size 3741 // Into: tmp1 = VSPLTIS[BHW] elt+16 3742 // tmp2 = VSPLTIS[BHW] -16 3743 // VADDU[BHW]M tmp1, tmp2 3744 SDValue EltVal = getI32Imm(Elt + 16, dl); 3745 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3746 EltVal = getI32Imm(-16, dl); 3747 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 3748 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 3749 SDValue(Tmp2, 0))); 3750 return; 3751 } 3752 } 3753 } 3754 3755 SelectCode(N); 3756 } 3757 3758 // If the target supports the cmpb instruction, do the idiom recognition here. 3759 // We don't do this as a DAG combine because we don't want to do it as nodes 3760 // are being combined (because we might miss part of the eventual idiom). We 3761 // don't want to do it during instruction selection because we want to reuse 3762 // the logic for lowering the masking operations already part of the 3763 // instruction selector. 3764 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 3765 SDLoc dl(N); 3766 3767 assert(N->getOpcode() == ISD::OR && 3768 "Only OR nodes are supported for CMPB"); 3769 3770 SDValue Res; 3771 if (!PPCSubTarget->hasCMPB()) 3772 return Res; 3773 3774 if (N->getValueType(0) != MVT::i32 && 3775 N->getValueType(0) != MVT::i64) 3776 return Res; 3777 3778 EVT VT = N->getValueType(0); 3779 3780 SDValue RHS, LHS; 3781 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 3782 uint64_t Mask = 0, Alt = 0; 3783 3784 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 3785 uint64_t &Mask, uint64_t &Alt, 3786 SDValue &LHS, SDValue &RHS) { 3787 if (O.getOpcode() != ISD::SELECT_CC) 3788 return false; 3789 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 3790 3791 if (!isa<ConstantSDNode>(O.getOperand(2)) || 3792 !isa<ConstantSDNode>(O.getOperand(3))) 3793 return false; 3794 3795 uint64_t PM = O.getConstantOperandVal(2); 3796 uint64_t PAlt = O.getConstantOperandVal(3); 3797 for (b = 0; b < 8; ++b) { 3798 uint64_t Mask = UINT64_C(0xFF) << (8*b); 3799 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 3800 break; 3801 } 3802 3803 if (b == 8) 3804 return false; 3805 Mask |= PM; 3806 Alt |= PAlt; 3807 3808 if (!isa<ConstantSDNode>(O.getOperand(1)) || 3809 O.getConstantOperandVal(1) != 0) { 3810 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 3811 if (Op0.getOpcode() == ISD::TRUNCATE) 3812 Op0 = Op0.getOperand(0); 3813 if (Op1.getOpcode() == ISD::TRUNCATE) 3814 Op1 = Op1.getOperand(0); 3815 3816 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 3817 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 3818 isa<ConstantSDNode>(Op0.getOperand(1))) { 3819 3820 unsigned Bits = Op0.getValueSizeInBits(); 3821 if (b != Bits/8-1) 3822 return false; 3823 if (Op0.getConstantOperandVal(1) != Bits-8) 3824 return false; 3825 3826 LHS = Op0.getOperand(0); 3827 RHS = Op1.getOperand(0); 3828 return true; 3829 } 3830 3831 // When we have small integers (i16 to be specific), the form present 3832 // post-legalization uses SETULT in the SELECT_CC for the 3833 // higher-order byte, depending on the fact that the 3834 // even-higher-order bytes are known to all be zero, for example: 3835 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 3836 // (so when the second byte is the same, because all higher-order 3837 // bits from bytes 3 and 4 are known to be zero, the result of the 3838 // xor can be at most 255) 3839 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 3840 isa<ConstantSDNode>(O.getOperand(1))) { 3841 3842 uint64_t ULim = O.getConstantOperandVal(1); 3843 if (ULim != (UINT64_C(1) << b*8)) 3844 return false; 3845 3846 // Now we need to make sure that the upper bytes are known to be 3847 // zero. 3848 unsigned Bits = Op0.getValueSizeInBits(); 3849 if (!CurDAG->MaskedValueIsZero( 3850 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 3851 return false; 3852 3853 LHS = Op0.getOperand(0); 3854 RHS = Op0.getOperand(1); 3855 return true; 3856 } 3857 3858 return false; 3859 } 3860 3861 if (CC != ISD::SETEQ) 3862 return false; 3863 3864 SDValue Op = O.getOperand(0); 3865 if (Op.getOpcode() == ISD::AND) { 3866 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3867 return false; 3868 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 3869 return false; 3870 3871 SDValue XOR = Op.getOperand(0); 3872 if (XOR.getOpcode() == ISD::TRUNCATE) 3873 XOR = XOR.getOperand(0); 3874 if (XOR.getOpcode() != ISD::XOR) 3875 return false; 3876 3877 LHS = XOR.getOperand(0); 3878 RHS = XOR.getOperand(1); 3879 return true; 3880 } else if (Op.getOpcode() == ISD::SRL) { 3881 if (!isa<ConstantSDNode>(Op.getOperand(1))) 3882 return false; 3883 unsigned Bits = Op.getValueSizeInBits(); 3884 if (b != Bits/8-1) 3885 return false; 3886 if (Op.getConstantOperandVal(1) != Bits-8) 3887 return false; 3888 3889 SDValue XOR = Op.getOperand(0); 3890 if (XOR.getOpcode() == ISD::TRUNCATE) 3891 XOR = XOR.getOperand(0); 3892 if (XOR.getOpcode() != ISD::XOR) 3893 return false; 3894 3895 LHS = XOR.getOperand(0); 3896 RHS = XOR.getOperand(1); 3897 return true; 3898 } 3899 3900 return false; 3901 }; 3902 3903 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 3904 while (!Queue.empty()) { 3905 SDValue V = Queue.pop_back_val(); 3906 3907 for (const SDValue &O : V.getNode()->ops()) { 3908 unsigned b; 3909 uint64_t M = 0, A = 0; 3910 SDValue OLHS, ORHS; 3911 if (O.getOpcode() == ISD::OR) { 3912 Queue.push_back(O); 3913 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 3914 if (!LHS) { 3915 LHS = OLHS; 3916 RHS = ORHS; 3917 BytesFound[b] = true; 3918 Mask |= M; 3919 Alt |= A; 3920 } else if ((LHS == ORHS && RHS == OLHS) || 3921 (RHS == ORHS && LHS == OLHS)) { 3922 BytesFound[b] = true; 3923 Mask |= M; 3924 Alt |= A; 3925 } else { 3926 return Res; 3927 } 3928 } else { 3929 return Res; 3930 } 3931 } 3932 } 3933 3934 unsigned LastB = 0, BCnt = 0; 3935 for (unsigned i = 0; i < 8; ++i) 3936 if (BytesFound[LastB]) { 3937 ++BCnt; 3938 LastB = i; 3939 } 3940 3941 if (!LastB || BCnt < 2) 3942 return Res; 3943 3944 // Because we'll be zero-extending the output anyway if don't have a specific 3945 // value for each input byte (via the Mask), we can 'anyext' the inputs. 3946 if (LHS.getValueType() != VT) { 3947 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 3948 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 3949 } 3950 3951 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 3952 3953 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 3954 if (NonTrivialMask && !Alt) { 3955 // Res = Mask & CMPB 3956 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 3957 CurDAG->getConstant(Mask, dl, VT)); 3958 } else if (Alt) { 3959 // Res = (CMPB & Mask) | (~CMPB & Alt) 3960 // Which, as suggested here: 3961 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 3962 // can be written as: 3963 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 3964 // useful because the (Alt ^ Mask) can be pre-computed. 3965 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 3966 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 3967 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 3968 CurDAG->getConstant(Alt, dl, VT)); 3969 } 3970 3971 return Res; 3972 } 3973 3974 // When CR bit registers are enabled, an extension of an i1 variable to a i32 3975 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 3976 // involves constant materialization of a 0 or a 1 or both. If the result of 3977 // the extension is then operated upon by some operator that can be constant 3978 // folded with a constant 0 or 1, and that constant can be materialized using 3979 // only one instruction (like a zero or one), then we should fold in those 3980 // operations with the select. 3981 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 3982 if (!PPCSubTarget->useCRBits()) 3983 return; 3984 3985 if (N->getOpcode() != ISD::ZERO_EXTEND && 3986 N->getOpcode() != ISD::SIGN_EXTEND && 3987 N->getOpcode() != ISD::ANY_EXTEND) 3988 return; 3989 3990 if (N->getOperand(0).getValueType() != MVT::i1) 3991 return; 3992 3993 if (!N->hasOneUse()) 3994 return; 3995 3996 SDLoc dl(N); 3997 EVT VT = N->getValueType(0); 3998 SDValue Cond = N->getOperand(0); 3999 SDValue ConstTrue = 4000 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 4001 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 4002 4003 do { 4004 SDNode *User = *N->use_begin(); 4005 if (User->getNumOperands() != 2) 4006 break; 4007 4008 auto TryFold = [this, N, User, dl](SDValue Val) { 4009 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 4010 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 4011 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 4012 4013 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 4014 User->getValueType(0), 4015 O0.getNode(), O1.getNode()); 4016 }; 4017 4018 // FIXME: When the semantics of the interaction between select and undef 4019 // are clearly defined, it may turn out to be unnecessary to break here. 4020 SDValue TrueRes = TryFold(ConstTrue); 4021 if (!TrueRes || TrueRes.isUndef()) 4022 break; 4023 SDValue FalseRes = TryFold(ConstFalse); 4024 if (!FalseRes || FalseRes.isUndef()) 4025 break; 4026 4027 // For us to materialize these using one instruction, we must be able to 4028 // represent them as signed 16-bit integers. 4029 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 4030 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 4031 if (!isInt<16>(True) || !isInt<16>(False)) 4032 break; 4033 4034 // We can replace User with a new SELECT node, and try again to see if we 4035 // can fold the select with its user. 4036 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 4037 N = User; 4038 ConstTrue = TrueRes; 4039 ConstFalse = FalseRes; 4040 } while (N->hasOneUse()); 4041 } 4042 4043 void PPCDAGToDAGISel::PreprocessISelDAG() { 4044 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4045 ++Position; 4046 4047 bool MadeChange = false; 4048 while (Position != CurDAG->allnodes_begin()) { 4049 SDNode *N = &*--Position; 4050 if (N->use_empty()) 4051 continue; 4052 4053 SDValue Res; 4054 switch (N->getOpcode()) { 4055 default: break; 4056 case ISD::OR: 4057 Res = combineToCMPB(N); 4058 break; 4059 } 4060 4061 if (!Res) 4062 foldBoolExts(Res, N); 4063 4064 if (Res) { 4065 DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 4066 DEBUG(N->dump(CurDAG)); 4067 DEBUG(dbgs() << "\nNew: "); 4068 DEBUG(Res.getNode()->dump(CurDAG)); 4069 DEBUG(dbgs() << "\n"); 4070 4071 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 4072 MadeChange = true; 4073 } 4074 } 4075 4076 if (MadeChange) 4077 CurDAG->RemoveDeadNodes(); 4078 } 4079 4080 /// PostprocessISelDAG - Perform some late peephole optimizations 4081 /// on the DAG representation. 4082 void PPCDAGToDAGISel::PostprocessISelDAG() { 4083 // Skip peepholes at -O0. 4084 if (TM.getOptLevel() == CodeGenOpt::None) 4085 return; 4086 4087 PeepholePPC64(); 4088 PeepholeCROps(); 4089 PeepholePPC64ZExt(); 4090 } 4091 4092 // Check if all users of this node will become isel where the second operand 4093 // is the constant zero. If this is so, and if we can negate the condition, 4094 // then we can flip the true and false operands. This will allow the zero to 4095 // be folded with the isel so that we don't need to materialize a register 4096 // containing zero. 4097 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 4098 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4099 UI != UE; ++UI) { 4100 SDNode *User = *UI; 4101 if (!User->isMachineOpcode()) 4102 return false; 4103 if (User->getMachineOpcode() != PPC::SELECT_I4 && 4104 User->getMachineOpcode() != PPC::SELECT_I8) 4105 return false; 4106 4107 SDNode *Op2 = User->getOperand(2).getNode(); 4108 if (!Op2->isMachineOpcode()) 4109 return false; 4110 4111 if (Op2->getMachineOpcode() != PPC::LI && 4112 Op2->getMachineOpcode() != PPC::LI8) 4113 return false; 4114 4115 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 4116 if (!C) 4117 return false; 4118 4119 if (!C->isNullValue()) 4120 return false; 4121 } 4122 4123 return true; 4124 } 4125 4126 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 4127 SmallVector<SDNode *, 4> ToReplace; 4128 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4129 UI != UE; ++UI) { 4130 SDNode *User = *UI; 4131 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 4132 User->getMachineOpcode() == PPC::SELECT_I8) && 4133 "Must have all select users"); 4134 ToReplace.push_back(User); 4135 } 4136 4137 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 4138 UE = ToReplace.end(); UI != UE; ++UI) { 4139 SDNode *User = *UI; 4140 SDNode *ResNode = 4141 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 4142 User->getValueType(0), User->getOperand(0), 4143 User->getOperand(2), 4144 User->getOperand(1)); 4145 4146 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 4147 DEBUG(User->dump(CurDAG)); 4148 DEBUG(dbgs() << "\nNew: "); 4149 DEBUG(ResNode->dump(CurDAG)); 4150 DEBUG(dbgs() << "\n"); 4151 4152 ReplaceUses(User, ResNode); 4153 } 4154 } 4155 4156 void PPCDAGToDAGISel::PeepholeCROps() { 4157 bool IsModified; 4158 do { 4159 IsModified = false; 4160 for (SDNode &Node : CurDAG->allnodes()) { 4161 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 4162 if (!MachineNode || MachineNode->use_empty()) 4163 continue; 4164 SDNode *ResNode = MachineNode; 4165 4166 bool Op1Set = false, Op1Unset = false, 4167 Op1Not = false, 4168 Op2Set = false, Op2Unset = false, 4169 Op2Not = false; 4170 4171 unsigned Opcode = MachineNode->getMachineOpcode(); 4172 switch (Opcode) { 4173 default: break; 4174 case PPC::CRAND: 4175 case PPC::CRNAND: 4176 case PPC::CROR: 4177 case PPC::CRXOR: 4178 case PPC::CRNOR: 4179 case PPC::CREQV: 4180 case PPC::CRANDC: 4181 case PPC::CRORC: { 4182 SDValue Op = MachineNode->getOperand(1); 4183 if (Op.isMachineOpcode()) { 4184 if (Op.getMachineOpcode() == PPC::CRSET) 4185 Op2Set = true; 4186 else if (Op.getMachineOpcode() == PPC::CRUNSET) 4187 Op2Unset = true; 4188 else if (Op.getMachineOpcode() == PPC::CRNOR && 4189 Op.getOperand(0) == Op.getOperand(1)) 4190 Op2Not = true; 4191 } 4192 LLVM_FALLTHROUGH; 4193 } 4194 case PPC::BC: 4195 case PPC::BCn: 4196 case PPC::SELECT_I4: 4197 case PPC::SELECT_I8: 4198 case PPC::SELECT_F4: 4199 case PPC::SELECT_F8: 4200 case PPC::SELECT_QFRC: 4201 case PPC::SELECT_QSRC: 4202 case PPC::SELECT_QBRC: 4203 case PPC::SELECT_VRRC: 4204 case PPC::SELECT_VSFRC: 4205 case PPC::SELECT_VSSRC: 4206 case PPC::SELECT_VSRC: { 4207 SDValue Op = MachineNode->getOperand(0); 4208 if (Op.isMachineOpcode()) { 4209 if (Op.getMachineOpcode() == PPC::CRSET) 4210 Op1Set = true; 4211 else if (Op.getMachineOpcode() == PPC::CRUNSET) 4212 Op1Unset = true; 4213 else if (Op.getMachineOpcode() == PPC::CRNOR && 4214 Op.getOperand(0) == Op.getOperand(1)) 4215 Op1Not = true; 4216 } 4217 } 4218 break; 4219 } 4220 4221 bool SelectSwap = false; 4222 switch (Opcode) { 4223 default: break; 4224 case PPC::CRAND: 4225 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4226 // x & x = x 4227 ResNode = MachineNode->getOperand(0).getNode(); 4228 else if (Op1Set) 4229 // 1 & y = y 4230 ResNode = MachineNode->getOperand(1).getNode(); 4231 else if (Op2Set) 4232 // x & 1 = x 4233 ResNode = MachineNode->getOperand(0).getNode(); 4234 else if (Op1Unset || Op2Unset) 4235 // x & 0 = 0 & y = 0 4236 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4237 MVT::i1); 4238 else if (Op1Not) 4239 // ~x & y = andc(y, x) 4240 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4241 MVT::i1, MachineNode->getOperand(1), 4242 MachineNode->getOperand(0). 4243 getOperand(0)); 4244 else if (Op2Not) 4245 // x & ~y = andc(x, y) 4246 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4247 MVT::i1, MachineNode->getOperand(0), 4248 MachineNode->getOperand(1). 4249 getOperand(0)); 4250 else if (AllUsersSelectZero(MachineNode)) { 4251 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 4252 MVT::i1, MachineNode->getOperand(0), 4253 MachineNode->getOperand(1)); 4254 SelectSwap = true; 4255 } 4256 break; 4257 case PPC::CRNAND: 4258 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4259 // nand(x, x) -> nor(x, x) 4260 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4261 MVT::i1, MachineNode->getOperand(0), 4262 MachineNode->getOperand(0)); 4263 else if (Op1Set) 4264 // nand(1, y) -> nor(y, y) 4265 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4266 MVT::i1, MachineNode->getOperand(1), 4267 MachineNode->getOperand(1)); 4268 else if (Op2Set) 4269 // nand(x, 1) -> nor(x, x) 4270 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4271 MVT::i1, MachineNode->getOperand(0), 4272 MachineNode->getOperand(0)); 4273 else if (Op1Unset || Op2Unset) 4274 // nand(x, 0) = nand(0, y) = 1 4275 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4276 MVT::i1); 4277 else if (Op1Not) 4278 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 4279 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4280 MVT::i1, MachineNode->getOperand(0). 4281 getOperand(0), 4282 MachineNode->getOperand(1)); 4283 else if (Op2Not) 4284 // nand(x, ~y) = ~x | y = orc(y, x) 4285 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4286 MVT::i1, MachineNode->getOperand(1). 4287 getOperand(0), 4288 MachineNode->getOperand(0)); 4289 else if (AllUsersSelectZero(MachineNode)) { 4290 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 4291 MVT::i1, MachineNode->getOperand(0), 4292 MachineNode->getOperand(1)); 4293 SelectSwap = true; 4294 } 4295 break; 4296 case PPC::CROR: 4297 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4298 // x | x = x 4299 ResNode = MachineNode->getOperand(0).getNode(); 4300 else if (Op1Set || Op2Set) 4301 // x | 1 = 1 | y = 1 4302 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4303 MVT::i1); 4304 else if (Op1Unset) 4305 // 0 | y = y 4306 ResNode = MachineNode->getOperand(1).getNode(); 4307 else if (Op2Unset) 4308 // x | 0 = x 4309 ResNode = MachineNode->getOperand(0).getNode(); 4310 else if (Op1Not) 4311 // ~x | y = orc(y, x) 4312 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4313 MVT::i1, MachineNode->getOperand(1), 4314 MachineNode->getOperand(0). 4315 getOperand(0)); 4316 else if (Op2Not) 4317 // x | ~y = orc(x, y) 4318 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4319 MVT::i1, MachineNode->getOperand(0), 4320 MachineNode->getOperand(1). 4321 getOperand(0)); 4322 else if (AllUsersSelectZero(MachineNode)) { 4323 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4324 MVT::i1, MachineNode->getOperand(0), 4325 MachineNode->getOperand(1)); 4326 SelectSwap = true; 4327 } 4328 break; 4329 case PPC::CRXOR: 4330 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4331 // xor(x, x) = 0 4332 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4333 MVT::i1); 4334 else if (Op1Set) 4335 // xor(1, y) -> nor(y, y) 4336 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4337 MVT::i1, MachineNode->getOperand(1), 4338 MachineNode->getOperand(1)); 4339 else if (Op2Set) 4340 // xor(x, 1) -> nor(x, x) 4341 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4342 MVT::i1, MachineNode->getOperand(0), 4343 MachineNode->getOperand(0)); 4344 else if (Op1Unset) 4345 // xor(0, y) = y 4346 ResNode = MachineNode->getOperand(1).getNode(); 4347 else if (Op2Unset) 4348 // xor(x, 0) = x 4349 ResNode = MachineNode->getOperand(0).getNode(); 4350 else if (Op1Not) 4351 // xor(~x, y) = eqv(x, y) 4352 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4353 MVT::i1, MachineNode->getOperand(0). 4354 getOperand(0), 4355 MachineNode->getOperand(1)); 4356 else if (Op2Not) 4357 // xor(x, ~y) = eqv(x, y) 4358 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4359 MVT::i1, MachineNode->getOperand(0), 4360 MachineNode->getOperand(1). 4361 getOperand(0)); 4362 else if (AllUsersSelectZero(MachineNode)) { 4363 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 4364 MVT::i1, MachineNode->getOperand(0), 4365 MachineNode->getOperand(1)); 4366 SelectSwap = true; 4367 } 4368 break; 4369 case PPC::CRNOR: 4370 if (Op1Set || Op2Set) 4371 // nor(1, y) -> 0 4372 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4373 MVT::i1); 4374 else if (Op1Unset) 4375 // nor(0, y) = ~y -> nor(y, y) 4376 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4377 MVT::i1, MachineNode->getOperand(1), 4378 MachineNode->getOperand(1)); 4379 else if (Op2Unset) 4380 // nor(x, 0) = ~x 4381 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4382 MVT::i1, MachineNode->getOperand(0), 4383 MachineNode->getOperand(0)); 4384 else if (Op1Not) 4385 // nor(~x, y) = andc(x, y) 4386 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4387 MVT::i1, MachineNode->getOperand(0). 4388 getOperand(0), 4389 MachineNode->getOperand(1)); 4390 else if (Op2Not) 4391 // nor(x, ~y) = andc(y, x) 4392 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4393 MVT::i1, MachineNode->getOperand(1). 4394 getOperand(0), 4395 MachineNode->getOperand(0)); 4396 else if (AllUsersSelectZero(MachineNode)) { 4397 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 4398 MVT::i1, MachineNode->getOperand(0), 4399 MachineNode->getOperand(1)); 4400 SelectSwap = true; 4401 } 4402 break; 4403 case PPC::CREQV: 4404 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4405 // eqv(x, x) = 1 4406 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4407 MVT::i1); 4408 else if (Op1Set) 4409 // eqv(1, y) = y 4410 ResNode = MachineNode->getOperand(1).getNode(); 4411 else if (Op2Set) 4412 // eqv(x, 1) = x 4413 ResNode = MachineNode->getOperand(0).getNode(); 4414 else if (Op1Unset) 4415 // eqv(0, y) = ~y -> nor(y, y) 4416 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4417 MVT::i1, MachineNode->getOperand(1), 4418 MachineNode->getOperand(1)); 4419 else if (Op2Unset) 4420 // eqv(x, 0) = ~x 4421 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4422 MVT::i1, MachineNode->getOperand(0), 4423 MachineNode->getOperand(0)); 4424 else if (Op1Not) 4425 // eqv(~x, y) = xor(x, y) 4426 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4427 MVT::i1, MachineNode->getOperand(0). 4428 getOperand(0), 4429 MachineNode->getOperand(1)); 4430 else if (Op2Not) 4431 // eqv(x, ~y) = xor(x, y) 4432 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4433 MVT::i1, MachineNode->getOperand(0), 4434 MachineNode->getOperand(1). 4435 getOperand(0)); 4436 else if (AllUsersSelectZero(MachineNode)) { 4437 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 4438 MVT::i1, MachineNode->getOperand(0), 4439 MachineNode->getOperand(1)); 4440 SelectSwap = true; 4441 } 4442 break; 4443 case PPC::CRANDC: 4444 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4445 // andc(x, x) = 0 4446 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4447 MVT::i1); 4448 else if (Op1Set) 4449 // andc(1, y) = ~y 4450 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4451 MVT::i1, MachineNode->getOperand(1), 4452 MachineNode->getOperand(1)); 4453 else if (Op1Unset || Op2Set) 4454 // andc(0, y) = andc(x, 1) = 0 4455 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 4456 MVT::i1); 4457 else if (Op2Unset) 4458 // andc(x, 0) = x 4459 ResNode = MachineNode->getOperand(0).getNode(); 4460 else if (Op1Not) 4461 // andc(~x, y) = ~(x | y) = nor(x, y) 4462 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4463 MVT::i1, MachineNode->getOperand(0). 4464 getOperand(0), 4465 MachineNode->getOperand(1)); 4466 else if (Op2Not) 4467 // andc(x, ~y) = x & y 4468 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 4469 MVT::i1, MachineNode->getOperand(0), 4470 MachineNode->getOperand(1). 4471 getOperand(0)); 4472 else if (AllUsersSelectZero(MachineNode)) { 4473 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 4474 MVT::i1, MachineNode->getOperand(1), 4475 MachineNode->getOperand(0)); 4476 SelectSwap = true; 4477 } 4478 break; 4479 case PPC::CRORC: 4480 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 4481 // orc(x, x) = 1 4482 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4483 MVT::i1); 4484 else if (Op1Set || Op2Unset) 4485 // orc(1, y) = orc(x, 0) = 1 4486 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 4487 MVT::i1); 4488 else if (Op2Set) 4489 // orc(x, 1) = x 4490 ResNode = MachineNode->getOperand(0).getNode(); 4491 else if (Op1Unset) 4492 // orc(0, y) = ~y 4493 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 4494 MVT::i1, MachineNode->getOperand(1), 4495 MachineNode->getOperand(1)); 4496 else if (Op1Not) 4497 // orc(~x, y) = ~(x & y) = nand(x, y) 4498 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 4499 MVT::i1, MachineNode->getOperand(0). 4500 getOperand(0), 4501 MachineNode->getOperand(1)); 4502 else if (Op2Not) 4503 // orc(x, ~y) = x | y 4504 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 4505 MVT::i1, MachineNode->getOperand(0), 4506 MachineNode->getOperand(1). 4507 getOperand(0)); 4508 else if (AllUsersSelectZero(MachineNode)) { 4509 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 4510 MVT::i1, MachineNode->getOperand(1), 4511 MachineNode->getOperand(0)); 4512 SelectSwap = true; 4513 } 4514 break; 4515 case PPC::SELECT_I4: 4516 case PPC::SELECT_I8: 4517 case PPC::SELECT_F4: 4518 case PPC::SELECT_F8: 4519 case PPC::SELECT_QFRC: 4520 case PPC::SELECT_QSRC: 4521 case PPC::SELECT_QBRC: 4522 case PPC::SELECT_VRRC: 4523 case PPC::SELECT_VSFRC: 4524 case PPC::SELECT_VSSRC: 4525 case PPC::SELECT_VSRC: 4526 if (Op1Set) 4527 ResNode = MachineNode->getOperand(1).getNode(); 4528 else if (Op1Unset) 4529 ResNode = MachineNode->getOperand(2).getNode(); 4530 else if (Op1Not) 4531 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 4532 SDLoc(MachineNode), 4533 MachineNode->getValueType(0), 4534 MachineNode->getOperand(0). 4535 getOperand(0), 4536 MachineNode->getOperand(2), 4537 MachineNode->getOperand(1)); 4538 break; 4539 case PPC::BC: 4540 case PPC::BCn: 4541 if (Op1Not) 4542 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 4543 PPC::BC, 4544 SDLoc(MachineNode), 4545 MVT::Other, 4546 MachineNode->getOperand(0). 4547 getOperand(0), 4548 MachineNode->getOperand(1), 4549 MachineNode->getOperand(2)); 4550 // FIXME: Handle Op1Set, Op1Unset here too. 4551 break; 4552 } 4553 4554 // If we're inverting this node because it is used only by selects that 4555 // we'd like to swap, then swap the selects before the node replacement. 4556 if (SelectSwap) 4557 SwapAllSelectUsers(MachineNode); 4558 4559 if (ResNode != MachineNode) { 4560 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 4561 DEBUG(MachineNode->dump(CurDAG)); 4562 DEBUG(dbgs() << "\nNew: "); 4563 DEBUG(ResNode->dump(CurDAG)); 4564 DEBUG(dbgs() << "\n"); 4565 4566 ReplaceUses(MachineNode, ResNode); 4567 IsModified = true; 4568 } 4569 } 4570 if (IsModified) 4571 CurDAG->RemoveDeadNodes(); 4572 } while (IsModified); 4573 } 4574 4575 // Gather the set of 32-bit operations that are known to have their 4576 // higher-order 32 bits zero, where ToPromote contains all such operations. 4577 static bool PeepholePPC64ZExtGather(SDValue Op32, 4578 SmallPtrSetImpl<SDNode *> &ToPromote) { 4579 if (!Op32.isMachineOpcode()) 4580 return false; 4581 4582 // First, check for the "frontier" instructions (those that will clear the 4583 // higher-order 32 bits. 4584 4585 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 4586 // around. If it does not, then these instructions will clear the 4587 // higher-order bits. 4588 if ((Op32.getMachineOpcode() == PPC::RLWINM || 4589 Op32.getMachineOpcode() == PPC::RLWNM) && 4590 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 4591 ToPromote.insert(Op32.getNode()); 4592 return true; 4593 } 4594 4595 // SLW and SRW always clear the higher-order bits. 4596 if (Op32.getMachineOpcode() == PPC::SLW || 4597 Op32.getMachineOpcode() == PPC::SRW) { 4598 ToPromote.insert(Op32.getNode()); 4599 return true; 4600 } 4601 4602 // For LI and LIS, we need the immediate to be positive (so that it is not 4603 // sign extended). 4604 if (Op32.getMachineOpcode() == PPC::LI || 4605 Op32.getMachineOpcode() == PPC::LIS) { 4606 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 4607 return false; 4608 4609 ToPromote.insert(Op32.getNode()); 4610 return true; 4611 } 4612 4613 // LHBRX and LWBRX always clear the higher-order bits. 4614 if (Op32.getMachineOpcode() == PPC::LHBRX || 4615 Op32.getMachineOpcode() == PPC::LWBRX) { 4616 ToPromote.insert(Op32.getNode()); 4617 return true; 4618 } 4619 4620 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 4621 if (Op32.getMachineOpcode() == PPC::CNTLZW || 4622 Op32.getMachineOpcode() == PPC::CNTTZW) { 4623 ToPromote.insert(Op32.getNode()); 4624 return true; 4625 } 4626 4627 // Next, check for those instructions we can look through. 4628 4629 // Assuming the mask does not wrap around, then the higher-order bits are 4630 // taken directly from the first operand. 4631 if (Op32.getMachineOpcode() == PPC::RLWIMI && 4632 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 4633 SmallPtrSet<SDNode *, 16> ToPromote1; 4634 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 4635 return false; 4636 4637 ToPromote.insert(Op32.getNode()); 4638 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4639 return true; 4640 } 4641 4642 // For OR, the higher-order bits are zero if that is true for both operands. 4643 // For SELECT_I4, the same is true (but the relevant operand numbers are 4644 // shifted by 1). 4645 if (Op32.getMachineOpcode() == PPC::OR || 4646 Op32.getMachineOpcode() == PPC::SELECT_I4) { 4647 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 4648 SmallPtrSet<SDNode *, 16> ToPromote1; 4649 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 4650 return false; 4651 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 4652 return false; 4653 4654 ToPromote.insert(Op32.getNode()); 4655 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4656 return true; 4657 } 4658 4659 // For ORI and ORIS, we need the higher-order bits of the first operand to be 4660 // zero, and also for the constant to be positive (so that it is not sign 4661 // extended). 4662 if (Op32.getMachineOpcode() == PPC::ORI || 4663 Op32.getMachineOpcode() == PPC::ORIS) { 4664 SmallPtrSet<SDNode *, 16> ToPromote1; 4665 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 4666 return false; 4667 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 4668 return false; 4669 4670 ToPromote.insert(Op32.getNode()); 4671 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4672 return true; 4673 } 4674 4675 // The higher-order bits of AND are zero if that is true for at least one of 4676 // the operands. 4677 if (Op32.getMachineOpcode() == PPC::AND) { 4678 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 4679 bool Op0OK = 4680 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 4681 bool Op1OK = 4682 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 4683 if (!Op0OK && !Op1OK) 4684 return false; 4685 4686 ToPromote.insert(Op32.getNode()); 4687 4688 if (Op0OK) 4689 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4690 4691 if (Op1OK) 4692 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 4693 4694 return true; 4695 } 4696 4697 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 4698 // of the first operand, or if the second operand is positive (so that it is 4699 // not sign extended). 4700 if (Op32.getMachineOpcode() == PPC::ANDIo || 4701 Op32.getMachineOpcode() == PPC::ANDISo) { 4702 SmallPtrSet<SDNode *, 16> ToPromote1; 4703 bool Op0OK = 4704 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 4705 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 4706 if (!Op0OK && !Op1OK) 4707 return false; 4708 4709 ToPromote.insert(Op32.getNode()); 4710 4711 if (Op0OK) 4712 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 4713 4714 return true; 4715 } 4716 4717 return false; 4718 } 4719 4720 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 4721 if (!PPCSubTarget->isPPC64()) 4722 return; 4723 4724 // When we zero-extend from i32 to i64, we use a pattern like this: 4725 // def : Pat<(i64 (zext i32:$in)), 4726 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 4727 // 0, 32)>; 4728 // There are several 32-bit shift/rotate instructions, however, that will 4729 // clear the higher-order bits of their output, rendering the RLDICL 4730 // unnecessary. When that happens, we remove it here, and redefine the 4731 // relevant 32-bit operation to be a 64-bit operation. 4732 4733 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4734 ++Position; 4735 4736 bool MadeChange = false; 4737 while (Position != CurDAG->allnodes_begin()) { 4738 SDNode *N = &*--Position; 4739 // Skip dead nodes and any non-machine opcodes. 4740 if (N->use_empty() || !N->isMachineOpcode()) 4741 continue; 4742 4743 if (N->getMachineOpcode() != PPC::RLDICL) 4744 continue; 4745 4746 if (N->getConstantOperandVal(1) != 0 || 4747 N->getConstantOperandVal(2) != 32) 4748 continue; 4749 4750 SDValue ISR = N->getOperand(0); 4751 if (!ISR.isMachineOpcode() || 4752 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 4753 continue; 4754 4755 if (!ISR.hasOneUse()) 4756 continue; 4757 4758 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 4759 continue; 4760 4761 SDValue IDef = ISR.getOperand(0); 4762 if (!IDef.isMachineOpcode() || 4763 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 4764 continue; 4765 4766 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 4767 // can get rid of it. 4768 4769 SDValue Op32 = ISR->getOperand(1); 4770 if (!Op32.isMachineOpcode()) 4771 continue; 4772 4773 // There are some 32-bit instructions that always clear the high-order 32 4774 // bits, there are also some instructions (like AND) that we can look 4775 // through. 4776 SmallPtrSet<SDNode *, 16> ToPromote; 4777 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 4778 continue; 4779 4780 // If the ToPromote set contains nodes that have uses outside of the set 4781 // (except for the original INSERT_SUBREG), then abort the transformation. 4782 bool OutsideUse = false; 4783 for (SDNode *PN : ToPromote) { 4784 for (SDNode *UN : PN->uses()) { 4785 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 4786 OutsideUse = true; 4787 break; 4788 } 4789 } 4790 4791 if (OutsideUse) 4792 break; 4793 } 4794 if (OutsideUse) 4795 continue; 4796 4797 MadeChange = true; 4798 4799 // We now know that this zero extension can be removed by promoting to 4800 // nodes in ToPromote to 64-bit operations, where for operations in the 4801 // frontier of the set, we need to insert INSERT_SUBREGs for their 4802 // operands. 4803 for (SDNode *PN : ToPromote) { 4804 unsigned NewOpcode; 4805 switch (PN->getMachineOpcode()) { 4806 default: 4807 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 4808 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 4809 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 4810 case PPC::SLW: NewOpcode = PPC::SLW8; break; 4811 case PPC::SRW: NewOpcode = PPC::SRW8; break; 4812 case PPC::LI: NewOpcode = PPC::LI8; break; 4813 case PPC::LIS: NewOpcode = PPC::LIS8; break; 4814 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 4815 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 4816 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 4817 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 4818 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 4819 case PPC::OR: NewOpcode = PPC::OR8; break; 4820 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 4821 case PPC::ORI: NewOpcode = PPC::ORI8; break; 4822 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 4823 case PPC::AND: NewOpcode = PPC::AND8; break; 4824 case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break; 4825 case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break; 4826 } 4827 4828 // Note: During the replacement process, the nodes will be in an 4829 // inconsistent state (some instructions will have operands with values 4830 // of the wrong type). Once done, however, everything should be right 4831 // again. 4832 4833 SmallVector<SDValue, 4> Ops; 4834 for (const SDValue &V : PN->ops()) { 4835 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 4836 !isa<ConstantSDNode>(V)) { 4837 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 4838 SDNode *ReplOp = 4839 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 4840 ISR.getNode()->getVTList(), ReplOpOps); 4841 Ops.push_back(SDValue(ReplOp, 0)); 4842 } else { 4843 Ops.push_back(V); 4844 } 4845 } 4846 4847 // Because all to-be-promoted nodes only have users that are other 4848 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 4849 // the i32 result value type with i64. 4850 4851 SmallVector<EVT, 2> NewVTs; 4852 SDVTList VTs = PN->getVTList(); 4853 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 4854 if (VTs.VTs[i] == MVT::i32) 4855 NewVTs.push_back(MVT::i64); 4856 else 4857 NewVTs.push_back(VTs.VTs[i]); 4858 4859 DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 4860 DEBUG(PN->dump(CurDAG)); 4861 4862 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 4863 4864 DEBUG(dbgs() << "\nNew: "); 4865 DEBUG(PN->dump(CurDAG)); 4866 DEBUG(dbgs() << "\n"); 4867 } 4868 4869 // Now we replace the original zero extend and its associated INSERT_SUBREG 4870 // with the value feeding the INSERT_SUBREG (which has now been promoted to 4871 // return an i64). 4872 4873 DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 4874 DEBUG(N->dump(CurDAG)); 4875 DEBUG(dbgs() << "\nNew: "); 4876 DEBUG(Op32.getNode()->dump(CurDAG)); 4877 DEBUG(dbgs() << "\n"); 4878 4879 ReplaceUses(N, Op32.getNode()); 4880 } 4881 4882 if (MadeChange) 4883 CurDAG->RemoveDeadNodes(); 4884 } 4885 4886 void PPCDAGToDAGISel::PeepholePPC64() { 4887 // These optimizations are currently supported only for 64-bit SVR4. 4888 if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) 4889 return; 4890 4891 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4892 ++Position; 4893 4894 while (Position != CurDAG->allnodes_begin()) { 4895 SDNode *N = &*--Position; 4896 // Skip dead nodes and any non-machine opcodes. 4897 if (N->use_empty() || !N->isMachineOpcode()) 4898 continue; 4899 4900 unsigned FirstOp; 4901 unsigned StorageOpcode = N->getMachineOpcode(); 4902 4903 switch (StorageOpcode) { 4904 default: continue; 4905 4906 case PPC::LBZ: 4907 case PPC::LBZ8: 4908 case PPC::LD: 4909 case PPC::LFD: 4910 case PPC::LFS: 4911 case PPC::LHA: 4912 case PPC::LHA8: 4913 case PPC::LHZ: 4914 case PPC::LHZ8: 4915 case PPC::LWA: 4916 case PPC::LWZ: 4917 case PPC::LWZ8: 4918 FirstOp = 0; 4919 break; 4920 4921 case PPC::STB: 4922 case PPC::STB8: 4923 case PPC::STD: 4924 case PPC::STFD: 4925 case PPC::STFS: 4926 case PPC::STH: 4927 case PPC::STH8: 4928 case PPC::STW: 4929 case PPC::STW8: 4930 FirstOp = 1; 4931 break; 4932 } 4933 4934 // If this is a load or store with a zero offset, or within the alignment, 4935 // we may be able to fold an add-immediate into the memory operation. 4936 // The check against alignment is below, as it can't occur until we check 4937 // the arguments to N 4938 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 4939 continue; 4940 4941 SDValue Base = N->getOperand(FirstOp + 1); 4942 if (!Base.isMachineOpcode()) 4943 continue; 4944 4945 unsigned Flags = 0; 4946 bool ReplaceFlags = true; 4947 4948 // When the feeding operation is an add-immediate of some sort, 4949 // determine whether we need to add relocation information to the 4950 // target flags on the immediate operand when we fold it into the 4951 // load instruction. 4952 // 4953 // For something like ADDItocL, the relocation information is 4954 // inferred from the opcode; when we process it in the AsmPrinter, 4955 // we add the necessary relocation there. A load, though, can receive 4956 // relocation from various flavors of ADDIxxx, so we need to carry 4957 // the relocation information in the target flags. 4958 switch (Base.getMachineOpcode()) { 4959 default: continue; 4960 4961 case PPC::ADDI8: 4962 case PPC::ADDI: 4963 // In some cases (such as TLS) the relocation information 4964 // is already in place on the operand, so copying the operand 4965 // is sufficient. 4966 ReplaceFlags = false; 4967 // For these cases, the immediate may not be divisible by 4, in 4968 // which case the fold is illegal for DS-form instructions. (The 4969 // other cases provide aligned addresses and are always safe.) 4970 if ((StorageOpcode == PPC::LWA || 4971 StorageOpcode == PPC::LD || 4972 StorageOpcode == PPC::STD) && 4973 (!isa<ConstantSDNode>(Base.getOperand(1)) || 4974 Base.getConstantOperandVal(1) % 4 != 0)) 4975 continue; 4976 break; 4977 case PPC::ADDIdtprelL: 4978 Flags = PPCII::MO_DTPREL_LO; 4979 break; 4980 case PPC::ADDItlsldL: 4981 Flags = PPCII::MO_TLSLD_LO; 4982 break; 4983 case PPC::ADDItocL: 4984 Flags = PPCII::MO_TOC_LO; 4985 break; 4986 } 4987 4988 SDValue ImmOpnd = Base.getOperand(1); 4989 4990 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 4991 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 4992 // we might have needed different @ha relocation values for the offset 4993 // pointers). 4994 int MaxDisplacement = 7; 4995 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 4996 const GlobalValue *GV = GA->getGlobal(); 4997 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 4998 } 4999 5000 bool UpdateHBase = false; 5001 SDValue HBase = Base.getOperand(0); 5002 5003 int Offset = N->getConstantOperandVal(FirstOp); 5004 if (ReplaceFlags) { 5005 if (Offset < 0 || Offset > MaxDisplacement) { 5006 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 5007 // one use, then we can do this for any offset, we just need to also 5008 // update the offset (i.e. the symbol addend) on the addis also. 5009 if (Base.getMachineOpcode() != PPC::ADDItocL) 5010 continue; 5011 5012 if (!HBase.isMachineOpcode() || 5013 HBase.getMachineOpcode() != PPC::ADDIStocHA) 5014 continue; 5015 5016 if (!Base.hasOneUse() || !HBase.hasOneUse()) 5017 continue; 5018 5019 SDValue HImmOpnd = HBase.getOperand(1); 5020 if (HImmOpnd != ImmOpnd) 5021 continue; 5022 5023 UpdateHBase = true; 5024 } 5025 } else { 5026 // If we're directly folding the addend from an addi instruction, then: 5027 // 1. In general, the offset on the memory access must be zero. 5028 // 2. If the addend is a constant, then it can be combined with a 5029 // non-zero offset, but only if the result meets the encoding 5030 // requirements. 5031 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 5032 Offset += C->getSExtValue(); 5033 5034 if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD || 5035 StorageOpcode == PPC::STD) && (Offset % 4) != 0) 5036 continue; 5037 5038 if (!isInt<16>(Offset)) 5039 continue; 5040 5041 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 5042 ImmOpnd.getValueType()); 5043 } else if (Offset != 0) { 5044 continue; 5045 } 5046 } 5047 5048 // We found an opportunity. Reverse the operands from the add 5049 // immediate and substitute them into the load or store. If 5050 // needed, update the target flags for the immediate operand to 5051 // reflect the necessary relocation information. 5052 DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 5053 DEBUG(Base->dump(CurDAG)); 5054 DEBUG(dbgs() << "\nN: "); 5055 DEBUG(N->dump(CurDAG)); 5056 DEBUG(dbgs() << "\n"); 5057 5058 // If the relocation information isn't already present on the 5059 // immediate operand, add it now. 5060 if (ReplaceFlags) { 5061 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 5062 SDLoc dl(GA); 5063 const GlobalValue *GV = GA->getGlobal(); 5064 // We can't perform this optimization for data whose alignment 5065 // is insufficient for the instruction encoding. 5066 if (GV->getAlignment() < 4 && 5067 (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || 5068 StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { 5069 DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 5070 continue; 5071 } 5072 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 5073 } else if (ConstantPoolSDNode *CP = 5074 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 5075 const Constant *C = CP->getConstVal(); 5076 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, 5077 CP->getAlignment(), 5078 Offset, Flags); 5079 } 5080 } 5081 5082 if (FirstOp == 1) // Store 5083 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 5084 Base.getOperand(0), N->getOperand(3)); 5085 else // Load 5086 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 5087 N->getOperand(2)); 5088 5089 if (UpdateHBase) 5090 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 5091 ImmOpnd); 5092 5093 // The add-immediate may now be dead, in which case remove it. 5094 if (Base.getNode()->use_empty()) 5095 CurDAG->RemoveDeadNode(Base.getNode()); 5096 } 5097 } 5098 5099 /// createPPCISelDag - This pass converts a legalized DAG into a 5100 /// PowerPC-specific DAG, ready for instruction scheduling. 5101 /// 5102 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 5103 CodeGenOpt::Level OptLevel) { 5104 return new PPCDAGToDAGISel(TM, OptLevel); 5105 } 5106