1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/IntervalMap.h" 24 #include "llvm/ADT/None.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SetVector.h" 28 #include "llvm/ADT/SmallBitVector.h" 29 #include "llvm/ADT/SmallPtrSet.h" 30 #include "llvm/ADT/SmallSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/AliasAnalysis.h" 34 #include "llvm/Analysis/MemoryLocation.h" 35 #include "llvm/CodeGen/DAGCombine.h" 36 #include "llvm/CodeGen/ISDOpcodes.h" 37 #include "llvm/CodeGen/MachineFrameInfo.h" 38 #include "llvm/CodeGen/MachineFunction.h" 39 #include "llvm/CodeGen/MachineMemOperand.h" 40 #include "llvm/CodeGen/RuntimeLibcalls.h" 41 #include "llvm/CodeGen/SelectionDAG.h" 42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 43 #include "llvm/CodeGen/SelectionDAGNodes.h" 44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 45 #include "llvm/CodeGen/TargetLowering.h" 46 #include "llvm/CodeGen/TargetRegisterInfo.h" 47 #include "llvm/CodeGen/TargetSubtargetInfo.h" 48 #include "llvm/CodeGen/ValueTypes.h" 49 #include "llvm/IR/Attributes.h" 50 #include "llvm/IR/Constant.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Function.h" 54 #include "llvm/IR/LLVMContext.h" 55 #include "llvm/IR/Metadata.h" 56 #include "llvm/Support/Casting.h" 57 #include "llvm/Support/CodeGen.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Compiler.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/KnownBits.h" 63 #include "llvm/Support/MachineValueType.h" 64 #include "llvm/Support/MathExtras.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include "llvm/Target/TargetMachine.h" 67 #include "llvm/Target/TargetOptions.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <cstdint> 71 #include <functional> 72 #include <iterator> 73 #include <string> 74 #include <tuple> 75 #include <utility> 76 77 using namespace llvm; 78 79 #define DEBUG_TYPE "dagcombine" 80 81 STATISTIC(NodesCombined , "Number of dag nodes combined"); 82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 86 STATISTIC(SlicedLoads, "Number of load sliced"); 87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); 88 89 static cl::opt<bool> 90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 91 cl::desc("Enable DAG combiner's use of IR alias analysis")); 92 93 static cl::opt<bool> 94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 95 cl::desc("Enable DAG combiner's use of TBAA")); 96 97 #ifndef NDEBUG 98 static cl::opt<std::string> 99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 100 cl::desc("Only use DAG-combiner alias analysis in this" 101 " function")); 102 #endif 103 104 /// Hidden option to stress test load slicing, i.e., when this option 105 /// is enabled, load slicing bypasses most of its profitability guards. 106 static cl::opt<bool> 107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 108 cl::desc("Bypass the profitability model of load slicing"), 109 cl::init(false)); 110 111 static cl::opt<bool> 112 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 113 cl::desc("DAG combiner may split indexing from loads")); 114 115 namespace { 116 117 class DAGCombiner { 118 SelectionDAG &DAG; 119 const TargetLowering &TLI; 120 CombineLevel Level; 121 CodeGenOpt::Level OptLevel; 122 bool LegalOperations = false; 123 bool LegalTypes = false; 124 bool ForCodeSize; 125 126 /// Worklist of all of the nodes that need to be simplified. 127 /// 128 /// This must behave as a stack -- new nodes to process are pushed onto the 129 /// back and when processing we pop off of the back. 130 /// 131 /// The worklist will not contain duplicates but may contain null entries 132 /// due to nodes being deleted from the underlying DAG. 133 SmallVector<SDNode *, 64> Worklist; 134 135 /// Mapping from an SDNode to its position on the worklist. 136 /// 137 /// This is used to find and remove nodes from the worklist (by nulling 138 /// them) when they are deleted from the underlying DAG. It relies on 139 /// stable indices of nodes within the worklist. 140 DenseMap<SDNode *, unsigned> WorklistMap; 141 142 /// Set of nodes which have been combined (at least once). 143 /// 144 /// This is used to allow us to reliably add any operands of a DAG node 145 /// which have not yet been combined to the worklist. 146 SmallPtrSet<SDNode *, 32> CombinedNodes; 147 148 // AA - Used for DAG load/store alias analysis. 149 AliasAnalysis *AA; 150 151 /// When an instruction is simplified, add all users of the instruction to 152 /// the work lists because they might get more simplified now. 153 void AddUsersToWorklist(SDNode *N) { 154 for (SDNode *Node : N->uses()) 155 AddToWorklist(Node); 156 } 157 158 /// Call the node-specific routine that folds each particular type of node. 159 SDValue visit(SDNode *N); 160 161 public: 162 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) 163 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 164 OptLevel(OL), AA(AA) { 165 ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); 166 167 MaximumLegalStoreInBits = 0; 168 for (MVT VT : MVT::all_valuetypes()) 169 if (EVT(VT).isSimple() && VT != MVT::Other && 170 TLI.isTypeLegal(EVT(VT)) && 171 VT.getSizeInBits() >= MaximumLegalStoreInBits) 172 MaximumLegalStoreInBits = VT.getSizeInBits(); 173 } 174 175 /// Add to the worklist making sure its instance is at the back (next to be 176 /// processed.) 177 void AddToWorklist(SDNode *N) { 178 assert(N->getOpcode() != ISD::DELETED_NODE && 179 "Deleted Node added to Worklist"); 180 181 // Skip handle nodes as they can't usefully be combined and confuse the 182 // zero-use deletion strategy. 183 if (N->getOpcode() == ISD::HANDLENODE) 184 return; 185 186 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 187 Worklist.push_back(N); 188 } 189 190 /// Remove all instances of N from the worklist. 191 void removeFromWorklist(SDNode *N) { 192 CombinedNodes.erase(N); 193 194 auto It = WorklistMap.find(N); 195 if (It == WorklistMap.end()) 196 return; // Not in the worklist. 197 198 // Null out the entry rather than erasing it to avoid a linear operation. 199 Worklist[It->second] = nullptr; 200 WorklistMap.erase(It); 201 } 202 203 void deleteAndRecombine(SDNode *N); 204 bool recursivelyDeleteUnusedNodes(SDNode *N); 205 206 /// Replaces all uses of the results of one DAG node with new values. 207 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 208 bool AddTo = true); 209 210 /// Replaces all uses of the results of one DAG node with new values. 211 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 212 return CombineTo(N, &Res, 1, AddTo); 213 } 214 215 /// Replaces all uses of the results of one DAG node with new values. 216 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 217 bool AddTo = true) { 218 SDValue To[] = { Res0, Res1 }; 219 return CombineTo(N, To, 2, AddTo); 220 } 221 222 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 223 224 private: 225 unsigned MaximumLegalStoreInBits; 226 227 /// Check the specified integer node value to see if it can be simplified or 228 /// if things it uses can be simplified by bit propagation. 229 /// If so, return true. 230 bool SimplifyDemandedBits(SDValue Op) { 231 unsigned BitWidth = Op.getScalarValueSizeInBits(); 232 APInt Demanded = APInt::getAllOnesValue(BitWidth); 233 return SimplifyDemandedBits(Op, Demanded); 234 } 235 236 /// Check the specified vector node value to see if it can be simplified or 237 /// if things it uses can be simplified as it only uses some of the 238 /// elements. If so, return true. 239 bool SimplifyDemandedVectorElts(SDValue Op) { 240 unsigned NumElts = Op.getValueType().getVectorNumElements(); 241 APInt Demanded = APInt::getAllOnesValue(NumElts); 242 return SimplifyDemandedVectorElts(Op, Demanded); 243 } 244 245 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 246 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, 247 bool AssumeSingleUse = false); 248 249 bool CombineToPreIndexedLoadStore(SDNode *N); 250 bool CombineToPostIndexedLoadStore(SDNode *N); 251 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 252 bool SliceUpLoad(SDNode *N); 253 254 // Scalars have size 0 to distinguish from singleton vectors. 255 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); 256 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); 257 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); 258 259 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 260 /// load. 261 /// 262 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 263 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 264 /// \param EltNo index of the vector element to load. 265 /// \param OriginalLoad load that EVE came from to be replaced. 266 /// \returns EVE on success SDValue() on failure. 267 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, 268 SDValue EltNo, 269 LoadSDNode *OriginalLoad); 270 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 271 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 272 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 273 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 274 SDValue PromoteIntBinOp(SDValue Op); 275 SDValue PromoteIntShiftOp(SDValue Op); 276 SDValue PromoteExtend(SDValue Op); 277 bool PromoteLoad(SDValue Op); 278 279 /// Call the node-specific routine that knows how to fold each 280 /// particular type of node. If that doesn't do anything, try the 281 /// target-specific DAG combines. 282 SDValue combine(SDNode *N); 283 284 // Visitation implementation - Implement dag node combining for different 285 // node types. The semantics are as follows: 286 // Return Value: 287 // SDValue.getNode() == 0 - No change was made 288 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 289 // otherwise - N should be replaced by the returned Operand. 290 // 291 SDValue visitTokenFactor(SDNode *N); 292 SDValue visitMERGE_VALUES(SDNode *N); 293 SDValue visitADD(SDNode *N); 294 SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); 295 SDValue visitSUB(SDNode *N); 296 SDValue visitADDSAT(SDNode *N); 297 SDValue visitSUBSAT(SDNode *N); 298 SDValue visitADDC(SDNode *N); 299 SDValue visitUADDO(SDNode *N); 300 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); 301 SDValue visitSUBC(SDNode *N); 302 SDValue visitUSUBO(SDNode *N); 303 SDValue visitADDE(SDNode *N); 304 SDValue visitADDCARRY(SDNode *N); 305 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); 306 SDValue visitSUBE(SDNode *N); 307 SDValue visitSUBCARRY(SDNode *N); 308 SDValue visitMUL(SDNode *N); 309 SDValue useDivRem(SDNode *N); 310 SDValue visitSDIV(SDNode *N); 311 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); 312 SDValue visitUDIV(SDNode *N); 313 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); 314 SDValue visitREM(SDNode *N); 315 SDValue visitMULHU(SDNode *N); 316 SDValue visitMULHS(SDNode *N); 317 SDValue visitSMUL_LOHI(SDNode *N); 318 SDValue visitUMUL_LOHI(SDNode *N); 319 SDValue visitSMULO(SDNode *N); 320 SDValue visitUMULO(SDNode *N); 321 SDValue visitIMINMAX(SDNode *N); 322 SDValue visitAND(SDNode *N); 323 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); 324 SDValue visitOR(SDNode *N); 325 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); 326 SDValue visitXOR(SDNode *N); 327 SDValue SimplifyVBinOp(SDNode *N); 328 SDValue visitSHL(SDNode *N); 329 SDValue visitSRA(SDNode *N); 330 SDValue visitSRL(SDNode *N); 331 SDValue visitFunnelShift(SDNode *N); 332 SDValue visitRotate(SDNode *N); 333 SDValue visitABS(SDNode *N); 334 SDValue visitBSWAP(SDNode *N); 335 SDValue visitBITREVERSE(SDNode *N); 336 SDValue visitCTLZ(SDNode *N); 337 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 338 SDValue visitCTTZ(SDNode *N); 339 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 340 SDValue visitCTPOP(SDNode *N); 341 SDValue visitSELECT(SDNode *N); 342 SDValue visitVSELECT(SDNode *N); 343 SDValue visitSELECT_CC(SDNode *N); 344 SDValue visitSETCC(SDNode *N); 345 SDValue visitSETCCCARRY(SDNode *N); 346 SDValue visitSIGN_EXTEND(SDNode *N); 347 SDValue visitZERO_EXTEND(SDNode *N); 348 SDValue visitANY_EXTEND(SDNode *N); 349 SDValue visitAssertExt(SDNode *N); 350 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 351 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 352 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); 353 SDValue visitTRUNCATE(SDNode *N); 354 SDValue visitBITCAST(SDNode *N); 355 SDValue visitBUILD_PAIR(SDNode *N); 356 SDValue visitFADD(SDNode *N); 357 SDValue visitFSUB(SDNode *N); 358 SDValue visitFMUL(SDNode *N); 359 SDValue visitFMA(SDNode *N); 360 SDValue visitFDIV(SDNode *N); 361 SDValue visitFREM(SDNode *N); 362 SDValue visitFSQRT(SDNode *N); 363 SDValue visitFCOPYSIGN(SDNode *N); 364 SDValue visitFPOW(SDNode *N); 365 SDValue visitSINT_TO_FP(SDNode *N); 366 SDValue visitUINT_TO_FP(SDNode *N); 367 SDValue visitFP_TO_SINT(SDNode *N); 368 SDValue visitFP_TO_UINT(SDNode *N); 369 SDValue visitFP_ROUND(SDNode *N); 370 SDValue visitFP_ROUND_INREG(SDNode *N); 371 SDValue visitFP_EXTEND(SDNode *N); 372 SDValue visitFNEG(SDNode *N); 373 SDValue visitFABS(SDNode *N); 374 SDValue visitFCEIL(SDNode *N); 375 SDValue visitFTRUNC(SDNode *N); 376 SDValue visitFFLOOR(SDNode *N); 377 SDValue visitFMINNUM(SDNode *N); 378 SDValue visitFMAXNUM(SDNode *N); 379 SDValue visitFMINIMUM(SDNode *N); 380 SDValue visitFMAXIMUM(SDNode *N); 381 SDValue visitBRCOND(SDNode *N); 382 SDValue visitBR_CC(SDNode *N); 383 SDValue visitLOAD(SDNode *N); 384 385 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 386 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 387 388 SDValue visitSTORE(SDNode *N); 389 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 390 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 391 SDValue visitBUILD_VECTOR(SDNode *N); 392 SDValue visitCONCAT_VECTORS(SDNode *N); 393 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 394 SDValue visitVECTOR_SHUFFLE(SDNode *N); 395 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 396 SDValue visitINSERT_SUBVECTOR(SDNode *N); 397 SDValue visitMLOAD(SDNode *N); 398 SDValue visitMSTORE(SDNode *N); 399 SDValue visitMGATHER(SDNode *N); 400 SDValue visitMSCATTER(SDNode *N); 401 SDValue visitFP_TO_FP16(SDNode *N); 402 SDValue visitFP16_TO_FP(SDNode *N); 403 404 SDValue visitFADDForFMACombine(SDNode *N); 405 SDValue visitFSUBForFMACombine(SDNode *N); 406 SDValue visitFMULForFMADistributiveCombine(SDNode *N); 407 408 SDValue XformToShuffleWithZero(SDNode *N); 409 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 410 SDValue N1, SDNodeFlags Flags); 411 412 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 413 414 SDValue foldSelectOfConstants(SDNode *N); 415 SDValue foldVSelectOfConstants(SDNode *N); 416 SDValue foldBinOpIntoSelect(SDNode *BO); 417 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 418 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N); 419 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); 420 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 421 SDValue N2, SDValue N3, ISD::CondCode CC, 422 bool NotExtCompare = false); 423 SDValue convertSelectOfFPConstantsToLoadOffset( 424 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, 425 ISD::CondCode CC); 426 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, 427 SDValue N2, SDValue N3, ISD::CondCode CC); 428 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 429 const SDLoc &DL); 430 SDValue unfoldMaskedMerge(SDNode *N); 431 SDValue unfoldExtremeBitClearingToShifts(SDNode *N); 432 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 433 const SDLoc &DL, bool foldBooleans); 434 SDValue rebuildSetCC(SDValue N); 435 436 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 437 SDValue &CC) const; 438 bool isOneUseSetCC(SDValue N) const; 439 440 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 441 unsigned HiOp); 442 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 443 SDValue CombineExtLoad(SDNode *N); 444 SDValue CombineZExtLogicopShiftLoad(SDNode *N); 445 SDValue combineRepeatedFPDivisors(SDNode *N); 446 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); 447 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 448 SDValue BuildSDIV(SDNode *N); 449 SDValue BuildSDIVPow2(SDNode *N); 450 SDValue BuildUDIV(SDNode *N); 451 SDValue BuildLogBase2(SDValue V, const SDLoc &DL); 452 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); 453 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); 454 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); 455 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); 456 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, 457 SDNodeFlags Flags, bool Reciprocal); 458 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, 459 SDNodeFlags Flags, bool Reciprocal); 460 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 461 bool DemandHighBits = true); 462 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 463 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 464 SDValue InnerPos, SDValue InnerNeg, 465 unsigned PosOpcode, unsigned NegOpcode, 466 const SDLoc &DL); 467 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); 468 SDValue MatchLoadCombine(SDNode *N); 469 SDValue ReduceLoadWidth(SDNode *N); 470 SDValue ReduceLoadOpStoreWidth(SDNode *N); 471 SDValue splitMergedValStore(StoreSDNode *ST); 472 SDValue TransformFPLoadStorePair(SDNode *N); 473 SDValue convertBuildVecZextToZext(SDNode *N); 474 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 475 SDValue reduceBuildVecToShuffle(SDNode *N); 476 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, 477 ArrayRef<int> VectorMask, SDValue VecIn1, 478 SDValue VecIn2, unsigned LeftIdx); 479 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); 480 481 /// Walk up chain skipping non-aliasing memory nodes, 482 /// looking for aliasing nodes and adding them to the Aliases vector. 483 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 484 SmallVectorImpl<SDValue> &Aliases); 485 486 /// Return true if there is any possibility that the two addresses overlap. 487 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 488 489 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 490 /// chain (aliasing node.) 491 SDValue FindBetterChain(SDNode *N, SDValue Chain); 492 493 /// Try to replace a store and any possibly adjacent stores on 494 /// consecutive chains with better chains. Return true only if St is 495 /// replaced. 496 /// 497 /// Notice that other chains may still be replaced even if the function 498 /// returns false. 499 bool findBetterNeighborChains(StoreSDNode *St); 500 501 // Helper for findBetterNeighborChains. Walk up store chain add additional 502 // chained stores that do not overlap and can be parallelized. 503 bool parallelizeChainedStores(StoreSDNode *St); 504 505 /// Holds a pointer to an LSBaseSDNode as well as information on where it 506 /// is located in a sequence of memory operations connected by a chain. 507 struct MemOpLink { 508 // Ptr to the mem node. 509 LSBaseSDNode *MemNode; 510 511 // Offset from the base ptr. 512 int64_t OffsetFromBase; 513 514 MemOpLink(LSBaseSDNode *N, int64_t Offset) 515 : MemNode(N), OffsetFromBase(Offset) {} 516 }; 517 518 /// This is a helper function for visitMUL to check the profitability 519 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 520 /// MulNode is the original multiply, AddNode is (add x, c1), 521 /// and ConstNode is c2. 522 bool isMulAddWithConstProfitable(SDNode *MulNode, 523 SDValue &AddNode, 524 SDValue &ConstNode); 525 526 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 527 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 528 /// the type of the loaded value to be extended. 529 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 530 EVT LoadResultTy, EVT &ExtVT); 531 532 /// Helper function to calculate whether the given Load/Store can have its 533 /// width reduced to ExtVT. 534 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, 535 EVT &MemVT, unsigned ShAmt = 0); 536 537 /// Used by BackwardsPropagateMask to find suitable loads. 538 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, 539 SmallPtrSetImpl<SDNode*> &NodesWithConsts, 540 ConstantSDNode *Mask, SDNode *&NodeToMask); 541 /// Attempt to propagate a given AND node back to load leaves so that they 542 /// can be combined into narrow loads. 543 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); 544 545 /// Helper function for MergeConsecutiveStores which merges the 546 /// component store chains. 547 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 548 unsigned NumStores); 549 550 /// This is a helper function for MergeConsecutiveStores. When the 551 /// source elements of the consecutive stores are all constants or 552 /// all extracted vector elements, try to merge them into one 553 /// larger store introducing bitcasts if necessary. \return True 554 /// if a merged store was created. 555 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 556 EVT MemVT, unsigned NumStores, 557 bool IsConstantSrc, bool UseVector, 558 bool UseTrunc); 559 560 /// This is a helper function for MergeConsecutiveStores. Stores 561 /// that potentially may be merged with St are placed in 562 /// StoreNodes. RootNode is a chain predecessor to all store 563 /// candidates. 564 void getStoreMergeCandidates(StoreSDNode *St, 565 SmallVectorImpl<MemOpLink> &StoreNodes, 566 SDNode *&Root); 567 568 /// Helper function for MergeConsecutiveStores. Checks if 569 /// candidate stores have indirect dependency through their 570 /// operands. RootNode is the predecessor to all stores calculated 571 /// by getStoreMergeCandidates and is used to prune the dependency check. 572 /// \return True if safe to merge. 573 bool checkMergeStoreCandidatesForDependencies( 574 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 575 SDNode *RootNode); 576 577 /// Merge consecutive store operations into a wide store. 578 /// This optimization uses wide integers or vectors when possible. 579 /// \return number of stores that were merged into a merged store (the 580 /// affected nodes are stored as a prefix in \p StoreNodes). 581 bool MergeConsecutiveStores(StoreSDNode *St); 582 583 /// Try to transform a truncation where C is a constant: 584 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 585 /// 586 /// \p N needs to be a truncation and its first operand an AND. Other 587 /// requirements are checked by the function (e.g. that trunc is 588 /// single-use) and if missed an empty SDValue is returned. 589 SDValue distributeTruncateThroughAnd(SDNode *N); 590 591 /// Helper function to determine whether the target supports operation 592 /// given by \p Opcode for type \p VT, that is, whether the operation 593 /// is legal or custom before legalizing operations, and whether is 594 /// legal (but not custom) after legalization. 595 bool hasOperation(unsigned Opcode, EVT VT) { 596 if (LegalOperations) 597 return TLI.isOperationLegal(Opcode, VT); 598 return TLI.isOperationLegalOrCustom(Opcode, VT); 599 } 600 601 public: 602 /// Runs the dag combiner on all nodes in the work list 603 void Run(CombineLevel AtLevel); 604 605 SelectionDAG &getDAG() const { return DAG; } 606 607 /// Returns a type large enough to hold any valid shift amount - before type 608 /// legalization these can be huge. 609 EVT getShiftAmountTy(EVT LHSTy) { 610 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 611 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); 612 } 613 614 /// This method returns true if we are running before type legalization or 615 /// if the specified VT is legal. 616 bool isTypeLegal(const EVT &VT) { 617 if (!LegalTypes) return true; 618 return TLI.isTypeLegal(VT); 619 } 620 621 /// Convenience wrapper around TargetLowering::getSetCCResultType 622 EVT getSetCCResultType(EVT VT) const { 623 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 624 } 625 626 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 627 SDValue OrigLoad, SDValue ExtLoad, 628 ISD::NodeType ExtType); 629 }; 630 631 /// This class is a DAGUpdateListener that removes any deleted 632 /// nodes from the worklist. 633 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 634 DAGCombiner &DC; 635 636 public: 637 explicit WorklistRemover(DAGCombiner &dc) 638 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 639 640 void NodeDeleted(SDNode *N, SDNode *E) override { 641 DC.removeFromWorklist(N); 642 } 643 }; 644 645 } // end anonymous namespace 646 647 //===----------------------------------------------------------------------===// 648 // TargetLowering::DAGCombinerInfo implementation 649 //===----------------------------------------------------------------------===// 650 651 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 652 ((DAGCombiner*)DC)->AddToWorklist(N); 653 } 654 655 SDValue TargetLowering::DAGCombinerInfo:: 656 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 657 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 658 } 659 660 SDValue TargetLowering::DAGCombinerInfo:: 661 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 662 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 663 } 664 665 SDValue TargetLowering::DAGCombinerInfo:: 666 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 667 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 668 } 669 670 void TargetLowering::DAGCombinerInfo:: 671 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 672 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 673 } 674 675 //===----------------------------------------------------------------------===// 676 // Helper Functions 677 //===----------------------------------------------------------------------===// 678 679 void DAGCombiner::deleteAndRecombine(SDNode *N) { 680 removeFromWorklist(N); 681 682 // If the operands of this node are only used by the node, they will now be 683 // dead. Make sure to re-visit them and recursively delete dead nodes. 684 for (const SDValue &Op : N->ops()) 685 // For an operand generating multiple values, one of the values may 686 // become dead allowing further simplification (e.g. split index 687 // arithmetic from an indexed load). 688 if (Op->hasOneUse() || Op->getNumValues() > 1) 689 AddToWorklist(Op.getNode()); 690 691 DAG.DeleteNode(N); 692 } 693 694 /// Return 1 if we can compute the negated form of the specified expression for 695 /// the same cost as the expression itself, or 2 if we can compute the negated 696 /// form more cheaply than the expression itself. 697 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 698 const TargetLowering &TLI, 699 const TargetOptions *Options, 700 unsigned Depth = 0) { 701 // fneg is removable even if it has multiple uses. 702 if (Op.getOpcode() == ISD::FNEG) return 2; 703 704 // Don't allow anything with multiple uses unless we know it is free. 705 EVT VT = Op.getValueType(); 706 const SDNodeFlags Flags = Op->getFlags(); 707 if (!Op.hasOneUse()) 708 if (!(Op.getOpcode() == ISD::FP_EXTEND && 709 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) 710 return 0; 711 712 // Don't recurse exponentially. 713 if (Depth > 6) return 0; 714 715 switch (Op.getOpcode()) { 716 default: return false; 717 case ISD::ConstantFP: { 718 if (!LegalOperations) 719 return 1; 720 721 // Don't invert constant FP values after legalization unless the target says 722 // the negated constant is legal. 723 return TLI.isOperationLegal(ISD::ConstantFP, VT) || 724 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); 725 } 726 case ISD::FADD: 727 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) 728 return 0; 729 730 // After operation legalization, it might not be legal to create new FSUBs. 731 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) 732 return 0; 733 734 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 735 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 736 Options, Depth + 1)) 737 return V; 738 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 739 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 740 Depth + 1); 741 case ISD::FSUB: 742 // We can't turn -(A-B) into B-A when we honor signed zeros. 743 if (!Options->NoSignedZerosFPMath && 744 !Flags.hasNoSignedZeros()) 745 return 0; 746 747 // fold (fneg (fsub A, B)) -> (fsub B, A) 748 return 1; 749 750 case ISD::FMUL: 751 case ISD::FDIV: 752 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 753 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 754 Options, Depth + 1)) 755 return V; 756 757 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 758 Depth + 1); 759 760 case ISD::FP_EXTEND: 761 case ISD::FP_ROUND: 762 case ISD::FSIN: 763 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 764 Depth + 1); 765 } 766 } 767 768 /// If isNegatibleForFree returns true, return the newly negated expression. 769 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 770 bool LegalOperations, unsigned Depth = 0) { 771 const TargetOptions &Options = DAG.getTarget().Options; 772 // fneg is removable even if it has multiple uses. 773 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 774 775 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 776 777 const SDNodeFlags Flags = Op.getNode()->getFlags(); 778 779 switch (Op.getOpcode()) { 780 default: llvm_unreachable("Unknown code"); 781 case ISD::ConstantFP: { 782 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 783 V.changeSign(); 784 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 785 } 786 case ISD::FADD: 787 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); 788 789 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 790 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 791 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 792 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 793 GetNegatedExpression(Op.getOperand(0), DAG, 794 LegalOperations, Depth+1), 795 Op.getOperand(1), Flags); 796 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 797 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 798 GetNegatedExpression(Op.getOperand(1), DAG, 799 LegalOperations, Depth+1), 800 Op.getOperand(0), Flags); 801 case ISD::FSUB: 802 // fold (fneg (fsub 0, B)) -> B 803 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 804 if (N0CFP->isZero()) 805 return Op.getOperand(1); 806 807 // fold (fneg (fsub A, B)) -> (fsub B, A) 808 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 809 Op.getOperand(1), Op.getOperand(0), Flags); 810 811 case ISD::FMUL: 812 case ISD::FDIV: 813 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 814 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 815 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 816 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 817 GetNegatedExpression(Op.getOperand(0), DAG, 818 LegalOperations, Depth+1), 819 Op.getOperand(1), Flags); 820 821 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 822 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 823 Op.getOperand(0), 824 GetNegatedExpression(Op.getOperand(1), DAG, 825 LegalOperations, Depth+1), Flags); 826 827 case ISD::FP_EXTEND: 828 case ISD::FSIN: 829 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 830 GetNegatedExpression(Op.getOperand(0), DAG, 831 LegalOperations, Depth+1)); 832 case ISD::FP_ROUND: 833 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 834 GetNegatedExpression(Op.getOperand(0), DAG, 835 LegalOperations, Depth+1), 836 Op.getOperand(1)); 837 } 838 } 839 840 // APInts must be the same size for most operations, this helper 841 // function zero extends the shorter of the pair so that they match. 842 // We provide an Offset so that we can create bitwidths that won't overflow. 843 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { 844 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); 845 LHS = LHS.zextOrSelf(Bits); 846 RHS = RHS.zextOrSelf(Bits); 847 } 848 849 // Return true if this node is a setcc, or is a select_cc 850 // that selects between the target values used for true and false, making it 851 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 852 // the appropriate nodes based on the type of node we are checking. This 853 // simplifies life a bit for the callers. 854 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 855 SDValue &CC) const { 856 if (N.getOpcode() == ISD::SETCC) { 857 LHS = N.getOperand(0); 858 RHS = N.getOperand(1); 859 CC = N.getOperand(2); 860 return true; 861 } 862 863 if (N.getOpcode() != ISD::SELECT_CC || 864 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 865 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 866 return false; 867 868 if (TLI.getBooleanContents(N.getValueType()) == 869 TargetLowering::UndefinedBooleanContent) 870 return false; 871 872 LHS = N.getOperand(0); 873 RHS = N.getOperand(1); 874 CC = N.getOperand(4); 875 return true; 876 } 877 878 /// Return true if this is a SetCC-equivalent operation with only one use. 879 /// If this is true, it allows the users to invert the operation for free when 880 /// it is profitable to do so. 881 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 882 SDValue N0, N1, N2; 883 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 884 return true; 885 return false; 886 } 887 888 // Returns the SDNode if it is a constant float BuildVector 889 // or constant float. 890 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 891 if (isa<ConstantFPSDNode>(N)) 892 return N.getNode(); 893 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 894 return N.getNode(); 895 return nullptr; 896 } 897 898 // Determines if it is a constant integer or a build vector of constant 899 // integers (and undefs). 900 // Do not permit build vector implicit truncation. 901 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { 902 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) 903 return !(Const->isOpaque() && NoOpaques); 904 if (N.getOpcode() != ISD::BUILD_VECTOR) 905 return false; 906 unsigned BitWidth = N.getScalarValueSizeInBits(); 907 for (const SDValue &Op : N->op_values()) { 908 if (Op.isUndef()) 909 continue; 910 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); 911 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || 912 (Const->isOpaque() && NoOpaques)) 913 return false; 914 } 915 return true; 916 } 917 918 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with 919 // undef's. 920 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { 921 if (V.getOpcode() != ISD::BUILD_VECTOR) 922 return false; 923 return isConstantOrConstantVector(V, NoOpaques) || 924 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); 925 } 926 927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 928 SDValue N1, SDNodeFlags Flags) { 929 // Don't reassociate reductions. 930 if (Flags.hasVectorReduction()) 931 return SDValue(); 932 933 EVT VT = N0.getValueType(); 934 if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) { 935 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 936 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 937 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 938 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 939 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 940 return SDValue(); 941 } 942 if (N0.hasOneUse()) { 943 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 944 // use 945 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 946 if (!OpNode.getNode()) 947 return SDValue(); 948 AddToWorklist(OpNode.getNode()); 949 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 950 } 951 } 952 } 953 954 if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) { 955 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 956 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 957 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 958 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 959 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 960 return SDValue(); 961 } 962 if (N1.hasOneUse()) { 963 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one 964 // use 965 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); 966 if (!OpNode.getNode()) 967 return SDValue(); 968 AddToWorklist(OpNode.getNode()); 969 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 970 } 971 } 972 } 973 974 return SDValue(); 975 } 976 977 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 978 bool AddTo) { 979 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 980 ++NodesCombined; 981 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; 982 To[0].getNode()->dump(&DAG); 983 dbgs() << " and " << NumTo - 1 << " other values\n"); 984 for (unsigned i = 0, e = NumTo; i != e; ++i) 985 assert((!To[i].getNode() || 986 N->getValueType(i) == To[i].getValueType()) && 987 "Cannot combine value to value of different type!"); 988 989 WorklistRemover DeadNodes(*this); 990 DAG.ReplaceAllUsesWith(N, To); 991 if (AddTo) { 992 // Push the new nodes and any users onto the worklist 993 for (unsigned i = 0, e = NumTo; i != e; ++i) { 994 if (To[i].getNode()) { 995 AddToWorklist(To[i].getNode()); 996 AddUsersToWorklist(To[i].getNode()); 997 } 998 } 999 } 1000 1001 // Finally, if the node is now dead, remove it from the graph. The node 1002 // may not be dead if the replacement process recursively simplified to 1003 // something else needing this node. 1004 if (N->use_empty()) 1005 deleteAndRecombine(N); 1006 return SDValue(N, 0); 1007 } 1008 1009 void DAGCombiner:: 1010 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 1011 // Replace all uses. If any nodes become isomorphic to other nodes and 1012 // are deleted, make sure to remove them from our worklist. 1013 WorklistRemover DeadNodes(*this); 1014 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 1015 1016 // Push the new node and any (possibly new) users onto the worklist. 1017 AddToWorklist(TLO.New.getNode()); 1018 AddUsersToWorklist(TLO.New.getNode()); 1019 1020 // Finally, if the node is now dead, remove it from the graph. The node 1021 // may not be dead if the replacement process recursively simplified to 1022 // something else needing this node. 1023 if (TLO.Old.getNode()->use_empty()) 1024 deleteAndRecombine(TLO.Old.getNode()); 1025 } 1026 1027 /// Check the specified integer node value to see if it can be simplified or if 1028 /// things it uses can be simplified by bit propagation. If so, return true. 1029 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 1030 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1031 KnownBits Known; 1032 if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) 1033 return false; 1034 1035 // Revisit the node. 1036 AddToWorklist(Op.getNode()); 1037 1038 // Replace the old value with the new one. 1039 ++NodesCombined; 1040 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1041 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1042 dbgs() << '\n'); 1043 1044 CommitTargetLoweringOpt(TLO); 1045 return true; 1046 } 1047 1048 /// Check the specified vector node value to see if it can be simplified or 1049 /// if things it uses can be simplified as it only uses some of the elements. 1050 /// If so, return true. 1051 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, 1052 bool AssumeSingleUse) { 1053 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1054 APInt KnownUndef, KnownZero; 1055 if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, 1056 0, AssumeSingleUse)) 1057 return false; 1058 1059 // Revisit the node. 1060 AddToWorklist(Op.getNode()); 1061 1062 // Replace the old value with the new one. 1063 ++NodesCombined; 1064 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1065 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1066 dbgs() << '\n'); 1067 1068 CommitTargetLoweringOpt(TLO); 1069 return true; 1070 } 1071 1072 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 1073 SDLoc DL(Load); 1074 EVT VT = Load->getValueType(0); 1075 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); 1076 1077 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; 1078 Trunc.getNode()->dump(&DAG); dbgs() << '\n'); 1079 WorklistRemover DeadNodes(*this); 1080 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 1081 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 1082 deleteAndRecombine(Load); 1083 AddToWorklist(Trunc.getNode()); 1084 } 1085 1086 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 1087 Replace = false; 1088 SDLoc DL(Op); 1089 if (ISD::isUNINDEXEDLoad(Op.getNode())) { 1090 LoadSDNode *LD = cast<LoadSDNode>(Op); 1091 EVT MemVT = LD->getMemoryVT(); 1092 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1093 : LD->getExtensionType(); 1094 Replace = true; 1095 return DAG.getExtLoad(ExtType, DL, PVT, 1096 LD->getChain(), LD->getBasePtr(), 1097 MemVT, LD->getMemOperand()); 1098 } 1099 1100 unsigned Opc = Op.getOpcode(); 1101 switch (Opc) { 1102 default: break; 1103 case ISD::AssertSext: 1104 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) 1105 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); 1106 break; 1107 case ISD::AssertZext: 1108 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) 1109 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); 1110 break; 1111 case ISD::Constant: { 1112 unsigned ExtOpc = 1113 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1114 return DAG.getNode(ExtOpc, DL, PVT, Op); 1115 } 1116 } 1117 1118 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 1119 return SDValue(); 1120 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); 1121 } 1122 1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1124 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1125 return SDValue(); 1126 EVT OldVT = Op.getValueType(); 1127 SDLoc DL(Op); 1128 bool Replace = false; 1129 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1130 if (!NewOp.getNode()) 1131 return SDValue(); 1132 AddToWorklist(NewOp.getNode()); 1133 1134 if (Replace) 1135 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1136 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, 1137 DAG.getValueType(OldVT)); 1138 } 1139 1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1141 EVT OldVT = Op.getValueType(); 1142 SDLoc DL(Op); 1143 bool Replace = false; 1144 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1145 if (!NewOp.getNode()) 1146 return SDValue(); 1147 AddToWorklist(NewOp.getNode()); 1148 1149 if (Replace) 1150 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1151 return DAG.getZeroExtendInReg(NewOp, DL, OldVT); 1152 } 1153 1154 /// Promote the specified integer binary operation if the target indicates it is 1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1156 /// i32 since i16 instructions are longer. 1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1158 if (!LegalOperations) 1159 return SDValue(); 1160 1161 EVT VT = Op.getValueType(); 1162 if (VT.isVector() || !VT.isInteger()) 1163 return SDValue(); 1164 1165 // If operation type is 'undesirable', e.g. i16 on x86, consider 1166 // promoting it. 1167 unsigned Opc = Op.getOpcode(); 1168 if (TLI.isTypeDesirableForOp(Opc, VT)) 1169 return SDValue(); 1170 1171 EVT PVT = VT; 1172 // Consult target whether it is a good idea to promote this operation and 1173 // what's the right type to promote it to. 1174 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1175 assert(PVT != VT && "Don't know what type to promote to!"); 1176 1177 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1178 1179 bool Replace0 = false; 1180 SDValue N0 = Op.getOperand(0); 1181 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1182 1183 bool Replace1 = false; 1184 SDValue N1 = Op.getOperand(1); 1185 SDValue NN1 = PromoteOperand(N1, PVT, Replace1); 1186 SDLoc DL(Op); 1187 1188 SDValue RV = 1189 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); 1190 1191 // We are always replacing N0/N1's use in N and only need 1192 // additional replacements if there are additional uses. 1193 Replace0 &= !N0->hasOneUse(); 1194 Replace1 &= (N0 != N1) && !N1->hasOneUse(); 1195 1196 // Combine Op here so it is preserved past replacements. 1197 CombineTo(Op.getNode(), RV); 1198 1199 // If operands have a use ordering, make sure we deal with 1200 // predecessor first. 1201 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { 1202 std::swap(N0, N1); 1203 std::swap(NN0, NN1); 1204 } 1205 1206 if (Replace0) { 1207 AddToWorklist(NN0.getNode()); 1208 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1209 } 1210 if (Replace1) { 1211 AddToWorklist(NN1.getNode()); 1212 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1213 } 1214 return Op; 1215 } 1216 return SDValue(); 1217 } 1218 1219 /// Promote the specified integer shift operation if the target indicates it is 1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1221 /// i32 since i16 instructions are longer. 1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1223 if (!LegalOperations) 1224 return SDValue(); 1225 1226 EVT VT = Op.getValueType(); 1227 if (VT.isVector() || !VT.isInteger()) 1228 return SDValue(); 1229 1230 // If operation type is 'undesirable', e.g. i16 on x86, consider 1231 // promoting it. 1232 unsigned Opc = Op.getOpcode(); 1233 if (TLI.isTypeDesirableForOp(Opc, VT)) 1234 return SDValue(); 1235 1236 EVT PVT = VT; 1237 // Consult target whether it is a good idea to promote this operation and 1238 // what's the right type to promote it to. 1239 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1240 assert(PVT != VT && "Don't know what type to promote to!"); 1241 1242 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1243 1244 bool Replace = false; 1245 SDValue N0 = Op.getOperand(0); 1246 SDValue N1 = Op.getOperand(1); 1247 if (Opc == ISD::SRA) 1248 N0 = SExtPromoteOperand(N0, PVT); 1249 else if (Opc == ISD::SRL) 1250 N0 = ZExtPromoteOperand(N0, PVT); 1251 else 1252 N0 = PromoteOperand(N0, PVT, Replace); 1253 1254 if (!N0.getNode()) 1255 return SDValue(); 1256 1257 SDLoc DL(Op); 1258 SDValue RV = 1259 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); 1260 1261 AddToWorklist(N0.getNode()); 1262 if (Replace) 1263 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1264 1265 // Deal with Op being deleted. 1266 if (Op && Op.getOpcode() != ISD::DELETED_NODE) 1267 return RV; 1268 } 1269 return SDValue(); 1270 } 1271 1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1273 if (!LegalOperations) 1274 return SDValue(); 1275 1276 EVT VT = Op.getValueType(); 1277 if (VT.isVector() || !VT.isInteger()) 1278 return SDValue(); 1279 1280 // If operation type is 'undesirable', e.g. i16 on x86, consider 1281 // promoting it. 1282 unsigned Opc = Op.getOpcode(); 1283 if (TLI.isTypeDesirableForOp(Opc, VT)) 1284 return SDValue(); 1285 1286 EVT PVT = VT; 1287 // Consult target whether it is a good idea to promote this operation and 1288 // what's the right type to promote it to. 1289 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1290 assert(PVT != VT && "Don't know what type to promote to!"); 1291 // fold (aext (aext x)) -> (aext x) 1292 // fold (aext (zext x)) -> (zext x) 1293 // fold (aext (sext x)) -> (sext x) 1294 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1295 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1296 } 1297 return SDValue(); 1298 } 1299 1300 bool DAGCombiner::PromoteLoad(SDValue Op) { 1301 if (!LegalOperations) 1302 return false; 1303 1304 if (!ISD::isUNINDEXEDLoad(Op.getNode())) 1305 return false; 1306 1307 EVT VT = Op.getValueType(); 1308 if (VT.isVector() || !VT.isInteger()) 1309 return false; 1310 1311 // If operation type is 'undesirable', e.g. i16 on x86, consider 1312 // promoting it. 1313 unsigned Opc = Op.getOpcode(); 1314 if (TLI.isTypeDesirableForOp(Opc, VT)) 1315 return false; 1316 1317 EVT PVT = VT; 1318 // Consult target whether it is a good idea to promote this operation and 1319 // what's the right type to promote it to. 1320 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1321 assert(PVT != VT && "Don't know what type to promote to!"); 1322 1323 SDLoc DL(Op); 1324 SDNode *N = Op.getNode(); 1325 LoadSDNode *LD = cast<LoadSDNode>(N); 1326 EVT MemVT = LD->getMemoryVT(); 1327 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1328 : LD->getExtensionType(); 1329 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, 1330 LD->getChain(), LD->getBasePtr(), 1331 MemVT, LD->getMemOperand()); 1332 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); 1333 1334 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; 1335 Result.getNode()->dump(&DAG); dbgs() << '\n'); 1336 WorklistRemover DeadNodes(*this); 1337 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1338 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1339 deleteAndRecombine(N); 1340 AddToWorklist(Result.getNode()); 1341 return true; 1342 } 1343 return false; 1344 } 1345 1346 /// Recursively delete a node which has no uses and any operands for 1347 /// which it is the only use. 1348 /// 1349 /// Note that this both deletes the nodes and removes them from the worklist. 1350 /// It also adds any nodes who have had a user deleted to the worklist as they 1351 /// may now have only one use and subject to other combines. 1352 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1353 if (!N->use_empty()) 1354 return false; 1355 1356 SmallSetVector<SDNode *, 16> Nodes; 1357 Nodes.insert(N); 1358 do { 1359 N = Nodes.pop_back_val(); 1360 if (!N) 1361 continue; 1362 1363 if (N->use_empty()) { 1364 for (const SDValue &ChildN : N->op_values()) 1365 Nodes.insert(ChildN.getNode()); 1366 1367 removeFromWorklist(N); 1368 DAG.DeleteNode(N); 1369 } else { 1370 AddToWorklist(N); 1371 } 1372 } while (!Nodes.empty()); 1373 return true; 1374 } 1375 1376 //===----------------------------------------------------------------------===// 1377 // Main DAG Combiner implementation 1378 //===----------------------------------------------------------------------===// 1379 1380 void DAGCombiner::Run(CombineLevel AtLevel) { 1381 // set the instance variables, so that the various visit routines may use it. 1382 Level = AtLevel; 1383 LegalOperations = Level >= AfterLegalizeVectorOps; 1384 LegalTypes = Level >= AfterLegalizeTypes; 1385 1386 // Add all the dag nodes to the worklist. 1387 for (SDNode &Node : DAG.allnodes()) 1388 AddToWorklist(&Node); 1389 1390 // Create a dummy node (which is not added to allnodes), that adds a reference 1391 // to the root node, preventing it from being deleted, and tracking any 1392 // changes of the root. 1393 HandleSDNode Dummy(DAG.getRoot()); 1394 1395 // While the worklist isn't empty, find a node and try to combine it. 1396 while (!WorklistMap.empty()) { 1397 SDNode *N; 1398 // The Worklist holds the SDNodes in order, but it may contain null entries. 1399 do { 1400 N = Worklist.pop_back_val(); 1401 } while (!N); 1402 1403 bool GoodWorklistEntry = WorklistMap.erase(N); 1404 (void)GoodWorklistEntry; 1405 assert(GoodWorklistEntry && 1406 "Found a worklist entry without a corresponding map entry!"); 1407 1408 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1409 // N is deleted from the DAG, since they too may now be dead or may have a 1410 // reduced number of uses, allowing other xforms. 1411 if (recursivelyDeleteUnusedNodes(N)) 1412 continue; 1413 1414 WorklistRemover DeadNodes(*this); 1415 1416 // If this combine is running after legalizing the DAG, re-legalize any 1417 // nodes pulled off the worklist. 1418 if (Level == AfterLegalizeDAG) { 1419 SmallSetVector<SDNode *, 16> UpdatedNodes; 1420 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1421 1422 for (SDNode *LN : UpdatedNodes) { 1423 AddToWorklist(LN); 1424 AddUsersToWorklist(LN); 1425 } 1426 if (!NIsValid) 1427 continue; 1428 } 1429 1430 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1431 1432 // Add any operands of the new node which have not yet been combined to the 1433 // worklist as well. Because the worklist uniques things already, this 1434 // won't repeatedly process the same operand. 1435 CombinedNodes.insert(N); 1436 for (const SDValue &ChildN : N->op_values()) 1437 if (!CombinedNodes.count(ChildN.getNode())) 1438 AddToWorklist(ChildN.getNode()); 1439 1440 SDValue RV = combine(N); 1441 1442 if (!RV.getNode()) 1443 continue; 1444 1445 ++NodesCombined; 1446 1447 // If we get back the same node we passed in, rather than a new node or 1448 // zero, we know that the node must have defined multiple values and 1449 // CombineTo was used. Since CombineTo takes care of the worklist 1450 // mechanics for us, we have no work to do in this case. 1451 if (RV.getNode() == N) 1452 continue; 1453 1454 assert(N->getOpcode() != ISD::DELETED_NODE && 1455 RV.getOpcode() != ISD::DELETED_NODE && 1456 "Node was deleted but visit returned new node!"); 1457 1458 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); 1459 1460 if (N->getNumValues() == RV.getNode()->getNumValues()) 1461 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1462 else { 1463 assert(N->getValueType(0) == RV.getValueType() && 1464 N->getNumValues() == 1 && "Type mismatch"); 1465 DAG.ReplaceAllUsesWith(N, &RV); 1466 } 1467 1468 // Push the new node and any users onto the worklist 1469 AddToWorklist(RV.getNode()); 1470 AddUsersToWorklist(RV.getNode()); 1471 1472 // Finally, if the node is now dead, remove it from the graph. The node 1473 // may not be dead if the replacement process recursively simplified to 1474 // something else needing this node. This will also take care of adding any 1475 // operands which have lost a user to the worklist. 1476 recursivelyDeleteUnusedNodes(N); 1477 } 1478 1479 // If the root changed (e.g. it was a dead load, update the root). 1480 DAG.setRoot(Dummy.getValue()); 1481 DAG.RemoveDeadNodes(); 1482 } 1483 1484 SDValue DAGCombiner::visit(SDNode *N) { 1485 switch (N->getOpcode()) { 1486 default: break; 1487 case ISD::TokenFactor: return visitTokenFactor(N); 1488 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1489 case ISD::ADD: return visitADD(N); 1490 case ISD::SUB: return visitSUB(N); 1491 case ISD::SADDSAT: 1492 case ISD::UADDSAT: return visitADDSAT(N); 1493 case ISD::SSUBSAT: 1494 case ISD::USUBSAT: return visitSUBSAT(N); 1495 case ISD::ADDC: return visitADDC(N); 1496 case ISD::UADDO: return visitUADDO(N); 1497 case ISD::SUBC: return visitSUBC(N); 1498 case ISD::USUBO: return visitUSUBO(N); 1499 case ISD::ADDE: return visitADDE(N); 1500 case ISD::ADDCARRY: return visitADDCARRY(N); 1501 case ISD::SUBE: return visitSUBE(N); 1502 case ISD::SUBCARRY: return visitSUBCARRY(N); 1503 case ISD::MUL: return visitMUL(N); 1504 case ISD::SDIV: return visitSDIV(N); 1505 case ISD::UDIV: return visitUDIV(N); 1506 case ISD::SREM: 1507 case ISD::UREM: return visitREM(N); 1508 case ISD::MULHU: return visitMULHU(N); 1509 case ISD::MULHS: return visitMULHS(N); 1510 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1511 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1512 case ISD::SMULO: return visitSMULO(N); 1513 case ISD::UMULO: return visitUMULO(N); 1514 case ISD::SMIN: 1515 case ISD::SMAX: 1516 case ISD::UMIN: 1517 case ISD::UMAX: return visitIMINMAX(N); 1518 case ISD::AND: return visitAND(N); 1519 case ISD::OR: return visitOR(N); 1520 case ISD::XOR: return visitXOR(N); 1521 case ISD::SHL: return visitSHL(N); 1522 case ISD::SRA: return visitSRA(N); 1523 case ISD::SRL: return visitSRL(N); 1524 case ISD::ROTR: 1525 case ISD::ROTL: return visitRotate(N); 1526 case ISD::FSHL: 1527 case ISD::FSHR: return visitFunnelShift(N); 1528 case ISD::ABS: return visitABS(N); 1529 case ISD::BSWAP: return visitBSWAP(N); 1530 case ISD::BITREVERSE: return visitBITREVERSE(N); 1531 case ISD::CTLZ: return visitCTLZ(N); 1532 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1533 case ISD::CTTZ: return visitCTTZ(N); 1534 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1535 case ISD::CTPOP: return visitCTPOP(N); 1536 case ISD::SELECT: return visitSELECT(N); 1537 case ISD::VSELECT: return visitVSELECT(N); 1538 case ISD::SELECT_CC: return visitSELECT_CC(N); 1539 case ISD::SETCC: return visitSETCC(N); 1540 case ISD::SETCCCARRY: return visitSETCCCARRY(N); 1541 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1542 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1543 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1544 case ISD::AssertSext: 1545 case ISD::AssertZext: return visitAssertExt(N); 1546 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1547 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1548 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); 1549 case ISD::TRUNCATE: return visitTRUNCATE(N); 1550 case ISD::BITCAST: return visitBITCAST(N); 1551 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1552 case ISD::FADD: return visitFADD(N); 1553 case ISD::FSUB: return visitFSUB(N); 1554 case ISD::FMUL: return visitFMUL(N); 1555 case ISD::FMA: return visitFMA(N); 1556 case ISD::FDIV: return visitFDIV(N); 1557 case ISD::FREM: return visitFREM(N); 1558 case ISD::FSQRT: return visitFSQRT(N); 1559 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1560 case ISD::FPOW: return visitFPOW(N); 1561 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1562 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1563 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1564 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1565 case ISD::FP_ROUND: return visitFP_ROUND(N); 1566 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1567 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1568 case ISD::FNEG: return visitFNEG(N); 1569 case ISD::FABS: return visitFABS(N); 1570 case ISD::FFLOOR: return visitFFLOOR(N); 1571 case ISD::FMINNUM: return visitFMINNUM(N); 1572 case ISD::FMAXNUM: return visitFMAXNUM(N); 1573 case ISD::FMINIMUM: return visitFMINIMUM(N); 1574 case ISD::FMAXIMUM: return visitFMAXIMUM(N); 1575 case ISD::FCEIL: return visitFCEIL(N); 1576 case ISD::FTRUNC: return visitFTRUNC(N); 1577 case ISD::BRCOND: return visitBRCOND(N); 1578 case ISD::BR_CC: return visitBR_CC(N); 1579 case ISD::LOAD: return visitLOAD(N); 1580 case ISD::STORE: return visitSTORE(N); 1581 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1582 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1583 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1584 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1585 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1586 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1587 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1588 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1589 case ISD::MGATHER: return visitMGATHER(N); 1590 case ISD::MLOAD: return visitMLOAD(N); 1591 case ISD::MSCATTER: return visitMSCATTER(N); 1592 case ISD::MSTORE: return visitMSTORE(N); 1593 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1594 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1595 } 1596 return SDValue(); 1597 } 1598 1599 SDValue DAGCombiner::combine(SDNode *N) { 1600 SDValue RV = visit(N); 1601 1602 // If nothing happened, try a target-specific DAG combine. 1603 if (!RV.getNode()) { 1604 assert(N->getOpcode() != ISD::DELETED_NODE && 1605 "Node was deleted but visit returned NULL!"); 1606 1607 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1608 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1609 1610 // Expose the DAG combiner to the target combiner impls. 1611 TargetLowering::DAGCombinerInfo 1612 DagCombineInfo(DAG, Level, false, this); 1613 1614 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1615 } 1616 } 1617 1618 // If nothing happened still, try promoting the operation. 1619 if (!RV.getNode()) { 1620 switch (N->getOpcode()) { 1621 default: break; 1622 case ISD::ADD: 1623 case ISD::SUB: 1624 case ISD::MUL: 1625 case ISD::AND: 1626 case ISD::OR: 1627 case ISD::XOR: 1628 RV = PromoteIntBinOp(SDValue(N, 0)); 1629 break; 1630 case ISD::SHL: 1631 case ISD::SRA: 1632 case ISD::SRL: 1633 RV = PromoteIntShiftOp(SDValue(N, 0)); 1634 break; 1635 case ISD::SIGN_EXTEND: 1636 case ISD::ZERO_EXTEND: 1637 case ISD::ANY_EXTEND: 1638 RV = PromoteExtend(SDValue(N, 0)); 1639 break; 1640 case ISD::LOAD: 1641 if (PromoteLoad(SDValue(N, 0))) 1642 RV = SDValue(N, 0); 1643 break; 1644 } 1645 } 1646 1647 // If N is a commutative binary node, try eliminate it if the commuted 1648 // version is already present in the DAG. 1649 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && 1650 N->getNumValues() == 1) { 1651 SDValue N0 = N->getOperand(0); 1652 SDValue N1 = N->getOperand(1); 1653 1654 // Constant operands are canonicalized to RHS. 1655 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { 1656 SDValue Ops[] = {N1, N0}; 1657 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1658 N->getFlags()); 1659 if (CSENode) 1660 return SDValue(CSENode, 0); 1661 } 1662 } 1663 1664 return RV; 1665 } 1666 1667 /// Given a node, return its input chain if it has one, otherwise return a null 1668 /// sd operand. 1669 static SDValue getInputChainForNode(SDNode *N) { 1670 if (unsigned NumOps = N->getNumOperands()) { 1671 if (N->getOperand(0).getValueType() == MVT::Other) 1672 return N->getOperand(0); 1673 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1674 return N->getOperand(NumOps-1); 1675 for (unsigned i = 1; i < NumOps-1; ++i) 1676 if (N->getOperand(i).getValueType() == MVT::Other) 1677 return N->getOperand(i); 1678 } 1679 return SDValue(); 1680 } 1681 1682 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1683 // If N has two operands, where one has an input chain equal to the other, 1684 // the 'other' chain is redundant. 1685 if (N->getNumOperands() == 2) { 1686 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1687 return N->getOperand(0); 1688 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1689 return N->getOperand(1); 1690 } 1691 1692 // Don't simplify token factors if optnone. 1693 if (OptLevel == CodeGenOpt::None) 1694 return SDValue(); 1695 1696 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1697 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1698 SmallPtrSet<SDNode*, 16> SeenOps; 1699 bool Changed = false; // If we should replace this token factor. 1700 1701 // Start out with this token factor. 1702 TFs.push_back(N); 1703 1704 // Iterate through token factors. The TFs grows when new token factors are 1705 // encountered. 1706 for (unsigned i = 0; i < TFs.size(); ++i) { 1707 SDNode *TF = TFs[i]; 1708 1709 // Check each of the operands. 1710 for (const SDValue &Op : TF->op_values()) { 1711 switch (Op.getOpcode()) { 1712 case ISD::EntryToken: 1713 // Entry tokens don't need to be added to the list. They are 1714 // redundant. 1715 Changed = true; 1716 break; 1717 1718 case ISD::TokenFactor: 1719 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { 1720 // Queue up for processing. 1721 TFs.push_back(Op.getNode()); 1722 // Clean up in case the token factor is removed. 1723 AddToWorklist(Op.getNode()); 1724 Changed = true; 1725 break; 1726 } 1727 LLVM_FALLTHROUGH; 1728 1729 default: 1730 // Only add if it isn't already in the list. 1731 if (SeenOps.insert(Op.getNode()).second) 1732 Ops.push_back(Op); 1733 else 1734 Changed = true; 1735 break; 1736 } 1737 } 1738 } 1739 1740 // Remove Nodes that are chained to another node in the list. Do so 1741 // by walking up chains breath-first stopping when we've seen 1742 // another operand. In general we must climb to the EntryNode, but we can exit 1743 // early if we find all remaining work is associated with just one operand as 1744 // no further pruning is possible. 1745 1746 // List of nodes to search through and original Ops from which they originate. 1747 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; 1748 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. 1749 SmallPtrSet<SDNode *, 16> SeenChains; 1750 bool DidPruneOps = false; 1751 1752 unsigned NumLeftToConsider = 0; 1753 for (const SDValue &Op : Ops) { 1754 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); 1755 OpWorkCount.push_back(1); 1756 } 1757 1758 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { 1759 // If this is an Op, we can remove the op from the list. Remark any 1760 // search associated with it as from the current OpNumber. 1761 if (SeenOps.count(Op) != 0) { 1762 Changed = true; 1763 DidPruneOps = true; 1764 unsigned OrigOpNumber = 0; 1765 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) 1766 OrigOpNumber++; 1767 assert((OrigOpNumber != Ops.size()) && 1768 "expected to find TokenFactor Operand"); 1769 // Re-mark worklist from OrigOpNumber to OpNumber 1770 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { 1771 if (Worklist[i].second == OrigOpNumber) { 1772 Worklist[i].second = OpNumber; 1773 } 1774 } 1775 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; 1776 OpWorkCount[OrigOpNumber] = 0; 1777 NumLeftToConsider--; 1778 } 1779 // Add if it's a new chain 1780 if (SeenChains.insert(Op).second) { 1781 OpWorkCount[OpNumber]++; 1782 Worklist.push_back(std::make_pair(Op, OpNumber)); 1783 } 1784 }; 1785 1786 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { 1787 // We need at least be consider at least 2 Ops to prune. 1788 if (NumLeftToConsider <= 1) 1789 break; 1790 auto CurNode = Worklist[i].first; 1791 auto CurOpNumber = Worklist[i].second; 1792 assert((OpWorkCount[CurOpNumber] > 0) && 1793 "Node should not appear in worklist"); 1794 switch (CurNode->getOpcode()) { 1795 case ISD::EntryToken: 1796 // Hitting EntryToken is the only way for the search to terminate without 1797 // hitting 1798 // another operand's search. Prevent us from marking this operand 1799 // considered. 1800 NumLeftToConsider++; 1801 break; 1802 case ISD::TokenFactor: 1803 for (const SDValue &Op : CurNode->op_values()) 1804 AddToWorklist(i, Op.getNode(), CurOpNumber); 1805 break; 1806 case ISD::CopyFromReg: 1807 case ISD::CopyToReg: 1808 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); 1809 break; 1810 default: 1811 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) 1812 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); 1813 break; 1814 } 1815 OpWorkCount[CurOpNumber]--; 1816 if (OpWorkCount[CurOpNumber] == 0) 1817 NumLeftToConsider--; 1818 } 1819 1820 // If we've changed things around then replace token factor. 1821 if (Changed) { 1822 SDValue Result; 1823 if (Ops.empty()) { 1824 // The entry token is the only possible outcome. 1825 Result = DAG.getEntryNode(); 1826 } else { 1827 if (DidPruneOps) { 1828 SmallVector<SDValue, 8> PrunedOps; 1829 // 1830 for (const SDValue &Op : Ops) { 1831 if (SeenChains.count(Op.getNode()) == 0) 1832 PrunedOps.push_back(Op); 1833 } 1834 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); 1835 } else { 1836 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1837 } 1838 } 1839 return Result; 1840 } 1841 return SDValue(); 1842 } 1843 1844 /// MERGE_VALUES can always be eliminated. 1845 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1846 WorklistRemover DeadNodes(*this); 1847 // Replacing results may cause a different MERGE_VALUES to suddenly 1848 // be CSE'd with N, and carry its uses with it. Iterate until no 1849 // uses remain, to ensure that the node can be safely deleted. 1850 // First add the users of this node to the work list so that they 1851 // can be tried again once they have new operands. 1852 AddUsersToWorklist(N); 1853 do { 1854 // Do as a single replacement to avoid rewalking use lists. 1855 SmallVector<SDValue, 8> Ops; 1856 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1857 Ops.push_back(N->getOperand(i)); 1858 DAG.ReplaceAllUsesWith(N, Ops.data()); 1859 } while (!N->use_empty()); 1860 deleteAndRecombine(N); 1861 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1862 } 1863 1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a 1865 /// ConstantSDNode pointer else nullptr. 1866 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1867 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1868 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1869 } 1870 1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { 1872 assert(ISD::isBinaryOp(BO) && "Unexpected binary operator"); 1873 1874 // Don't do this unless the old select is going away. We want to eliminate the 1875 // binary operator, not replace a binop with a select. 1876 // TODO: Handle ISD::SELECT_CC. 1877 unsigned SelOpNo = 0; 1878 SDValue Sel = BO->getOperand(0); 1879 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 1880 SelOpNo = 1; 1881 Sel = BO->getOperand(1); 1882 } 1883 1884 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 1885 return SDValue(); 1886 1887 SDValue CT = Sel.getOperand(1); 1888 if (!isConstantOrConstantVector(CT, true) && 1889 !isConstantFPBuildVectorOrConstantFP(CT)) 1890 return SDValue(); 1891 1892 SDValue CF = Sel.getOperand(2); 1893 if (!isConstantOrConstantVector(CF, true) && 1894 !isConstantFPBuildVectorOrConstantFP(CF)) 1895 return SDValue(); 1896 1897 // Bail out if any constants are opaque because we can't constant fold those. 1898 // The exception is "and" and "or" with either 0 or -1 in which case we can 1899 // propagate non constant operands into select. I.e.: 1900 // and (select Cond, 0, -1), X --> select Cond, 0, X 1901 // or X, (select Cond, -1, 0) --> select Cond, -1, X 1902 auto BinOpcode = BO->getOpcode(); 1903 bool CanFoldNonConst = 1904 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && 1905 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && 1906 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); 1907 1908 SDValue CBO = BO->getOperand(SelOpNo ^ 1); 1909 if (!CanFoldNonConst && 1910 !isConstantOrConstantVector(CBO, true) && 1911 !isConstantFPBuildVectorOrConstantFP(CBO)) 1912 return SDValue(); 1913 1914 EVT VT = Sel.getValueType(); 1915 1916 // In case of shift value and shift amount may have different VT. For instance 1917 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have 1918 // swapped operands and value types do not match. NB: x86 is fine if operands 1919 // are not swapped with shift amount VT being not bigger than shifted value. 1920 // TODO: that is possible to check for a shift operation, correct VTs and 1921 // still perform optimization on x86 if needed. 1922 if (SelOpNo && VT != CBO.getValueType()) 1923 return SDValue(); 1924 1925 // We have a select-of-constants followed by a binary operator with a 1926 // constant. Eliminate the binop by pulling the constant math into the select. 1927 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO 1928 SDLoc DL(Sel); 1929 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) 1930 : DAG.getNode(BinOpcode, DL, VT, CT, CBO); 1931 if (!CanFoldNonConst && !NewCT.isUndef() && 1932 !isConstantOrConstantVector(NewCT, true) && 1933 !isConstantFPBuildVectorOrConstantFP(NewCT)) 1934 return SDValue(); 1935 1936 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) 1937 : DAG.getNode(BinOpcode, DL, VT, CF, CBO); 1938 if (!CanFoldNonConst && !NewCF.isUndef() && 1939 !isConstantOrConstantVector(NewCF, true) && 1940 !isConstantFPBuildVectorOrConstantFP(NewCF)) 1941 return SDValue(); 1942 1943 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); 1944 } 1945 1946 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { 1947 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && 1948 "Expecting add or sub"); 1949 1950 // Match a constant operand and a zext operand for the math instruction: 1951 // add Z, C 1952 // sub C, Z 1953 bool IsAdd = N->getOpcode() == ISD::ADD; 1954 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); 1955 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); 1956 auto *CN = dyn_cast<ConstantSDNode>(C); 1957 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) 1958 return SDValue(); 1959 1960 // Match the zext operand as a setcc of a boolean. 1961 if (Z.getOperand(0).getOpcode() != ISD::SETCC || 1962 Z.getOperand(0).getValueType() != MVT::i1) 1963 return SDValue(); 1964 1965 // Match the compare as: setcc (X & 1), 0, eq. 1966 SDValue SetCC = Z.getOperand(0); 1967 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); 1968 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || 1969 SetCC.getOperand(0).getOpcode() != ISD::AND || 1970 !isOneConstant(SetCC.getOperand(0).getOperand(1))) 1971 return SDValue(); 1972 1973 // We are adding/subtracting a constant and an inverted low bit. Turn that 1974 // into a subtract/add of the low bit with incremented/decremented constant: 1975 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) 1976 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) 1977 EVT VT = C.getValueType(); 1978 SDLoc DL(N); 1979 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); 1980 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : 1981 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); 1982 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); 1983 } 1984 1985 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into 1986 /// a shift and add with a different constant. 1987 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { 1988 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && 1989 "Expecting add or sub"); 1990 1991 // We need a constant operand for the add/sub, and the other operand is a 1992 // logical shift right: add (srl), C or sub C, (srl). 1993 bool IsAdd = N->getOpcode() == ISD::ADD; 1994 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); 1995 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); 1996 ConstantSDNode *C = isConstOrConstSplat(ConstantOp); 1997 if (!C || ShiftOp.getOpcode() != ISD::SRL) 1998 return SDValue(); 1999 2000 // The shift must be of a 'not' value. 2001 SDValue Not = ShiftOp.getOperand(0); 2002 if (!Not.hasOneUse() || !isBitwiseNot(Not)) 2003 return SDValue(); 2004 2005 // The shift must be moving the sign bit to the least-significant-bit. 2006 EVT VT = ShiftOp.getValueType(); 2007 SDValue ShAmt = ShiftOp.getOperand(1); 2008 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); 2009 if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) 2010 return SDValue(); 2011 2012 // Eliminate the 'not' by adjusting the shift and add/sub constant: 2013 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) 2014 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) 2015 SDLoc DL(N); 2016 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; 2017 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); 2018 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; 2019 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); 2020 } 2021 2022 SDValue DAGCombiner::visitADD(SDNode *N) { 2023 SDValue N0 = N->getOperand(0); 2024 SDValue N1 = N->getOperand(1); 2025 EVT VT = N0.getValueType(); 2026 SDLoc DL(N); 2027 2028 // fold vector ops 2029 if (VT.isVector()) { 2030 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2031 return FoldedVOp; 2032 2033 // fold (add x, 0) -> x, vector edition 2034 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2035 return N0; 2036 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2037 return N1; 2038 } 2039 2040 // fold (add x, undef) -> undef 2041 if (N0.isUndef()) 2042 return N0; 2043 2044 if (N1.isUndef()) 2045 return N1; 2046 2047 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 2048 // canonicalize constant to RHS 2049 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2050 return DAG.getNode(ISD::ADD, DL, VT, N1, N0); 2051 // fold (add c1, c2) -> c1+c2 2052 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), 2053 N1.getNode()); 2054 } 2055 2056 // fold (add x, 0) -> x 2057 if (isNullConstant(N1)) 2058 return N0; 2059 2060 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { 2061 // fold ((c1-A)+c2) -> (c1+c2)-A 2062 if (N0.getOpcode() == ISD::SUB && 2063 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { 2064 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. 2065 return DAG.getNode(ISD::SUB, DL, VT, 2066 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), 2067 N0.getOperand(1)); 2068 } 2069 2070 // add (sext i1 X), 1 -> zext (not i1 X) 2071 // We don't transform this pattern: 2072 // add (zext i1 X), -1 -> sext (not i1 X) 2073 // because most (?) targets generate better code for the zext form. 2074 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && 2075 isOneOrOneSplat(N1)) { 2076 SDValue X = N0.getOperand(0); 2077 if ((!LegalOperations || 2078 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && 2079 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && 2080 X.getScalarValueSizeInBits() == 1) { 2081 SDValue Not = DAG.getNOT(DL, X, X.getValueType()); 2082 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); 2083 } 2084 } 2085 2086 // Undo the add -> or combine to merge constant offsets from a frame index. 2087 if (N0.getOpcode() == ISD::OR && 2088 isa<FrameIndexSDNode>(N0.getOperand(0)) && 2089 isa<ConstantSDNode>(N0.getOperand(1)) && 2090 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { 2091 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); 2092 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); 2093 } 2094 } 2095 2096 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2097 return NewSel; 2098 2099 // reassociate add 2100 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) 2101 return RADD; 2102 2103 // fold ((0-A) + B) -> B-A 2104 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) 2105 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); 2106 2107 // fold (A + (0-B)) -> A-B 2108 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) 2109 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); 2110 2111 // fold (A+(B-A)) -> B 2112 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 2113 return N1.getOperand(0); 2114 2115 // fold ((B-A)+A) -> B 2116 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 2117 return N0.getOperand(0); 2118 2119 // fold (A+(B-(A+C))) to (B-C) 2120 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2121 N0 == N1.getOperand(1).getOperand(0)) 2122 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2123 N1.getOperand(1).getOperand(1)); 2124 2125 // fold (A+(B-(C+A))) to (B-C) 2126 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2127 N0 == N1.getOperand(1).getOperand(1)) 2128 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2129 N1.getOperand(1).getOperand(0)); 2130 2131 // fold (A+((B-A)+or-C)) to (B+or-C) 2132 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 2133 N1.getOperand(0).getOpcode() == ISD::SUB && 2134 N0 == N1.getOperand(0).getOperand(1)) 2135 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), 2136 N1.getOperand(1)); 2137 2138 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 2139 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 2140 SDValue N00 = N0.getOperand(0); 2141 SDValue N01 = N0.getOperand(1); 2142 SDValue N10 = N1.getOperand(0); 2143 SDValue N11 = N1.getOperand(1); 2144 2145 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) 2146 return DAG.getNode(ISD::SUB, DL, VT, 2147 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 2148 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 2149 } 2150 2151 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2152 return V; 2153 2154 if (SDValue V = foldAddSubOfSignBit(N, DAG)) 2155 return V; 2156 2157 if (SimplifyDemandedBits(SDValue(N, 0))) 2158 return SDValue(N, 0); 2159 2160 // fold (a+b) -> (a|b) iff a and b share no bits. 2161 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 2162 DAG.haveNoCommonBitsSet(N0, N1)) 2163 return DAG.getNode(ISD::OR, DL, VT, N0, N1); 2164 2165 // fold (add (xor a, -1), 1) -> (sub 0, a) 2166 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) 2167 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 2168 N0.getOperand(0)); 2169 2170 if (SDValue Combined = visitADDLike(N0, N1, N)) 2171 return Combined; 2172 2173 if (SDValue Combined = visitADDLike(N1, N0, N)) 2174 return Combined; 2175 2176 return SDValue(); 2177 } 2178 2179 SDValue DAGCombiner::visitADDSAT(SDNode *N) { 2180 unsigned Opcode = N->getOpcode(); 2181 SDValue N0 = N->getOperand(0); 2182 SDValue N1 = N->getOperand(1); 2183 EVT VT = N0.getValueType(); 2184 SDLoc DL(N); 2185 2186 // fold vector ops 2187 if (VT.isVector()) { 2188 // TODO SimplifyVBinOp 2189 2190 // fold (add_sat x, 0) -> x, vector edition 2191 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2192 return N0; 2193 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2194 return N1; 2195 } 2196 2197 // fold (add_sat x, undef) -> -1 2198 if (N0.isUndef() || N1.isUndef()) 2199 return DAG.getAllOnesConstant(DL, VT); 2200 2201 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 2202 // canonicalize constant to RHS 2203 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2204 return DAG.getNode(Opcode, DL, VT, N1, N0); 2205 // fold (add_sat c1, c2) -> c3 2206 return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), 2207 N1.getNode()); 2208 } 2209 2210 // fold (add_sat x, 0) -> x 2211 if (isNullConstant(N1)) 2212 return N0; 2213 2214 // If it cannot overflow, transform into an add. 2215 if (Opcode == ISD::UADDSAT) 2216 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2217 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 2218 2219 return SDValue(); 2220 } 2221 2222 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { 2223 bool Masked = false; 2224 2225 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. 2226 while (true) { 2227 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { 2228 V = V.getOperand(0); 2229 continue; 2230 } 2231 2232 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { 2233 Masked = true; 2234 V = V.getOperand(0); 2235 continue; 2236 } 2237 2238 break; 2239 } 2240 2241 // If this is not a carry, return. 2242 if (V.getResNo() != 1) 2243 return SDValue(); 2244 2245 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && 2246 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) 2247 return SDValue(); 2248 2249 // If the result is masked, then no matter what kind of bool it is we can 2250 // return. If it isn't, then we need to make sure the bool type is either 0 or 2251 // 1 and not other values. 2252 if (Masked || 2253 TLI.getBooleanContents(V.getValueType()) == 2254 TargetLoweringBase::ZeroOrOneBooleanContent) 2255 return V; 2256 2257 return SDValue(); 2258 } 2259 2260 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { 2261 EVT VT = N0.getValueType(); 2262 SDLoc DL(LocReference); 2263 2264 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 2265 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 2266 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) 2267 return DAG.getNode(ISD::SUB, DL, VT, N0, 2268 DAG.getNode(ISD::SHL, DL, VT, 2269 N1.getOperand(0).getOperand(1), 2270 N1.getOperand(1))); 2271 2272 if (N1.getOpcode() == ISD::AND) { 2273 SDValue AndOp0 = N1.getOperand(0); 2274 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 2275 unsigned DestBits = VT.getScalarSizeInBits(); 2276 2277 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 2278 // and similar xforms where the inner op is either ~0 or 0. 2279 if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1))) 2280 return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); 2281 } 2282 2283 // add (sext i1), X -> sub X, (zext i1) 2284 if (N0.getOpcode() == ISD::SIGN_EXTEND && 2285 N0.getOperand(0).getValueType() == MVT::i1 && 2286 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 2287 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 2288 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 2289 } 2290 2291 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 2292 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2293 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2294 if (TN->getVT() == MVT::i1) { 2295 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2296 DAG.getConstant(1, DL, VT)); 2297 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 2298 } 2299 } 2300 2301 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2302 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && 2303 N1.getResNo() == 0) 2304 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), 2305 N0, N1.getOperand(0), N1.getOperand(2)); 2306 2307 // (add X, Carry) -> (addcarry X, 0, Carry) 2308 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2309 if (SDValue Carry = getAsCarry(TLI, N1)) 2310 return DAG.getNode(ISD::ADDCARRY, DL, 2311 DAG.getVTList(VT, Carry.getValueType()), N0, 2312 DAG.getConstant(0, DL, VT), Carry); 2313 2314 return SDValue(); 2315 } 2316 2317 SDValue DAGCombiner::visitADDC(SDNode *N) { 2318 SDValue N0 = N->getOperand(0); 2319 SDValue N1 = N->getOperand(1); 2320 EVT VT = N0.getValueType(); 2321 SDLoc DL(N); 2322 2323 // If the flag result is dead, turn this into an ADD. 2324 if (!N->hasAnyUseOfValue(1)) 2325 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2326 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2327 2328 // canonicalize constant to RHS. 2329 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2330 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2331 if (N0C && !N1C) 2332 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); 2333 2334 // fold (addc x, 0) -> x + no carry out 2335 if (isNullConstant(N1)) 2336 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 2337 DL, MVT::Glue)); 2338 2339 // If it cannot overflow, transform into an add. 2340 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2341 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2342 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2343 2344 return SDValue(); 2345 } 2346 2347 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, 2348 SelectionDAG &DAG, const TargetLowering &TLI) { 2349 SDValue Cst; 2350 switch (TLI.getBooleanContents(VT)) { 2351 case TargetLowering::ZeroOrOneBooleanContent: 2352 case TargetLowering::UndefinedBooleanContent: 2353 Cst = DAG.getConstant(1, DL, VT); 2354 break; 2355 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2356 Cst = DAG.getConstant(-1, DL, VT); 2357 break; 2358 } 2359 2360 return DAG.getNode(ISD::XOR, DL, VT, V, Cst); 2361 } 2362 2363 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { 2364 if (V.getOpcode() != ISD::XOR) return false; 2365 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); 2366 if (!Const) return false; 2367 2368 switch(TLI.getBooleanContents(VT)) { 2369 case TargetLowering::ZeroOrOneBooleanContent: 2370 return Const->isOne(); 2371 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2372 return Const->isAllOnesValue(); 2373 case TargetLowering::UndefinedBooleanContent: 2374 return (Const->getAPIntValue() & 0x01) == 1; 2375 } 2376 llvm_unreachable("Unsupported boolean content"); 2377 } 2378 2379 SDValue DAGCombiner::visitUADDO(SDNode *N) { 2380 SDValue N0 = N->getOperand(0); 2381 SDValue N1 = N->getOperand(1); 2382 EVT VT = N0.getValueType(); 2383 if (VT.isVector()) 2384 return SDValue(); 2385 2386 EVT CarryVT = N->getValueType(1); 2387 SDLoc DL(N); 2388 2389 // If the flag result is dead, turn this into an ADD. 2390 if (!N->hasAnyUseOfValue(1)) 2391 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2392 DAG.getUNDEF(CarryVT)); 2393 2394 // canonicalize constant to RHS. 2395 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2396 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2397 if (N0C && !N1C) 2398 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); 2399 2400 // fold (uaddo x, 0) -> x + no carry out 2401 if (isNullConstant(N1)) 2402 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2403 2404 // If it cannot overflow, transform into an add. 2405 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2406 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2407 DAG.getConstant(0, DL, CarryVT)); 2408 2409 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. 2410 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { 2411 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), 2412 DAG.getConstant(0, DL, VT), 2413 N0.getOperand(0)); 2414 return CombineTo(N, Sub, 2415 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2416 } 2417 2418 if (SDValue Combined = visitUADDOLike(N0, N1, N)) 2419 return Combined; 2420 2421 if (SDValue Combined = visitUADDOLike(N1, N0, N)) 2422 return Combined; 2423 2424 return SDValue(); 2425 } 2426 2427 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { 2428 auto VT = N0.getValueType(); 2429 2430 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2431 // If Y + 1 cannot overflow. 2432 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { 2433 SDValue Y = N1.getOperand(0); 2434 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); 2435 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) 2436 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, 2437 N1.getOperand(2)); 2438 } 2439 2440 // (uaddo X, Carry) -> (addcarry X, 0, Carry) 2441 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2442 if (SDValue Carry = getAsCarry(TLI, N1)) 2443 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2444 DAG.getConstant(0, SDLoc(N), VT), Carry); 2445 2446 return SDValue(); 2447 } 2448 2449 SDValue DAGCombiner::visitADDE(SDNode *N) { 2450 SDValue N0 = N->getOperand(0); 2451 SDValue N1 = N->getOperand(1); 2452 SDValue CarryIn = N->getOperand(2); 2453 2454 // canonicalize constant to RHS 2455 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2456 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2457 if (N0C && !N1C) 2458 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 2459 N1, N0, CarryIn); 2460 2461 // fold (adde x, y, false) -> (addc x, y) 2462 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2463 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 2464 2465 return SDValue(); 2466 } 2467 2468 SDValue DAGCombiner::visitADDCARRY(SDNode *N) { 2469 SDValue N0 = N->getOperand(0); 2470 SDValue N1 = N->getOperand(1); 2471 SDValue CarryIn = N->getOperand(2); 2472 SDLoc DL(N); 2473 2474 // canonicalize constant to RHS 2475 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2476 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2477 if (N0C && !N1C) 2478 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); 2479 2480 // fold (addcarry x, y, false) -> (uaddo x, y) 2481 if (isNullConstant(CarryIn)) { 2482 if (!LegalOperations || 2483 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) 2484 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); 2485 } 2486 2487 EVT CarryVT = CarryIn.getValueType(); 2488 2489 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. 2490 if (isNullConstant(N0) && isNullConstant(N1)) { 2491 EVT VT = N0.getValueType(); 2492 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); 2493 AddToWorklist(CarryExt.getNode()); 2494 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, 2495 DAG.getConstant(1, DL, VT)), 2496 DAG.getConstant(0, DL, CarryVT)); 2497 } 2498 2499 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. 2500 if (isBitwiseNot(N0) && isNullConstant(N1) && 2501 isBooleanFlip(CarryIn, CarryVT, TLI)) { 2502 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), 2503 DAG.getConstant(0, DL, N0.getValueType()), 2504 N0.getOperand(0), CarryIn.getOperand(0)); 2505 return CombineTo(N, Sub, 2506 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2507 } 2508 2509 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) 2510 return Combined; 2511 2512 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) 2513 return Combined; 2514 2515 return SDValue(); 2516 } 2517 2518 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, 2519 SDNode *N) { 2520 // Iff the flag result is dead: 2521 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) 2522 if ((N0.getOpcode() == ISD::ADD || 2523 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && 2524 isNullConstant(N1) && !N->hasAnyUseOfValue(1)) 2525 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), 2526 N0.getOperand(0), N0.getOperand(1), CarryIn); 2527 2528 /** 2529 * When one of the addcarry argument is itself a carry, we may be facing 2530 * a diamond carry propagation. In which case we try to transform the DAG 2531 * to ensure linear carry propagation if that is possible. 2532 * 2533 * We are trying to get: 2534 * (addcarry X, 0, (addcarry A, B, Z):Carry) 2535 */ 2536 if (auto Y = getAsCarry(TLI, N1)) { 2537 /** 2538 * (uaddo A, B) 2539 * / \ 2540 * Carry Sum 2541 * | \ 2542 * | (addcarry *, 0, Z) 2543 * | / 2544 * \ Carry 2545 * | / 2546 * (addcarry X, *, *) 2547 */ 2548 if (Y.getOpcode() == ISD::UADDO && 2549 CarryIn.getResNo() == 1 && 2550 CarryIn.getOpcode() == ISD::ADDCARRY && 2551 isNullConstant(CarryIn.getOperand(1)) && 2552 CarryIn.getOperand(0) == Y.getValue(0)) { 2553 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), 2554 Y.getOperand(0), Y.getOperand(1), 2555 CarryIn.getOperand(2)); 2556 AddToWorklist(NewY.getNode()); 2557 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2558 DAG.getConstant(0, SDLoc(N), N0.getValueType()), 2559 NewY.getValue(1)); 2560 } 2561 } 2562 2563 return SDValue(); 2564 } 2565 2566 // Since it may not be valid to emit a fold to zero for vector initializers 2567 // check if we can before folding. 2568 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, 2569 SelectionDAG &DAG, bool LegalOperations) { 2570 if (!VT.isVector()) 2571 return DAG.getConstant(0, DL, VT); 2572 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 2573 return DAG.getConstant(0, DL, VT); 2574 return SDValue(); 2575 } 2576 2577 SDValue DAGCombiner::visitSUB(SDNode *N) { 2578 SDValue N0 = N->getOperand(0); 2579 SDValue N1 = N->getOperand(1); 2580 EVT VT = N0.getValueType(); 2581 SDLoc DL(N); 2582 2583 // fold vector ops 2584 if (VT.isVector()) { 2585 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2586 return FoldedVOp; 2587 2588 // fold (sub x, 0) -> x, vector edition 2589 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2590 return N0; 2591 } 2592 2593 // fold (sub x, x) -> 0 2594 // FIXME: Refactor this and xor and other similar operations together. 2595 if (N0 == N1) 2596 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 2597 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2598 DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 2599 // fold (sub c1, c2) -> c1-c2 2600 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), 2601 N1.getNode()); 2602 } 2603 2604 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2605 return NewSel; 2606 2607 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2608 2609 // fold (sub x, c) -> (add x, -c) 2610 if (N1C) { 2611 return DAG.getNode(ISD::ADD, DL, VT, N0, 2612 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 2613 } 2614 2615 if (isNullOrNullSplat(N0)) { 2616 unsigned BitWidth = VT.getScalarSizeInBits(); 2617 // Right-shifting everything out but the sign bit followed by negation is 2618 // the same as flipping arithmetic/logical shift type without the negation: 2619 // -(X >>u 31) -> (X >>s 31) 2620 // -(X >>s 31) -> (X >>u 31) 2621 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { 2622 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); 2623 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { 2624 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; 2625 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) 2626 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); 2627 } 2628 } 2629 2630 // 0 - X --> 0 if the sub is NUW. 2631 if (N->getFlags().hasNoUnsignedWrap()) 2632 return N0; 2633 2634 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { 2635 // N1 is either 0 or the minimum signed value. If the sub is NSW, then 2636 // N1 must be 0 because negating the minimum signed value is undefined. 2637 if (N->getFlags().hasNoSignedWrap()) 2638 return N0; 2639 2640 // 0 - X --> X if X is 0 or the minimum signed value. 2641 return N1; 2642 } 2643 } 2644 2645 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 2646 if (isAllOnesOrAllOnesSplat(N0)) 2647 return DAG.getNode(ISD::XOR, DL, VT, N1, N0); 2648 2649 // fold (A - (0-B)) -> A+B 2650 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) 2651 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); 2652 2653 // fold A-(A-B) -> B 2654 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 2655 return N1.getOperand(1); 2656 2657 // fold (A+B)-A -> B 2658 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 2659 return N0.getOperand(1); 2660 2661 // fold (A+B)-B -> A 2662 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 2663 return N0.getOperand(0); 2664 2665 // fold C2-(A+C1) -> (C2-C1)-A 2666 if (N1.getOpcode() == ISD::ADD) { 2667 SDValue N11 = N1.getOperand(1); 2668 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && 2669 isConstantOrConstantVector(N11, /* NoOpaques */ true)) { 2670 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); 2671 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); 2672 } 2673 } 2674 2675 // fold ((A+(B+or-C))-B) -> A+or-C 2676 if (N0.getOpcode() == ISD::ADD && 2677 (N0.getOperand(1).getOpcode() == ISD::SUB || 2678 N0.getOperand(1).getOpcode() == ISD::ADD) && 2679 N0.getOperand(1).getOperand(0) == N1) 2680 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), 2681 N0.getOperand(1).getOperand(1)); 2682 2683 // fold ((A+(C+B))-B) -> A+C 2684 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && 2685 N0.getOperand(1).getOperand(1) == N1) 2686 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), 2687 N0.getOperand(1).getOperand(0)); 2688 2689 // fold ((A-(B-C))-C) -> A-B 2690 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && 2691 N0.getOperand(1).getOperand(1) == N1) 2692 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), 2693 N0.getOperand(1).getOperand(0)); 2694 2695 // fold (A-(B-C)) -> A+(C-B) 2696 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) 2697 return DAG.getNode(ISD::ADD, DL, VT, N0, 2698 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), 2699 N1.getOperand(0))); 2700 2701 // fold (X - (-Y * Z)) -> (X + (Y * Z)) 2702 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { 2703 if (N1.getOperand(0).getOpcode() == ISD::SUB && 2704 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { 2705 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, 2706 N1.getOperand(0).getOperand(1), 2707 N1.getOperand(1)); 2708 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); 2709 } 2710 if (N1.getOperand(1).getOpcode() == ISD::SUB && 2711 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { 2712 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, 2713 N1.getOperand(0), 2714 N1.getOperand(1).getOperand(1)); 2715 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); 2716 } 2717 } 2718 2719 // If either operand of a sub is undef, the result is undef 2720 if (N0.isUndef()) 2721 return N0; 2722 if (N1.isUndef()) 2723 return N1; 2724 2725 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2726 return V; 2727 2728 if (SDValue V = foldAddSubOfSignBit(N, DAG)) 2729 return V; 2730 2731 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) 2732 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 2733 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { 2734 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); 2735 SDValue S0 = N1.getOperand(0); 2736 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { 2737 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 2738 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) 2739 if (C->getAPIntValue() == (OpSizeInBits - 1)) 2740 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); 2741 } 2742 } 2743 } 2744 2745 // If the relocation model supports it, consider symbol offsets. 2746 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 2747 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 2748 // fold (sub Sym, c) -> Sym-c 2749 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 2750 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 2751 GA->getOffset() - 2752 (uint64_t)N1C->getSExtValue()); 2753 // fold (sub Sym+c1, Sym+c2) -> c1-c2 2754 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 2755 if (GA->getGlobal() == GB->getGlobal()) 2756 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 2757 DL, VT); 2758 } 2759 2760 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 2761 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2762 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2763 if (TN->getVT() == MVT::i1) { 2764 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2765 DAG.getConstant(1, DL, VT)); 2766 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 2767 } 2768 } 2769 2770 // Prefer an add for more folding potential and possibly better codegen: 2771 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) 2772 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { 2773 SDValue ShAmt = N1.getOperand(1); 2774 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); 2775 if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { 2776 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); 2777 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); 2778 } 2779 } 2780 2781 return SDValue(); 2782 } 2783 2784 SDValue DAGCombiner::visitSUBSAT(SDNode *N) { 2785 SDValue N0 = N->getOperand(0); 2786 SDValue N1 = N->getOperand(1); 2787 EVT VT = N0.getValueType(); 2788 SDLoc DL(N); 2789 2790 // fold vector ops 2791 if (VT.isVector()) { 2792 // TODO SimplifyVBinOp 2793 2794 // fold (sub_sat x, 0) -> x, vector edition 2795 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2796 return N0; 2797 } 2798 2799 // fold (sub_sat x, undef) -> 0 2800 if (N0.isUndef() || N1.isUndef()) 2801 return DAG.getConstant(0, DL, VT); 2802 2803 // fold (sub_sat x, x) -> 0 2804 if (N0 == N1) 2805 return DAG.getConstant(0, DL, VT); 2806 2807 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2808 DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 2809 // fold (sub_sat c1, c2) -> c3 2810 return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(), 2811 N1.getNode()); 2812 } 2813 2814 // fold (sub_sat x, 0) -> x 2815 if (isNullConstant(N1)) 2816 return N0; 2817 2818 return SDValue(); 2819 } 2820 2821 SDValue DAGCombiner::visitSUBC(SDNode *N) { 2822 SDValue N0 = N->getOperand(0); 2823 SDValue N1 = N->getOperand(1); 2824 EVT VT = N0.getValueType(); 2825 SDLoc DL(N); 2826 2827 // If the flag result is dead, turn this into an SUB. 2828 if (!N->hasAnyUseOfValue(1)) 2829 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2830 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2831 2832 // fold (subc x, x) -> 0 + no borrow 2833 if (N0 == N1) 2834 return CombineTo(N, DAG.getConstant(0, DL, VT), 2835 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2836 2837 // fold (subc x, 0) -> x + no borrow 2838 if (isNullConstant(N1)) 2839 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2840 2841 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2842 if (isAllOnesConstant(N0)) 2843 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2844 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2845 2846 return SDValue(); 2847 } 2848 2849 SDValue DAGCombiner::visitUSUBO(SDNode *N) { 2850 SDValue N0 = N->getOperand(0); 2851 SDValue N1 = N->getOperand(1); 2852 EVT VT = N0.getValueType(); 2853 if (VT.isVector()) 2854 return SDValue(); 2855 2856 EVT CarryVT = N->getValueType(1); 2857 SDLoc DL(N); 2858 2859 // If the flag result is dead, turn this into an SUB. 2860 if (!N->hasAnyUseOfValue(1)) 2861 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2862 DAG.getUNDEF(CarryVT)); 2863 2864 // fold (usubo x, x) -> 0 + no borrow 2865 if (N0 == N1) 2866 return CombineTo(N, DAG.getConstant(0, DL, VT), 2867 DAG.getConstant(0, DL, CarryVT)); 2868 2869 // fold (usubo x, 0) -> x + no borrow 2870 if (isNullConstant(N1)) 2871 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2872 2873 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2874 if (isAllOnesConstant(N0)) 2875 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2876 DAG.getConstant(0, DL, CarryVT)); 2877 2878 return SDValue(); 2879 } 2880 2881 SDValue DAGCombiner::visitSUBE(SDNode *N) { 2882 SDValue N0 = N->getOperand(0); 2883 SDValue N1 = N->getOperand(1); 2884 SDValue CarryIn = N->getOperand(2); 2885 2886 // fold (sube x, y, false) -> (subc x, y) 2887 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2888 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2889 2890 return SDValue(); 2891 } 2892 2893 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { 2894 SDValue N0 = N->getOperand(0); 2895 SDValue N1 = N->getOperand(1); 2896 SDValue CarryIn = N->getOperand(2); 2897 2898 // fold (subcarry x, y, false) -> (usubo x, y) 2899 if (isNullConstant(CarryIn)) { 2900 if (!LegalOperations || 2901 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) 2902 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); 2903 } 2904 2905 return SDValue(); 2906 } 2907 2908 SDValue DAGCombiner::visitMUL(SDNode *N) { 2909 SDValue N0 = N->getOperand(0); 2910 SDValue N1 = N->getOperand(1); 2911 EVT VT = N0.getValueType(); 2912 2913 // fold (mul x, undef) -> 0 2914 if (N0.isUndef() || N1.isUndef()) 2915 return DAG.getConstant(0, SDLoc(N), VT); 2916 2917 bool N0IsConst = false; 2918 bool N1IsConst = false; 2919 bool N1IsOpaqueConst = false; 2920 bool N0IsOpaqueConst = false; 2921 APInt ConstValue0, ConstValue1; 2922 // fold vector ops 2923 if (VT.isVector()) { 2924 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2925 return FoldedVOp; 2926 2927 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); 2928 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); 2929 assert((!N0IsConst || 2930 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && 2931 "Splat APInt should be element width"); 2932 assert((!N1IsConst || 2933 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && 2934 "Splat APInt should be element width"); 2935 } else { 2936 N0IsConst = isa<ConstantSDNode>(N0); 2937 if (N0IsConst) { 2938 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2939 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2940 } 2941 N1IsConst = isa<ConstantSDNode>(N1); 2942 if (N1IsConst) { 2943 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2944 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2945 } 2946 } 2947 2948 // fold (mul c1, c2) -> c1*c2 2949 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2950 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2951 N0.getNode(), N1.getNode()); 2952 2953 // canonicalize constant to RHS (vector doesn't have to splat) 2954 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2955 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2956 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2957 // fold (mul x, 0) -> 0 2958 if (N1IsConst && ConstValue1.isNullValue()) 2959 return N1; 2960 // fold (mul x, 1) -> x 2961 if (N1IsConst && ConstValue1.isOneValue()) 2962 return N0; 2963 2964 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2965 return NewSel; 2966 2967 // fold (mul x, -1) -> 0-x 2968 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2969 SDLoc DL(N); 2970 return DAG.getNode(ISD::SUB, DL, VT, 2971 DAG.getConstant(0, DL, VT), N0); 2972 } 2973 // fold (mul x, (1 << c)) -> x << c 2974 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 2975 DAG.isKnownToBeAPowerOfTwo(N1) && 2976 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { 2977 SDLoc DL(N); 2978 SDValue LogBase2 = BuildLogBase2(N1, DL); 2979 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 2980 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 2981 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); 2982 } 2983 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2984 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { 2985 unsigned Log2Val = (-ConstValue1).logBase2(); 2986 SDLoc DL(N); 2987 // FIXME: If the input is something that is easily negated (e.g. a 2988 // single-use add), we should put the negate there. 2989 return DAG.getNode(ISD::SUB, DL, VT, 2990 DAG.getConstant(0, DL, VT), 2991 DAG.getNode(ISD::SHL, DL, VT, N0, 2992 DAG.getConstant(Log2Val, DL, 2993 getShiftAmountTy(N0.getValueType())))); 2994 } 2995 2996 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub. 2997 // mul x, (2^N + 1) --> add (shl x, N), x 2998 // mul x, (2^N - 1) --> sub (shl x, N), x 2999 // Examples: x * 33 --> (x << 5) + x 3000 // x * 15 --> (x << 4) - x 3001 // x * -33 --> -((x << 5) + x) 3002 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) 3003 if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { 3004 // TODO: We could handle more general decomposition of any constant by 3005 // having the target set a limit on number of ops and making a 3006 // callback to determine that sequence (similar to sqrt expansion). 3007 unsigned MathOp = ISD::DELETED_NODE; 3008 APInt MulC = ConstValue1.abs(); 3009 if ((MulC - 1).isPowerOf2()) 3010 MathOp = ISD::ADD; 3011 else if ((MulC + 1).isPowerOf2()) 3012 MathOp = ISD::SUB; 3013 3014 if (MathOp != ISD::DELETED_NODE) { 3015 unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() 3016 : (MulC + 1).logBase2(); 3017 assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() && 3018 "Not expecting multiply-by-constant that could have simplified"); 3019 SDLoc DL(N); 3020 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, 3021 DAG.getConstant(ShAmt, DL, VT)); 3022 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); 3023 if (ConstValue1.isNegative()) 3024 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); 3025 return R; 3026 } 3027 } 3028 3029 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 3030 if (N0.getOpcode() == ISD::SHL && 3031 isConstantOrConstantVector(N1, /* NoOpaques */ true) && 3032 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { 3033 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); 3034 if (isConstantOrConstantVector(C3)) 3035 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); 3036 } 3037 3038 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 3039 // use. 3040 { 3041 SDValue Sh(nullptr, 0), Y(nullptr, 0); 3042 3043 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 3044 if (N0.getOpcode() == ISD::SHL && 3045 isConstantOrConstantVector(N0.getOperand(1)) && 3046 N0.getNode()->hasOneUse()) { 3047 Sh = N0; Y = N1; 3048 } else if (N1.getOpcode() == ISD::SHL && 3049 isConstantOrConstantVector(N1.getOperand(1)) && 3050 N1.getNode()->hasOneUse()) { 3051 Sh = N1; Y = N0; 3052 } 3053 3054 if (Sh.getNode()) { 3055 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); 3056 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); 3057 } 3058 } 3059 3060 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 3061 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && 3062 N0.getOpcode() == ISD::ADD && 3063 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 3064 isMulAddWithConstProfitable(N, N0, N1)) 3065 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 3066 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 3067 N0.getOperand(0), N1), 3068 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 3069 N0.getOperand(1), N1)); 3070 3071 // reassociate mul 3072 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) 3073 return RMUL; 3074 3075 return SDValue(); 3076 } 3077 3078 /// Return true if divmod libcall is available. 3079 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 3080 const TargetLowering &TLI) { 3081 RTLIB::Libcall LC; 3082 EVT NodeType = Node->getValueType(0); 3083 if (!NodeType.isSimple()) 3084 return false; 3085 switch (NodeType.getSimpleVT().SimpleTy) { 3086 default: return false; // No libcall for vector types. 3087 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 3088 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 3089 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 3090 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 3091 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 3092 } 3093 3094 return TLI.getLibcallName(LC) != nullptr; 3095 } 3096 3097 /// Issue divrem if both quotient and remainder are needed. 3098 SDValue DAGCombiner::useDivRem(SDNode *Node) { 3099 if (Node->use_empty()) 3100 return SDValue(); // This is a dead node, leave it alone. 3101 3102 unsigned Opcode = Node->getOpcode(); 3103 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 3104 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 3105 3106 // DivMod lib calls can still work on non-legal types if using lib-calls. 3107 EVT VT = Node->getValueType(0); 3108 if (VT.isVector() || !VT.isInteger()) 3109 return SDValue(); 3110 3111 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) 3112 return SDValue(); 3113 3114 // If DIVREM is going to get expanded into a libcall, 3115 // but there is no libcall available, then don't combine. 3116 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 3117 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 3118 return SDValue(); 3119 3120 // If div is legal, it's better to do the normal expansion 3121 unsigned OtherOpcode = 0; 3122 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 3123 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 3124 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 3125 return SDValue(); 3126 } else { 3127 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 3128 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 3129 return SDValue(); 3130 } 3131 3132 SDValue Op0 = Node->getOperand(0); 3133 SDValue Op1 = Node->getOperand(1); 3134 SDValue combined; 3135 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 3136 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 3137 SDNode *User = *UI; 3138 if (User == Node || User->getOpcode() == ISD::DELETED_NODE || 3139 User->use_empty()) 3140 continue; 3141 // Convert the other matching node(s), too; 3142 // otherwise, the DIVREM may get target-legalized into something 3143 // target-specific that we won't be able to recognize. 3144 unsigned UserOpc = User->getOpcode(); 3145 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 3146 User->getOperand(0) == Op0 && 3147 User->getOperand(1) == Op1) { 3148 if (!combined) { 3149 if (UserOpc == OtherOpcode) { 3150 SDVTList VTs = DAG.getVTList(VT, VT); 3151 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 3152 } else if (UserOpc == DivRemOpc) { 3153 combined = SDValue(User, 0); 3154 } else { 3155 assert(UserOpc == Opcode); 3156 continue; 3157 } 3158 } 3159 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 3160 CombineTo(User, combined); 3161 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 3162 CombineTo(User, combined.getValue(1)); 3163 } 3164 } 3165 return combined; 3166 } 3167 3168 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { 3169 SDValue N0 = N->getOperand(0); 3170 SDValue N1 = N->getOperand(1); 3171 EVT VT = N->getValueType(0); 3172 SDLoc DL(N); 3173 3174 unsigned Opc = N->getOpcode(); 3175 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); 3176 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3177 3178 // X / undef -> undef 3179 // X % undef -> undef 3180 // X / 0 -> undef 3181 // X % 0 -> undef 3182 // NOTE: This includes vectors where any divisor element is zero/undef. 3183 if (DAG.isUndef(Opc, {N0, N1})) 3184 return DAG.getUNDEF(VT); 3185 3186 // undef / X -> 0 3187 // undef % X -> 0 3188 if (N0.isUndef()) 3189 return DAG.getConstant(0, DL, VT); 3190 3191 // 0 / X -> 0 3192 // 0 % X -> 0 3193 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3194 if (N0C && N0C->isNullValue()) 3195 return N0; 3196 3197 // X / X -> 1 3198 // X % X -> 0 3199 if (N0 == N1) 3200 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); 3201 3202 // X / 1 -> X 3203 // X % 1 -> 0 3204 // If this is a boolean op (single-bit element type), we can't have 3205 // division-by-zero or remainder-by-zero, so assume the divisor is 1. 3206 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume 3207 // it's a 1. 3208 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) 3209 return IsDiv ? N0 : DAG.getConstant(0, DL, VT); 3210 3211 return SDValue(); 3212 } 3213 3214 SDValue DAGCombiner::visitSDIV(SDNode *N) { 3215 SDValue N0 = N->getOperand(0); 3216 SDValue N1 = N->getOperand(1); 3217 EVT VT = N->getValueType(0); 3218 EVT CCVT = getSetCCResultType(VT); 3219 3220 // fold vector ops 3221 if (VT.isVector()) 3222 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3223 return FoldedVOp; 3224 3225 SDLoc DL(N); 3226 3227 // fold (sdiv c1, c2) -> c1/c2 3228 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3229 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3230 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 3231 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 3232 // fold (sdiv X, -1) -> 0-X 3233 if (N1C && N1C->isAllOnesValue()) 3234 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); 3235 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) 3236 if (N1C && N1C->getAPIntValue().isMinSignedValue()) 3237 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3238 DAG.getConstant(1, DL, VT), 3239 DAG.getConstant(0, DL, VT)); 3240 3241 if (SDValue V = simplifyDivRem(N, DAG)) 3242 return V; 3243 3244 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3245 return NewSel; 3246 3247 // If we know the sign bits of both operands are zero, strength reduce to a 3248 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 3249 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3250 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 3251 3252 if (SDValue V = visitSDIVLike(N0, N1, N)) { 3253 // If the corresponding remainder node exists, update its users with 3254 // (Dividend - (Quotient * Divisor). 3255 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(), 3256 { N0, N1 })) { 3257 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); 3258 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 3259 AddToWorklist(Mul.getNode()); 3260 AddToWorklist(Sub.getNode()); 3261 CombineTo(RemNode, Sub); 3262 } 3263 return V; 3264 } 3265 3266 // sdiv, srem -> sdivrem 3267 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3268 // true. Otherwise, we break the simplification logic in visitREM(). 3269 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3270 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3271 if (SDValue DivRem = useDivRem(N)) 3272 return DivRem; 3273 3274 return SDValue(); 3275 } 3276 3277 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { 3278 SDLoc DL(N); 3279 EVT VT = N->getValueType(0); 3280 EVT CCVT = getSetCCResultType(VT); 3281 unsigned BitWidth = VT.getScalarSizeInBits(); 3282 3283 // Helper for determining whether a value is a power-2 constant scalar or a 3284 // vector of such elements. 3285 auto IsPowerOfTwo = [](ConstantSDNode *C) { 3286 if (C->isNullValue() || C->isOpaque()) 3287 return false; 3288 if (C->getAPIntValue().isPowerOf2()) 3289 return true; 3290 if ((-C->getAPIntValue()).isPowerOf2()) 3291 return true; 3292 return false; 3293 }; 3294 3295 // fold (sdiv X, pow2) -> simple ops after legalize 3296 // FIXME: We check for the exact bit here because the generic lowering gives 3297 // better results in that case. The target-specific lowering should learn how 3298 // to handle exact sdivs efficiently. 3299 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { 3300 // Target-specific implementation of sdiv x, pow2. 3301 if (SDValue Res = BuildSDIVPow2(N)) 3302 return Res; 3303 3304 // Create constants that are functions of the shift amount value. 3305 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 3306 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); 3307 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); 3308 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); 3309 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); 3310 if (!isConstantOrConstantVector(Inexact)) 3311 return SDValue(); 3312 3313 // Splat the sign bit into the register 3314 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, 3315 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); 3316 AddToWorklist(Sign.getNode()); 3317 3318 // Add (N0 < 0) ? abs2 - 1 : 0; 3319 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); 3320 AddToWorklist(Srl.getNode()); 3321 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); 3322 AddToWorklist(Add.getNode()); 3323 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); 3324 AddToWorklist(Sra.getNode()); 3325 3326 // Special case: (sdiv X, 1) -> X 3327 // Special Case: (sdiv X, -1) -> 0-X 3328 SDValue One = DAG.getConstant(1, DL, VT); 3329 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); 3330 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); 3331 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); 3332 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); 3333 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); 3334 3335 // If dividing by a positive value, we're done. Otherwise, the result must 3336 // be negated. 3337 SDValue Zero = DAG.getConstant(0, DL, VT); 3338 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); 3339 3340 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. 3341 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); 3342 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); 3343 return Res; 3344 } 3345 3346 // If integer divide is expensive and we satisfy the requirements, emit an 3347 // alternate sequence. Targets may check function attributes for size/speed 3348 // trade-offs. 3349 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3350 if (isConstantOrConstantVector(N1) && 3351 !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3352 if (SDValue Op = BuildSDIV(N)) 3353 return Op; 3354 3355 return SDValue(); 3356 } 3357 3358 SDValue DAGCombiner::visitUDIV(SDNode *N) { 3359 SDValue N0 = N->getOperand(0); 3360 SDValue N1 = N->getOperand(1); 3361 EVT VT = N->getValueType(0); 3362 EVT CCVT = getSetCCResultType(VT); 3363 3364 // fold vector ops 3365 if (VT.isVector()) 3366 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3367 return FoldedVOp; 3368 3369 SDLoc DL(N); 3370 3371 // fold (udiv c1, c2) -> c1/c2 3372 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3373 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3374 if (N0C && N1C) 3375 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 3376 N0C, N1C)) 3377 return Folded; 3378 // fold (udiv X, -1) -> select(X == -1, 1, 0) 3379 if (N1C && N1C->getAPIntValue().isAllOnesValue()) 3380 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3381 DAG.getConstant(1, DL, VT), 3382 DAG.getConstant(0, DL, VT)); 3383 3384 if (SDValue V = simplifyDivRem(N, DAG)) 3385 return V; 3386 3387 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3388 return NewSel; 3389 3390 if (SDValue V = visitUDIVLike(N0, N1, N)) { 3391 // If the corresponding remainder node exists, update its users with 3392 // (Dividend - (Quotient * Divisor). 3393 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(), 3394 { N0, N1 })) { 3395 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1); 3396 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 3397 AddToWorklist(Mul.getNode()); 3398 AddToWorklist(Sub.getNode()); 3399 CombineTo(RemNode, Sub); 3400 } 3401 return V; 3402 } 3403 3404 // sdiv, srem -> sdivrem 3405 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3406 // true. Otherwise, we break the simplification logic in visitREM(). 3407 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3408 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3409 if (SDValue DivRem = useDivRem(N)) 3410 return DivRem; 3411 3412 return SDValue(); 3413 } 3414 3415 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { 3416 SDLoc DL(N); 3417 EVT VT = N->getValueType(0); 3418 3419 // fold (udiv x, (1 << c)) -> x >>u c 3420 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 3421 DAG.isKnownToBeAPowerOfTwo(N1)) { 3422 SDValue LogBase2 = BuildLogBase2(N1, DL); 3423 AddToWorklist(LogBase2.getNode()); 3424 3425 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 3426 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 3427 AddToWorklist(Trunc.getNode()); 3428 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); 3429 } 3430 3431 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 3432 if (N1.getOpcode() == ISD::SHL) { 3433 SDValue N10 = N1.getOperand(0); 3434 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && 3435 DAG.isKnownToBeAPowerOfTwo(N10)) { 3436 SDValue LogBase2 = BuildLogBase2(N10, DL); 3437 AddToWorklist(LogBase2.getNode()); 3438 3439 EVT ADDVT = N1.getOperand(1).getValueType(); 3440 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); 3441 AddToWorklist(Trunc.getNode()); 3442 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); 3443 AddToWorklist(Add.getNode()); 3444 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 3445 } 3446 } 3447 3448 // fold (udiv x, c) -> alternate 3449 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3450 if (isConstantOrConstantVector(N1) && 3451 !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3452 if (SDValue Op = BuildUDIV(N)) 3453 return Op; 3454 3455 return SDValue(); 3456 } 3457 3458 // handles ISD::SREM and ISD::UREM 3459 SDValue DAGCombiner::visitREM(SDNode *N) { 3460 unsigned Opcode = N->getOpcode(); 3461 SDValue N0 = N->getOperand(0); 3462 SDValue N1 = N->getOperand(1); 3463 EVT VT = N->getValueType(0); 3464 EVT CCVT = getSetCCResultType(VT); 3465 3466 bool isSigned = (Opcode == ISD::SREM); 3467 SDLoc DL(N); 3468 3469 // fold (rem c1, c2) -> c1%c2 3470 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3471 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3472 if (N0C && N1C) 3473 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 3474 return Folded; 3475 // fold (urem X, -1) -> select(X == -1, 0, x) 3476 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) 3477 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3478 DAG.getConstant(0, DL, VT), N0); 3479 3480 if (SDValue V = simplifyDivRem(N, DAG)) 3481 return V; 3482 3483 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3484 return NewSel; 3485 3486 if (isSigned) { 3487 // If we know the sign bits of both operands are zero, strength reduce to a 3488 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 3489 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3490 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 3491 } else { 3492 SDValue NegOne = DAG.getAllOnesConstant(DL, VT); 3493 if (DAG.isKnownToBeAPowerOfTwo(N1)) { 3494 // fold (urem x, pow2) -> (and x, pow2-1) 3495 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3496 AddToWorklist(Add.getNode()); 3497 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3498 } 3499 if (N1.getOpcode() == ISD::SHL && 3500 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { 3501 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 3502 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3503 AddToWorklist(Add.getNode()); 3504 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3505 } 3506 } 3507 3508 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3509 3510 // If X/C can be simplified by the division-by-constant logic, lower 3511 // X%C to the equivalent of X-X/C*C. 3512 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the 3513 // speculative DIV must not cause a DIVREM conversion. We guard against this 3514 // by skipping the simplification if isIntDivCheap(). When div is not cheap, 3515 // combine will not return a DIVREM. Regardless, checking cheapness here 3516 // makes sense since the simplification results in fatter code. 3517 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { 3518 SDValue OptimizedDiv = 3519 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); 3520 if (OptimizedDiv.getNode()) { 3521 // If the equivalent Div node also exists, update its users. 3522 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 3523 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(), 3524 { N0, N1 })) 3525 CombineTo(DivNode, OptimizedDiv); 3526 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 3527 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 3528 AddToWorklist(OptimizedDiv.getNode()); 3529 AddToWorklist(Mul.getNode()); 3530 return Sub; 3531 } 3532 } 3533 3534 // sdiv, srem -> sdivrem 3535 if (SDValue DivRem = useDivRem(N)) 3536 return DivRem.getValue(1); 3537 3538 return SDValue(); 3539 } 3540 3541 SDValue DAGCombiner::visitMULHS(SDNode *N) { 3542 SDValue N0 = N->getOperand(0); 3543 SDValue N1 = N->getOperand(1); 3544 EVT VT = N->getValueType(0); 3545 SDLoc DL(N); 3546 3547 if (VT.isVector()) { 3548 // fold (mulhs x, 0) -> 0 3549 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3550 return N1; 3551 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3552 return N0; 3553 } 3554 3555 // fold (mulhs x, 0) -> 0 3556 if (isNullConstant(N1)) 3557 return N1; 3558 // fold (mulhs x, 1) -> (sra x, size(x)-1) 3559 if (isOneConstant(N1)) 3560 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 3561 DAG.getConstant(N0.getValueSizeInBits() - 1, DL, 3562 getShiftAmountTy(N0.getValueType()))); 3563 3564 // fold (mulhs x, undef) -> 0 3565 if (N0.isUndef() || N1.isUndef()) 3566 return DAG.getConstant(0, DL, VT); 3567 3568 // If the type twice as wide is legal, transform the mulhs to a wider multiply 3569 // plus a shift. 3570 if (VT.isSimple() && !VT.isVector()) { 3571 MVT Simple = VT.getSimpleVT(); 3572 unsigned SimpleSize = Simple.getSizeInBits(); 3573 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3574 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3575 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 3576 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 3577 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3578 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3579 DAG.getConstant(SimpleSize, DL, 3580 getShiftAmountTy(N1.getValueType()))); 3581 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3582 } 3583 } 3584 3585 return SDValue(); 3586 } 3587 3588 SDValue DAGCombiner::visitMULHU(SDNode *N) { 3589 SDValue N0 = N->getOperand(0); 3590 SDValue N1 = N->getOperand(1); 3591 EVT VT = N->getValueType(0); 3592 SDLoc DL(N); 3593 3594 if (VT.isVector()) { 3595 // fold (mulhu x, 0) -> 0 3596 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3597 return N1; 3598 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3599 return N0; 3600 } 3601 3602 // fold (mulhu x, 0) -> 0 3603 if (isNullConstant(N1)) 3604 return N1; 3605 // fold (mulhu x, 1) -> 0 3606 if (isOneConstant(N1)) 3607 return DAG.getConstant(0, DL, N0.getValueType()); 3608 // fold (mulhu x, undef) -> 0 3609 if (N0.isUndef() || N1.isUndef()) 3610 return DAG.getConstant(0, DL, VT); 3611 3612 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) 3613 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 3614 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { 3615 SDLoc DL(N); 3616 unsigned NumEltBits = VT.getScalarSizeInBits(); 3617 SDValue LogBase2 = BuildLogBase2(N1, DL); 3618 SDValue SRLAmt = DAG.getNode( 3619 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); 3620 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 3621 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); 3622 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); 3623 } 3624 3625 // If the type twice as wide is legal, transform the mulhu to a wider multiply 3626 // plus a shift. 3627 if (VT.isSimple() && !VT.isVector()) { 3628 MVT Simple = VT.getSimpleVT(); 3629 unsigned SimpleSize = Simple.getSizeInBits(); 3630 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3631 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3632 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 3633 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 3634 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3635 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3636 DAG.getConstant(SimpleSize, DL, 3637 getShiftAmountTy(N1.getValueType()))); 3638 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3639 } 3640 } 3641 3642 return SDValue(); 3643 } 3644 3645 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 3646 /// give the opcodes for the two computations that are being performed. Return 3647 /// true if a simplification was made. 3648 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 3649 unsigned HiOp) { 3650 // If the high half is not needed, just compute the low half. 3651 bool HiExists = N->hasAnyUseOfValue(1); 3652 if (!HiExists && (!LegalOperations || 3653 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 3654 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3655 return CombineTo(N, Res, Res); 3656 } 3657 3658 // If the low half is not needed, just compute the high half. 3659 bool LoExists = N->hasAnyUseOfValue(0); 3660 if (!LoExists && (!LegalOperations || 3661 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) { 3662 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3663 return CombineTo(N, Res, Res); 3664 } 3665 3666 // If both halves are used, return as it is. 3667 if (LoExists && HiExists) 3668 return SDValue(); 3669 3670 // If the two computed results can be simplified separately, separate them. 3671 if (LoExists) { 3672 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3673 AddToWorklist(Lo.getNode()); 3674 SDValue LoOpt = combine(Lo.getNode()); 3675 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 3676 (!LegalOperations || 3677 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType()))) 3678 return CombineTo(N, LoOpt, LoOpt); 3679 } 3680 3681 if (HiExists) { 3682 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3683 AddToWorklist(Hi.getNode()); 3684 SDValue HiOpt = combine(Hi.getNode()); 3685 if (HiOpt.getNode() && HiOpt != Hi && 3686 (!LegalOperations || 3687 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType()))) 3688 return CombineTo(N, HiOpt, HiOpt); 3689 } 3690 3691 return SDValue(); 3692 } 3693 3694 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 3695 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 3696 return Res; 3697 3698 EVT VT = N->getValueType(0); 3699 SDLoc DL(N); 3700 3701 // If the type is twice as wide is legal, transform the mulhu to a wider 3702 // multiply plus a shift. 3703 if (VT.isSimple() && !VT.isVector()) { 3704 MVT Simple = VT.getSimpleVT(); 3705 unsigned SimpleSize = Simple.getSizeInBits(); 3706 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3707 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3708 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 3709 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 3710 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3711 // Compute the high part as N1. 3712 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3713 DAG.getConstant(SimpleSize, DL, 3714 getShiftAmountTy(Lo.getValueType()))); 3715 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3716 // Compute the low part as N0. 3717 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3718 return CombineTo(N, Lo, Hi); 3719 } 3720 } 3721 3722 return SDValue(); 3723 } 3724 3725 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 3726 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 3727 return Res; 3728 3729 EVT VT = N->getValueType(0); 3730 SDLoc DL(N); 3731 3732 // If the type is twice as wide is legal, transform the mulhu to a wider 3733 // multiply plus a shift. 3734 if (VT.isSimple() && !VT.isVector()) { 3735 MVT Simple = VT.getSimpleVT(); 3736 unsigned SimpleSize = Simple.getSizeInBits(); 3737 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3738 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3739 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 3740 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 3741 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3742 // Compute the high part as N1. 3743 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3744 DAG.getConstant(SimpleSize, DL, 3745 getShiftAmountTy(Lo.getValueType()))); 3746 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3747 // Compute the low part as N0. 3748 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3749 return CombineTo(N, Lo, Hi); 3750 } 3751 } 3752 3753 return SDValue(); 3754 } 3755 3756 SDValue DAGCombiner::visitSMULO(SDNode *N) { 3757 // (smulo x, 2) -> (saddo x, x) 3758 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3759 if (C2->getAPIntValue() == 2) 3760 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 3761 N->getOperand(0), N->getOperand(0)); 3762 3763 return SDValue(); 3764 } 3765 3766 SDValue DAGCombiner::visitUMULO(SDNode *N) { 3767 // (umulo x, 2) -> (uaddo x, x) 3768 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3769 if (C2->getAPIntValue() == 2) 3770 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 3771 N->getOperand(0), N->getOperand(0)); 3772 3773 return SDValue(); 3774 } 3775 3776 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 3777 SDValue N0 = N->getOperand(0); 3778 SDValue N1 = N->getOperand(1); 3779 EVT VT = N0.getValueType(); 3780 3781 // fold vector ops 3782 if (VT.isVector()) 3783 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3784 return FoldedVOp; 3785 3786 // fold operation with constant operands. 3787 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3788 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 3789 if (N0C && N1C) 3790 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 3791 3792 // canonicalize constant to RHS 3793 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 3794 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 3795 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 3796 3797 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. 3798 // Only do this if the current op isn't legal and the flipped is. 3799 unsigned Opcode = N->getOpcode(); 3800 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3801 if (!TLI.isOperationLegal(Opcode, VT) && 3802 (N0.isUndef() || DAG.SignBitIsZero(N0)) && 3803 (N1.isUndef() || DAG.SignBitIsZero(N1))) { 3804 unsigned AltOpcode; 3805 switch (Opcode) { 3806 case ISD::SMIN: AltOpcode = ISD::UMIN; break; 3807 case ISD::SMAX: AltOpcode = ISD::UMAX; break; 3808 case ISD::UMIN: AltOpcode = ISD::SMIN; break; 3809 case ISD::UMAX: AltOpcode = ISD::SMAX; break; 3810 default: llvm_unreachable("Unknown MINMAX opcode"); 3811 } 3812 if (TLI.isOperationLegal(AltOpcode, VT)) 3813 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); 3814 } 3815 3816 return SDValue(); 3817 } 3818 3819 /// If this is a bitwise logic instruction and both operands have the same 3820 /// opcode, try to sink the other opcode after the logic instruction. 3821 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { 3822 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3823 EVT VT = N0.getValueType(); 3824 unsigned LogicOpcode = N->getOpcode(); 3825 unsigned HandOpcode = N0.getOpcode(); 3826 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || 3827 LogicOpcode == ISD::XOR) && "Expected logic opcode"); 3828 assert(HandOpcode == N1.getOpcode() && "Bad input!"); 3829 3830 // Bail early if none of these transforms apply. 3831 if (N0.getNumOperands() == 0) 3832 return SDValue(); 3833 3834 // FIXME: We should check number of uses of the operands to not increase 3835 // the instruction count for all transforms. 3836 3837 // Handle size-changing casts. 3838 SDValue X = N0.getOperand(0); 3839 SDValue Y = N1.getOperand(0); 3840 EVT XVT = X.getValueType(); 3841 SDLoc DL(N); 3842 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND || 3843 HandOpcode == ISD::SIGN_EXTEND) { 3844 // If both operands have other uses, this transform would create extra 3845 // instructions without eliminating anything. 3846 if (!N0.hasOneUse() && !N1.hasOneUse()) 3847 return SDValue(); 3848 // We need matching integer source types. 3849 if (XVT != Y.getValueType()) 3850 return SDValue(); 3851 // Don't create an illegal op during or after legalization. Don't ever 3852 // create an unsupported vector op. 3853 if ((VT.isVector() || LegalOperations) && 3854 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT)) 3855 return SDValue(); 3856 // Avoid infinite looping with PromoteIntBinOp. 3857 // TODO: Should we apply desirable/legal constraints to all opcodes? 3858 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes && 3859 !TLI.isTypeDesirableForOp(LogicOpcode, XVT)) 3860 return SDValue(); 3861 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y) 3862 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); 3863 return DAG.getNode(HandOpcode, DL, VT, Logic); 3864 } 3865 3866 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y) 3867 if (HandOpcode == ISD::TRUNCATE) { 3868 // If both operands have other uses, this transform would create extra 3869 // instructions without eliminating anything. 3870 if (!N0.hasOneUse() && !N1.hasOneUse()) 3871 return SDValue(); 3872 // We need matching source types. 3873 if (XVT != Y.getValueType()) 3874 return SDValue(); 3875 // Don't create an illegal op during or after legalization. 3876 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT)) 3877 return SDValue(); 3878 // Be extra careful sinking truncate. If it's free, there's no benefit in 3879 // widening a binop. Also, don't create a logic op on an illegal type. 3880 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT)) 3881 return SDValue(); 3882 if (!TLI.isTypeLegal(XVT)) 3883 return SDValue(); 3884 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); 3885 return DAG.getNode(HandOpcode, DL, VT, Logic); 3886 } 3887 3888 // For binops SHL/SRL/SRA/AND: 3889 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z 3890 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL || 3891 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) && 3892 N0.getOperand(1) == N1.getOperand(1)) { 3893 // If either operand has other uses, this transform is not an improvement. 3894 if (!N0.hasOneUse() || !N1.hasOneUse()) 3895 return SDValue(); 3896 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); 3897 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1)); 3898 } 3899 3900 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y) 3901 if (HandOpcode == ISD::BSWAP) { 3902 // If either operand has other uses, this transform is not an improvement. 3903 if (!N0.hasOneUse() || !N1.hasOneUse()) 3904 return SDValue(); 3905 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); 3906 return DAG.getNode(HandOpcode, DL, VT, Logic); 3907 } 3908 3909 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 3910 // Only perform this optimization up until type legalization, before 3911 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 3912 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 3913 // we don't want to undo this promotion. 3914 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 3915 // on scalars. 3916 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && 3917 Level <= AfterLegalizeTypes) { 3918 // Input types must be integer and the same. 3919 if (XVT.isInteger() && XVT == Y.getValueType()) { 3920 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); 3921 return DAG.getNode(HandOpcode, DL, VT, Logic); 3922 } 3923 } 3924 3925 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 3926 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 3927 // If both shuffles use the same mask, and both shuffle within a single 3928 // vector, then it is worthwhile to move the swizzle after the operation. 3929 // The type-legalizer generates this pattern when loading illegal 3930 // vector types from memory. In many cases this allows additional shuffle 3931 // optimizations. 3932 // There are other cases where moving the shuffle after the xor/and/or 3933 // is profitable even if shuffles don't perform a swizzle. 3934 // If both shuffles use the same mask, and both shuffles have the same first 3935 // or second operand, then it might still be profitable to move the shuffle 3936 // after the xor/and/or operation. 3937 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 3938 auto *SVN0 = cast<ShuffleVectorSDNode>(N0); 3939 auto *SVN1 = cast<ShuffleVectorSDNode>(N1); 3940 assert(X.getValueType() == Y.getValueType() && 3941 "Inputs to shuffles are not the same type"); 3942 3943 // Check that both shuffles use the same mask. The masks are known to be of 3944 // the same length because the result vector type is the same. 3945 // Check also that shuffles have only one use to avoid introducing extra 3946 // instructions. 3947 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() || 3948 !SVN0->getMask().equals(SVN1->getMask())) 3949 return SDValue(); 3950 3951 // Don't try to fold this node if it requires introducing a 3952 // build vector of all zeros that might be illegal at this stage. 3953 SDValue ShOp = N0.getOperand(1); 3954 if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) 3955 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 3956 3957 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C) 3958 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 3959 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, 3960 N0.getOperand(0), N1.getOperand(0)); 3961 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask()); 3962 } 3963 3964 // Don't try to fold this node if it requires introducing a 3965 // build vector of all zeros that might be illegal at this stage. 3966 ShOp = N0.getOperand(0); 3967 if (LogicOpcode == ISD::XOR && !ShOp.isUndef()) 3968 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 3969 3970 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B)) 3971 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) { 3972 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1), 3973 N1.getOperand(1)); 3974 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask()); 3975 } 3976 } 3977 3978 return SDValue(); 3979 } 3980 3981 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. 3982 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 3983 const SDLoc &DL) { 3984 SDValue LL, LR, RL, RR, N0CC, N1CC; 3985 if (!isSetCCEquivalent(N0, LL, LR, N0CC) || 3986 !isSetCCEquivalent(N1, RL, RR, N1CC)) 3987 return SDValue(); 3988 3989 assert(N0.getValueType() == N1.getValueType() && 3990 "Unexpected operand types for bitwise logic op"); 3991 assert(LL.getValueType() == LR.getValueType() && 3992 RL.getValueType() == RR.getValueType() && 3993 "Unexpected operand types for setcc"); 3994 3995 // If we're here post-legalization or the logic op type is not i1, the logic 3996 // op type must match a setcc result type. Also, all folds require new 3997 // operations on the left and right operands, so those types must match. 3998 EVT VT = N0.getValueType(); 3999 EVT OpVT = LL.getValueType(); 4000 if (LegalOperations || VT.getScalarType() != MVT::i1) 4001 if (VT != getSetCCResultType(OpVT)) 4002 return SDValue(); 4003 if (OpVT != RL.getValueType()) 4004 return SDValue(); 4005 4006 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get(); 4007 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); 4008 bool IsInteger = OpVT.isInteger(); 4009 if (LR == RR && CC0 == CC1 && IsInteger) { 4010 bool IsZero = isNullOrNullSplat(LR); 4011 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR); 4012 4013 // All bits clear? 4014 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; 4015 // All sign bits clear? 4016 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1; 4017 // Any bits set? 4018 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero; 4019 // Any sign bits set? 4020 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero; 4021 4022 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) 4023 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) 4024 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) 4025 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) 4026 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) { 4027 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); 4028 AddToWorklist(Or.getNode()); 4029 return DAG.getSetCC(DL, VT, Or, LR, CC1); 4030 } 4031 4032 // All bits set? 4033 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1; 4034 // All sign bits set? 4035 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero; 4036 // Any bits clear? 4037 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1; 4038 // Any sign bits clear? 4039 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1; 4040 4041 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) 4042 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) 4043 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) 4044 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) 4045 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) { 4046 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); 4047 AddToWorklist(And.getNode()); 4048 return DAG.getSetCC(DL, VT, And, LR, CC1); 4049 } 4050 } 4051 4052 // TODO: What is the 'or' equivalent of this fold? 4053 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) 4054 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && 4055 IsInteger && CC0 == ISD::SETNE && 4056 ((isNullConstant(LR) && isAllOnesConstant(RR)) || 4057 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 4058 SDValue One = DAG.getConstant(1, DL, OpVT); 4059 SDValue Two = DAG.getConstant(2, DL, OpVT); 4060 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); 4061 AddToWorklist(Add.getNode()); 4062 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); 4063 } 4064 4065 // Try more general transforms if the predicates match and the only user of 4066 // the compares is the 'and' or 'or'. 4067 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && 4068 N0.hasOneUse() && N1.hasOneUse()) { 4069 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 4070 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 4071 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { 4072 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); 4073 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); 4074 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); 4075 SDValue Zero = DAG.getConstant(0, DL, OpVT); 4076 return DAG.getSetCC(DL, VT, Or, Zero, CC1); 4077 } 4078 } 4079 4080 // Canonicalize equivalent operands to LL == RL. 4081 if (LL == RR && LR == RL) { 4082 CC1 = ISD::getSetCCSwappedOperands(CC1); 4083 std::swap(RL, RR); 4084 } 4085 4086 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 4087 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 4088 if (LL == RL && LR == RR) { 4089 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) 4090 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); 4091 if (NewCC != ISD::SETCC_INVALID && 4092 (!LegalOperations || 4093 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && 4094 TLI.isOperationLegal(ISD::SETCC, OpVT)))) 4095 return DAG.getSetCC(DL, VT, LL, LR, NewCC); 4096 } 4097 4098 return SDValue(); 4099 } 4100 4101 /// This contains all DAGCombine rules which reduce two values combined by 4102 /// an And operation to a single value. This makes them reusable in the context 4103 /// of visitSELECT(). Rules involving constants are not included as 4104 /// visitSELECT() already handles those cases. 4105 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { 4106 EVT VT = N1.getValueType(); 4107 SDLoc DL(N); 4108 4109 // fold (and x, undef) -> 0 4110 if (N0.isUndef() || N1.isUndef()) 4111 return DAG.getConstant(0, DL, VT); 4112 4113 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) 4114 return V; 4115 4116 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 4117 VT.getSizeInBits() <= 64) { 4118 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4119 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 4120 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 4121 // immediate for an add, but it is legal if its top c2 bits are set, 4122 // transform the ADD so the immediate doesn't need to be materialized 4123 // in a register. 4124 APInt ADDC = ADDI->getAPIntValue(); 4125 APInt SRLC = SRLI->getAPIntValue(); 4126 if (ADDC.getMinSignedBits() <= 64 && 4127 SRLC.ult(VT.getSizeInBits()) && 4128 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 4129 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 4130 SRLC.getZExtValue()); 4131 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 4132 ADDC |= Mask; 4133 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 4134 SDLoc DL0(N0); 4135 SDValue NewAdd = 4136 DAG.getNode(ISD::ADD, DL0, VT, 4137 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 4138 CombineTo(N0.getNode(), NewAdd); 4139 // Return N so it doesn't get rechecked! 4140 return SDValue(N, 0); 4141 } 4142 } 4143 } 4144 } 4145 } 4146 } 4147 4148 // Reduce bit extract of low half of an integer to the narrower type. 4149 // (and (srl i64:x, K), KMask) -> 4150 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) 4151 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4152 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { 4153 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4154 unsigned Size = VT.getSizeInBits(); 4155 const APInt &AndMask = CAnd->getAPIntValue(); 4156 unsigned ShiftBits = CShift->getZExtValue(); 4157 4158 // Bail out, this node will probably disappear anyway. 4159 if (ShiftBits == 0) 4160 return SDValue(); 4161 4162 unsigned MaskBits = AndMask.countTrailingOnes(); 4163 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); 4164 4165 if (AndMask.isMask() && 4166 // Required bits must not span the two halves of the integer and 4167 // must fit in the half size type. 4168 (ShiftBits + MaskBits <= Size / 2) && 4169 TLI.isNarrowingProfitable(VT, HalfVT) && 4170 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && 4171 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && 4172 TLI.isTruncateFree(VT, HalfVT) && 4173 TLI.isZExtFree(HalfVT, VT)) { 4174 // The isNarrowingProfitable is to avoid regressions on PPC and 4175 // AArch64 which match a few 64-bit bit insert / bit extract patterns 4176 // on downstream users of this. Those patterns could probably be 4177 // extended to handle extensions mixed in. 4178 4179 SDValue SL(N0); 4180 assert(MaskBits <= Size); 4181 4182 // Extracting the highest bit of the low half. 4183 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); 4184 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, 4185 N0.getOperand(0)); 4186 4187 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); 4188 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); 4189 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); 4190 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); 4191 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); 4192 } 4193 } 4194 } 4195 } 4196 4197 return SDValue(); 4198 } 4199 4200 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 4201 EVT LoadResultTy, EVT &ExtVT) { 4202 if (!AndC->getAPIntValue().isMask()) 4203 return false; 4204 4205 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); 4206 4207 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 4208 EVT LoadedVT = LoadN->getMemoryVT(); 4209 4210 if (ExtVT == LoadedVT && 4211 (!LegalOperations || 4212 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 4213 // ZEXTLOAD will match without needing to change the size of the value being 4214 // loaded. 4215 return true; 4216 } 4217 4218 // Do not change the width of a volatile load. 4219 if (LoadN->isVolatile()) 4220 return false; 4221 4222 // Do not generate loads of non-round integer types since these can 4223 // be expensive (and would be wrong if the type is not byte sized). 4224 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 4225 return false; 4226 4227 if (LegalOperations && 4228 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 4229 return false; 4230 4231 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 4232 return false; 4233 4234 return true; 4235 } 4236 4237 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, 4238 ISD::LoadExtType ExtType, EVT &MemVT, 4239 unsigned ShAmt) { 4240 if (!LDST) 4241 return false; 4242 // Only allow byte offsets. 4243 if (ShAmt % 8) 4244 return false; 4245 4246 // Do not generate loads of non-round integer types since these can 4247 // be expensive (and would be wrong if the type is not byte sized). 4248 if (!MemVT.isRound()) 4249 return false; 4250 4251 // Don't change the width of a volatile load. 4252 if (LDST->isVolatile()) 4253 return false; 4254 4255 // Verify that we are actually reducing a load width here. 4256 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) 4257 return false; 4258 4259 // Ensure that this isn't going to produce an unsupported unaligned access. 4260 if (ShAmt && 4261 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, 4262 LDST->getAddressSpace(), ShAmt / 8)) 4263 return false; 4264 4265 // It's not possible to generate a constant of extended or untyped type. 4266 EVT PtrType = LDST->getBasePtr().getValueType(); 4267 if (PtrType == MVT::Untyped || PtrType.isExtended()) 4268 return false; 4269 4270 if (isa<LoadSDNode>(LDST)) { 4271 LoadSDNode *Load = cast<LoadSDNode>(LDST); 4272 // Don't transform one with multiple uses, this would require adding a new 4273 // load. 4274 if (!SDValue(Load, 0).hasOneUse()) 4275 return false; 4276 4277 if (LegalOperations && 4278 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) 4279 return false; 4280 4281 // For the transform to be legal, the load must produce only two values 4282 // (the value loaded and the chain). Don't transform a pre-increment 4283 // load, for example, which produces an extra value. Otherwise the 4284 // transformation is not equivalent, and the downstream logic to replace 4285 // uses gets things wrong. 4286 if (Load->getNumValues() > 2) 4287 return false; 4288 4289 // If the load that we're shrinking is an extload and we're not just 4290 // discarding the extension we can't simply shrink the load. Bail. 4291 // TODO: It would be possible to merge the extensions in some cases. 4292 if (Load->getExtensionType() != ISD::NON_EXTLOAD && 4293 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) 4294 return false; 4295 4296 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) 4297 return false; 4298 } else { 4299 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); 4300 StoreSDNode *Store = cast<StoreSDNode>(LDST); 4301 // Can't write outside the original store 4302 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) 4303 return false; 4304 4305 if (LegalOperations && 4306 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) 4307 return false; 4308 } 4309 return true; 4310 } 4311 4312 bool DAGCombiner::SearchForAndLoads(SDNode *N, 4313 SmallVectorImpl<LoadSDNode*> &Loads, 4314 SmallPtrSetImpl<SDNode*> &NodesWithConsts, 4315 ConstantSDNode *Mask, 4316 SDNode *&NodeToMask) { 4317 // Recursively search for the operands, looking for loads which can be 4318 // narrowed. 4319 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { 4320 SDValue Op = N->getOperand(i); 4321 4322 if (Op.getValueType().isVector()) 4323 return false; 4324 4325 // Some constants may need fixing up later if they are too large. 4326 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4327 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && 4328 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) 4329 NodesWithConsts.insert(N); 4330 continue; 4331 } 4332 4333 if (!Op.hasOneUse()) 4334 return false; 4335 4336 switch(Op.getOpcode()) { 4337 case ISD::LOAD: { 4338 auto *Load = cast<LoadSDNode>(Op); 4339 EVT ExtVT; 4340 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && 4341 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { 4342 4343 // ZEXTLOAD is already small enough. 4344 if (Load->getExtensionType() == ISD::ZEXTLOAD && 4345 ExtVT.bitsGE(Load->getMemoryVT())) 4346 continue; 4347 4348 // Use LE to convert equal sized loads to zext. 4349 if (ExtVT.bitsLE(Load->getMemoryVT())) 4350 Loads.push_back(Load); 4351 4352 continue; 4353 } 4354 return false; 4355 } 4356 case ISD::ZERO_EXTEND: 4357 case ISD::AssertZext: { 4358 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); 4359 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 4360 EVT VT = Op.getOpcode() == ISD::AssertZext ? 4361 cast<VTSDNode>(Op.getOperand(1))->getVT() : 4362 Op.getOperand(0).getValueType(); 4363 4364 // We can accept extending nodes if the mask is wider or an equal 4365 // width to the original type. 4366 if (ExtVT.bitsGE(VT)) 4367 continue; 4368 break; 4369 } 4370 case ISD::OR: 4371 case ISD::XOR: 4372 case ISD::AND: 4373 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, 4374 NodeToMask)) 4375 return false; 4376 continue; 4377 } 4378 4379 // Allow one node which will masked along with any loads found. 4380 if (NodeToMask) 4381 return false; 4382 4383 // Also ensure that the node to be masked only produces one data result. 4384 NodeToMask = Op.getNode(); 4385 if (NodeToMask->getNumValues() > 1) { 4386 bool HasValue = false; 4387 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { 4388 MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); 4389 if (VT != MVT::Glue && VT != MVT::Other) { 4390 if (HasValue) { 4391 NodeToMask = nullptr; 4392 return false; 4393 } 4394 HasValue = true; 4395 } 4396 } 4397 assert(HasValue && "Node to be masked has no data result?"); 4398 } 4399 } 4400 return true; 4401 } 4402 4403 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { 4404 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4405 if (!Mask) 4406 return false; 4407 4408 if (!Mask->getAPIntValue().isMask()) 4409 return false; 4410 4411 // No need to do anything if the and directly uses a load. 4412 if (isa<LoadSDNode>(N->getOperand(0))) 4413 return false; 4414 4415 SmallVector<LoadSDNode*, 8> Loads; 4416 SmallPtrSet<SDNode*, 2> NodesWithConsts; 4417 SDNode *FixupNode = nullptr; 4418 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { 4419 if (Loads.size() == 0) 4420 return false; 4421 4422 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); 4423 SDValue MaskOp = N->getOperand(1); 4424 4425 // If it exists, fixup the single node we allow in the tree that needs 4426 // masking. 4427 if (FixupNode) { 4428 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); 4429 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), 4430 FixupNode->getValueType(0), 4431 SDValue(FixupNode, 0), MaskOp); 4432 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); 4433 if (And.getOpcode() == ISD ::AND) 4434 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); 4435 } 4436 4437 // Narrow any constants that need it. 4438 for (auto *LogicN : NodesWithConsts) { 4439 SDValue Op0 = LogicN->getOperand(0); 4440 SDValue Op1 = LogicN->getOperand(1); 4441 4442 if (isa<ConstantSDNode>(Op0)) 4443 std::swap(Op0, Op1); 4444 4445 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), 4446 Op1, MaskOp); 4447 4448 DAG.UpdateNodeOperands(LogicN, Op0, And); 4449 } 4450 4451 // Create narrow loads. 4452 for (auto *Load : Loads) { 4453 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); 4454 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), 4455 SDValue(Load, 0), MaskOp); 4456 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); 4457 if (And.getOpcode() == ISD ::AND) 4458 And = SDValue( 4459 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); 4460 SDValue NewLoad = ReduceLoadWidth(And.getNode()); 4461 assert(NewLoad && 4462 "Shouldn't be masking the load if it can't be narrowed"); 4463 CombineTo(Load, NewLoad, NewLoad.getValue(1)); 4464 } 4465 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); 4466 return true; 4467 } 4468 return false; 4469 } 4470 4471 // Unfold 4472 // x & (-1 'logical shift' y) 4473 // To 4474 // (x 'opposite logical shift' y) 'logical shift' y 4475 // if it is better for performance. 4476 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { 4477 assert(N->getOpcode() == ISD::AND); 4478 4479 SDValue N0 = N->getOperand(0); 4480 SDValue N1 = N->getOperand(1); 4481 4482 // Do we actually prefer shifts over mask? 4483 if (!TLI.preferShiftsToClearExtremeBits(N0)) 4484 return SDValue(); 4485 4486 // Try to match (-1 '[outer] logical shift' y) 4487 unsigned OuterShift; 4488 unsigned InnerShift; // The opposite direction to the OuterShift. 4489 SDValue Y; // Shift amount. 4490 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { 4491 if (!M.hasOneUse()) 4492 return false; 4493 OuterShift = M->getOpcode(); 4494 if (OuterShift == ISD::SHL) 4495 InnerShift = ISD::SRL; 4496 else if (OuterShift == ISD::SRL) 4497 InnerShift = ISD::SHL; 4498 else 4499 return false; 4500 if (!isAllOnesConstant(M->getOperand(0))) 4501 return false; 4502 Y = M->getOperand(1); 4503 return true; 4504 }; 4505 4506 SDValue X; 4507 if (matchMask(N1)) 4508 X = N0; 4509 else if (matchMask(N0)) 4510 X = N1; 4511 else 4512 return SDValue(); 4513 4514 SDLoc DL(N); 4515 EVT VT = N->getValueType(0); 4516 4517 // tmp = x 'opposite logical shift' y 4518 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); 4519 // ret = tmp 'logical shift' y 4520 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); 4521 4522 return T1; 4523 } 4524 4525 SDValue DAGCombiner::visitAND(SDNode *N) { 4526 SDValue N0 = N->getOperand(0); 4527 SDValue N1 = N->getOperand(1); 4528 EVT VT = N1.getValueType(); 4529 4530 // x & x --> x 4531 if (N0 == N1) 4532 return N0; 4533 4534 // fold vector ops 4535 if (VT.isVector()) { 4536 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4537 return FoldedVOp; 4538 4539 // fold (and x, 0) -> 0, vector edition 4540 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4541 // do not return N0, because undef node may exist in N0 4542 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), 4543 SDLoc(N), N0.getValueType()); 4544 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4545 // do not return N1, because undef node may exist in N1 4546 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), 4547 SDLoc(N), N1.getValueType()); 4548 4549 // fold (and x, -1) -> x, vector edition 4550 if (ISD::isBuildVectorAllOnes(N0.getNode())) 4551 return N1; 4552 if (ISD::isBuildVectorAllOnes(N1.getNode())) 4553 return N0; 4554 } 4555 4556 // fold (and c1, c2) -> c1&c2 4557 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4558 ConstantSDNode *N1C = isConstOrConstSplat(N1); 4559 if (N0C && N1C && !N1C->isOpaque()) 4560 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 4561 // canonicalize constant to RHS 4562 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 4563 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 4564 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 4565 // fold (and x, -1) -> x 4566 if (isAllOnesConstant(N1)) 4567 return N0; 4568 // if (and x, c) is known to be zero, return 0 4569 unsigned BitWidth = VT.getScalarSizeInBits(); 4570 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4571 APInt::getAllOnesValue(BitWidth))) 4572 return DAG.getConstant(0, SDLoc(N), VT); 4573 4574 if (SDValue NewSel = foldBinOpIntoSelect(N)) 4575 return NewSel; 4576 4577 // reassociate and 4578 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) 4579 return RAND; 4580 4581 // Try to convert a constant mask AND into a shuffle clear mask. 4582 if (VT.isVector()) 4583 if (SDValue Shuffle = XformToShuffleWithZero(N)) 4584 return Shuffle; 4585 4586 // fold (and (or x, C), D) -> D if (C & D) == D 4587 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { 4588 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); 4589 }; 4590 if (N0.getOpcode() == ISD::OR && 4591 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) 4592 return N1; 4593 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 4594 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4595 SDValue N0Op0 = N0.getOperand(0); 4596 APInt Mask = ~N1C->getAPIntValue(); 4597 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); 4598 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 4599 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4600 N0.getValueType(), N0Op0); 4601 4602 // Replace uses of the AND with uses of the Zero extend node. 4603 CombineTo(N, Zext); 4604 4605 // We actually want to replace all uses of the any_extend with the 4606 // zero_extend, to avoid duplicating things. This will later cause this 4607 // AND to be folded. 4608 CombineTo(N0.getNode(), Zext); 4609 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4610 } 4611 } 4612 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 4613 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 4614 // already be zero by virtue of the width of the base type of the load. 4615 // 4616 // the 'X' node here can either be nothing or an extract_vector_elt to catch 4617 // more cases. 4618 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4619 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && 4620 N0.getOperand(0).getOpcode() == ISD::LOAD && 4621 N0.getOperand(0).getResNo() == 0) || 4622 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { 4623 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 4624 N0 : N0.getOperand(0) ); 4625 4626 // Get the constant (if applicable) the zero'th operand is being ANDed with. 4627 // This can be a pure constant or a vector splat, in which case we treat the 4628 // vector as a scalar and use the splat value. 4629 APInt Constant = APInt::getNullValue(1); 4630 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 4631 Constant = C->getAPIntValue(); 4632 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 4633 APInt SplatValue, SplatUndef; 4634 unsigned SplatBitSize; 4635 bool HasAnyUndefs; 4636 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 4637 SplatBitSize, HasAnyUndefs); 4638 if (IsSplat) { 4639 // Undef bits can contribute to a possible optimisation if set, so 4640 // set them. 4641 SplatValue |= SplatUndef; 4642 4643 // The splat value may be something like "0x00FFFFFF", which means 0 for 4644 // the first vector value and FF for the rest, repeating. We need a mask 4645 // that will apply equally to all members of the vector, so AND all the 4646 // lanes of the constant together. 4647 EVT VT = Vector->getValueType(0); 4648 unsigned BitWidth = VT.getScalarSizeInBits(); 4649 4650 // If the splat value has been compressed to a bitlength lower 4651 // than the size of the vector lane, we need to re-expand it to 4652 // the lane size. 4653 if (BitWidth > SplatBitSize) 4654 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 4655 SplatBitSize < BitWidth; 4656 SplatBitSize = SplatBitSize * 2) 4657 SplatValue |= SplatValue.shl(SplatBitSize); 4658 4659 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 4660 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 4661 if (SplatBitSize % BitWidth == 0) { 4662 Constant = APInt::getAllOnesValue(BitWidth); 4663 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 4664 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 4665 } 4666 } 4667 } 4668 4669 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 4670 // actually legal and isn't going to get expanded, else this is a false 4671 // optimisation. 4672 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 4673 Load->getValueType(0), 4674 Load->getMemoryVT()); 4675 4676 // Resize the constant to the same size as the original memory access before 4677 // extension. If it is still the AllOnesValue then this AND is completely 4678 // unneeded. 4679 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); 4680 4681 bool B; 4682 switch (Load->getExtensionType()) { 4683 default: B = false; break; 4684 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 4685 case ISD::ZEXTLOAD: 4686 case ISD::NON_EXTLOAD: B = true; break; 4687 } 4688 4689 if (B && Constant.isAllOnesValue()) { 4690 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 4691 // preserve semantics once we get rid of the AND. 4692 SDValue NewLoad(Load, 0); 4693 4694 // Fold the AND away. NewLoad may get replaced immediately. 4695 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 4696 4697 if (Load->getExtensionType() == ISD::EXTLOAD) { 4698 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 4699 Load->getValueType(0), SDLoc(Load), 4700 Load->getChain(), Load->getBasePtr(), 4701 Load->getOffset(), Load->getMemoryVT(), 4702 Load->getMemOperand()); 4703 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 4704 if (Load->getNumValues() == 3) { 4705 // PRE/POST_INC loads have 3 values. 4706 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 4707 NewLoad.getValue(2) }; 4708 CombineTo(Load, To, 3, true); 4709 } else { 4710 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 4711 } 4712 } 4713 4714 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4715 } 4716 } 4717 4718 // fold (and (load x), 255) -> (zextload x, i8) 4719 // fold (and (extload x, i16), 255) -> (zextload x, i8) 4720 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 4721 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || 4722 (N0.getOpcode() == ISD::ANY_EXTEND && 4723 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 4724 if (SDValue Res = ReduceLoadWidth(N)) { 4725 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND 4726 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); 4727 AddToWorklist(N); 4728 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); 4729 return SDValue(N, 0); 4730 } 4731 } 4732 4733 if (Level >= AfterLegalizeTypes) { 4734 // Attempt to propagate the AND back up to the leaves which, if they're 4735 // loads, can be combined to narrow loads and the AND node can be removed. 4736 // Perform after legalization so that extend nodes will already be 4737 // combined into the loads. 4738 if (BackwardsPropagateMask(N, DAG)) { 4739 return SDValue(N, 0); 4740 } 4741 } 4742 4743 if (SDValue Combined = visitANDLike(N0, N1, N)) 4744 return Combined; 4745 4746 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 4747 if (N0.getOpcode() == N1.getOpcode()) 4748 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) 4749 return V; 4750 4751 // Masking the negated extension of a boolean is just the zero-extended 4752 // boolean: 4753 // and (sub 0, zext(bool X)), 1 --> zext(bool X) 4754 // and (sub 0, sext(bool X)), 1 --> zext(bool X) 4755 // 4756 // Note: the SimplifyDemandedBits fold below can make an information-losing 4757 // transform, and then we have no way to find this better fold. 4758 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { 4759 if (isNullOrNullSplat(N0.getOperand(0))) { 4760 SDValue SubRHS = N0.getOperand(1); 4761 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && 4762 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4763 return SubRHS; 4764 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && 4765 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4766 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); 4767 } 4768 } 4769 4770 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 4771 // fold (and (sra)) -> (and (srl)) when possible. 4772 if (SimplifyDemandedBits(SDValue(N, 0))) 4773 return SDValue(N, 0); 4774 4775 // fold (zext_inreg (extload x)) -> (zextload x) 4776 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 4777 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4778 EVT MemVT = LN0->getMemoryVT(); 4779 // If we zero all the possible extended bits, then we can turn this into 4780 // a zextload if we are running before legalize or the operation is legal. 4781 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4782 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4783 BitWidth - MemVT.getScalarSizeInBits())) && 4784 ((!LegalOperations && !LN0->isVolatile()) || 4785 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4786 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4787 LN0->getChain(), LN0->getBasePtr(), 4788 MemVT, LN0->getMemOperand()); 4789 AddToWorklist(N); 4790 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4791 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4792 } 4793 } 4794 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 4795 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4796 N0.hasOneUse()) { 4797 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4798 EVT MemVT = LN0->getMemoryVT(); 4799 // If we zero all the possible extended bits, then we can turn this into 4800 // a zextload if we are running before legalize or the operation is legal. 4801 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4802 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4803 BitWidth - MemVT.getScalarSizeInBits())) && 4804 ((!LegalOperations && !LN0->isVolatile()) || 4805 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4806 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4807 LN0->getChain(), LN0->getBasePtr(), 4808 MemVT, LN0->getMemOperand()); 4809 AddToWorklist(N); 4810 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4811 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4812 } 4813 } 4814 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 4815 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 4816 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 4817 N0.getOperand(1), false)) 4818 return BSwap; 4819 } 4820 4821 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) 4822 return Shifts; 4823 4824 return SDValue(); 4825 } 4826 4827 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 4828 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 4829 bool DemandHighBits) { 4830 if (!LegalOperations) 4831 return SDValue(); 4832 4833 EVT VT = N->getValueType(0); 4834 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 4835 return SDValue(); 4836 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 4837 return SDValue(); 4838 4839 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) 4840 bool LookPassAnd0 = false; 4841 bool LookPassAnd1 = false; 4842 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 4843 std::swap(N0, N1); 4844 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 4845 std::swap(N0, N1); 4846 if (N0.getOpcode() == ISD::AND) { 4847 if (!N0.getNode()->hasOneUse()) 4848 return SDValue(); 4849 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4850 // Also handle 0xffff since the LHS is guaranteed to have zeros there. 4851 // This is needed for X86. 4852 if (!N01C || (N01C->getZExtValue() != 0xFF00 && 4853 N01C->getZExtValue() != 0xFFFF)) 4854 return SDValue(); 4855 N0 = N0.getOperand(0); 4856 LookPassAnd0 = true; 4857 } 4858 4859 if (N1.getOpcode() == ISD::AND) { 4860 if (!N1.getNode()->hasOneUse()) 4861 return SDValue(); 4862 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4863 if (!N11C || N11C->getZExtValue() != 0xFF) 4864 return SDValue(); 4865 N1 = N1.getOperand(0); 4866 LookPassAnd1 = true; 4867 } 4868 4869 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 4870 std::swap(N0, N1); 4871 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 4872 return SDValue(); 4873 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) 4874 return SDValue(); 4875 4876 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4877 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4878 if (!N01C || !N11C) 4879 return SDValue(); 4880 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 4881 return SDValue(); 4882 4883 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 4884 SDValue N00 = N0->getOperand(0); 4885 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 4886 if (!N00.getNode()->hasOneUse()) 4887 return SDValue(); 4888 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 4889 if (!N001C || N001C->getZExtValue() != 0xFF) 4890 return SDValue(); 4891 N00 = N00.getOperand(0); 4892 LookPassAnd0 = true; 4893 } 4894 4895 SDValue N10 = N1->getOperand(0); 4896 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 4897 if (!N10.getNode()->hasOneUse()) 4898 return SDValue(); 4899 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 4900 // Also allow 0xFFFF since the bits will be shifted out. This is needed 4901 // for X86. 4902 if (!N101C || (N101C->getZExtValue() != 0xFF00 && 4903 N101C->getZExtValue() != 0xFFFF)) 4904 return SDValue(); 4905 N10 = N10.getOperand(0); 4906 LookPassAnd1 = true; 4907 } 4908 4909 if (N00 != N10) 4910 return SDValue(); 4911 4912 // Make sure everything beyond the low halfword gets set to zero since the SRL 4913 // 16 will clear the top bits. 4914 unsigned OpSizeInBits = VT.getSizeInBits(); 4915 if (DemandHighBits && OpSizeInBits > 16) { 4916 // If the left-shift isn't masked out then the only way this is a bswap is 4917 // if all bits beyond the low 8 are 0. In that case the entire pattern 4918 // reduces to a left shift anyway: leave it for other parts of the combiner. 4919 if (!LookPassAnd0) 4920 return SDValue(); 4921 4922 // However, if the right shift isn't masked out then it might be because 4923 // it's not needed. See if we can spot that too. 4924 if (!LookPassAnd1 && 4925 !DAG.MaskedValueIsZero( 4926 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 4927 return SDValue(); 4928 } 4929 4930 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 4931 if (OpSizeInBits > 16) { 4932 SDLoc DL(N); 4933 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 4934 DAG.getConstant(OpSizeInBits - 16, DL, 4935 getShiftAmountTy(VT))); 4936 } 4937 return Res; 4938 } 4939 4940 /// Return true if the specified node is an element that makes up a 32-bit 4941 /// packed halfword byteswap. 4942 /// ((x & 0x000000ff) << 8) | 4943 /// ((x & 0x0000ff00) >> 8) | 4944 /// ((x & 0x00ff0000) << 8) | 4945 /// ((x & 0xff000000) >> 8) 4946 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 4947 if (!N.getNode()->hasOneUse()) 4948 return false; 4949 4950 unsigned Opc = N.getOpcode(); 4951 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 4952 return false; 4953 4954 SDValue N0 = N.getOperand(0); 4955 unsigned Opc0 = N0.getOpcode(); 4956 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) 4957 return false; 4958 4959 ConstantSDNode *N1C = nullptr; 4960 // SHL or SRL: look upstream for AND mask operand 4961 if (Opc == ISD::AND) 4962 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4963 else if (Opc0 == ISD::AND) 4964 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4965 if (!N1C) 4966 return false; 4967 4968 unsigned MaskByteOffset; 4969 switch (N1C->getZExtValue()) { 4970 default: 4971 return false; 4972 case 0xFF: MaskByteOffset = 0; break; 4973 case 0xFF00: MaskByteOffset = 1; break; 4974 case 0xFFFF: 4975 // In case demanded bits didn't clear the bits that will be shifted out. 4976 // This is needed for X86. 4977 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { 4978 MaskByteOffset = 1; 4979 break; 4980 } 4981 return false; 4982 case 0xFF0000: MaskByteOffset = 2; break; 4983 case 0xFF000000: MaskByteOffset = 3; break; 4984 } 4985 4986 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 4987 if (Opc == ISD::AND) { 4988 if (MaskByteOffset == 0 || MaskByteOffset == 2) { 4989 // (x >> 8) & 0xff 4990 // (x >> 8) & 0xff0000 4991 if (Opc0 != ISD::SRL) 4992 return false; 4993 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4994 if (!C || C->getZExtValue() != 8) 4995 return false; 4996 } else { 4997 // (x << 8) & 0xff00 4998 // (x << 8) & 0xff000000 4999 if (Opc0 != ISD::SHL) 5000 return false; 5001 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 5002 if (!C || C->getZExtValue() != 8) 5003 return false; 5004 } 5005 } else if (Opc == ISD::SHL) { 5006 // (x & 0xff) << 8 5007 // (x & 0xff0000) << 8 5008 if (MaskByteOffset != 0 && MaskByteOffset != 2) 5009 return false; 5010 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 5011 if (!C || C->getZExtValue() != 8) 5012 return false; 5013 } else { // Opc == ISD::SRL 5014 // (x & 0xff00) >> 8 5015 // (x & 0xff000000) >> 8 5016 if (MaskByteOffset != 1 && MaskByteOffset != 3) 5017 return false; 5018 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 5019 if (!C || C->getZExtValue() != 8) 5020 return false; 5021 } 5022 5023 if (Parts[MaskByteOffset]) 5024 return false; 5025 5026 Parts[MaskByteOffset] = N0.getOperand(0).getNode(); 5027 return true; 5028 } 5029 5030 /// Match a 32-bit packed halfword bswap. That is 5031 /// ((x & 0x000000ff) << 8) | 5032 /// ((x & 0x0000ff00) >> 8) | 5033 /// ((x & 0x00ff0000) << 8) | 5034 /// ((x & 0xff000000) >> 8) 5035 /// => (rotl (bswap x), 16) 5036 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 5037 if (!LegalOperations) 5038 return SDValue(); 5039 5040 EVT VT = N->getValueType(0); 5041 if (VT != MVT::i32) 5042 return SDValue(); 5043 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 5044 return SDValue(); 5045 5046 // Look for either 5047 // (or (or (and), (and)), (or (and), (and))) 5048 // (or (or (or (and), (and)), (and)), (and)) 5049 if (N0.getOpcode() != ISD::OR) 5050 return SDValue(); 5051 SDValue N00 = N0.getOperand(0); 5052 SDValue N01 = N0.getOperand(1); 5053 SDNode *Parts[4] = {}; 5054 5055 if (N1.getOpcode() == ISD::OR && 5056 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 5057 // (or (or (and), (and)), (or (and), (and))) 5058 if (!isBSwapHWordElement(N00, Parts)) 5059 return SDValue(); 5060 5061 if (!isBSwapHWordElement(N01, Parts)) 5062 return SDValue(); 5063 SDValue N10 = N1.getOperand(0); 5064 if (!isBSwapHWordElement(N10, Parts)) 5065 return SDValue(); 5066 SDValue N11 = N1.getOperand(1); 5067 if (!isBSwapHWordElement(N11, Parts)) 5068 return SDValue(); 5069 } else { 5070 // (or (or (or (and), (and)), (and)), (and)) 5071 if (!isBSwapHWordElement(N1, Parts)) 5072 return SDValue(); 5073 if (!isBSwapHWordElement(N01, Parts)) 5074 return SDValue(); 5075 if (N00.getOpcode() != ISD::OR) 5076 return SDValue(); 5077 SDValue N000 = N00.getOperand(0); 5078 if (!isBSwapHWordElement(N000, Parts)) 5079 return SDValue(); 5080 SDValue N001 = N00.getOperand(1); 5081 if (!isBSwapHWordElement(N001, Parts)) 5082 return SDValue(); 5083 } 5084 5085 // Make sure the parts are all coming from the same node. 5086 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 5087 return SDValue(); 5088 5089 SDLoc DL(N); 5090 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 5091 SDValue(Parts[0], 0)); 5092 5093 // Result of the bswap should be rotated by 16. If it's not legal, then 5094 // do (x << 16) | (x >> 16). 5095 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 5096 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 5097 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 5098 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 5099 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 5100 return DAG.getNode(ISD::OR, DL, VT, 5101 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 5102 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 5103 } 5104 5105 /// This contains all DAGCombine rules which reduce two values combined by 5106 /// an Or operation to a single value \see visitANDLike(). 5107 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { 5108 EVT VT = N1.getValueType(); 5109 SDLoc DL(N); 5110 5111 // fold (or x, undef) -> -1 5112 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) 5113 return DAG.getAllOnesConstant(DL, VT); 5114 5115 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL)) 5116 return V; 5117 5118 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 5119 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 5120 // Don't increase # computations. 5121 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 5122 // We can only do this xform if we know that bits from X that are set in C2 5123 // but not in C1 are already zero. Likewise for Y. 5124 if (const ConstantSDNode *N0O1C = 5125 getAsNonOpaqueConstant(N0.getOperand(1))) { 5126 if (const ConstantSDNode *N1O1C = 5127 getAsNonOpaqueConstant(N1.getOperand(1))) { 5128 // We can only do this xform if we know that bits from X that are set in 5129 // C2 but not in C1 are already zero. Likewise for Y. 5130 const APInt &LHSMask = N0O1C->getAPIntValue(); 5131 const APInt &RHSMask = N1O1C->getAPIntValue(); 5132 5133 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 5134 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 5135 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 5136 N0.getOperand(0), N1.getOperand(0)); 5137 return DAG.getNode(ISD::AND, DL, VT, X, 5138 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 5139 } 5140 } 5141 } 5142 } 5143 5144 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 5145 if (N0.getOpcode() == ISD::AND && 5146 N1.getOpcode() == ISD::AND && 5147 N0.getOperand(0) == N1.getOperand(0) && 5148 // Don't increase # computations. 5149 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 5150 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 5151 N0.getOperand(1), N1.getOperand(1)); 5152 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); 5153 } 5154 5155 return SDValue(); 5156 } 5157 5158 SDValue DAGCombiner::visitOR(SDNode *N) { 5159 SDValue N0 = N->getOperand(0); 5160 SDValue N1 = N->getOperand(1); 5161 EVT VT = N1.getValueType(); 5162 5163 // x | x --> x 5164 if (N0 == N1) 5165 return N0; 5166 5167 // fold vector ops 5168 if (VT.isVector()) { 5169 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 5170 return FoldedVOp; 5171 5172 // fold (or x, 0) -> x, vector edition 5173 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5174 return N1; 5175 if (ISD::isBuildVectorAllZeros(N1.getNode())) 5176 return N0; 5177 5178 // fold (or x, -1) -> -1, vector edition 5179 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5180 // do not return N0, because undef node may exist in N0 5181 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); 5182 if (ISD::isBuildVectorAllOnes(N1.getNode())) 5183 // do not return N1, because undef node may exist in N1 5184 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); 5185 5186 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) 5187 // Do this only if the resulting shuffle is legal. 5188 if (isa<ShuffleVectorSDNode>(N0) && 5189 isa<ShuffleVectorSDNode>(N1) && 5190 // Avoid folding a node with illegal type. 5191 TLI.isTypeLegal(VT)) { 5192 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); 5193 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); 5194 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5195 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); 5196 // Ensure both shuffles have a zero input. 5197 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { 5198 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); 5199 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); 5200 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 5201 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 5202 bool CanFold = true; 5203 int NumElts = VT.getVectorNumElements(); 5204 SmallVector<int, 4> Mask(NumElts); 5205 5206 for (int i = 0; i != NumElts; ++i) { 5207 int M0 = SV0->getMaskElt(i); 5208 int M1 = SV1->getMaskElt(i); 5209 5210 // Determine if either index is pointing to a zero vector. 5211 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); 5212 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); 5213 5214 // If one element is zero and the otherside is undef, keep undef. 5215 // This also handles the case that both are undef. 5216 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { 5217 Mask[i] = -1; 5218 continue; 5219 } 5220 5221 // Make sure only one of the elements is zero. 5222 if (M0Zero == M1Zero) { 5223 CanFold = false; 5224 break; 5225 } 5226 5227 assert((M0 >= 0 || M1 >= 0) && "Undef index!"); 5228 5229 // We have a zero and non-zero element. If the non-zero came from 5230 // SV0 make the index a LHS index. If it came from SV1, make it 5231 // a RHS index. We need to mod by NumElts because we don't care 5232 // which operand it came from in the original shuffles. 5233 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; 5234 } 5235 5236 if (CanFold) { 5237 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); 5238 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); 5239 5240 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 5241 if (!LegalMask) { 5242 std::swap(NewLHS, NewRHS); 5243 ShuffleVectorSDNode::commuteMask(Mask); 5244 LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 5245 } 5246 5247 if (LegalMask) 5248 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); 5249 } 5250 } 5251 } 5252 } 5253 5254 // fold (or c1, c2) -> c1|c2 5255 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 5256 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 5257 if (N0C && N1C && !N1C->isOpaque()) 5258 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 5259 // canonicalize constant to RHS 5260 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 5261 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 5262 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 5263 // fold (or x, 0) -> x 5264 if (isNullConstant(N1)) 5265 return N0; 5266 // fold (or x, -1) -> -1 5267 if (isAllOnesConstant(N1)) 5268 return N1; 5269 5270 if (SDValue NewSel = foldBinOpIntoSelect(N)) 5271 return NewSel; 5272 5273 // fold (or x, c) -> c iff (x & ~c) == 0 5274 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 5275 return N1; 5276 5277 if (SDValue Combined = visitORLike(N0, N1, N)) 5278 return Combined; 5279 5280 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 5281 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 5282 return BSwap; 5283 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 5284 return BSwap; 5285 5286 // reassociate or 5287 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) 5288 return ROR; 5289 5290 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 5291 // iff (c1 & c2) != 0 or c1/c2 are undef. 5292 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) { 5293 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue()); 5294 }; 5295 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 5296 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { 5297 if (SDValue COR = DAG.FoldConstantArithmetic( 5298 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { 5299 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); 5300 AddToWorklist(IOR.getNode()); 5301 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); 5302 } 5303 } 5304 5305 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 5306 if (N0.getOpcode() == N1.getOpcode()) 5307 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) 5308 return V; 5309 5310 // See if this is some rotate idiom. 5311 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 5312 return SDValue(Rot, 0); 5313 5314 if (SDValue Load = MatchLoadCombine(N)) 5315 return Load; 5316 5317 // Simplify the operands using demanded-bits information. 5318 if (SimplifyDemandedBits(SDValue(N, 0))) 5319 return SDValue(N, 0); 5320 5321 return SDValue(); 5322 } 5323 5324 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { 5325 if (Op.getOpcode() == ISD::AND && 5326 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 5327 Mask = Op.getOperand(1); 5328 return Op.getOperand(0); 5329 } 5330 return Op; 5331 } 5332 5333 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 5334 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, 5335 SDValue &Mask) { 5336 Op = stripConstantMask(DAG, Op, Mask); 5337 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 5338 Shift = Op; 5339 return true; 5340 } 5341 return false; 5342 } 5343 5344 /// Helper function for visitOR to extract the needed side of a rotate idiom 5345 /// from a shl/srl/mul/udiv. This is meant to handle cases where 5346 /// InstCombine merged some outside op with one of the shifts from 5347 /// the rotate pattern. 5348 /// \returns An empty \c SDValue if the needed shift couldn't be extracted. 5349 /// Otherwise, returns an expansion of \p ExtractFrom based on the following 5350 /// patterns: 5351 /// 5352 /// (or (mul v c0) (shrl (mul v c1) c2)): 5353 /// expands (mul v c0) -> (shl (mul v c1) c3) 5354 /// 5355 /// (or (udiv v c0) (shl (udiv v c1) c2)): 5356 /// expands (udiv v c0) -> (shrl (udiv v c1) c3) 5357 /// 5358 /// (or (shl v c0) (shrl (shl v c1) c2)): 5359 /// expands (shl v c0) -> (shl (shl v c1) c3) 5360 /// 5361 /// (or (shrl v c0) (shl (shrl v c1) c2)): 5362 /// expands (shrl v c0) -> (shrl (shrl v c1) c3) 5363 /// 5364 /// Such that in all cases, c3+c2==bitwidth(op v c1). 5365 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, 5366 SDValue ExtractFrom, SDValue &Mask, 5367 const SDLoc &DL) { 5368 assert(OppShift && ExtractFrom && "Empty SDValue"); 5369 assert( 5370 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && 5371 "Existing shift must be valid as a rotate half"); 5372 5373 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); 5374 // Preconditions: 5375 // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) 5376 // 5377 // Find opcode of the needed shift to be extracted from (op0 v c0). 5378 unsigned Opcode = ISD::DELETED_NODE; 5379 bool IsMulOrDiv = false; 5380 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift 5381 // opcode or its arithmetic (mul or udiv) variant. 5382 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { 5383 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; 5384 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) 5385 return false; 5386 Opcode = NeededShift; 5387 return true; 5388 }; 5389 // op0 must be either the needed shift opcode or the mul/udiv equivalent 5390 // that the needed shift can be extracted from. 5391 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && 5392 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) 5393 return SDValue(); 5394 5395 // op0 must be the same opcode on both sides, have the same LHS argument, 5396 // and produce the same value type. 5397 SDValue OppShiftLHS = OppShift.getOperand(0); 5398 EVT ShiftedVT = OppShiftLHS.getValueType(); 5399 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || 5400 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || 5401 ShiftedVT != ExtractFrom.getValueType()) 5402 return SDValue(); 5403 5404 // Amount of the existing shift. 5405 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); 5406 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. 5407 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); 5408 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. 5409 ConstantSDNode *ExtractFromCst = 5410 isConstOrConstSplat(ExtractFrom.getOperand(1)); 5411 // TODO: We should be able to handle non-uniform constant vectors for these values 5412 // Check that we have constant values. 5413 if (!OppShiftCst || !OppShiftCst->getAPIntValue() || 5414 !OppLHSCst || !OppLHSCst->getAPIntValue() || 5415 !ExtractFromCst || !ExtractFromCst->getAPIntValue()) 5416 return SDValue(); 5417 5418 // Compute the shift amount we need to extract to complete the rotate. 5419 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); 5420 if (OppShiftCst->getAPIntValue().ugt(VTWidth)) 5421 return SDValue(); 5422 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); 5423 // Normalize the bitwidth of the two mul/udiv/shift constant operands. 5424 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); 5425 APInt OppLHSAmt = OppLHSCst->getAPIntValue(); 5426 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); 5427 5428 // Now try extract the needed shift from the ExtractFrom op and see if the 5429 // result matches up with the existing shift's LHS op. 5430 if (IsMulOrDiv) { 5431 // Op to extract from is a mul or udiv by a constant. 5432 // Check: 5433 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 5434 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 5435 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), 5436 NeededShiftAmt.getZExtValue()); 5437 APInt ResultAmt; 5438 APInt Rem; 5439 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); 5440 if (Rem != 0 || ResultAmt != OppLHSAmt) 5441 return SDValue(); 5442 } else { 5443 // Op to extract from is a shift by a constant. 5444 // Check: 5445 // c2 - (bitwidth(op0 v c0) - c1) == c0 5446 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( 5447 ExtractFromAmt.getBitWidth())) 5448 return SDValue(); 5449 } 5450 5451 // Return the expanded shift op that should allow a rotate to be formed. 5452 EVT ShiftVT = OppShift.getOperand(1).getValueType(); 5453 EVT ResVT = ExtractFrom.getValueType(); 5454 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); 5455 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); 5456 } 5457 5458 // Return true if we can prove that, whenever Neg and Pos are both in the 5459 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 5460 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 5461 // 5462 // (or (shift1 X, Neg), (shift2 X, Pos)) 5463 // 5464 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 5465 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 5466 // to consider shift amounts with defined behavior. 5467 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, 5468 SelectionDAG &DAG) { 5469 // If EltSize is a power of 2 then: 5470 // 5471 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 5472 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 5473 // 5474 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 5475 // for the stronger condition: 5476 // 5477 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 5478 // 5479 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 5480 // we can just replace Neg with Neg' for the rest of the function. 5481 // 5482 // In other cases we check for the even stronger condition: 5483 // 5484 // Neg == EltSize - Pos [B] 5485 // 5486 // for all Neg and Pos. Note that the (or ...) then invokes undefined 5487 // behavior if Pos == 0 (and consequently Neg == EltSize). 5488 // 5489 // We could actually use [A] whenever EltSize is a power of 2, but the 5490 // only extra cases that it would match are those uninteresting ones 5491 // where Neg and Pos are never in range at the same time. E.g. for 5492 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 5493 // as well as (sub 32, Pos), but: 5494 // 5495 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 5496 // 5497 // always invokes undefined behavior for 32-bit X. 5498 // 5499 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 5500 unsigned MaskLoBits = 0; 5501 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 5502 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 5503 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); 5504 unsigned Bits = Log2_64(EltSize); 5505 if (NegC->getAPIntValue().getActiveBits() <= Bits && 5506 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { 5507 Neg = Neg.getOperand(0); 5508 MaskLoBits = Bits; 5509 } 5510 } 5511 } 5512 5513 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 5514 if (Neg.getOpcode() != ISD::SUB) 5515 return false; 5516 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 5517 if (!NegC) 5518 return false; 5519 SDValue NegOp1 = Neg.getOperand(1); 5520 5521 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 5522 // Pos'. The truncation is redundant for the purpose of the equality. 5523 if (MaskLoBits && Pos.getOpcode() == ISD::AND) { 5524 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { 5525 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0)); 5526 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && 5527 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= 5528 MaskLoBits)) 5529 Pos = Pos.getOperand(0); 5530 } 5531 } 5532 5533 // The condition we need is now: 5534 // 5535 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 5536 // 5537 // If NegOp1 == Pos then we need: 5538 // 5539 // EltSize & Mask == NegC & Mask 5540 // 5541 // (because "x & Mask" is a truncation and distributes through subtraction). 5542 APInt Width; 5543 if (Pos == NegOp1) 5544 Width = NegC->getAPIntValue(); 5545 5546 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 5547 // Then the condition we want to prove becomes: 5548 // 5549 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 5550 // 5551 // which, again because "x & Mask" is a truncation, becomes: 5552 // 5553 // NegC & Mask == (EltSize - PosC) & Mask 5554 // EltSize & Mask == (NegC + PosC) & Mask 5555 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 5556 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 5557 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 5558 else 5559 return false; 5560 } else 5561 return false; 5562 5563 // Now we just need to check that EltSize & Mask == Width & Mask. 5564 if (MaskLoBits) 5565 // EltSize & Mask is 0 since Mask is EltSize - 1. 5566 return Width.getLoBits(MaskLoBits) == 0; 5567 return Width == EltSize; 5568 } 5569 5570 // A subroutine of MatchRotate used once we have found an OR of two opposite 5571 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 5572 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 5573 // former being preferred if supported. InnerPos and InnerNeg are Pos and 5574 // Neg with outer conversions stripped away. 5575 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 5576 SDValue Neg, SDValue InnerPos, 5577 SDValue InnerNeg, unsigned PosOpcode, 5578 unsigned NegOpcode, const SDLoc &DL) { 5579 // fold (or (shl x, (*ext y)), 5580 // (srl x, (*ext (sub 32, y)))) -> 5581 // (rotl x, y) or (rotr x, (sub 32, y)) 5582 // 5583 // fold (or (shl x, (*ext (sub 32, y))), 5584 // (srl x, (*ext y))) -> 5585 // (rotr x, y) or (rotl x, (sub 32, y)) 5586 EVT VT = Shifted.getValueType(); 5587 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { 5588 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 5589 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 5590 HasPos ? Pos : Neg).getNode(); 5591 } 5592 5593 return nullptr; 5594 } 5595 5596 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 5597 // idioms for rotate, and if the target supports rotation instructions, generate 5598 // a rot[lr]. 5599 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { 5600 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 5601 EVT VT = LHS.getValueType(); 5602 if (!TLI.isTypeLegal(VT)) return nullptr; 5603 5604 // The target must have at least one rotate flavor. 5605 bool HasROTL = hasOperation(ISD::ROTL, VT); 5606 bool HasROTR = hasOperation(ISD::ROTR, VT); 5607 if (!HasROTL && !HasROTR) return nullptr; 5608 5609 // Check for truncated rotate. 5610 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && 5611 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { 5612 assert(LHS.getValueType() == RHS.getValueType()); 5613 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { 5614 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), 5615 SDValue(Rot, 0)).getNode(); 5616 } 5617 } 5618 5619 // Match "(X shl/srl V1) & V2" where V2 may not be present. 5620 SDValue LHSShift; // The shift. 5621 SDValue LHSMask; // AND value if any. 5622 matchRotateHalf(DAG, LHS, LHSShift, LHSMask); 5623 5624 SDValue RHSShift; // The shift. 5625 SDValue RHSMask; // AND value if any. 5626 matchRotateHalf(DAG, RHS, RHSShift, RHSMask); 5627 5628 // If neither side matched a rotate half, bail 5629 if (!LHSShift && !RHSShift) 5630 return nullptr; 5631 5632 // InstCombine may have combined a constant shl, srl, mul, or udiv with one 5633 // side of the rotate, so try to handle that here. In all cases we need to 5634 // pass the matched shift from the opposite side to compute the opcode and 5635 // needed shift amount to extract. We still want to do this if both sides 5636 // matched a rotate half because one half may be a potential overshift that 5637 // can be broken down (ie if InstCombine merged two shl or srl ops into a 5638 // single one). 5639 5640 // Have LHS side of the rotate, try to extract the needed shift from the RHS. 5641 if (LHSShift) 5642 if (SDValue NewRHSShift = 5643 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) 5644 RHSShift = NewRHSShift; 5645 // Have RHS side of the rotate, try to extract the needed shift from the LHS. 5646 if (RHSShift) 5647 if (SDValue NewLHSShift = 5648 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) 5649 LHSShift = NewLHSShift; 5650 5651 // If a side is still missing, nothing else we can do. 5652 if (!RHSShift || !LHSShift) 5653 return nullptr; 5654 5655 // At this point we've matched or extracted a shift op on each side. 5656 5657 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 5658 return nullptr; // Not shifting the same value. 5659 5660 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 5661 return nullptr; // Shifts must disagree. 5662 5663 // Canonicalize shl to left side in a shl/srl pair. 5664 if (RHSShift.getOpcode() == ISD::SHL) { 5665 std::swap(LHS, RHS); 5666 std::swap(LHSShift, RHSShift); 5667 std::swap(LHSMask, RHSMask); 5668 } 5669 5670 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 5671 SDValue LHSShiftArg = LHSShift.getOperand(0); 5672 SDValue LHSShiftAmt = LHSShift.getOperand(1); 5673 SDValue RHSShiftArg = RHSShift.getOperand(0); 5674 SDValue RHSShiftAmt = RHSShift.getOperand(1); 5675 5676 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 5677 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 5678 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, 5679 ConstantSDNode *RHS) { 5680 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; 5681 }; 5682 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { 5683 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 5684 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 5685 5686 // If there is an AND of either shifted operand, apply it to the result. 5687 if (LHSMask.getNode() || RHSMask.getNode()) { 5688 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); 5689 SDValue Mask = AllOnes; 5690 5691 if (LHSMask.getNode()) { 5692 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); 5693 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5694 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); 5695 } 5696 if (RHSMask.getNode()) { 5697 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); 5698 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5699 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); 5700 } 5701 5702 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 5703 } 5704 5705 return Rot.getNode(); 5706 } 5707 5708 // If there is a mask here, and we have a variable shift, we can't be sure 5709 // that we're masking out the right stuff. 5710 if (LHSMask.getNode() || RHSMask.getNode()) 5711 return nullptr; 5712 5713 // If the shift amount is sign/zext/any-extended just peel it off. 5714 SDValue LExtOp0 = LHSShiftAmt; 5715 SDValue RExtOp0 = RHSShiftAmt; 5716 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5717 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5718 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5719 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 5720 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5721 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5722 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5723 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 5724 LExtOp0 = LHSShiftAmt.getOperand(0); 5725 RExtOp0 = RHSShiftAmt.getOperand(0); 5726 } 5727 5728 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 5729 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 5730 if (TryL) 5731 return TryL; 5732 5733 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 5734 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 5735 if (TryR) 5736 return TryR; 5737 5738 return nullptr; 5739 } 5740 5741 namespace { 5742 5743 /// Represents known origin of an individual byte in load combine pattern. The 5744 /// value of the byte is either constant zero or comes from memory. 5745 struct ByteProvider { 5746 // For constant zero providers Load is set to nullptr. For memory providers 5747 // Load represents the node which loads the byte from memory. 5748 // ByteOffset is the offset of the byte in the value produced by the load. 5749 LoadSDNode *Load = nullptr; 5750 unsigned ByteOffset = 0; 5751 5752 ByteProvider() = default; 5753 5754 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { 5755 return ByteProvider(Load, ByteOffset); 5756 } 5757 5758 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } 5759 5760 bool isConstantZero() const { return !Load; } 5761 bool isMemory() const { return Load; } 5762 5763 bool operator==(const ByteProvider &Other) const { 5764 return Other.Load == Load && Other.ByteOffset == ByteOffset; 5765 } 5766 5767 private: 5768 ByteProvider(LoadSDNode *Load, unsigned ByteOffset) 5769 : Load(Load), ByteOffset(ByteOffset) {} 5770 }; 5771 5772 } // end anonymous namespace 5773 5774 /// Recursively traverses the expression calculating the origin of the requested 5775 /// byte of the given value. Returns None if the provider can't be calculated. 5776 /// 5777 /// For all the values except the root of the expression verifies that the value 5778 /// has exactly one use and if it's not true return None. This way if the origin 5779 /// of the byte is returned it's guaranteed that the values which contribute to 5780 /// the byte are not used outside of this expression. 5781 /// 5782 /// Because the parts of the expression are not allowed to have more than one 5783 /// use this function iterates over trees, not DAGs. So it never visits the same 5784 /// node more than once. 5785 static const Optional<ByteProvider> 5786 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, 5787 bool Root = false) { 5788 // Typical i64 by i8 pattern requires recursion up to 8 calls depth 5789 if (Depth == 10) 5790 return None; 5791 5792 if (!Root && !Op.hasOneUse()) 5793 return None; 5794 5795 assert(Op.getValueType().isScalarInteger() && "can't handle other types"); 5796 unsigned BitWidth = Op.getValueSizeInBits(); 5797 if (BitWidth % 8 != 0) 5798 return None; 5799 unsigned ByteWidth = BitWidth / 8; 5800 assert(Index < ByteWidth && "invalid index requested"); 5801 (void) ByteWidth; 5802 5803 switch (Op.getOpcode()) { 5804 case ISD::OR: { 5805 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); 5806 if (!LHS) 5807 return None; 5808 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); 5809 if (!RHS) 5810 return None; 5811 5812 if (LHS->isConstantZero()) 5813 return RHS; 5814 if (RHS->isConstantZero()) 5815 return LHS; 5816 return None; 5817 } 5818 case ISD::SHL: { 5819 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); 5820 if (!ShiftOp) 5821 return None; 5822 5823 uint64_t BitShift = ShiftOp->getZExtValue(); 5824 if (BitShift % 8 != 0) 5825 return None; 5826 uint64_t ByteShift = BitShift / 8; 5827 5828 return Index < ByteShift 5829 ? ByteProvider::getConstantZero() 5830 : calculateByteProvider(Op->getOperand(0), Index - ByteShift, 5831 Depth + 1); 5832 } 5833 case ISD::ANY_EXTEND: 5834 case ISD::SIGN_EXTEND: 5835 case ISD::ZERO_EXTEND: { 5836 SDValue NarrowOp = Op->getOperand(0); 5837 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); 5838 if (NarrowBitWidth % 8 != 0) 5839 return None; 5840 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5841 5842 if (Index >= NarrowByteWidth) 5843 return Op.getOpcode() == ISD::ZERO_EXTEND 5844 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5845 : None; 5846 return calculateByteProvider(NarrowOp, Index, Depth + 1); 5847 } 5848 case ISD::BSWAP: 5849 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, 5850 Depth + 1); 5851 case ISD::LOAD: { 5852 auto L = cast<LoadSDNode>(Op.getNode()); 5853 if (L->isVolatile() || L->isIndexed()) 5854 return None; 5855 5856 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); 5857 if (NarrowBitWidth % 8 != 0) 5858 return None; 5859 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5860 5861 if (Index >= NarrowByteWidth) 5862 return L->getExtensionType() == ISD::ZEXTLOAD 5863 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5864 : None; 5865 return ByteProvider::getMemory(L, Index); 5866 } 5867 } 5868 5869 return None; 5870 } 5871 5872 /// Match a pattern where a wide type scalar value is loaded by several narrow 5873 /// loads and combined by shifts and ors. Fold it into a single load or a load 5874 /// and a BSWAP if the targets supports it. 5875 /// 5876 /// Assuming little endian target: 5877 /// i8 *a = ... 5878 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 5879 /// => 5880 /// i32 val = *((i32)a) 5881 /// 5882 /// i8 *a = ... 5883 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 5884 /// => 5885 /// i32 val = BSWAP(*((i32)a)) 5886 /// 5887 /// TODO: This rule matches complex patterns with OR node roots and doesn't 5888 /// interact well with the worklist mechanism. When a part of the pattern is 5889 /// updated (e.g. one of the loads) its direct users are put into the worklist, 5890 /// but the root node of the pattern which triggers the load combine is not 5891 /// necessarily a direct user of the changed node. For example, once the address 5892 /// of t28 load is reassociated load combine won't be triggered: 5893 /// t25: i32 = add t4, Constant:i32<2> 5894 /// t26: i64 = sign_extend t25 5895 /// t27: i64 = add t2, t26 5896 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 5897 /// t29: i32 = zero_extend t28 5898 /// t32: i32 = shl t29, Constant:i8<8> 5899 /// t33: i32 = or t23, t32 5900 /// As a possible fix visitLoad can check if the load can be a part of a load 5901 /// combine pattern and add corresponding OR roots to the worklist. 5902 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { 5903 assert(N->getOpcode() == ISD::OR && 5904 "Can only match load combining against OR nodes"); 5905 5906 // Handles simple types only 5907 EVT VT = N->getValueType(0); 5908 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) 5909 return SDValue(); 5910 unsigned ByteWidth = VT.getSizeInBits() / 8; 5911 5912 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5913 // Before legalize we can introduce too wide illegal loads which will be later 5914 // split into legal sized loads. This enables us to combine i64 load by i8 5915 // patterns to a couple of i32 loads on 32 bit targets. 5916 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) 5917 return SDValue(); 5918 5919 std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = []( 5920 unsigned BW, unsigned i) { return i; }; 5921 std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = []( 5922 unsigned BW, unsigned i) { return BW - i - 1; }; 5923 5924 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); 5925 auto MemoryByteOffset = [&] (ByteProvider P) { 5926 assert(P.isMemory() && "Must be a memory byte provider"); 5927 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); 5928 assert(LoadBitWidth % 8 == 0 && 5929 "can only analyze providers for individual bytes not bit"); 5930 unsigned LoadByteWidth = LoadBitWidth / 8; 5931 return IsBigEndianTarget 5932 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) 5933 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); 5934 }; 5935 5936 Optional<BaseIndexOffset> Base; 5937 SDValue Chain; 5938 5939 SmallPtrSet<LoadSDNode *, 8> Loads; 5940 Optional<ByteProvider> FirstByteProvider; 5941 int64_t FirstOffset = INT64_MAX; 5942 5943 // Check if all the bytes of the OR we are looking at are loaded from the same 5944 // base address. Collect bytes offsets from Base address in ByteOffsets. 5945 SmallVector<int64_t, 4> ByteOffsets(ByteWidth); 5946 for (unsigned i = 0; i < ByteWidth; i++) { 5947 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); 5948 if (!P || !P->isMemory()) // All the bytes must be loaded from memory 5949 return SDValue(); 5950 5951 LoadSDNode *L = P->Load; 5952 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && 5953 "Must be enforced by calculateByteProvider"); 5954 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); 5955 5956 // All loads must share the same chain 5957 SDValue LChain = L->getChain(); 5958 if (!Chain) 5959 Chain = LChain; 5960 else if (Chain != LChain) 5961 return SDValue(); 5962 5963 // Loads must share the same base address 5964 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); 5965 int64_t ByteOffsetFromBase = 0; 5966 if (!Base) 5967 Base = Ptr; 5968 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) 5969 return SDValue(); 5970 5971 // Calculate the offset of the current byte from the base address 5972 ByteOffsetFromBase += MemoryByteOffset(*P); 5973 ByteOffsets[i] = ByteOffsetFromBase; 5974 5975 // Remember the first byte load 5976 if (ByteOffsetFromBase < FirstOffset) { 5977 FirstByteProvider = P; 5978 FirstOffset = ByteOffsetFromBase; 5979 } 5980 5981 Loads.insert(L); 5982 } 5983 assert(!Loads.empty() && "All the bytes of the value must be loaded from " 5984 "memory, so there must be at least one load which produces the value"); 5985 assert(Base && "Base address of the accessed memory location must be set"); 5986 assert(FirstOffset != INT64_MAX && "First byte offset must be set"); 5987 5988 // Check if the bytes of the OR we are looking at match with either big or 5989 // little endian value load 5990 bool BigEndian = true, LittleEndian = true; 5991 for (unsigned i = 0; i < ByteWidth; i++) { 5992 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; 5993 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); 5994 BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); 5995 if (!BigEndian && !LittleEndian) 5996 return SDValue(); 5997 } 5998 assert((BigEndian != LittleEndian) && "should be either or"); 5999 assert(FirstByteProvider && "must be set"); 6000 6001 // Ensure that the first byte is loaded from zero offset of the first load. 6002 // So the combined value can be loaded from the first load address. 6003 if (MemoryByteOffset(*FirstByteProvider) != 0) 6004 return SDValue(); 6005 LoadSDNode *FirstLoad = FirstByteProvider->Load; 6006 6007 // The node we are looking at matches with the pattern, check if we can 6008 // replace it with a single load and bswap if needed. 6009 6010 // If the load needs byte swap check if the target supports it 6011 bool NeedsBswap = IsBigEndianTarget != BigEndian; 6012 6013 // Before legalize we can introduce illegal bswaps which will be later 6014 // converted to an explicit bswap sequence. This way we end up with a single 6015 // load and byte shuffling instead of several loads and byte shuffling. 6016 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) 6017 return SDValue(); 6018 6019 // Check that a load of the wide type is both allowed and fast on the target 6020 bool Fast = false; 6021 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), 6022 VT, FirstLoad->getAddressSpace(), 6023 FirstLoad->getAlignment(), &Fast); 6024 if (!Allowed || !Fast) 6025 return SDValue(); 6026 6027 SDValue NewLoad = 6028 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), 6029 FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); 6030 6031 // Transfer chain users from old loads to the new load. 6032 for (LoadSDNode *L : Loads) 6033 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); 6034 6035 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; 6036 } 6037 6038 // If the target has andn, bsl, or a similar bit-select instruction, 6039 // we want to unfold masked merge, with canonical pattern of: 6040 // | A | |B| 6041 // ((x ^ y) & m) ^ y 6042 // | D | 6043 // Into: 6044 // (x & m) | (y & ~m) 6045 // If y is a constant, and the 'andn' does not work with immediates, 6046 // we unfold into a different pattern: 6047 // ~(~x & m) & (m | y) 6048 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at 6049 // the very least that breaks andnpd / andnps patterns, and because those 6050 // patterns are simplified in IR and shouldn't be created in the DAG 6051 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { 6052 assert(N->getOpcode() == ISD::XOR); 6053 6054 // Don't touch 'not' (i.e. where y = -1). 6055 if (isAllOnesOrAllOnesSplat(N->getOperand(1))) 6056 return SDValue(); 6057 6058 EVT VT = N->getValueType(0); 6059 6060 // There are 3 commutable operators in the pattern, 6061 // so we have to deal with 8 possible variants of the basic pattern. 6062 SDValue X, Y, M; 6063 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { 6064 if (And.getOpcode() != ISD::AND || !And.hasOneUse()) 6065 return false; 6066 SDValue Xor = And.getOperand(XorIdx); 6067 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 6068 return false; 6069 SDValue Xor0 = Xor.getOperand(0); 6070 SDValue Xor1 = Xor.getOperand(1); 6071 // Don't touch 'not' (i.e. where y = -1). 6072 if (isAllOnesOrAllOnesSplat(Xor1)) 6073 return false; 6074 if (Other == Xor0) 6075 std::swap(Xor0, Xor1); 6076 if (Other != Xor1) 6077 return false; 6078 X = Xor0; 6079 Y = Xor1; 6080 M = And.getOperand(XorIdx ? 0 : 1); 6081 return true; 6082 }; 6083 6084 SDValue N0 = N->getOperand(0); 6085 SDValue N1 = N->getOperand(1); 6086 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && 6087 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) 6088 return SDValue(); 6089 6090 // Don't do anything if the mask is constant. This should not be reachable. 6091 // InstCombine should have already unfolded this pattern, and DAGCombiner 6092 // probably shouldn't produce it, too. 6093 if (isa<ConstantSDNode>(M.getNode())) 6094 return SDValue(); 6095 6096 // We can transform if the target has AndNot 6097 if (!TLI.hasAndNot(M)) 6098 return SDValue(); 6099 6100 SDLoc DL(N); 6101 6102 // If Y is a constant, check that 'andn' works with immediates. 6103 if (!TLI.hasAndNot(Y)) { 6104 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); 6105 // If not, we need to do a bit more work to make sure andn is still used. 6106 SDValue NotX = DAG.getNOT(DL, X, VT); 6107 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); 6108 SDValue NotLHS = DAG.getNOT(DL, LHS, VT); 6109 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); 6110 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); 6111 } 6112 6113 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); 6114 SDValue NotM = DAG.getNOT(DL, M, VT); 6115 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); 6116 6117 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); 6118 } 6119 6120 SDValue DAGCombiner::visitXOR(SDNode *N) { 6121 SDValue N0 = N->getOperand(0); 6122 SDValue N1 = N->getOperand(1); 6123 EVT VT = N0.getValueType(); 6124 6125 // fold vector ops 6126 if (VT.isVector()) { 6127 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6128 return FoldedVOp; 6129 6130 // fold (xor x, 0) -> x, vector edition 6131 if (ISD::isBuildVectorAllZeros(N0.getNode())) 6132 return N1; 6133 if (ISD::isBuildVectorAllZeros(N1.getNode())) 6134 return N0; 6135 } 6136 6137 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 6138 SDLoc DL(N); 6139 if (N0.isUndef() && N1.isUndef()) 6140 return DAG.getConstant(0, DL, VT); 6141 // fold (xor x, undef) -> undef 6142 if (N0.isUndef()) 6143 return N0; 6144 if (N1.isUndef()) 6145 return N1; 6146 // fold (xor c1, c2) -> c1^c2 6147 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6148 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 6149 if (N0C && N1C) 6150 return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); 6151 // canonicalize constant to RHS 6152 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 6153 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 6154 return DAG.getNode(ISD::XOR, DL, VT, N1, N0); 6155 // fold (xor x, 0) -> x 6156 if (isNullConstant(N1)) 6157 return N0; 6158 6159 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6160 return NewSel; 6161 6162 // reassociate xor 6163 if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) 6164 return RXOR; 6165 6166 // fold !(x cc y) -> (x !cc y) 6167 unsigned N0Opcode = N0.getOpcode(); 6168 SDValue LHS, RHS, CC; 6169 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 6170 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 6171 LHS.getValueType().isInteger()); 6172 if (!LegalOperations || 6173 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 6174 switch (N0Opcode) { 6175 default: 6176 llvm_unreachable("Unhandled SetCC Equivalent!"); 6177 case ISD::SETCC: 6178 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); 6179 case ISD::SELECT_CC: 6180 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), 6181 N0.getOperand(3), NotCC); 6182 } 6183 } 6184 } 6185 6186 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 6187 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() && 6188 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 6189 SDValue V = N0.getOperand(0); 6190 SDLoc DL0(N0); 6191 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V, 6192 DAG.getConstant(1, DL0, V.getValueType())); 6193 AddToWorklist(V.getNode()); 6194 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V); 6195 } 6196 6197 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 6198 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && 6199 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { 6200 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 6201 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 6202 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; 6203 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 6204 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 6205 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 6206 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); 6207 } 6208 } 6209 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 6210 if (isAllOnesConstant(N1) && N0.hasOneUse() && 6211 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { 6212 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 6213 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 6214 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; 6215 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 6216 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 6217 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 6218 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); 6219 } 6220 } 6221 // fold (xor (and x, y), y) -> (and (not x), y) 6222 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { 6223 SDValue X = N0.getOperand(0); 6224 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 6225 AddToWorklist(NotX.getNode()); 6226 return DAG.getNode(ISD::AND, DL, VT, NotX, N1); 6227 } 6228 6229 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { 6230 ConstantSDNode *XorC = isConstOrConstSplat(N1); 6231 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); 6232 unsigned BitWidth = VT.getScalarSizeInBits(); 6233 if (XorC && ShiftC) { 6234 // Don't crash on an oversized shift. We can not guarantee that a bogus 6235 // shift has been simplified to undef. 6236 uint64_t ShiftAmt = ShiftC->getLimitedValue(); 6237 if (ShiftAmt < BitWidth) { 6238 APInt Ones = APInt::getAllOnesValue(BitWidth); 6239 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); 6240 if (XorC->getAPIntValue() == Ones) { 6241 // If the xor constant is a shifted -1, do a 'not' before the shift: 6242 // xor (X << ShiftC), XorC --> (not X) << ShiftC 6243 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC 6244 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); 6245 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); 6246 } 6247 } 6248 } 6249 } 6250 6251 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) 6252 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 6253 SDValue A = N0Opcode == ISD::ADD ? N0 : N1; 6254 SDValue S = N0Opcode == ISD::SRA ? N0 : N1; 6255 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { 6256 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); 6257 SDValue S0 = S.getOperand(0); 6258 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { 6259 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6260 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) 6261 if (C->getAPIntValue() == (OpSizeInBits - 1)) 6262 return DAG.getNode(ISD::ABS, DL, VT, S0); 6263 } 6264 } 6265 } 6266 6267 // fold (xor x, x) -> 0 6268 if (N0 == N1) 6269 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 6270 6271 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 6272 // Here is a concrete example of this equivalence: 6273 // i16 x == 14 6274 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 6275 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 6276 // 6277 // => 6278 // 6279 // i16 ~1 == 0b1111111111111110 6280 // i16 rol(~1, 14) == 0b1011111111111111 6281 // 6282 // Some additional tips to help conceptualize this transform: 6283 // - Try to see the operation as placing a single zero in a value of all ones. 6284 // - There exists no value for x which would allow the result to contain zero. 6285 // - Values of x larger than the bitwidth are undefined and do not require a 6286 // consistent result. 6287 // - Pushing the zero left requires shifting one bits in from the right. 6288 // A rotate left of ~1 is a nice way of achieving the desired result. 6289 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL && 6290 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 6291 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 6292 N0.getOperand(1)); 6293 } 6294 6295 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 6296 if (N0Opcode == N1.getOpcode()) 6297 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N)) 6298 return V; 6299 6300 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable 6301 if (SDValue MM = unfoldMaskedMerge(N)) 6302 return MM; 6303 6304 // Simplify the expression using non-local knowledge. 6305 if (SimplifyDemandedBits(SDValue(N, 0))) 6306 return SDValue(N, 0); 6307 6308 return SDValue(); 6309 } 6310 6311 /// Handle transforms common to the three shifts, when the shift amount is a 6312 /// constant. 6313 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 6314 // Do not turn a 'not' into a regular xor. 6315 if (isBitwiseNot(N->getOperand(0))) 6316 return SDValue(); 6317 6318 SDNode *LHS = N->getOperand(0).getNode(); 6319 if (!LHS->hasOneUse()) return SDValue(); 6320 6321 // We want to pull some binops through shifts, so that we have (and (shift)) 6322 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 6323 // thing happens with address calculations, so it's important to canonicalize 6324 // it. 6325 bool HighBitSet = false; // Can we transform this if the high bit is set? 6326 6327 switch (LHS->getOpcode()) { 6328 default: return SDValue(); 6329 case ISD::OR: 6330 case ISD::XOR: 6331 HighBitSet = false; // We can only transform sra if the high bit is clear. 6332 break; 6333 case ISD::AND: 6334 HighBitSet = true; // We can only transform sra if the high bit is set. 6335 break; 6336 case ISD::ADD: 6337 if (N->getOpcode() != ISD::SHL) 6338 return SDValue(); // only shl(add) not sr[al](add). 6339 HighBitSet = false; // We can only transform sra if the high bit is clear. 6340 break; 6341 } 6342 6343 // We require the RHS of the binop to be a constant and not opaque as well. 6344 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 6345 if (!BinOpCst) return SDValue(); 6346 6347 // FIXME: disable this unless the input to the binop is a shift by a constant 6348 // or is copy/select.Enable this in other cases when figure out it's exactly profitable. 6349 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 6350 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || 6351 BinOpLHSVal->getOpcode() == ISD::SRA || 6352 BinOpLHSVal->getOpcode() == ISD::SRL; 6353 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || 6354 BinOpLHSVal->getOpcode() == ISD::SELECT; 6355 6356 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && 6357 !isCopyOrSelect) 6358 return SDValue(); 6359 6360 if (isCopyOrSelect && N->hasOneUse()) 6361 return SDValue(); 6362 6363 EVT VT = N->getValueType(0); 6364 6365 // If this is a signed shift right, and the high bit is modified by the 6366 // logical operation, do not perform the transformation. The highBitSet 6367 // boolean indicates the value of the high bit of the constant which would 6368 // cause it to be modified for this operation. 6369 if (N->getOpcode() == ISD::SRA) { 6370 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 6371 if (BinOpRHSSignSet != HighBitSet) 6372 return SDValue(); 6373 } 6374 6375 if (!TLI.isDesirableToCommuteWithShift(N, Level)) 6376 return SDValue(); 6377 6378 // Fold the constants, shifting the binop RHS by the shift amount. 6379 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 6380 N->getValueType(0), 6381 LHS->getOperand(1), N->getOperand(1)); 6382 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 6383 6384 // Create the new shift. 6385 SDValue NewShift = DAG.getNode(N->getOpcode(), 6386 SDLoc(LHS->getOperand(0)), 6387 VT, LHS->getOperand(0), N->getOperand(1)); 6388 6389 // Create the new binop. 6390 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 6391 } 6392 6393 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 6394 assert(N->getOpcode() == ISD::TRUNCATE); 6395 assert(N->getOperand(0).getOpcode() == ISD::AND); 6396 6397 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 6398 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 6399 SDValue N01 = N->getOperand(0).getOperand(1); 6400 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { 6401 SDLoc DL(N); 6402 EVT TruncVT = N->getValueType(0); 6403 SDValue N00 = N->getOperand(0).getOperand(0); 6404 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); 6405 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); 6406 AddToWorklist(Trunc00.getNode()); 6407 AddToWorklist(Trunc01.getNode()); 6408 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); 6409 } 6410 } 6411 6412 return SDValue(); 6413 } 6414 6415 SDValue DAGCombiner::visitRotate(SDNode *N) { 6416 SDLoc dl(N); 6417 SDValue N0 = N->getOperand(0); 6418 SDValue N1 = N->getOperand(1); 6419 EVT VT = N->getValueType(0); 6420 unsigned Bitsize = VT.getScalarSizeInBits(); 6421 6422 // fold (rot x, 0) -> x 6423 if (isNullOrNullSplat(N1)) 6424 return N0; 6425 6426 // fold (rot x, c) -> x iff (c % BitSize) == 0 6427 if (isPowerOf2_32(Bitsize) && Bitsize > 1) { 6428 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1); 6429 if (DAG.MaskedValueIsZero(N1, ModuloMask)) 6430 return N0; 6431 } 6432 6433 // fold (rot x, c) -> (rot x, c % BitSize) 6434 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { 6435 if (Cst->getAPIntValue().uge(Bitsize)) { 6436 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); 6437 return DAG.getNode(N->getOpcode(), dl, VT, N0, 6438 DAG.getConstant(RotAmt, dl, N1.getValueType())); 6439 } 6440 } 6441 6442 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 6443 if (N1.getOpcode() == ISD::TRUNCATE && 6444 N1.getOperand(0).getOpcode() == ISD::AND) { 6445 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6446 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); 6447 } 6448 6449 unsigned NextOp = N0.getOpcode(); 6450 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) 6451 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { 6452 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); 6453 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); 6454 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { 6455 EVT ShiftVT = C1->getValueType(0); 6456 bool SameSide = (N->getOpcode() == NextOp); 6457 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; 6458 if (SDValue CombinedShift = 6459 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { 6460 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); 6461 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( 6462 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), 6463 BitsizeC.getNode()); 6464 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), 6465 CombinedShiftNorm); 6466 } 6467 } 6468 } 6469 return SDValue(); 6470 } 6471 6472 SDValue DAGCombiner::visitSHL(SDNode *N) { 6473 SDValue N0 = N->getOperand(0); 6474 SDValue N1 = N->getOperand(1); 6475 if (SDValue V = DAG.simplifyShift(N0, N1)) 6476 return V; 6477 6478 EVT VT = N0.getValueType(); 6479 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6480 6481 // fold vector ops 6482 if (VT.isVector()) { 6483 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6484 return FoldedVOp; 6485 6486 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 6487 // If setcc produces all-one true value then: 6488 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 6489 if (N1CV && N1CV->isConstant()) { 6490 if (N0.getOpcode() == ISD::AND) { 6491 SDValue N00 = N0->getOperand(0); 6492 SDValue N01 = N0->getOperand(1); 6493 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 6494 6495 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 6496 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 6497 TargetLowering::ZeroOrNegativeOneBooleanContent) { 6498 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 6499 N01CV, N1CV)) 6500 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 6501 } 6502 } 6503 } 6504 } 6505 6506 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6507 6508 // fold (shl c1, c2) -> c1<<c2 6509 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6510 if (N0C && N1C && !N1C->isOpaque()) 6511 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 6512 6513 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6514 return NewSel; 6515 6516 // if (shl x, c) is known to be zero, return 0 6517 if (DAG.MaskedValueIsZero(SDValue(N, 0), 6518 APInt::getAllOnesValue(OpSizeInBits))) 6519 return DAG.getConstant(0, SDLoc(N), VT); 6520 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 6521 if (N1.getOpcode() == ISD::TRUNCATE && 6522 N1.getOperand(0).getOpcode() == ISD::AND) { 6523 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6524 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 6525 } 6526 6527 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6528 return SDValue(N, 0); 6529 6530 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 6531 if (N0.getOpcode() == ISD::SHL) { 6532 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6533 ConstantSDNode *RHS) { 6534 APInt c1 = LHS->getAPIntValue(); 6535 APInt c2 = RHS->getAPIntValue(); 6536 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6537 return (c1 + c2).uge(OpSizeInBits); 6538 }; 6539 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6540 return DAG.getConstant(0, SDLoc(N), VT); 6541 6542 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6543 ConstantSDNode *RHS) { 6544 APInt c1 = LHS->getAPIntValue(); 6545 APInt c2 = RHS->getAPIntValue(); 6546 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6547 return (c1 + c2).ult(OpSizeInBits); 6548 }; 6549 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6550 SDLoc DL(N); 6551 EVT ShiftVT = N1.getValueType(); 6552 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6553 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); 6554 } 6555 } 6556 6557 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 6558 // For this to be valid, the second form must not preserve any of the bits 6559 // that are shifted out by the inner shift in the first form. This means 6560 // the outer shift size must be >= the number of bits added by the ext. 6561 // As a corollary, we don't care what kind of ext it is. 6562 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 6563 N0.getOpcode() == ISD::ANY_EXTEND || 6564 N0.getOpcode() == ISD::SIGN_EXTEND) && 6565 N0.getOperand(0).getOpcode() == ISD::SHL) { 6566 SDValue N0Op0 = N0.getOperand(0); 6567 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6568 APInt c1 = N0Op0C1->getAPIntValue(); 6569 APInt c2 = N1C->getAPIntValue(); 6570 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6571 6572 EVT InnerShiftVT = N0Op0.getValueType(); 6573 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6574 if (c2.uge(OpSizeInBits - InnerShiftSize)) { 6575 SDLoc DL(N0); 6576 APInt Sum = c1 + c2; 6577 if (Sum.uge(OpSizeInBits)) 6578 return DAG.getConstant(0, DL, VT); 6579 6580 return DAG.getNode( 6581 ISD::SHL, DL, VT, 6582 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), 6583 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); 6584 } 6585 } 6586 } 6587 6588 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 6589 // Only fold this if the inner zext has no other uses to avoid increasing 6590 // the total number of instructions. 6591 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 6592 N0.getOperand(0).getOpcode() == ISD::SRL) { 6593 SDValue N0Op0 = N0.getOperand(0); 6594 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6595 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { 6596 uint64_t c1 = N0Op0C1->getZExtValue(); 6597 uint64_t c2 = N1C->getZExtValue(); 6598 if (c1 == c2) { 6599 SDValue NewOp0 = N0.getOperand(0); 6600 EVT CountVT = NewOp0.getOperand(1).getValueType(); 6601 SDLoc DL(N); 6602 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 6603 NewOp0, 6604 DAG.getConstant(c2, DL, CountVT)); 6605 AddToWorklist(NewSHL.getNode()); 6606 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 6607 } 6608 } 6609 } 6610 } 6611 6612 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 6613 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 6614 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 6615 N0->getFlags().hasExact()) { 6616 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6617 uint64_t C1 = N0C1->getZExtValue(); 6618 uint64_t C2 = N1C->getZExtValue(); 6619 SDLoc DL(N); 6620 if (C1 <= C2) 6621 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6622 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 6623 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 6624 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 6625 } 6626 } 6627 6628 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 6629 // (and (srl x, (sub c1, c2), MASK) 6630 // Only fold this if the inner shift has no other uses -- if it does, folding 6631 // this will increase the total number of instructions. 6632 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && 6633 TLI.shouldFoldShiftPairToMask(N, Level)) { 6634 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6635 uint64_t c1 = N0C1->getZExtValue(); 6636 if (c1 < OpSizeInBits) { 6637 uint64_t c2 = N1C->getZExtValue(); 6638 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 6639 SDValue Shift; 6640 if (c2 > c1) { 6641 Mask <<= c2 - c1; 6642 SDLoc DL(N); 6643 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6644 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 6645 } else { 6646 Mask.lshrInPlace(c1 - c2); 6647 SDLoc DL(N); 6648 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 6649 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 6650 } 6651 SDLoc DL(N0); 6652 return DAG.getNode(ISD::AND, DL, VT, Shift, 6653 DAG.getConstant(Mask, DL, VT)); 6654 } 6655 } 6656 } 6657 6658 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 6659 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && 6660 isConstantOrConstantVector(N1, /* No Opaques */ true)) { 6661 SDLoc DL(N); 6662 SDValue AllBits = DAG.getAllOnesConstant(DL, VT); 6663 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); 6664 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); 6665 } 6666 6667 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 6668 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 6669 // Variant of version done on multiply, except mul by a power of 2 is turned 6670 // into a shift. 6671 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && 6672 N0.getNode()->hasOneUse() && 6673 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6674 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && 6675 TLI.isDesirableToCommuteWithShift(N, Level)) { 6676 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 6677 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6678 AddToWorklist(Shl0.getNode()); 6679 AddToWorklist(Shl1.getNode()); 6680 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); 6681 } 6682 6683 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 6684 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && 6685 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6686 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { 6687 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6688 if (isConstantOrConstantVector(Shl)) 6689 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); 6690 } 6691 6692 if (N1C && !N1C->isOpaque()) 6693 if (SDValue NewSHL = visitShiftByConstant(N, N1C)) 6694 return NewSHL; 6695 6696 return SDValue(); 6697 } 6698 6699 SDValue DAGCombiner::visitSRA(SDNode *N) { 6700 SDValue N0 = N->getOperand(0); 6701 SDValue N1 = N->getOperand(1); 6702 if (SDValue V = DAG.simplifyShift(N0, N1)) 6703 return V; 6704 6705 EVT VT = N0.getValueType(); 6706 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6707 6708 // Arithmetic shifting an all-sign-bit value is a no-op. 6709 // fold (sra 0, x) -> 0 6710 // fold (sra -1, x) -> -1 6711 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) 6712 return N0; 6713 6714 // fold vector ops 6715 if (VT.isVector()) 6716 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6717 return FoldedVOp; 6718 6719 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6720 6721 // fold (sra c1, c2) -> (sra c1, c2) 6722 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6723 if (N0C && N1C && !N1C->isOpaque()) 6724 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 6725 6726 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6727 return NewSel; 6728 6729 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 6730 // sext_inreg. 6731 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 6732 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 6733 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 6734 if (VT.isVector()) 6735 ExtVT = EVT::getVectorVT(*DAG.getContext(), 6736 ExtVT, VT.getVectorNumElements()); 6737 if ((!LegalOperations || 6738 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 6739 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6740 N0.getOperand(0), DAG.getValueType(ExtVT)); 6741 } 6742 6743 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 6744 // clamp (add c1, c2) to max shift. 6745 if (N0.getOpcode() == ISD::SRA) { 6746 SDLoc DL(N); 6747 EVT ShiftVT = N1.getValueType(); 6748 EVT ShiftSVT = ShiftVT.getScalarType(); 6749 SmallVector<SDValue, 16> ShiftValues; 6750 6751 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { 6752 APInt c1 = LHS->getAPIntValue(); 6753 APInt c2 = RHS->getAPIntValue(); 6754 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6755 APInt Sum = c1 + c2; 6756 unsigned ShiftSum = 6757 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); 6758 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); 6759 return true; 6760 }; 6761 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { 6762 SDValue ShiftValue; 6763 if (VT.isVector()) 6764 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); 6765 else 6766 ShiftValue = ShiftValues[0]; 6767 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); 6768 } 6769 } 6770 6771 // fold (sra (shl X, m), (sub result_size, n)) 6772 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 6773 // result_size - n != m. 6774 // If truncate is free for the target sext(shl) is likely to result in better 6775 // code. 6776 if (N0.getOpcode() == ISD::SHL && N1C) { 6777 // Get the two constanst of the shifts, CN0 = m, CN = n. 6778 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 6779 if (N01C) { 6780 LLVMContext &Ctx = *DAG.getContext(); 6781 // Determine what the truncate's result bitsize and type would be. 6782 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 6783 6784 if (VT.isVector()) 6785 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 6786 6787 // Determine the residual right-shift amount. 6788 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 6789 6790 // If the shift is not a no-op (in which case this should be just a sign 6791 // extend already), the truncated to type is legal, sign_extend is legal 6792 // on that type, and the truncate to that type is both legal and free, 6793 // perform the transform. 6794 if ((ShiftAmt > 0) && 6795 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 6796 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 6797 TLI.isTruncateFree(VT, TruncVT)) { 6798 SDLoc DL(N); 6799 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 6800 getShiftAmountTy(N0.getOperand(0).getValueType())); 6801 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 6802 N0.getOperand(0), Amt); 6803 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 6804 Shift); 6805 return DAG.getNode(ISD::SIGN_EXTEND, DL, 6806 N->getValueType(0), Trunc); 6807 } 6808 } 6809 } 6810 6811 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 6812 if (N1.getOpcode() == ISD::TRUNCATE && 6813 N1.getOperand(0).getOpcode() == ISD::AND) { 6814 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6815 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 6816 } 6817 6818 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 6819 // if c1 is equal to the number of bits the trunc removes 6820 if (N0.getOpcode() == ISD::TRUNCATE && 6821 (N0.getOperand(0).getOpcode() == ISD::SRL || 6822 N0.getOperand(0).getOpcode() == ISD::SRA) && 6823 N0.getOperand(0).hasOneUse() && 6824 N0.getOperand(0).getOperand(1).hasOneUse() && 6825 N1C) { 6826 SDValue N0Op0 = N0.getOperand(0); 6827 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 6828 unsigned LargeShiftVal = LargeShift->getZExtValue(); 6829 EVT LargeVT = N0Op0.getValueType(); 6830 6831 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 6832 SDLoc DL(N); 6833 SDValue Amt = 6834 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, 6835 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 6836 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, 6837 N0Op0.getOperand(0), Amt); 6838 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); 6839 } 6840 } 6841 } 6842 6843 // Simplify, based on bits shifted out of the LHS. 6844 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6845 return SDValue(N, 0); 6846 6847 // If the sign bit is known to be zero, switch this to a SRL. 6848 if (DAG.SignBitIsZero(N0)) 6849 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 6850 6851 if (N1C && !N1C->isOpaque()) 6852 if (SDValue NewSRA = visitShiftByConstant(N, N1C)) 6853 return NewSRA; 6854 6855 return SDValue(); 6856 } 6857 6858 SDValue DAGCombiner::visitSRL(SDNode *N) { 6859 SDValue N0 = N->getOperand(0); 6860 SDValue N1 = N->getOperand(1); 6861 if (SDValue V = DAG.simplifyShift(N0, N1)) 6862 return V; 6863 6864 EVT VT = N0.getValueType(); 6865 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6866 6867 // fold vector ops 6868 if (VT.isVector()) 6869 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6870 return FoldedVOp; 6871 6872 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6873 6874 // fold (srl c1, c2) -> c1 >>u c2 6875 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6876 if (N0C && N1C && !N1C->isOpaque()) 6877 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); 6878 6879 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6880 return NewSel; 6881 6882 // if (srl x, c) is known to be zero, return 0 6883 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 6884 APInt::getAllOnesValue(OpSizeInBits))) 6885 return DAG.getConstant(0, SDLoc(N), VT); 6886 6887 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 6888 if (N0.getOpcode() == ISD::SRL) { 6889 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6890 ConstantSDNode *RHS) { 6891 APInt c1 = LHS->getAPIntValue(); 6892 APInt c2 = RHS->getAPIntValue(); 6893 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6894 return (c1 + c2).uge(OpSizeInBits); 6895 }; 6896 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6897 return DAG.getConstant(0, SDLoc(N), VT); 6898 6899 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6900 ConstantSDNode *RHS) { 6901 APInt c1 = LHS->getAPIntValue(); 6902 APInt c2 = RHS->getAPIntValue(); 6903 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6904 return (c1 + c2).ult(OpSizeInBits); 6905 }; 6906 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6907 SDLoc DL(N); 6908 EVT ShiftVT = N1.getValueType(); 6909 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6910 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); 6911 } 6912 } 6913 6914 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 6915 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 6916 N0.getOperand(0).getOpcode() == ISD::SRL) { 6917 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { 6918 uint64_t c1 = N001C->getZExtValue(); 6919 uint64_t c2 = N1C->getZExtValue(); 6920 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 6921 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); 6922 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6923 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 6924 if (c1 + OpSizeInBits == InnerShiftSize) { 6925 SDLoc DL(N0); 6926 if (c1 + c2 >= InnerShiftSize) 6927 return DAG.getConstant(0, DL, VT); 6928 return DAG.getNode(ISD::TRUNCATE, DL, VT, 6929 DAG.getNode(ISD::SRL, DL, InnerShiftVT, 6930 N0.getOperand(0).getOperand(0), 6931 DAG.getConstant(c1 + c2, DL, 6932 ShiftCountVT))); 6933 } 6934 } 6935 } 6936 6937 // fold (srl (shl x, c), c) -> (and x, cst2) 6938 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 6939 isConstantOrConstantVector(N1, /* NoOpaques */ true)) { 6940 SDLoc DL(N); 6941 SDValue Mask = 6942 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); 6943 AddToWorklist(Mask.getNode()); 6944 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); 6945 } 6946 6947 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 6948 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 6949 // Shifting in all undef bits? 6950 EVT SmallVT = N0.getOperand(0).getValueType(); 6951 unsigned BitSize = SmallVT.getScalarSizeInBits(); 6952 if (N1C->getZExtValue() >= BitSize) 6953 return DAG.getUNDEF(VT); 6954 6955 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 6956 uint64_t ShiftAmt = N1C->getZExtValue(); 6957 SDLoc DL0(N0); 6958 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, 6959 N0.getOperand(0), 6960 DAG.getConstant(ShiftAmt, DL0, 6961 getShiftAmountTy(SmallVT))); 6962 AddToWorklist(SmallShift.getNode()); 6963 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); 6964 SDLoc DL(N); 6965 return DAG.getNode(ISD::AND, DL, VT, 6966 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), 6967 DAG.getConstant(Mask, DL, VT)); 6968 } 6969 } 6970 6971 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 6972 // bit, which is unmodified by sra. 6973 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 6974 if (N0.getOpcode() == ISD::SRA) 6975 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 6976 } 6977 6978 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 6979 if (N1C && N0.getOpcode() == ISD::CTLZ && 6980 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 6981 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0)); 6982 6983 // If any of the input bits are KnownOne, then the input couldn't be all 6984 // zeros, thus the result of the srl will always be zero. 6985 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); 6986 6987 // If all of the bits input the to ctlz node are known to be zero, then 6988 // the result of the ctlz is "32" and the result of the shift is one. 6989 APInt UnknownBits = ~Known.Zero; 6990 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); 6991 6992 // Otherwise, check to see if there is exactly one bit input to the ctlz. 6993 if (UnknownBits.isPowerOf2()) { 6994 // Okay, we know that only that the single bit specified by UnknownBits 6995 // could be set on input to the CTLZ node. If this bit is set, the SRL 6996 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 6997 // to an SRL/XOR pair, which is likely to simplify more. 6998 unsigned ShAmt = UnknownBits.countTrailingZeros(); 6999 SDValue Op = N0.getOperand(0); 7000 7001 if (ShAmt) { 7002 SDLoc DL(N0); 7003 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 7004 DAG.getConstant(ShAmt, DL, 7005 getShiftAmountTy(Op.getValueType()))); 7006 AddToWorklist(Op.getNode()); 7007 } 7008 7009 SDLoc DL(N); 7010 return DAG.getNode(ISD::XOR, DL, VT, 7011 Op, DAG.getConstant(1, DL, VT)); 7012 } 7013 } 7014 7015 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 7016 if (N1.getOpcode() == ISD::TRUNCATE && 7017 N1.getOperand(0).getOpcode() == ISD::AND) { 7018 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 7019 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 7020 } 7021 7022 // fold operands of srl based on knowledge that the low bits are not 7023 // demanded. 7024 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 7025 return SDValue(N, 0); 7026 7027 if (N1C && !N1C->isOpaque()) 7028 if (SDValue NewSRL = visitShiftByConstant(N, N1C)) 7029 return NewSRL; 7030 7031 // Attempt to convert a srl of a load into a narrower zero-extending load. 7032 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 7033 return NarrowLoad; 7034 7035 // Here is a common situation. We want to optimize: 7036 // 7037 // %a = ... 7038 // %b = and i32 %a, 2 7039 // %c = srl i32 %b, 1 7040 // brcond i32 %c ... 7041 // 7042 // into 7043 // 7044 // %a = ... 7045 // %b = and %a, 2 7046 // %c = setcc eq %b, 0 7047 // brcond %c ... 7048 // 7049 // However when after the source operand of SRL is optimized into AND, the SRL 7050 // itself may not be optimized further. Look for it and add the BRCOND into 7051 // the worklist. 7052 if (N->hasOneUse()) { 7053 SDNode *Use = *N->use_begin(); 7054 if (Use->getOpcode() == ISD::BRCOND) 7055 AddToWorklist(Use); 7056 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 7057 // Also look pass the truncate. 7058 Use = *Use->use_begin(); 7059 if (Use->getOpcode() == ISD::BRCOND) 7060 AddToWorklist(Use); 7061 } 7062 } 7063 7064 return SDValue(); 7065 } 7066 7067 SDValue DAGCombiner::visitFunnelShift(SDNode *N) { 7068 EVT VT = N->getValueType(0); 7069 SDValue N0 = N->getOperand(0); 7070 SDValue N1 = N->getOperand(1); 7071 SDValue N2 = N->getOperand(2); 7072 bool IsFSHL = N->getOpcode() == ISD::FSHL; 7073 unsigned BitWidth = VT.getScalarSizeInBits(); 7074 7075 // fold (fshl N0, N1, 0) -> N0 7076 // fold (fshr N0, N1, 0) -> N1 7077 if (isPowerOf2_32(BitWidth)) 7078 if (DAG.MaskedValueIsZero( 7079 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) 7080 return IsFSHL ? N0 : N1; 7081 7082 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) 7083 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { 7084 if (Cst->getAPIntValue().uge(BitWidth)) { 7085 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); 7086 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, 7087 DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType())); 7088 } 7089 } 7090 7091 // fold (fshl N0, N0, N2) -> (rotl N0, N2) 7092 // fold (fshr N0, N0, N2) -> (rotr N0, N2) 7093 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift 7094 // is legal as well we might be better off avoiding non-constant (BW - N2). 7095 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR; 7096 if (N0 == N1 && hasOperation(RotOpc, VT)) 7097 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2); 7098 7099 return SDValue(); 7100 } 7101 7102 SDValue DAGCombiner::visitABS(SDNode *N) { 7103 SDValue N0 = N->getOperand(0); 7104 EVT VT = N->getValueType(0); 7105 7106 // fold (abs c1) -> c2 7107 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7108 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); 7109 // fold (abs (abs x)) -> (abs x) 7110 if (N0.getOpcode() == ISD::ABS) 7111 return N0; 7112 // fold (abs x) -> x iff not-negative 7113 if (DAG.SignBitIsZero(N0)) 7114 return N0; 7115 return SDValue(); 7116 } 7117 7118 SDValue DAGCombiner::visitBSWAP(SDNode *N) { 7119 SDValue N0 = N->getOperand(0); 7120 EVT VT = N->getValueType(0); 7121 7122 // fold (bswap c1) -> c2 7123 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7124 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); 7125 // fold (bswap (bswap x)) -> x 7126 if (N0.getOpcode() == ISD::BSWAP) 7127 return N0->getOperand(0); 7128 return SDValue(); 7129 } 7130 7131 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { 7132 SDValue N0 = N->getOperand(0); 7133 EVT VT = N->getValueType(0); 7134 7135 // fold (bitreverse c1) -> c2 7136 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7137 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); 7138 // fold (bitreverse (bitreverse x)) -> x 7139 if (N0.getOpcode() == ISD::BITREVERSE) 7140 return N0.getOperand(0); 7141 return SDValue(); 7142 } 7143 7144 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 7145 SDValue N0 = N->getOperand(0); 7146 EVT VT = N->getValueType(0); 7147 7148 // fold (ctlz c1) -> c2 7149 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7150 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 7151 7152 // If the value is known never to be zero, switch to the undef version. 7153 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { 7154 if (DAG.isKnownNeverZero(N0)) 7155 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7156 } 7157 7158 return SDValue(); 7159 } 7160 7161 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 7162 SDValue N0 = N->getOperand(0); 7163 EVT VT = N->getValueType(0); 7164 7165 // fold (ctlz_zero_undef c1) -> c2 7166 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7167 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7168 return SDValue(); 7169 } 7170 7171 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 7172 SDValue N0 = N->getOperand(0); 7173 EVT VT = N->getValueType(0); 7174 7175 // fold (cttz c1) -> c2 7176 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7177 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 7178 7179 // If the value is known never to be zero, switch to the undef version. 7180 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { 7181 if (DAG.isKnownNeverZero(N0)) 7182 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7183 } 7184 7185 return SDValue(); 7186 } 7187 7188 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 7189 SDValue N0 = N->getOperand(0); 7190 EVT VT = N->getValueType(0); 7191 7192 // fold (cttz_zero_undef c1) -> c2 7193 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7194 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7195 return SDValue(); 7196 } 7197 7198 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 7199 SDValue N0 = N->getOperand(0); 7200 EVT VT = N->getValueType(0); 7201 7202 // fold (ctpop c1) -> c2 7203 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7204 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 7205 return SDValue(); 7206 } 7207 7208 // FIXME: This should be checking for no signed zeros on individual operands, as 7209 // well as no nans. 7210 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) { 7211 const TargetOptions &Options = DAG.getTarget().Options; 7212 EVT VT = LHS.getValueType(); 7213 7214 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && 7215 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); 7216 } 7217 7218 /// Generate Min/Max node 7219 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, 7220 SDValue RHS, SDValue True, SDValue False, 7221 ISD::CondCode CC, const TargetLowering &TLI, 7222 SelectionDAG &DAG) { 7223 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 7224 return SDValue(); 7225 7226 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 7227 switch (CC) { 7228 case ISD::SETOLT: 7229 case ISD::SETOLE: 7230 case ISD::SETLT: 7231 case ISD::SETLE: 7232 case ISD::SETULT: 7233 case ISD::SETULE: { 7234 // Since it's known never nan to get here already, either fminnum or 7235 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is 7236 // expanded in terms of it. 7237 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; 7238 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) 7239 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); 7240 7241 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 7242 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) 7243 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 7244 return SDValue(); 7245 } 7246 case ISD::SETOGT: 7247 case ISD::SETOGE: 7248 case ISD::SETGT: 7249 case ISD::SETGE: 7250 case ISD::SETUGT: 7251 case ISD::SETUGE: { 7252 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; 7253 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) 7254 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); 7255 7256 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 7257 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) 7258 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 7259 return SDValue(); 7260 } 7261 default: 7262 return SDValue(); 7263 } 7264 } 7265 7266 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { 7267 SDValue Cond = N->getOperand(0); 7268 SDValue N1 = N->getOperand(1); 7269 SDValue N2 = N->getOperand(2); 7270 EVT VT = N->getValueType(0); 7271 EVT CondVT = Cond.getValueType(); 7272 SDLoc DL(N); 7273 7274 if (!VT.isInteger()) 7275 return SDValue(); 7276 7277 auto *C1 = dyn_cast<ConstantSDNode>(N1); 7278 auto *C2 = dyn_cast<ConstantSDNode>(N2); 7279 if (!C1 || !C2) 7280 return SDValue(); 7281 7282 // Only do this before legalization to avoid conflicting with target-specific 7283 // transforms in the other direction (create a select from a zext/sext). There 7284 // is also a target-independent combine here in DAGCombiner in the other 7285 // direction for (select Cond, -1, 0) when the condition is not i1. 7286 if (CondVT == MVT::i1 && !LegalOperations) { 7287 if (C1->isNullValue() && C2->isOne()) { 7288 // select Cond, 0, 1 --> zext (!Cond) 7289 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 7290 if (VT != MVT::i1) 7291 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); 7292 return NotCond; 7293 } 7294 if (C1->isNullValue() && C2->isAllOnesValue()) { 7295 // select Cond, 0, -1 --> sext (!Cond) 7296 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 7297 if (VT != MVT::i1) 7298 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); 7299 return NotCond; 7300 } 7301 if (C1->isOne() && C2->isNullValue()) { 7302 // select Cond, 1, 0 --> zext (Cond) 7303 if (VT != MVT::i1) 7304 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 7305 return Cond; 7306 } 7307 if (C1->isAllOnesValue() && C2->isNullValue()) { 7308 // select Cond, -1, 0 --> sext (Cond) 7309 if (VT != MVT::i1) 7310 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 7311 return Cond; 7312 } 7313 7314 // For any constants that differ by 1, we can transform the select into an 7315 // extend and add. Use a target hook because some targets may prefer to 7316 // transform in the other direction. 7317 if (TLI.convertSelectOfConstantsToMath(VT)) { 7318 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { 7319 // select Cond, C1, C1-1 --> add (zext Cond), C1-1 7320 if (VT != MVT::i1) 7321 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 7322 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 7323 } 7324 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { 7325 // select Cond, C1, C1+1 --> add (sext Cond), C1+1 7326 if (VT != MVT::i1) 7327 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 7328 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 7329 } 7330 } 7331 7332 return SDValue(); 7333 } 7334 7335 // fold (select Cond, 0, 1) -> (xor Cond, 1) 7336 // We can't do this reliably if integer based booleans have different contents 7337 // to floating point based booleans. This is because we can't tell whether we 7338 // have an integer-based boolean or a floating-point-based boolean unless we 7339 // can find the SETCC that produced it and inspect its operands. This is 7340 // fairly easy if C is the SETCC node, but it can potentially be 7341 // undiscoverable (or not reasonably discoverable). For example, it could be 7342 // in another basic block or it could require searching a complicated 7343 // expression. 7344 if (CondVT.isInteger() && 7345 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == 7346 TargetLowering::ZeroOrOneBooleanContent && 7347 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == 7348 TargetLowering::ZeroOrOneBooleanContent && 7349 C1->isNullValue() && C2->isOne()) { 7350 SDValue NotCond = 7351 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); 7352 if (VT.bitsEq(CondVT)) 7353 return NotCond; 7354 return DAG.getZExtOrTrunc(NotCond, DL, VT); 7355 } 7356 7357 return SDValue(); 7358 } 7359 7360 SDValue DAGCombiner::visitSELECT(SDNode *N) { 7361 SDValue N0 = N->getOperand(0); 7362 SDValue N1 = N->getOperand(1); 7363 SDValue N2 = N->getOperand(2); 7364 EVT VT = N->getValueType(0); 7365 EVT VT0 = N0.getValueType(); 7366 SDLoc DL(N); 7367 7368 if (SDValue V = DAG.simplifySelect(N0, N1, N2)) 7369 return V; 7370 7371 // fold (select X, X, Y) -> (or X, Y) 7372 // fold (select X, 1, Y) -> (or C, Y) 7373 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) 7374 return DAG.getNode(ISD::OR, DL, VT, N0, N2); 7375 7376 if (SDValue V = foldSelectOfConstants(N)) 7377 return V; 7378 7379 // fold (select C, 0, X) -> (and (not C), X) 7380 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { 7381 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 7382 AddToWorklist(NOTNode.getNode()); 7383 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); 7384 } 7385 // fold (select C, X, 1) -> (or (not C), X) 7386 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { 7387 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 7388 AddToWorklist(NOTNode.getNode()); 7389 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); 7390 } 7391 // fold (select X, Y, X) -> (and X, Y) 7392 // fold (select X, Y, 0) -> (and X, Y) 7393 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) 7394 return DAG.getNode(ISD::AND, DL, VT, N0, N1); 7395 7396 // If we can fold this based on the true/false value, do so. 7397 if (SimplifySelectOps(N, N1, N2)) 7398 return SDValue(N, 0); // Don't revisit N. 7399 7400 if (VT0 == MVT::i1) { 7401 // The code in this block deals with the following 2 equivalences: 7402 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) 7403 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) 7404 // The target can specify its preferred form with the 7405 // shouldNormalizeToSelectSequence() callback. However we always transform 7406 // to the right anyway if we find the inner select exists in the DAG anyway 7407 // and we always transform to the left side if we know that we can further 7408 // optimize the combination of the conditions. 7409 bool normalizeToSequence = 7410 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); 7411 // select (and Cond0, Cond1), X, Y 7412 // -> select Cond0, (select Cond1, X, Y), Y 7413 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 7414 SDValue Cond0 = N0->getOperand(0); 7415 SDValue Cond1 = N0->getOperand(1); 7416 SDValue InnerSelect = 7417 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 7418 if (normalizeToSequence || !InnerSelect.use_empty()) 7419 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, 7420 InnerSelect, N2); 7421 } 7422 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 7423 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 7424 SDValue Cond0 = N0->getOperand(0); 7425 SDValue Cond1 = N0->getOperand(1); 7426 SDValue InnerSelect = 7427 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 7428 if (normalizeToSequence || !InnerSelect.use_empty()) 7429 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, 7430 InnerSelect); 7431 } 7432 7433 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 7434 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { 7435 SDValue N1_0 = N1->getOperand(0); 7436 SDValue N1_1 = N1->getOperand(1); 7437 SDValue N1_2 = N1->getOperand(2); 7438 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 7439 // Create the actual and node if we can generate good code for it. 7440 if (!normalizeToSequence) { 7441 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); 7442 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); 7443 } 7444 // Otherwise see if we can optimize the "and" to a better pattern. 7445 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 7446 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, 7447 N2); 7448 } 7449 } 7450 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 7451 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { 7452 SDValue N2_0 = N2->getOperand(0); 7453 SDValue N2_1 = N2->getOperand(1); 7454 SDValue N2_2 = N2->getOperand(2); 7455 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 7456 // Create the actual or node if we can generate good code for it. 7457 if (!normalizeToSequence) { 7458 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); 7459 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); 7460 } 7461 // Otherwise see if we can optimize to a better pattern. 7462 if (SDValue Combined = visitORLike(N0, N2_0, N)) 7463 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, 7464 N2_2); 7465 } 7466 } 7467 } 7468 7469 if (VT0 == MVT::i1) { 7470 // select (not Cond), N1, N2 -> select Cond, N2, N1 7471 if (isBitwiseNot(N0)) 7472 return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); 7473 } 7474 7475 // Fold selects based on a setcc into other things, such as min/max/abs. 7476 if (N0.getOpcode() == ISD::SETCC) { 7477 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); 7478 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7479 7480 // select (fcmp lt x, y), x, y -> fminnum x, y 7481 // select (fcmp gt x, y), x, y -> fmaxnum x, y 7482 // 7483 // This is OK if we don't care what happens if either operand is a NaN. 7484 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) 7485 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, 7486 CC, TLI, DAG)) 7487 return FMinMax; 7488 7489 // Use 'unsigned add with overflow' to optimize an unsigned saturating add. 7490 // This is conservatively limited to pre-legal-operations to give targets 7491 // a chance to reverse the transform if they want to do that. Also, it is 7492 // unlikely that the pattern would be formed late, so it's probably not 7493 // worth going through the other checks. 7494 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && 7495 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && 7496 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { 7497 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1)); 7498 auto *NotC = dyn_cast<ConstantSDNode>(Cond1); 7499 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { 7500 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> 7501 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 7502 // 7503 // The IR equivalent of this transform would have this form: 7504 // %a = add %x, C 7505 // %c = icmp ugt %x, ~C 7506 // %r = select %c, -1, %a 7507 // => 7508 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C) 7509 // %u0 = extractvalue %u, 0 7510 // %u1 = extractvalue %u, 1 7511 // %r = select %u1, -1, %u0 7512 SDVTList VTs = DAG.getVTList(VT, VT0); 7513 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); 7514 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); 7515 } 7516 } 7517 7518 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || 7519 (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) 7520 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, 7521 N0.getOperand(2)); 7522 7523 return SimplifySelect(DL, N0, N1, N2); 7524 } 7525 7526 return SDValue(); 7527 } 7528 7529 static 7530 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 7531 SDLoc DL(N); 7532 EVT LoVT, HiVT; 7533 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 7534 7535 // Split the inputs. 7536 SDValue Lo, Hi, LL, LH, RL, RH; 7537 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 7538 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 7539 7540 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 7541 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 7542 7543 return std::make_pair(Lo, Hi); 7544 } 7545 7546 // This function assumes all the vselect's arguments are CONCAT_VECTOR 7547 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 7548 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 7549 SDLoc DL(N); 7550 SDValue Cond = N->getOperand(0); 7551 SDValue LHS = N->getOperand(1); 7552 SDValue RHS = N->getOperand(2); 7553 EVT VT = N->getValueType(0); 7554 int NumElems = VT.getVectorNumElements(); 7555 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 7556 RHS.getOpcode() == ISD::CONCAT_VECTORS && 7557 Cond.getOpcode() == ISD::BUILD_VECTOR); 7558 7559 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 7560 // binary ones here. 7561 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 7562 return SDValue(); 7563 7564 // We're sure we have an even number of elements due to the 7565 // concat_vectors we have as arguments to vselect. 7566 // Skip BV elements until we find one that's not an UNDEF 7567 // After we find an UNDEF element, keep looping until we get to half the 7568 // length of the BV and see if all the non-undef nodes are the same. 7569 ConstantSDNode *BottomHalf = nullptr; 7570 for (int i = 0; i < NumElems / 2; ++i) { 7571 if (Cond->getOperand(i)->isUndef()) 7572 continue; 7573 7574 if (BottomHalf == nullptr) 7575 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7576 else if (Cond->getOperand(i).getNode() != BottomHalf) 7577 return SDValue(); 7578 } 7579 7580 // Do the same for the second half of the BuildVector 7581 ConstantSDNode *TopHalf = nullptr; 7582 for (int i = NumElems / 2; i < NumElems; ++i) { 7583 if (Cond->getOperand(i)->isUndef()) 7584 continue; 7585 7586 if (TopHalf == nullptr) 7587 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7588 else if (Cond->getOperand(i).getNode() != TopHalf) 7589 return SDValue(); 7590 } 7591 7592 assert(TopHalf && BottomHalf && 7593 "One half of the selector was all UNDEFs and the other was all the " 7594 "same value. This should have been addressed before this function."); 7595 return DAG.getNode( 7596 ISD::CONCAT_VECTORS, DL, VT, 7597 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 7598 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 7599 } 7600 7601 SDValue DAGCombiner::visitMSCATTER(SDNode *N) { 7602 if (Level >= AfterLegalizeTypes) 7603 return SDValue(); 7604 7605 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 7606 SDValue Mask = MSC->getMask(); 7607 SDValue Data = MSC->getValue(); 7608 SDLoc DL(N); 7609 7610 // If the MSCATTER data type requires splitting and the mask is provided by a 7611 // SETCC, then split both nodes and its operands before legalization. This 7612 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7613 // and enables future optimizations (e.g. min/max pattern matching on X86). 7614 if (Mask.getOpcode() != ISD::SETCC) 7615 return SDValue(); 7616 7617 // Check if any splitting is required. 7618 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 7619 TargetLowering::TypeSplitVector) 7620 return SDValue(); 7621 SDValue MaskLo, MaskHi; 7622 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7623 7624 EVT LoVT, HiVT; 7625 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); 7626 7627 SDValue Chain = MSC->getChain(); 7628 7629 EVT MemoryVT = MSC->getMemoryVT(); 7630 unsigned Alignment = MSC->getOriginalAlignment(); 7631 7632 EVT LoMemVT, HiMemVT; 7633 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7634 7635 SDValue DataLo, DataHi; 7636 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7637 7638 SDValue Scale = MSC->getScale(); 7639 SDValue BasePtr = MSC->getBasePtr(); 7640 SDValue IndexLo, IndexHi; 7641 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); 7642 7643 MachineMemOperand *MMO = DAG.getMachineFunction(). 7644 getMachineMemOperand(MSC->getPointerInfo(), 7645 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7646 Alignment, MSC->getAAInfo(), MSC->getRanges()); 7647 7648 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; 7649 SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), 7650 DataLo.getValueType(), DL, OpsLo, MMO); 7651 7652 // The order of the Scatter operation after split is well defined. The "Hi" 7653 // part comes after the "Lo". So these two operations should be chained one 7654 // after another. 7655 SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; 7656 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), 7657 DL, OpsHi, MMO); 7658 } 7659 7660 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 7661 if (Level >= AfterLegalizeTypes) 7662 return SDValue(); 7663 7664 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 7665 SDValue Mask = MST->getMask(); 7666 SDValue Data = MST->getValue(); 7667 EVT VT = Data.getValueType(); 7668 SDLoc DL(N); 7669 7670 // If the MSTORE data type requires splitting and the mask is provided by a 7671 // SETCC, then split both nodes and its operands before legalization. This 7672 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7673 // and enables future optimizations (e.g. min/max pattern matching on X86). 7674 if (Mask.getOpcode() == ISD::SETCC) { 7675 // Check if any splitting is required. 7676 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7677 TargetLowering::TypeSplitVector) 7678 return SDValue(); 7679 7680 SDValue MaskLo, MaskHi, Lo, Hi; 7681 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7682 7683 SDValue Chain = MST->getChain(); 7684 SDValue Ptr = MST->getBasePtr(); 7685 7686 EVT MemoryVT = MST->getMemoryVT(); 7687 unsigned Alignment = MST->getOriginalAlignment(); 7688 7689 // if Alignment is equal to the vector size, 7690 // take the half of it for the second part 7691 unsigned SecondHalfAlignment = 7692 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; 7693 7694 EVT LoMemVT, HiMemVT; 7695 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7696 7697 SDValue DataLo, DataHi; 7698 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7699 7700 MachineMemOperand *MMO = DAG.getMachineFunction(). 7701 getMachineMemOperand(MST->getPointerInfo(), 7702 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7703 Alignment, MST->getAAInfo(), MST->getRanges()); 7704 7705 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 7706 MST->isTruncatingStore(), 7707 MST->isCompressingStore()); 7708 7709 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7710 MST->isCompressingStore()); 7711 unsigned HiOffset = LoMemVT.getStoreSize(); 7712 7713 MMO = DAG.getMachineFunction().getMachineMemOperand( 7714 MST->getPointerInfo().getWithOffset(HiOffset), 7715 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, 7716 MST->getAAInfo(), MST->getRanges()); 7717 7718 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 7719 MST->isTruncatingStore(), 7720 MST->isCompressingStore()); 7721 7722 AddToWorklist(Lo.getNode()); 7723 AddToWorklist(Hi.getNode()); 7724 7725 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 7726 } 7727 return SDValue(); 7728 } 7729 7730 SDValue DAGCombiner::visitMGATHER(SDNode *N) { 7731 if (Level >= AfterLegalizeTypes) 7732 return SDValue(); 7733 7734 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); 7735 SDValue Mask = MGT->getMask(); 7736 SDLoc DL(N); 7737 7738 // If the MGATHER result requires splitting and the mask is provided by a 7739 // SETCC, then split both nodes and its operands before legalization. This 7740 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7741 // and enables future optimizations (e.g. min/max pattern matching on X86). 7742 7743 if (Mask.getOpcode() != ISD::SETCC) 7744 return SDValue(); 7745 7746 EVT VT = N->getValueType(0); 7747 7748 // Check if any splitting is required. 7749 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7750 TargetLowering::TypeSplitVector) 7751 return SDValue(); 7752 7753 SDValue MaskLo, MaskHi, Lo, Hi; 7754 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7755 7756 SDValue PassThru = MGT->getPassThru(); 7757 SDValue PassThruLo, PassThruHi; 7758 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); 7759 7760 EVT LoVT, HiVT; 7761 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); 7762 7763 SDValue Chain = MGT->getChain(); 7764 EVT MemoryVT = MGT->getMemoryVT(); 7765 unsigned Alignment = MGT->getOriginalAlignment(); 7766 7767 EVT LoMemVT, HiMemVT; 7768 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7769 7770 SDValue Scale = MGT->getScale(); 7771 SDValue BasePtr = MGT->getBasePtr(); 7772 SDValue Index = MGT->getIndex(); 7773 SDValue IndexLo, IndexHi; 7774 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); 7775 7776 MachineMemOperand *MMO = DAG.getMachineFunction(). 7777 getMachineMemOperand(MGT->getPointerInfo(), 7778 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7779 Alignment, MGT->getAAInfo(), MGT->getRanges()); 7780 7781 SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; 7782 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, 7783 MMO); 7784 7785 SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; 7786 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, 7787 MMO); 7788 7789 AddToWorklist(Lo.getNode()); 7790 AddToWorklist(Hi.getNode()); 7791 7792 // Build a factor node to remember that this load is independent of the 7793 // other one. 7794 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7795 Hi.getValue(1)); 7796 7797 // Legalized the chain result - switch anything that used the old chain to 7798 // use the new one. 7799 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); 7800 7801 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7802 7803 SDValue RetOps[] = { GatherRes, Chain }; 7804 return DAG.getMergeValues(RetOps, DL); 7805 } 7806 7807 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 7808 if (Level >= AfterLegalizeTypes) 7809 return SDValue(); 7810 7811 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 7812 SDValue Mask = MLD->getMask(); 7813 SDLoc DL(N); 7814 7815 // If the MLOAD result requires splitting and the mask is provided by a 7816 // SETCC, then split both nodes and its operands before legalization. This 7817 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7818 // and enables future optimizations (e.g. min/max pattern matching on X86). 7819 if (Mask.getOpcode() == ISD::SETCC) { 7820 EVT VT = N->getValueType(0); 7821 7822 // Check if any splitting is required. 7823 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7824 TargetLowering::TypeSplitVector) 7825 return SDValue(); 7826 7827 SDValue MaskLo, MaskHi, Lo, Hi; 7828 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7829 7830 SDValue PassThru = MLD->getPassThru(); 7831 SDValue PassThruLo, PassThruHi; 7832 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); 7833 7834 EVT LoVT, HiVT; 7835 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 7836 7837 SDValue Chain = MLD->getChain(); 7838 SDValue Ptr = MLD->getBasePtr(); 7839 EVT MemoryVT = MLD->getMemoryVT(); 7840 unsigned Alignment = MLD->getOriginalAlignment(); 7841 7842 // if Alignment is equal to the vector size, 7843 // take the half of it for the second part 7844 unsigned SecondHalfAlignment = 7845 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 7846 Alignment/2 : Alignment; 7847 7848 EVT LoMemVT, HiMemVT; 7849 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7850 7851 MachineMemOperand *MMO = DAG.getMachineFunction(). 7852 getMachineMemOperand(MLD->getPointerInfo(), 7853 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7854 Alignment, MLD->getAAInfo(), MLD->getRanges()); 7855 7856 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, 7857 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7858 7859 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7860 MLD->isExpandingLoad()); 7861 unsigned HiOffset = LoMemVT.getStoreSize(); 7862 7863 MMO = DAG.getMachineFunction().getMachineMemOperand( 7864 MLD->getPointerInfo().getWithOffset(HiOffset), 7865 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, 7866 MLD->getAAInfo(), MLD->getRanges()); 7867 7868 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, 7869 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7870 7871 AddToWorklist(Lo.getNode()); 7872 AddToWorklist(Hi.getNode()); 7873 7874 // Build a factor node to remember that this load is independent of the 7875 // other one. 7876 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7877 Hi.getValue(1)); 7878 7879 // Legalized the chain result - switch anything that used the old chain to 7880 // use the new one. 7881 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 7882 7883 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7884 7885 SDValue RetOps[] = { LoadRes, Chain }; 7886 return DAG.getMergeValues(RetOps, DL); 7887 } 7888 return SDValue(); 7889 } 7890 7891 /// A vector select of 2 constant vectors can be simplified to math/logic to 7892 /// avoid a variable select instruction and possibly avoid constant loads. 7893 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { 7894 SDValue Cond = N->getOperand(0); 7895 SDValue N1 = N->getOperand(1); 7896 SDValue N2 = N->getOperand(2); 7897 EVT VT = N->getValueType(0); 7898 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || 7899 !TLI.convertSelectOfConstantsToMath(VT) || 7900 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || 7901 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) 7902 return SDValue(); 7903 7904 // Check if we can use the condition value to increment/decrement a single 7905 // constant value. This simplifies a select to an add and removes a constant 7906 // load/materialization from the general case. 7907 bool AllAddOne = true; 7908 bool AllSubOne = true; 7909 unsigned Elts = VT.getVectorNumElements(); 7910 for (unsigned i = 0; i != Elts; ++i) { 7911 SDValue N1Elt = N1.getOperand(i); 7912 SDValue N2Elt = N2.getOperand(i); 7913 if (N1Elt.isUndef() || N2Elt.isUndef()) 7914 continue; 7915 7916 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); 7917 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); 7918 if (C1 != C2 + 1) 7919 AllAddOne = false; 7920 if (C1 != C2 - 1) 7921 AllSubOne = false; 7922 } 7923 7924 // Further simplifications for the extra-special cases where the constants are 7925 // all 0 or all -1 should be implemented as folds of these patterns. 7926 SDLoc DL(N); 7927 if (AllAddOne || AllSubOne) { 7928 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C 7929 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C 7930 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; 7931 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); 7932 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); 7933 } 7934 7935 // The general case for select-of-constants: 7936 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 7937 // ...but that only makes sense if a vselect is slower than 2 logic ops, so 7938 // leave that to a machine-specific pass. 7939 return SDValue(); 7940 } 7941 7942 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 7943 SDValue N0 = N->getOperand(0); 7944 SDValue N1 = N->getOperand(1); 7945 SDValue N2 = N->getOperand(2); 7946 SDLoc DL(N); 7947 7948 if (SDValue V = DAG.simplifySelect(N0, N1, N2)) 7949 return V; 7950 7951 // Canonicalize integer abs. 7952 // vselect (setg[te] X, 0), X, -X -> 7953 // vselect (setgt X, -1), X, -X -> 7954 // vselect (setl[te] X, 0), -X, X -> 7955 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 7956 if (N0.getOpcode() == ISD::SETCC) { 7957 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 7958 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7959 bool isAbs = false; 7960 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 7961 7962 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 7963 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 7964 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 7965 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 7966 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 7967 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 7968 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 7969 7970 if (isAbs) { 7971 EVT VT = LHS.getValueType(); 7972 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) 7973 return DAG.getNode(ISD::ABS, DL, VT, LHS); 7974 7975 SDValue Shift = DAG.getNode( 7976 ISD::SRA, DL, VT, LHS, 7977 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); 7978 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 7979 AddToWorklist(Shift.getNode()); 7980 AddToWorklist(Add.getNode()); 7981 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 7982 } 7983 7984 // vselect x, y (fcmp lt x, y) -> fminnum x, y 7985 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y 7986 // 7987 // This is OK if we don't care about what happens if either operand is a 7988 // NaN. 7989 // 7990 EVT VT = N->getValueType(0); 7991 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) { 7992 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7993 if (SDValue FMinMax = combineMinNumMaxNum( 7994 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) 7995 return FMinMax; 7996 } 7997 7998 // If this select has a condition (setcc) with narrower operands than the 7999 // select, try to widen the compare to match the select width. 8000 // TODO: This should be extended to handle any constant. 8001 // TODO: This could be extended to handle non-loading patterns, but that 8002 // requires thorough testing to avoid regressions. 8003 if (isNullOrNullSplat(RHS)) { 8004 EVT NarrowVT = LHS.getValueType(); 8005 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); 8006 EVT SetCCVT = getSetCCResultType(LHS.getValueType()); 8007 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); 8008 unsigned WideWidth = WideVT.getScalarSizeInBits(); 8009 bool IsSigned = isSignedIntSetCC(CC); 8010 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 8011 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && 8012 SetCCWidth != 1 && SetCCWidth < WideWidth && 8013 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && 8014 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { 8015 // Both compare operands can be widened for free. The LHS can use an 8016 // extended load, and the RHS is a constant: 8017 // vselect (ext (setcc load(X), C)), N1, N2 --> 8018 // vselect (setcc extload(X), C'), N1, N2 8019 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 8020 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); 8021 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); 8022 EVT WideSetCCVT = getSetCCResultType(WideVT); 8023 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); 8024 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); 8025 } 8026 } 8027 } 8028 8029 if (SimplifySelectOps(N, N1, N2)) 8030 return SDValue(N, 0); // Don't revisit N. 8031 8032 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 8033 if (ISD::isBuildVectorAllOnes(N0.getNode())) 8034 return N1; 8035 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 8036 if (ISD::isBuildVectorAllZeros(N0.getNode())) 8037 return N2; 8038 8039 // The ConvertSelectToConcatVector function is assuming both the above 8040 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 8041 // and addressed. 8042 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 8043 N2.getOpcode() == ISD::CONCAT_VECTORS && 8044 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 8045 if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) 8046 return CV; 8047 } 8048 8049 if (SDValue V = foldVSelectOfConstants(N)) 8050 return V; 8051 8052 return SDValue(); 8053 } 8054 8055 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 8056 SDValue N0 = N->getOperand(0); 8057 SDValue N1 = N->getOperand(1); 8058 SDValue N2 = N->getOperand(2); 8059 SDValue N3 = N->getOperand(3); 8060 SDValue N4 = N->getOperand(4); 8061 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 8062 8063 // fold select_cc lhs, rhs, x, x, cc -> x 8064 if (N2 == N3) 8065 return N2; 8066 8067 // Determine if the condition we're dealing with is constant 8068 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, 8069 CC, SDLoc(N), false)) { 8070 AddToWorklist(SCC.getNode()); 8071 8072 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 8073 if (!SCCC->isNullValue()) 8074 return N2; // cond always true -> true val 8075 else 8076 return N3; // cond always false -> false val 8077 } else if (SCC->isUndef()) { 8078 // When the condition is UNDEF, just return the first operand. This is 8079 // coherent the DAG creation, no setcc node is created in this case 8080 return N2; 8081 } else if (SCC.getOpcode() == ISD::SETCC) { 8082 // Fold to a simpler select_cc 8083 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 8084 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 8085 SCC.getOperand(2)); 8086 } 8087 } 8088 8089 // If we can fold this based on the true/false value, do so. 8090 if (SimplifySelectOps(N, N2, N3)) 8091 return SDValue(N, 0); // Don't revisit N. 8092 8093 // fold select_cc into other things, such as min/max/abs 8094 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 8095 } 8096 8097 SDValue DAGCombiner::visitSETCC(SDNode *N) { 8098 // setcc is very commonly used as an argument to brcond. This pattern 8099 // also lend itself to numerous combines and, as a result, it is desired 8100 // we keep the argument to a brcond as a setcc as much as possible. 8101 bool PreferSetCC = 8102 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; 8103 8104 SDValue Combined = SimplifySetCC( 8105 N->getValueType(0), N->getOperand(0), N->getOperand(1), 8106 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); 8107 8108 if (!Combined) 8109 return SDValue(); 8110 8111 // If we prefer to have a setcc, and we don't, we'll try our best to 8112 // recreate one using rebuildSetCC. 8113 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { 8114 SDValue NewSetCC = rebuildSetCC(Combined); 8115 8116 // We don't have anything interesting to combine to. 8117 if (NewSetCC.getNode() == N) 8118 return SDValue(); 8119 8120 if (NewSetCC) 8121 return NewSetCC; 8122 } 8123 8124 return Combined; 8125 } 8126 8127 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { 8128 SDValue LHS = N->getOperand(0); 8129 SDValue RHS = N->getOperand(1); 8130 SDValue Carry = N->getOperand(2); 8131 SDValue Cond = N->getOperand(3); 8132 8133 // If Carry is false, fold to a regular SETCC. 8134 if (isNullConstant(Carry)) 8135 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); 8136 8137 return SDValue(); 8138 } 8139 8140 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 8141 /// a build_vector of constants. 8142 /// This function is called by the DAGCombiner when visiting sext/zext/aext 8143 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 8144 /// Vector extends are not folded if operations are legal; this is to 8145 /// avoid introducing illegal build_vector dag nodes. 8146 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 8147 SelectionDAG &DAG, bool LegalTypes) { 8148 unsigned Opcode = N->getOpcode(); 8149 SDValue N0 = N->getOperand(0); 8150 EVT VT = N->getValueType(0); 8151 8152 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 8153 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 8154 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) 8155 && "Expected EXTEND dag node in input!"); 8156 8157 // fold (sext c1) -> c1 8158 // fold (zext c1) -> c1 8159 // fold (aext c1) -> c1 8160 if (isa<ConstantSDNode>(N0)) 8161 return DAG.getNode(Opcode, SDLoc(N), VT, N0); 8162 8163 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 8164 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 8165 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 8166 EVT SVT = VT.getScalarType(); 8167 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) && 8168 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 8169 return SDValue(); 8170 8171 // We can fold this node into a build_vector. 8172 unsigned VTBits = SVT.getSizeInBits(); 8173 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); 8174 SmallVector<SDValue, 8> Elts; 8175 unsigned NumElts = VT.getVectorNumElements(); 8176 SDLoc DL(N); 8177 8178 // For zero-extensions, UNDEF elements still guarantee to have the upper 8179 // bits set to zero. 8180 bool IsZext = 8181 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; 8182 8183 for (unsigned i = 0; i != NumElts; ++i) { 8184 SDValue Op = N0.getOperand(i); 8185 if (Op.isUndef()) { 8186 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); 8187 continue; 8188 } 8189 8190 SDLoc DL(Op); 8191 // Get the constant value and if needed trunc it to the size of the type. 8192 // Nodes like build_vector might have constants wider than the scalar type. 8193 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); 8194 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 8195 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); 8196 else 8197 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); 8198 } 8199 8200 return DAG.getBuildVector(VT, DL, Elts); 8201 } 8202 8203 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 8204 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 8205 // transformation. Returns true if extension are possible and the above 8206 // mentioned transformation is profitable. 8207 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, 8208 unsigned ExtOpc, 8209 SmallVectorImpl<SDNode *> &ExtendNodes, 8210 const TargetLowering &TLI) { 8211 bool HasCopyToRegUses = false; 8212 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); 8213 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 8214 UE = N0.getNode()->use_end(); 8215 UI != UE; ++UI) { 8216 SDNode *User = *UI; 8217 if (User == N) 8218 continue; 8219 if (UI.getUse().getResNo() != N0.getResNo()) 8220 continue; 8221 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 8222 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 8223 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 8224 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 8225 // Sign bits will be lost after a zext. 8226 return false; 8227 bool Add = false; 8228 for (unsigned i = 0; i != 2; ++i) { 8229 SDValue UseOp = User->getOperand(i); 8230 if (UseOp == N0) 8231 continue; 8232 if (!isa<ConstantSDNode>(UseOp)) 8233 return false; 8234 Add = true; 8235 } 8236 if (Add) 8237 ExtendNodes.push_back(User); 8238 continue; 8239 } 8240 // If truncates aren't free and there are users we can't 8241 // extend, it isn't worthwhile. 8242 if (!isTruncFree) 8243 return false; 8244 // Remember if this value is live-out. 8245 if (User->getOpcode() == ISD::CopyToReg) 8246 HasCopyToRegUses = true; 8247 } 8248 8249 if (HasCopyToRegUses) { 8250 bool BothLiveOut = false; 8251 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 8252 UI != UE; ++UI) { 8253 SDUse &Use = UI.getUse(); 8254 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 8255 BothLiveOut = true; 8256 break; 8257 } 8258 } 8259 if (BothLiveOut) 8260 // Both unextended and extended values are live out. There had better be 8261 // a good reason for the transformation. 8262 return ExtendNodes.size(); 8263 } 8264 return true; 8265 } 8266 8267 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 8268 SDValue OrigLoad, SDValue ExtLoad, 8269 ISD::NodeType ExtType) { 8270 // Extend SetCC uses if necessary. 8271 SDLoc DL(ExtLoad); 8272 for (SDNode *SetCC : SetCCs) { 8273 SmallVector<SDValue, 4> Ops; 8274 8275 for (unsigned j = 0; j != 2; ++j) { 8276 SDValue SOp = SetCC->getOperand(j); 8277 if (SOp == OrigLoad) 8278 Ops.push_back(ExtLoad); 8279 else 8280 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 8281 } 8282 8283 Ops.push_back(SetCC->getOperand(2)); 8284 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 8285 } 8286 } 8287 8288 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). 8289 SDValue DAGCombiner::CombineExtLoad(SDNode *N) { 8290 SDValue N0 = N->getOperand(0); 8291 EVT DstVT = N->getValueType(0); 8292 EVT SrcVT = N0.getValueType(); 8293 8294 assert((N->getOpcode() == ISD::SIGN_EXTEND || 8295 N->getOpcode() == ISD::ZERO_EXTEND) && 8296 "Unexpected node type (not an extend)!"); 8297 8298 // fold (sext (load x)) to multiple smaller sextloads; same for zext. 8299 // For example, on a target with legal v4i32, but illegal v8i32, turn: 8300 // (v8i32 (sext (v8i16 (load x)))) 8301 // into: 8302 // (v8i32 (concat_vectors (v4i32 (sextload x)), 8303 // (v4i32 (sextload (x + 16))))) 8304 // Where uses of the original load, i.e.: 8305 // (v8i16 (load x)) 8306 // are replaced with: 8307 // (v8i16 (truncate 8308 // (v8i32 (concat_vectors (v4i32 (sextload x)), 8309 // (v4i32 (sextload (x + 16))))))) 8310 // 8311 // This combine is only applicable to illegal, but splittable, vectors. 8312 // All legal types, and illegal non-vector types, are handled elsewhere. 8313 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. 8314 // 8315 if (N0->getOpcode() != ISD::LOAD) 8316 return SDValue(); 8317 8318 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8319 8320 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || 8321 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || 8322 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) 8323 return SDValue(); 8324 8325 SmallVector<SDNode *, 4> SetCCs; 8326 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) 8327 return SDValue(); 8328 8329 ISD::LoadExtType ExtType = 8330 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 8331 8332 // Try to split the vector types to get down to legal types. 8333 EVT SplitSrcVT = SrcVT; 8334 EVT SplitDstVT = DstVT; 8335 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && 8336 SplitSrcVT.getVectorNumElements() > 1) { 8337 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; 8338 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; 8339 } 8340 8341 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) 8342 return SDValue(); 8343 8344 SDLoc DL(N); 8345 const unsigned NumSplits = 8346 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); 8347 const unsigned Stride = SplitSrcVT.getStoreSize(); 8348 SmallVector<SDValue, 4> Loads; 8349 SmallVector<SDValue, 4> Chains; 8350 8351 SDValue BasePtr = LN0->getBasePtr(); 8352 for (unsigned Idx = 0; Idx < NumSplits; Idx++) { 8353 const unsigned Offset = Idx * Stride; 8354 const unsigned Align = MinAlign(LN0->getAlignment(), Offset); 8355 8356 SDValue SplitLoad = DAG.getExtLoad( 8357 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, 8358 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, 8359 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 8360 8361 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 8362 DAG.getConstant(Stride, DL, BasePtr.getValueType())); 8363 8364 Loads.push_back(SplitLoad.getValue(0)); 8365 Chains.push_back(SplitLoad.getValue(1)); 8366 } 8367 8368 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 8369 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); 8370 8371 // Simplify TF. 8372 AddToWorklist(NewChain.getNode()); 8373 8374 CombineTo(N, NewValue); 8375 8376 // Replace uses of the original load (before extension) 8377 // with a truncate of the concatenated sextloaded vectors. 8378 SDValue Trunc = 8379 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); 8380 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); 8381 CombineTo(N0.getNode(), Trunc, NewChain); 8382 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8383 } 8384 8385 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 8386 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 8387 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { 8388 assert(N->getOpcode() == ISD::ZERO_EXTEND); 8389 EVT VT = N->getValueType(0); 8390 8391 // and/or/xor 8392 SDValue N0 = N->getOperand(0); 8393 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8394 N0.getOpcode() == ISD::XOR) || 8395 N0.getOperand(1).getOpcode() != ISD::Constant || 8396 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) 8397 return SDValue(); 8398 8399 // shl/shr 8400 SDValue N1 = N0->getOperand(0); 8401 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || 8402 N1.getOperand(1).getOpcode() != ISD::Constant || 8403 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) 8404 return SDValue(); 8405 8406 // load 8407 if (!isa<LoadSDNode>(N1.getOperand(0))) 8408 return SDValue(); 8409 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); 8410 EVT MemVT = Load->getMemoryVT(); 8411 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || 8412 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) 8413 return SDValue(); 8414 8415 8416 // If the shift op is SHL, the logic op must be AND, otherwise the result 8417 // will be wrong. 8418 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) 8419 return SDValue(); 8420 8421 if (!N0.hasOneUse() || !N1.hasOneUse()) 8422 return SDValue(); 8423 8424 SmallVector<SDNode*, 4> SetCCs; 8425 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), 8426 ISD::ZERO_EXTEND, SetCCs, TLI)) 8427 return SDValue(); 8428 8429 // Actually do the transformation. 8430 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, 8431 Load->getChain(), Load->getBasePtr(), 8432 Load->getMemoryVT(), Load->getMemOperand()); 8433 8434 SDLoc DL1(N1); 8435 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, 8436 N1.getOperand(1)); 8437 8438 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8439 Mask = Mask.zext(VT.getSizeInBits()); 8440 SDLoc DL0(N0); 8441 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, 8442 DAG.getConstant(Mask, DL0, VT)); 8443 8444 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 8445 CombineTo(N, And); 8446 if (SDValue(Load, 0).hasOneUse()) { 8447 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); 8448 } else { 8449 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), 8450 Load->getValueType(0), ExtLoad); 8451 CombineTo(Load, Trunc, ExtLoad.getValue(1)); 8452 } 8453 return SDValue(N,0); // Return N so it doesn't get rechecked! 8454 } 8455 8456 /// If we're narrowing or widening the result of a vector select and the final 8457 /// size is the same size as a setcc (compare) feeding the select, then try to 8458 /// apply the cast operation to the select's operands because matching vector 8459 /// sizes for a select condition and other operands should be more efficient. 8460 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { 8461 unsigned CastOpcode = Cast->getOpcode(); 8462 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || 8463 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || 8464 CastOpcode == ISD::FP_ROUND) && 8465 "Unexpected opcode for vector select narrowing/widening"); 8466 8467 // We only do this transform before legal ops because the pattern may be 8468 // obfuscated by target-specific operations after legalization. Do not create 8469 // an illegal select op, however, because that may be difficult to lower. 8470 EVT VT = Cast->getValueType(0); 8471 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) 8472 return SDValue(); 8473 8474 SDValue VSel = Cast->getOperand(0); 8475 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || 8476 VSel.getOperand(0).getOpcode() != ISD::SETCC) 8477 return SDValue(); 8478 8479 // Does the setcc have the same vector size as the casted select? 8480 SDValue SetCC = VSel.getOperand(0); 8481 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); 8482 if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) 8483 return SDValue(); 8484 8485 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) 8486 SDValue A = VSel.getOperand(1); 8487 SDValue B = VSel.getOperand(2); 8488 SDValue CastA, CastB; 8489 SDLoc DL(Cast); 8490 if (CastOpcode == ISD::FP_ROUND) { 8491 // FP_ROUND (fptrunc) has an extra flag operand to pass along. 8492 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); 8493 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); 8494 } else { 8495 CastA = DAG.getNode(CastOpcode, DL, VT, A); 8496 CastB = DAG.getNode(CastOpcode, DL, VT, B); 8497 } 8498 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); 8499 } 8500 8501 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8502 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8503 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, 8504 const TargetLowering &TLI, EVT VT, 8505 bool LegalOperations, SDNode *N, 8506 SDValue N0, ISD::LoadExtType ExtLoadType) { 8507 SDNode *N0Node = N0.getNode(); 8508 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) 8509 : ISD::isZEXTLoad(N0Node); 8510 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || 8511 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) 8512 return {}; 8513 8514 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8515 EVT MemVT = LN0->getMemoryVT(); 8516 if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && 8517 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) 8518 return {}; 8519 8520 SDValue ExtLoad = 8521 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 8522 LN0->getBasePtr(), MemVT, LN0->getMemOperand()); 8523 Combiner.CombineTo(N, ExtLoad); 8524 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8525 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8526 } 8527 8528 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8529 // Only generate vector extloads when 1) they're legal, and 2) they are 8530 // deemed desirable by the target. 8531 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, 8532 const TargetLowering &TLI, EVT VT, 8533 bool LegalOperations, SDNode *N, SDValue N0, 8534 ISD::LoadExtType ExtLoadType, 8535 ISD::NodeType ExtOpc) { 8536 if (!ISD::isNON_EXTLoad(N0.getNode()) || 8537 !ISD::isUNINDEXEDLoad(N0.getNode()) || 8538 ((LegalOperations || VT.isVector() || 8539 cast<LoadSDNode>(N0)->isVolatile()) && 8540 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) 8541 return {}; 8542 8543 bool DoXform = true; 8544 SmallVector<SDNode *, 4> SetCCs; 8545 if (!N0.hasOneUse()) 8546 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); 8547 if (VT.isVector()) 8548 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 8549 if (!DoXform) 8550 return {}; 8551 8552 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8553 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 8554 LN0->getBasePtr(), N0.getValueType(), 8555 LN0->getMemOperand()); 8556 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); 8557 // If the load value is used only by N, replace it via CombineTo N. 8558 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); 8559 Combiner.CombineTo(N, ExtLoad); 8560 if (NoReplaceTrunc) { 8561 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8562 } else { 8563 SDValue Trunc = 8564 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); 8565 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 8566 } 8567 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8568 } 8569 8570 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, 8571 bool LegalOperations) { 8572 assert((N->getOpcode() == ISD::SIGN_EXTEND || 8573 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); 8574 8575 SDValue SetCC = N->getOperand(0); 8576 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || 8577 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) 8578 return SDValue(); 8579 8580 SDValue X = SetCC.getOperand(0); 8581 SDValue Ones = SetCC.getOperand(1); 8582 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); 8583 EVT VT = N->getValueType(0); 8584 EVT XVT = X.getValueType(); 8585 // setge X, C is canonicalized to setgt, so we do not need to match that 8586 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does 8587 // not require the 'not' op. 8588 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { 8589 // Invert and smear/shift the sign bit: 8590 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) 8591 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) 8592 SDLoc DL(N); 8593 SDValue NotX = DAG.getNOT(DL, X, VT); 8594 SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 8595 auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; 8596 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); 8597 } 8598 return SDValue(); 8599 } 8600 8601 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 8602 SDValue N0 = N->getOperand(0); 8603 EVT VT = N->getValueType(0); 8604 SDLoc DL(N); 8605 8606 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 8607 return Res; 8608 8609 // fold (sext (sext x)) -> (sext x) 8610 // fold (sext (aext x)) -> (sext x) 8611 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8612 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); 8613 8614 if (N0.getOpcode() == ISD::TRUNCATE) { 8615 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 8616 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 8617 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8618 SDNode *oye = N0.getOperand(0).getNode(); 8619 if (NarrowLoad.getNode() != N0.getNode()) { 8620 CombineTo(N0.getNode(), NarrowLoad); 8621 // CombineTo deleted the truncate, if needed, but not what's under it. 8622 AddToWorklist(oye); 8623 } 8624 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8625 } 8626 8627 // See if the value being truncated is already sign extended. If so, just 8628 // eliminate the trunc/sext pair. 8629 SDValue Op = N0.getOperand(0); 8630 unsigned OpBits = Op.getScalarValueSizeInBits(); 8631 unsigned MidBits = N0.getScalarValueSizeInBits(); 8632 unsigned DestBits = VT.getScalarSizeInBits(); 8633 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 8634 8635 if (OpBits == DestBits) { 8636 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 8637 // bits, it is already ready. 8638 if (NumSignBits > DestBits-MidBits) 8639 return Op; 8640 } else if (OpBits < DestBits) { 8641 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 8642 // bits, just sext from i32. 8643 if (NumSignBits > OpBits-MidBits) 8644 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); 8645 } else { 8646 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 8647 // bits, just truncate to i32. 8648 if (NumSignBits > OpBits-MidBits) 8649 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 8650 } 8651 8652 // fold (sext (truncate x)) -> (sextinreg x). 8653 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 8654 N0.getValueType())) { 8655 if (OpBits < DestBits) 8656 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 8657 else if (OpBits > DestBits) 8658 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 8659 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, 8660 DAG.getValueType(N0.getValueType())); 8661 } 8662 } 8663 8664 // Try to simplify (sext (load x)). 8665 if (SDValue foldedExt = 8666 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8667 ISD::SEXTLOAD, ISD::SIGN_EXTEND)) 8668 return foldedExt; 8669 8670 // fold (sext (load x)) to multiple smaller sextloads. 8671 // Only on illegal but splittable vectors. 8672 if (SDValue ExtLoad = CombineExtLoad(N)) 8673 return ExtLoad; 8674 8675 // Try to simplify (sext (sextload x)). 8676 if (SDValue foldedExt = tryToFoldExtOfExtload( 8677 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) 8678 return foldedExt; 8679 8680 // fold (sext (and/or/xor (load x), cst)) -> 8681 // (and/or/xor (sextload x), (sext cst)) 8682 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8683 N0.getOpcode() == ISD::XOR) && 8684 isa<LoadSDNode>(N0.getOperand(0)) && 8685 N0.getOperand(1).getOpcode() == ISD::Constant && 8686 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8687 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8688 EVT MemVT = LN00->getMemoryVT(); 8689 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && 8690 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { 8691 SmallVector<SDNode*, 4> SetCCs; 8692 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8693 ISD::SIGN_EXTEND, SetCCs, TLI); 8694 if (DoXform) { 8695 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, 8696 LN00->getChain(), LN00->getBasePtr(), 8697 LN00->getMemoryVT(), 8698 LN00->getMemOperand()); 8699 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8700 Mask = Mask.sext(VT.getSizeInBits()); 8701 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8702 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8703 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); 8704 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8705 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8706 CombineTo(N, And); 8707 // If N0 has multiple uses, change other uses as well. 8708 if (NoReplaceTruncAnd) { 8709 SDValue TruncAnd = 8710 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8711 CombineTo(N0.getNode(), TruncAnd); 8712 } 8713 if (NoReplaceTrunc) { 8714 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8715 } else { 8716 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8717 LN00->getValueType(0), ExtLoad); 8718 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8719 } 8720 return SDValue(N,0); // Return N so it doesn't get rechecked! 8721 } 8722 } 8723 } 8724 8725 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) 8726 return V; 8727 8728 if (N0.getOpcode() == ISD::SETCC) { 8729 SDValue N00 = N0.getOperand(0); 8730 SDValue N01 = N0.getOperand(1); 8731 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 8732 EVT N00VT = N0.getOperand(0).getValueType(); 8733 8734 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 8735 // Only do this before legalize for now. 8736 if (VT.isVector() && !LegalOperations && 8737 TLI.getBooleanContents(N00VT) == 8738 TargetLowering::ZeroOrNegativeOneBooleanContent) { 8739 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 8740 // of the same size as the compared operands. Only optimize sext(setcc()) 8741 // if this is the case. 8742 EVT SVT = getSetCCResultType(N00VT); 8743 8744 // If we already have the desired type, don't change it. 8745 if (SVT != N0.getValueType()) { 8746 // We know that the # elements of the results is the same as the 8747 // # elements of the compare (and the # elements of the compare result 8748 // for that matter). Check to see that they are the same size. If so, 8749 // we know that the element size of the sext'd result matches the 8750 // element size of the compare operands. 8751 if (VT.getSizeInBits() == SVT.getSizeInBits()) 8752 return DAG.getSetCC(DL, VT, N00, N01, CC); 8753 8754 // If the desired elements are smaller or larger than the source 8755 // elements, we can use a matching integer vector type and then 8756 // truncate/sign extend. 8757 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); 8758 if (SVT == MatchingVecType) { 8759 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); 8760 return DAG.getSExtOrTrunc(VsetCC, DL, VT); 8761 } 8762 } 8763 } 8764 8765 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) 8766 // Here, T can be 1 or -1, depending on the type of the setcc and 8767 // getBooleanContents(). 8768 unsigned SetCCWidth = N0.getScalarValueSizeInBits(); 8769 8770 // To determine the "true" side of the select, we need to know the high bit 8771 // of the value returned by the setcc if it evaluates to true. 8772 // If the type of the setcc is i1, then the true case of the select is just 8773 // sext(i1 1), that is, -1. 8774 // If the type of the setcc is larger (say, i8) then the value of the high 8775 // bit depends on getBooleanContents(), so ask TLI for a real "true" value 8776 // of the appropriate width. 8777 SDValue ExtTrueVal = (SetCCWidth == 1) 8778 ? DAG.getAllOnesConstant(DL, VT) 8779 : DAG.getBoolConstant(true, DL, VT, N00VT); 8780 SDValue Zero = DAG.getConstant(0, DL, VT); 8781 if (SDValue SCC = 8782 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) 8783 return SCC; 8784 8785 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { 8786 EVT SetCCVT = getSetCCResultType(N00VT); 8787 // Don't do this transform for i1 because there's a select transform 8788 // that would reverse it. 8789 // TODO: We should not do this transform at all without a target hook 8790 // because a sext is likely cheaper than a select? 8791 if (SetCCVT.getScalarSizeInBits() != 1 && 8792 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { 8793 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); 8794 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); 8795 } 8796 } 8797 } 8798 8799 // fold (sext x) -> (zext x) if the sign bit is known zero. 8800 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 8801 DAG.SignBitIsZero(N0)) 8802 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); 8803 8804 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 8805 return NewVSel; 8806 8807 return SDValue(); 8808 } 8809 8810 // isTruncateOf - If N is a truncate of some other value, return true, record 8811 // the value being truncated in Op and which of Op's bits are zero/one in Known. 8812 // This function computes KnownBits to avoid a duplicated call to 8813 // computeKnownBits in the caller. 8814 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 8815 KnownBits &Known) { 8816 if (N->getOpcode() == ISD::TRUNCATE) { 8817 Op = N->getOperand(0); 8818 Known = DAG.computeKnownBits(Op); 8819 return true; 8820 } 8821 8822 if (N.getOpcode() != ISD::SETCC || 8823 N.getValueType().getScalarType() != MVT::i1 || 8824 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE) 8825 return false; 8826 8827 SDValue Op0 = N->getOperand(0); 8828 SDValue Op1 = N->getOperand(1); 8829 assert(Op0.getValueType() == Op1.getValueType()); 8830 8831 if (isNullOrNullSplat(Op0)) 8832 Op = Op1; 8833 else if (isNullOrNullSplat(Op1)) 8834 Op = Op0; 8835 else 8836 return false; 8837 8838 Known = DAG.computeKnownBits(Op); 8839 8840 return (Known.Zero | 1).isAllOnesValue(); 8841 } 8842 8843 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 8844 SDValue N0 = N->getOperand(0); 8845 EVT VT = N->getValueType(0); 8846 8847 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 8848 return Res; 8849 8850 // fold (zext (zext x)) -> (zext x) 8851 // fold (zext (aext x)) -> (zext x) 8852 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8853 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 8854 N0.getOperand(0)); 8855 8856 // fold (zext (truncate x)) -> (zext x) or 8857 // (zext (truncate x)) -> (truncate x) 8858 // This is valid when the truncated bits of x are already zero. 8859 SDValue Op; 8860 KnownBits Known; 8861 if (isTruncateOf(DAG, N0, Op, Known)) { 8862 APInt TruncatedBits = 8863 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ? 8864 APInt(Op.getScalarValueSizeInBits(), 0) : 8865 APInt::getBitsSet(Op.getScalarValueSizeInBits(), 8866 N0.getScalarValueSizeInBits(), 8867 std::min(Op.getScalarValueSizeInBits(), 8868 VT.getScalarSizeInBits())); 8869 if (TruncatedBits.isSubsetOf(Known.Zero)) 8870 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8871 } 8872 8873 // fold (zext (truncate x)) -> (and x, mask) 8874 if (N0.getOpcode() == ISD::TRUNCATE) { 8875 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 8876 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 8877 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8878 SDNode *oye = N0.getOperand(0).getNode(); 8879 if (NarrowLoad.getNode() != N0.getNode()) { 8880 CombineTo(N0.getNode(), NarrowLoad); 8881 // CombineTo deleted the truncate, if needed, but not what's under it. 8882 AddToWorklist(oye); 8883 } 8884 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8885 } 8886 8887 EVT SrcVT = N0.getOperand(0).getValueType(); 8888 EVT MinVT = N0.getValueType(); 8889 8890 // Try to mask before the extension to avoid having to generate a larger mask, 8891 // possibly over several sub-vectors. 8892 if (SrcVT.bitsLT(VT) && VT.isVector()) { 8893 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && 8894 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { 8895 SDValue Op = N0.getOperand(0); 8896 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8897 AddToWorklist(Op.getNode()); 8898 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8899 // Transfer the debug info; the new node is equivalent to N0. 8900 DAG.transferDbgValues(N0, ZExtOrTrunc); 8901 return ZExtOrTrunc; 8902 } 8903 } 8904 8905 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { 8906 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 8907 AddToWorklist(Op.getNode()); 8908 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8909 // We may safely transfer the debug info describing the truncate node over 8910 // to the equivalent and operation. 8911 DAG.transferDbgValues(N0, And); 8912 return And; 8913 } 8914 } 8915 8916 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 8917 // if either of the casts is not free. 8918 if (N0.getOpcode() == ISD::AND && 8919 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 8920 N0.getOperand(1).getOpcode() == ISD::Constant && 8921 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 8922 N0.getValueType()) || 8923 !TLI.isZExtFree(N0.getValueType(), VT))) { 8924 SDValue X = N0.getOperand(0).getOperand(0); 8925 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); 8926 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8927 Mask = Mask.zext(VT.getSizeInBits()); 8928 SDLoc DL(N); 8929 return DAG.getNode(ISD::AND, DL, VT, 8930 X, DAG.getConstant(Mask, DL, VT)); 8931 } 8932 8933 // Try to simplify (zext (load x)). 8934 if (SDValue foldedExt = 8935 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8936 ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) 8937 return foldedExt; 8938 8939 // fold (zext (load x)) to multiple smaller zextloads. 8940 // Only on illegal but splittable vectors. 8941 if (SDValue ExtLoad = CombineExtLoad(N)) 8942 return ExtLoad; 8943 8944 // fold (zext (and/or/xor (load x), cst)) -> 8945 // (and/or/xor (zextload x), (zext cst)) 8946 // Unless (and (load x) cst) will match as a zextload already and has 8947 // additional users. 8948 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8949 N0.getOpcode() == ISD::XOR) && 8950 isa<LoadSDNode>(N0.getOperand(0)) && 8951 N0.getOperand(1).getOpcode() == ISD::Constant && 8952 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8953 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8954 EVT MemVT = LN00->getMemoryVT(); 8955 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && 8956 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { 8957 bool DoXform = true; 8958 SmallVector<SDNode*, 4> SetCCs; 8959 if (!N0.hasOneUse()) { 8960 if (N0.getOpcode() == ISD::AND) { 8961 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); 8962 EVT LoadResultTy = AndC->getValueType(0); 8963 EVT ExtVT; 8964 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) 8965 DoXform = false; 8966 } 8967 } 8968 if (DoXform) 8969 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8970 ISD::ZERO_EXTEND, SetCCs, TLI); 8971 if (DoXform) { 8972 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, 8973 LN00->getChain(), LN00->getBasePtr(), 8974 LN00->getMemoryVT(), 8975 LN00->getMemOperand()); 8976 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8977 Mask = Mask.zext(VT.getSizeInBits()); 8978 SDLoc DL(N); 8979 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8980 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8981 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 8982 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8983 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8984 CombineTo(N, And); 8985 // If N0 has multiple uses, change other uses as well. 8986 if (NoReplaceTruncAnd) { 8987 SDValue TruncAnd = 8988 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8989 CombineTo(N0.getNode(), TruncAnd); 8990 } 8991 if (NoReplaceTrunc) { 8992 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8993 } else { 8994 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8995 LN00->getValueType(0), ExtLoad); 8996 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8997 } 8998 return SDValue(N,0); // Return N so it doesn't get rechecked! 8999 } 9000 } 9001 } 9002 9003 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 9004 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 9005 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) 9006 return ZExtLoad; 9007 9008 // Try to simplify (zext (zextload x)). 9009 if (SDValue foldedExt = tryToFoldExtOfExtload( 9010 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) 9011 return foldedExt; 9012 9013 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) 9014 return V; 9015 9016 if (N0.getOpcode() == ISD::SETCC) { 9017 // Only do this before legalize for now. 9018 if (!LegalOperations && VT.isVector() && 9019 N0.getValueType().getVectorElementType() == MVT::i1) { 9020 EVT N00VT = N0.getOperand(0).getValueType(); 9021 if (getSetCCResultType(N00VT) == N0.getValueType()) 9022 return SDValue(); 9023 9024 // We know that the # elements of the results is the same as the # 9025 // elements of the compare (and the # elements of the compare result for 9026 // that matter). Check to see that they are the same size. If so, we know 9027 // that the element size of the sext'd result matches the element size of 9028 // the compare operands. 9029 SDLoc DL(N); 9030 SDValue VecOnes = DAG.getConstant(1, DL, VT); 9031 if (VT.getSizeInBits() == N00VT.getSizeInBits()) { 9032 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 9033 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), 9034 N0.getOperand(1), N0.getOperand(2)); 9035 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); 9036 } 9037 9038 // If the desired elements are smaller or larger than the source 9039 // elements we can use a matching integer vector type and then 9040 // truncate/sign extend. 9041 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 9042 SDValue VsetCC = 9043 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), 9044 N0.getOperand(1), N0.getOperand(2)); 9045 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), 9046 VecOnes); 9047 } 9048 9049 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 9050 SDLoc DL(N); 9051 if (SDValue SCC = SimplifySelectCC( 9052 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 9053 DAG.getConstant(0, DL, VT), 9054 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 9055 return SCC; 9056 } 9057 9058 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 9059 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 9060 isa<ConstantSDNode>(N0.getOperand(1)) && 9061 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 9062 N0.hasOneUse()) { 9063 SDValue ShAmt = N0.getOperand(1); 9064 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 9065 if (N0.getOpcode() == ISD::SHL) { 9066 SDValue InnerZExt = N0.getOperand(0); 9067 // If the original shl may be shifting out bits, do not perform this 9068 // transformation. 9069 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - 9070 InnerZExt.getOperand(0).getValueSizeInBits(); 9071 if (ShAmtVal > KnownZeroBits) 9072 return SDValue(); 9073 } 9074 9075 SDLoc DL(N); 9076 9077 // Ensure that the shift amount is wide enough for the shifted value. 9078 if (VT.getSizeInBits() >= 256) 9079 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 9080 9081 return DAG.getNode(N0.getOpcode(), DL, VT, 9082 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 9083 ShAmt); 9084 } 9085 9086 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 9087 return NewVSel; 9088 9089 return SDValue(); 9090 } 9091 9092 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 9093 SDValue N0 = N->getOperand(0); 9094 EVT VT = N->getValueType(0); 9095 9096 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 9097 return Res; 9098 9099 // fold (aext (aext x)) -> (aext x) 9100 // fold (aext (zext x)) -> (zext x) 9101 // fold (aext (sext x)) -> (sext x) 9102 if (N0.getOpcode() == ISD::ANY_EXTEND || 9103 N0.getOpcode() == ISD::ZERO_EXTEND || 9104 N0.getOpcode() == ISD::SIGN_EXTEND) 9105 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 9106 9107 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 9108 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 9109 if (N0.getOpcode() == ISD::TRUNCATE) { 9110 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 9111 SDNode *oye = N0.getOperand(0).getNode(); 9112 if (NarrowLoad.getNode() != N0.getNode()) { 9113 CombineTo(N0.getNode(), NarrowLoad); 9114 // CombineTo deleted the truncate, if needed, but not what's under it. 9115 AddToWorklist(oye); 9116 } 9117 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9118 } 9119 } 9120 9121 // fold (aext (truncate x)) 9122 if (N0.getOpcode() == ISD::TRUNCATE) 9123 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 9124 9125 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 9126 // if the trunc is not free. 9127 if (N0.getOpcode() == ISD::AND && 9128 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 9129 N0.getOperand(1).getOpcode() == ISD::Constant && 9130 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 9131 N0.getValueType())) { 9132 SDLoc DL(N); 9133 SDValue X = N0.getOperand(0).getOperand(0); 9134 X = DAG.getAnyExtOrTrunc(X, DL, VT); 9135 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 9136 Mask = Mask.zext(VT.getSizeInBits()); 9137 return DAG.getNode(ISD::AND, DL, VT, 9138 X, DAG.getConstant(Mask, DL, VT)); 9139 } 9140 9141 // fold (aext (load x)) -> (aext (truncate (extload x))) 9142 // None of the supported targets knows how to perform load and any_ext 9143 // on vectors in one instruction. We only perform this transformation on 9144 // scalars. 9145 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 9146 ISD::isUNINDEXEDLoad(N0.getNode()) && 9147 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 9148 bool DoXform = true; 9149 SmallVector<SDNode*, 4> SetCCs; 9150 if (!N0.hasOneUse()) 9151 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, 9152 TLI); 9153 if (DoXform) { 9154 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9155 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 9156 LN0->getChain(), 9157 LN0->getBasePtr(), N0.getValueType(), 9158 LN0->getMemOperand()); 9159 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); 9160 // If the load value is used only by N, replace it via CombineTo N. 9161 bool NoReplaceTrunc = N0.hasOneUse(); 9162 CombineTo(N, ExtLoad); 9163 if (NoReplaceTrunc) { 9164 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 9165 } else { 9166 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 9167 N0.getValueType(), ExtLoad); 9168 CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 9169 } 9170 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9171 } 9172 } 9173 9174 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 9175 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 9176 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 9177 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && 9178 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 9179 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9180 ISD::LoadExtType ExtType = LN0->getExtensionType(); 9181 EVT MemVT = LN0->getMemoryVT(); 9182 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 9183 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 9184 VT, LN0->getChain(), LN0->getBasePtr(), 9185 MemVT, LN0->getMemOperand()); 9186 CombineTo(N, ExtLoad); 9187 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 9188 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9189 } 9190 } 9191 9192 if (N0.getOpcode() == ISD::SETCC) { 9193 // For vectors: 9194 // aext(setcc) -> vsetcc 9195 // aext(setcc) -> truncate(vsetcc) 9196 // aext(setcc) -> aext(vsetcc) 9197 // Only do this before legalize for now. 9198 if (VT.isVector() && !LegalOperations) { 9199 EVT N00VT = N0.getOperand(0).getValueType(); 9200 if (getSetCCResultType(N00VT) == N0.getValueType()) 9201 return SDValue(); 9202 9203 // We know that the # elements of the results is the same as the 9204 // # elements of the compare (and the # elements of the compare result 9205 // for that matter). Check to see that they are the same size. If so, 9206 // we know that the element size of the sext'd result matches the 9207 // element size of the compare operands. 9208 if (VT.getSizeInBits() == N00VT.getSizeInBits()) 9209 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 9210 N0.getOperand(1), 9211 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9212 9213 // If the desired elements are smaller or larger than the source 9214 // elements we can use a matching integer vector type and then 9215 // truncate/any extend 9216 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 9217 SDValue VsetCC = 9218 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 9219 N0.getOperand(1), 9220 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9221 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 9222 } 9223 9224 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 9225 SDLoc DL(N); 9226 if (SDValue SCC = SimplifySelectCC( 9227 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 9228 DAG.getConstant(0, DL, VT), 9229 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 9230 return SCC; 9231 } 9232 9233 return SDValue(); 9234 } 9235 9236 SDValue DAGCombiner::visitAssertExt(SDNode *N) { 9237 unsigned Opcode = N->getOpcode(); 9238 SDValue N0 = N->getOperand(0); 9239 SDValue N1 = N->getOperand(1); 9240 EVT AssertVT = cast<VTSDNode>(N1)->getVT(); 9241 9242 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) 9243 if (N0.getOpcode() == Opcode && 9244 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) 9245 return N0; 9246 9247 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 9248 N0.getOperand(0).getOpcode() == Opcode) { 9249 // We have an assert, truncate, assert sandwich. Make one stronger assert 9250 // by asserting on the smallest asserted type to the larger source type. 9251 // This eliminates the later assert: 9252 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN 9253 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN 9254 SDValue BigA = N0.getOperand(0); 9255 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); 9256 assert(BigA_AssertVT.bitsLE(N0.getValueType()) && 9257 "Asserting zero/sign-extended bits to a type larger than the " 9258 "truncated destination does not provide information"); 9259 9260 SDLoc DL(N); 9261 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; 9262 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); 9263 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), 9264 BigA.getOperand(0), MinAssertVTVal); 9265 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); 9266 } 9267 9268 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller 9269 // than X. Just move the AssertZext in front of the truncate and drop the 9270 // AssertSExt. 9271 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 9272 N0.getOperand(0).getOpcode() == ISD::AssertSext && 9273 Opcode == ISD::AssertZext) { 9274 SDValue BigA = N0.getOperand(0); 9275 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); 9276 assert(BigA_AssertVT.bitsLE(N0.getValueType()) && 9277 "Asserting zero/sign-extended bits to a type larger than the " 9278 "truncated destination does not provide information"); 9279 9280 if (AssertVT.bitsLT(BigA_AssertVT)) { 9281 SDLoc DL(N); 9282 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), 9283 BigA.getOperand(0), N1); 9284 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); 9285 } 9286 } 9287 9288 return SDValue(); 9289 } 9290 9291 /// If the result of a wider load is shifted to right of N bits and then 9292 /// truncated to a narrower type and where N is a multiple of number of bits of 9293 /// the narrower type, transform it to a narrower load from address + N / num of 9294 /// bits of new type. Also narrow the load if the result is masked with an AND 9295 /// to effectively produce a smaller type. If the result is to be extended, also 9296 /// fold the extension to form a extending load. 9297 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 9298 unsigned Opc = N->getOpcode(); 9299 9300 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 9301 SDValue N0 = N->getOperand(0); 9302 EVT VT = N->getValueType(0); 9303 EVT ExtVT = VT; 9304 9305 // This transformation isn't valid for vector loads. 9306 if (VT.isVector()) 9307 return SDValue(); 9308 9309 unsigned ShAmt = 0; 9310 bool HasShiftedOffset = false; 9311 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 9312 // extended to VT. 9313 if (Opc == ISD::SIGN_EXTEND_INREG) { 9314 ExtType = ISD::SEXTLOAD; 9315 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 9316 } else if (Opc == ISD::SRL) { 9317 // Another special-case: SRL is basically zero-extending a narrower value, 9318 // or it maybe shifting a higher subword, half or byte into the lowest 9319 // bits. 9320 ExtType = ISD::ZEXTLOAD; 9321 N0 = SDValue(N, 0); 9322 9323 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); 9324 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 9325 if (!N01 || !LN0) 9326 return SDValue(); 9327 9328 uint64_t ShiftAmt = N01->getZExtValue(); 9329 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); 9330 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) 9331 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); 9332 else 9333 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 9334 VT.getSizeInBits() - ShiftAmt); 9335 } else if (Opc == ISD::AND) { 9336 // An AND with a constant mask is the same as a truncate + zero-extend. 9337 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9338 if (!AndC) 9339 return SDValue(); 9340 9341 const APInt &Mask = AndC->getAPIntValue(); 9342 unsigned ActiveBits = 0; 9343 if (Mask.isMask()) { 9344 ActiveBits = Mask.countTrailingOnes(); 9345 } else if (Mask.isShiftedMask()) { 9346 ShAmt = Mask.countTrailingZeros(); 9347 APInt ShiftedMask = Mask.lshr(ShAmt); 9348 ActiveBits = ShiftedMask.countTrailingOnes(); 9349 HasShiftedOffset = true; 9350 } else 9351 return SDValue(); 9352 9353 ExtType = ISD::ZEXTLOAD; 9354 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 9355 } 9356 9357 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 9358 SDValue SRL = N0; 9359 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { 9360 ShAmt = ConstShift->getZExtValue(); 9361 unsigned EVTBits = ExtVT.getSizeInBits(); 9362 // Is the shift amount a multiple of size of VT? 9363 if ((ShAmt & (EVTBits-1)) == 0) { 9364 N0 = N0.getOperand(0); 9365 // Is the load width a multiple of size of VT? 9366 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) 9367 return SDValue(); 9368 } 9369 9370 // At this point, we must have a load or else we can't do the transform. 9371 if (!isa<LoadSDNode>(N0)) return SDValue(); 9372 9373 auto *LN0 = cast<LoadSDNode>(N0); 9374 9375 // Because a SRL must be assumed to *need* to zero-extend the high bits 9376 // (as opposed to anyext the high bits), we can't combine the zextload 9377 // lowering of SRL and an sextload. 9378 if (LN0->getExtensionType() == ISD::SEXTLOAD) 9379 return SDValue(); 9380 9381 // If the shift amount is larger than the input type then we're not 9382 // accessing any of the loaded bytes. If the load was a zextload/extload 9383 // then the result of the shift+trunc is zero/undef (handled elsewhere). 9384 if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) 9385 return SDValue(); 9386 9387 // If the SRL is only used by a masking AND, we may be able to adjust 9388 // the ExtVT to make the AND redundant. 9389 SDNode *Mask = *(SRL->use_begin()); 9390 if (Mask->getOpcode() == ISD::AND && 9391 isa<ConstantSDNode>(Mask->getOperand(1))) { 9392 const APInt &ShiftMask = 9393 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); 9394 if (ShiftMask.isMask()) { 9395 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), 9396 ShiftMask.countTrailingOnes()); 9397 // If the mask is smaller, recompute the type. 9398 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && 9399 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) 9400 ExtVT = MaskedVT; 9401 } 9402 } 9403 } 9404 } 9405 9406 // If the load is shifted left (and the result isn't shifted back right), 9407 // we can fold the truncate through the shift. 9408 unsigned ShLeftAmt = 0; 9409 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 9410 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 9411 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 9412 ShLeftAmt = N01->getZExtValue(); 9413 N0 = N0.getOperand(0); 9414 } 9415 } 9416 9417 // If we haven't found a load, we can't narrow it. 9418 if (!isa<LoadSDNode>(N0)) 9419 return SDValue(); 9420 9421 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9422 if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) 9423 return SDValue(); 9424 9425 auto AdjustBigEndianShift = [&](unsigned ShAmt) { 9426 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 9427 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 9428 return LVTStoreBits - EVTStoreBits - ShAmt; 9429 }; 9430 9431 // For big endian targets, we need to adjust the offset to the pointer to 9432 // load the correct bytes. 9433 if (DAG.getDataLayout().isBigEndian()) 9434 ShAmt = AdjustBigEndianShift(ShAmt); 9435 9436 EVT PtrType = N0.getOperand(1).getValueType(); 9437 uint64_t PtrOff = ShAmt / 8; 9438 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 9439 SDLoc DL(LN0); 9440 // The original load itself didn't wrap, so an offset within it doesn't. 9441 SDNodeFlags Flags; 9442 Flags.setNoUnsignedWrap(true); 9443 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, 9444 PtrType, LN0->getBasePtr(), 9445 DAG.getConstant(PtrOff, DL, PtrType), 9446 Flags); 9447 AddToWorklist(NewPtr.getNode()); 9448 9449 SDValue Load; 9450 if (ExtType == ISD::NON_EXTLOAD) 9451 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 9452 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, 9453 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 9454 else 9455 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, 9456 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, 9457 NewAlign, LN0->getMemOperand()->getFlags(), 9458 LN0->getAAInfo()); 9459 9460 // Replace the old load's chain with the new load's chain. 9461 WorklistRemover DeadNodes(*this); 9462 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 9463 9464 // Shift the result left, if we've swallowed a left shift. 9465 SDValue Result = Load; 9466 if (ShLeftAmt != 0) { 9467 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 9468 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 9469 ShImmTy = VT; 9470 // If the shift amount is as large as the result size (but, presumably, 9471 // no larger than the source) then the useful bits of the result are 9472 // zero; we can't simply return the shortened shift, because the result 9473 // of that operation is undefined. 9474 SDLoc DL(N0); 9475 if (ShLeftAmt >= VT.getSizeInBits()) 9476 Result = DAG.getConstant(0, DL, VT); 9477 else 9478 Result = DAG.getNode(ISD::SHL, DL, VT, 9479 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); 9480 } 9481 9482 if (HasShiftedOffset) { 9483 // Recalculate the shift amount after it has been altered to calculate 9484 // the offset. 9485 if (DAG.getDataLayout().isBigEndian()) 9486 ShAmt = AdjustBigEndianShift(ShAmt); 9487 9488 // We're using a shifted mask, so the load now has an offset. This means 9489 // that data has been loaded into the lower bytes than it would have been 9490 // before, so we need to shl the loaded data into the correct position in the 9491 // register. 9492 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); 9493 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); 9494 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 9495 } 9496 9497 // Return the new loaded value. 9498 return Result; 9499 } 9500 9501 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 9502 SDValue N0 = N->getOperand(0); 9503 SDValue N1 = N->getOperand(1); 9504 EVT VT = N->getValueType(0); 9505 EVT EVT = cast<VTSDNode>(N1)->getVT(); 9506 unsigned VTBits = VT.getScalarSizeInBits(); 9507 unsigned EVTBits = EVT.getScalarSizeInBits(); 9508 9509 if (N0.isUndef()) 9510 return DAG.getUNDEF(VT); 9511 9512 // fold (sext_in_reg c1) -> c1 9513 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 9514 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 9515 9516 // If the input is already sign extended, just drop the extension. 9517 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 9518 return N0; 9519 9520 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 9521 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 9522 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 9523 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 9524 N0.getOperand(0), N1); 9525 9526 // fold (sext_in_reg (sext x)) -> (sext x) 9527 // fold (sext_in_reg (aext x)) -> (sext x) 9528 // if x is small enough or if we know that x has more than 1 sign bit and the 9529 // sign_extend_inreg is extending from one of them. 9530 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 9531 SDValue N00 = N0.getOperand(0); 9532 unsigned N00Bits = N00.getScalarValueSizeInBits(); 9533 if ((N00Bits <= EVTBits || 9534 (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) && 9535 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 9536 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); 9537 } 9538 9539 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) 9540 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || 9541 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || 9542 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && 9543 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { 9544 if (!LegalOperations || 9545 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) 9546 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, 9547 N0.getOperand(0)); 9548 } 9549 9550 // fold (sext_in_reg (zext x)) -> (sext x) 9551 // iff we are extending the source sign bit. 9552 if (N0.getOpcode() == ISD::ZERO_EXTEND) { 9553 SDValue N00 = N0.getOperand(0); 9554 if (N00.getScalarValueSizeInBits() == EVTBits && 9555 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 9556 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 9557 } 9558 9559 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 9560 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) 9561 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); 9562 9563 // fold operands of sext_in_reg based on knowledge that the top bits are not 9564 // demanded. 9565 if (SimplifyDemandedBits(SDValue(N, 0))) 9566 return SDValue(N, 0); 9567 9568 // fold (sext_in_reg (load x)) -> (smaller sextload x) 9569 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 9570 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 9571 return NarrowLoad; 9572 9573 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 9574 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 9575 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 9576 if (N0.getOpcode() == ISD::SRL) { 9577 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 9578 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 9579 // We can turn this into an SRA iff the input to the SRL is already sign 9580 // extended enough. 9581 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 9582 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 9583 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 9584 N0.getOperand(0), N0.getOperand(1)); 9585 } 9586 } 9587 9588 // fold (sext_inreg (extload x)) -> (sextload x) 9589 // If sextload is not supported by target, we can only do the combine when 9590 // load has one use. Doing otherwise can block folding the extload with other 9591 // extends that the target does support. 9592 if (ISD::isEXTLoad(N0.getNode()) && 9593 ISD::isUNINDEXEDLoad(N0.getNode()) && 9594 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 9595 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && 9596 N0.hasOneUse()) || 9597 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 9598 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9599 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 9600 LN0->getChain(), 9601 LN0->getBasePtr(), EVT, 9602 LN0->getMemOperand()); 9603 CombineTo(N, ExtLoad); 9604 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 9605 AddToWorklist(ExtLoad.getNode()); 9606 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9607 } 9608 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 9609 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 9610 N0.hasOneUse() && 9611 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 9612 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 9613 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 9614 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9615 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 9616 LN0->getChain(), 9617 LN0->getBasePtr(), EVT, 9618 LN0->getMemOperand()); 9619 CombineTo(N, ExtLoad); 9620 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 9621 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9622 } 9623 9624 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 9625 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 9626 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 9627 N0.getOperand(1), false)) 9628 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 9629 BSwap, N1); 9630 } 9631 9632 return SDValue(); 9633 } 9634 9635 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { 9636 SDValue N0 = N->getOperand(0); 9637 EVT VT = N->getValueType(0); 9638 9639 if (N0.isUndef()) 9640 return DAG.getUNDEF(VT); 9641 9642 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 9643 return Res; 9644 9645 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 9646 return SDValue(N, 0); 9647 9648 return SDValue(); 9649 } 9650 9651 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { 9652 SDValue N0 = N->getOperand(0); 9653 EVT VT = N->getValueType(0); 9654 9655 if (N0.isUndef()) 9656 return DAG.getUNDEF(VT); 9657 9658 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 9659 return Res; 9660 9661 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 9662 return SDValue(N, 0); 9663 9664 return SDValue(); 9665 } 9666 9667 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 9668 SDValue N0 = N->getOperand(0); 9669 EVT VT = N->getValueType(0); 9670 bool isLE = DAG.getDataLayout().isLittleEndian(); 9671 9672 // noop truncate 9673 if (N0.getValueType() == N->getValueType(0)) 9674 return N0; 9675 9676 // fold (truncate (truncate x)) -> (truncate x) 9677 if (N0.getOpcode() == ISD::TRUNCATE) 9678 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9679 9680 // fold (truncate c1) -> c1 9681 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 9682 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 9683 if (C.getNode() != N) 9684 return C; 9685 } 9686 9687 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 9688 if (N0.getOpcode() == ISD::ZERO_EXTEND || 9689 N0.getOpcode() == ISD::SIGN_EXTEND || 9690 N0.getOpcode() == ISD::ANY_EXTEND) { 9691 // if the source is smaller than the dest, we still need an extend. 9692 if (N0.getOperand(0).getValueType().bitsLT(VT)) 9693 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 9694 // if the source is larger than the dest, than we just need the truncate. 9695 if (N0.getOperand(0).getValueType().bitsGT(VT)) 9696 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9697 // if the source and dest are the same type, we can drop both the extend 9698 // and the truncate. 9699 return N0.getOperand(0); 9700 } 9701 9702 // If this is anyext(trunc), don't fold it, allow ourselves to be folded. 9703 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) 9704 return SDValue(); 9705 9706 // Fold extract-and-trunc into a narrow extract. For example: 9707 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 9708 // i32 y = TRUNCATE(i64 x) 9709 // -- becomes -- 9710 // v16i8 b = BITCAST (v2i64 val) 9711 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 9712 // 9713 // Note: We only run this optimization after type legalization (which often 9714 // creates this pattern) and before operation legalization after which 9715 // we need to be more careful about the vector instructions that we generate. 9716 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 9717 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 9718 EVT VecTy = N0.getOperand(0).getValueType(); 9719 EVT ExTy = N0.getValueType(); 9720 EVT TrTy = N->getValueType(0); 9721 9722 unsigned NumElem = VecTy.getVectorNumElements(); 9723 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 9724 9725 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 9726 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 9727 9728 SDValue EltNo = N0->getOperand(1); 9729 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 9730 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9731 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 9732 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 9733 9734 SDLoc DL(N); 9735 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, 9736 DAG.getBitcast(NVT, N0.getOperand(0)), 9737 DAG.getConstant(Index, DL, IndexTy)); 9738 } 9739 } 9740 9741 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 9742 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { 9743 EVT SrcVT = N0.getValueType(); 9744 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 9745 TLI.isTruncateFree(SrcVT, VT)) { 9746 SDLoc SL(N0); 9747 SDValue Cond = N0.getOperand(0); 9748 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9749 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 9750 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 9751 } 9752 } 9753 9754 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() 9755 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 9756 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && 9757 TLI.isTypeDesirableForOp(ISD::SHL, VT)) { 9758 SDValue Amt = N0.getOperand(1); 9759 KnownBits Known = DAG.computeKnownBits(Amt); 9760 unsigned Size = VT.getScalarSizeInBits(); 9761 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 9762 SDLoc SL(N); 9763 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); 9764 9765 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9766 if (AmtVT != Amt.getValueType()) { 9767 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); 9768 AddToWorklist(Amt.getNode()); 9769 } 9770 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); 9771 } 9772 } 9773 9774 // Fold a series of buildvector, bitcast, and truncate if possible. 9775 // For example fold 9776 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 9777 // (2xi32 (buildvector x, y)). 9778 if (Level == AfterLegalizeVectorOps && VT.isVector() && 9779 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 9780 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 9781 N0.getOperand(0).hasOneUse()) { 9782 SDValue BuildVect = N0.getOperand(0); 9783 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 9784 EVT TruncVecEltTy = VT.getVectorElementType(); 9785 9786 // Check that the element types match. 9787 if (BuildVectEltTy == TruncVecEltTy) { 9788 // Now we only need to compute the offset of the truncated elements. 9789 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 9790 unsigned TruncVecNumElts = VT.getVectorNumElements(); 9791 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 9792 9793 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 9794 "Invalid number of elements"); 9795 9796 SmallVector<SDValue, 8> Opnds; 9797 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 9798 Opnds.push_back(BuildVect.getOperand(i)); 9799 9800 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 9801 } 9802 } 9803 9804 // See if we can simplify the input to this truncate through knowledge that 9805 // only the low bits are being used. 9806 // For example "trunc (or (shl x, 8), y)" // -> trunc y 9807 // Currently we only perform this optimization on scalars because vectors 9808 // may have different active low bits. 9809 if (!VT.isVector()) { 9810 APInt Mask = 9811 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); 9812 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) 9813 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 9814 } 9815 9816 // fold (truncate (load x)) -> (smaller load x) 9817 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 9818 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 9819 if (SDValue Reduced = ReduceLoadWidth(N)) 9820 return Reduced; 9821 9822 // Handle the case where the load remains an extending load even 9823 // after truncation. 9824 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 9825 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9826 if (!LN0->isVolatile() && 9827 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 9828 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 9829 VT, LN0->getChain(), LN0->getBasePtr(), 9830 LN0->getMemoryVT(), 9831 LN0->getMemOperand()); 9832 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 9833 return NewLoad; 9834 } 9835 } 9836 } 9837 9838 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 9839 // where ... are all 'undef'. 9840 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 9841 SmallVector<EVT, 8> VTs; 9842 SDValue V; 9843 unsigned Idx = 0; 9844 unsigned NumDefs = 0; 9845 9846 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 9847 SDValue X = N0.getOperand(i); 9848 if (!X.isUndef()) { 9849 V = X; 9850 Idx = i; 9851 NumDefs++; 9852 } 9853 // Stop if more than one members are non-undef. 9854 if (NumDefs > 1) 9855 break; 9856 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 9857 VT.getVectorElementType(), 9858 X.getValueType().getVectorNumElements())); 9859 } 9860 9861 if (NumDefs == 0) 9862 return DAG.getUNDEF(VT); 9863 9864 if (NumDefs == 1) { 9865 assert(V.getNode() && "The single defined operand is empty!"); 9866 SmallVector<SDValue, 8> Opnds; 9867 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 9868 if (i != Idx) { 9869 Opnds.push_back(DAG.getUNDEF(VTs[i])); 9870 continue; 9871 } 9872 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 9873 AddToWorklist(NV.getNode()); 9874 Opnds.push_back(NV); 9875 } 9876 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 9877 } 9878 } 9879 9880 // Fold truncate of a bitcast of a vector to an extract of the low vector 9881 // element. 9882 // 9883 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx 9884 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { 9885 SDValue VecSrc = N0.getOperand(0); 9886 EVT SrcVT = VecSrc.getValueType(); 9887 if (SrcVT.isVector() && SrcVT.getScalarType() == VT && 9888 (!LegalOperations || 9889 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { 9890 SDLoc SL(N); 9891 9892 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); 9893 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; 9894 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, 9895 VecSrc, DAG.getConstant(Idx, SL, IdxVT)); 9896 } 9897 } 9898 9899 // Simplify the operands using demanded-bits information. 9900 if (!VT.isVector() && 9901 SimplifyDemandedBits(SDValue(N, 0))) 9902 return SDValue(N, 0); 9903 9904 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) 9905 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) 9906 // When the adde's carry is not used. 9907 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && 9908 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && 9909 (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { 9910 SDLoc SL(N); 9911 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9912 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9913 auto VTs = DAG.getVTList(VT, N0->getValueType(1)); 9914 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); 9915 } 9916 9917 // fold (truncate (extract_subvector(ext x))) -> 9918 // (extract_subvector x) 9919 // TODO: This can be generalized to cover cases where the truncate and extract 9920 // do not fully cancel each other out. 9921 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { 9922 SDValue N00 = N0.getOperand(0); 9923 if (N00.getOpcode() == ISD::SIGN_EXTEND || 9924 N00.getOpcode() == ISD::ZERO_EXTEND || 9925 N00.getOpcode() == ISD::ANY_EXTEND) { 9926 if (N00.getOperand(0)->getValueType(0).getVectorElementType() == 9927 VT.getVectorElementType()) 9928 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, 9929 N00.getOperand(0), N0.getOperand(1)); 9930 } 9931 } 9932 9933 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 9934 return NewVSel; 9935 9936 // Narrow a suitable binary operation with a non-opaque constant operand by 9937 // moving it ahead of the truncate. This is limited to pre-legalization 9938 // because targets may prefer a wider type during later combines and invert 9939 // this transform. 9940 switch (N0.getOpcode()) { 9941 case ISD::ADD: 9942 case ISD::SUB: 9943 case ISD::MUL: 9944 case ISD::AND: 9945 case ISD::OR: 9946 case ISD::XOR: 9947 if (!LegalOperations && N0.hasOneUse() && 9948 (isConstantOrConstantVector(N0.getOperand(0), true) || 9949 isConstantOrConstantVector(N0.getOperand(1), true))) { 9950 // TODO: We already restricted this to pre-legalization, but for vectors 9951 // we are extra cautious to not create an unsupported operation. 9952 // Target-specific changes are likely needed to avoid regressions here. 9953 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) { 9954 SDLoc DL(N); 9955 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0)); 9956 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1)); 9957 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR); 9958 } 9959 } 9960 } 9961 9962 return SDValue(); 9963 } 9964 9965 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 9966 SDValue Elt = N->getOperand(i); 9967 if (Elt.getOpcode() != ISD::MERGE_VALUES) 9968 return Elt.getNode(); 9969 return Elt.getOperand(Elt.getResNo()).getNode(); 9970 } 9971 9972 /// build_pair (load, load) -> load 9973 /// if load locations are consecutive. 9974 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 9975 assert(N->getOpcode() == ISD::BUILD_PAIR); 9976 9977 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 9978 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 9979 9980 // A BUILD_PAIR is always having the least significant part in elt 0 and the 9981 // most significant part in elt 1. So when combining into one large load, we 9982 // need to consider the endianness. 9983 if (DAG.getDataLayout().isBigEndian()) 9984 std::swap(LD1, LD2); 9985 9986 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 9987 LD1->getAddressSpace() != LD2->getAddressSpace()) 9988 return SDValue(); 9989 EVT LD1VT = LD1->getValueType(0); 9990 unsigned LD1Bytes = LD1VT.getStoreSize(); 9991 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && 9992 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { 9993 unsigned Align = LD1->getAlignment(); 9994 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 9995 VT.getTypeForEVT(*DAG.getContext())); 9996 9997 if (NewAlign <= Align && 9998 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 9999 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), 10000 LD1->getPointerInfo(), Align); 10001 } 10002 10003 return SDValue(); 10004 } 10005 10006 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { 10007 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi 10008 // and Lo parts; on big-endian machines it doesn't. 10009 return DAG.getDataLayout().isBigEndian() ? 1 : 0; 10010 } 10011 10012 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, 10013 const TargetLowering &TLI) { 10014 // If this is not a bitcast to an FP type or if the target doesn't have 10015 // IEEE754-compliant FP logic, we're done. 10016 EVT VT = N->getValueType(0); 10017 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) 10018 return SDValue(); 10019 10020 // TODO: Handle cases where the integer constant is a different scalar 10021 // bitwidth to the FP. 10022 SDValue N0 = N->getOperand(0); 10023 EVT SourceVT = N0.getValueType(); 10024 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits()) 10025 return SDValue(); 10026 10027 unsigned FPOpcode; 10028 APInt SignMask; 10029 switch (N0.getOpcode()) { 10030 case ISD::AND: 10031 FPOpcode = ISD::FABS; 10032 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); 10033 break; 10034 case ISD::XOR: 10035 FPOpcode = ISD::FNEG; 10036 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); 10037 break; 10038 case ISD::OR: 10039 FPOpcode = ISD::FABS; 10040 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); 10041 break; 10042 default: 10043 return SDValue(); 10044 } 10045 10046 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X 10047 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X 10048 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> 10049 // fneg (fabs X) 10050 SDValue LogicOp0 = N0.getOperand(0); 10051 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true); 10052 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && 10053 LogicOp0.getOpcode() == ISD::BITCAST && 10054 LogicOp0.getOperand(0).getValueType() == VT) { 10055 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0)); 10056 NumFPLogicOpsConv++; 10057 if (N0.getOpcode() == ISD::OR) 10058 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); 10059 return FPOp; 10060 } 10061 10062 return SDValue(); 10063 } 10064 10065 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 10066 SDValue N0 = N->getOperand(0); 10067 EVT VT = N->getValueType(0); 10068 10069 if (N0.isUndef()) 10070 return DAG.getUNDEF(VT); 10071 10072 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 10073 // Only do this before legalize types, since we might create an illegal 10074 // scalar type. Even if we knew we wouldn't create an illegal scalar type 10075 // we can only do this before legalize ops, since the target maybe 10076 // depending on the bitcast. 10077 // First check to see if this is all constant. 10078 if (!LegalTypes && 10079 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 10080 VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant()) 10081 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), 10082 VT.getVectorElementType()); 10083 10084 // If the input is a constant, let getNode fold it. 10085 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 10086 // If we can't allow illegal operations, we need to check that this is just 10087 // a fp -> int or int -> conversion and that the resulting operation will 10088 // be legal. 10089 if (!LegalOperations || 10090 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 10091 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 10092 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 10093 TLI.isOperationLegal(ISD::Constant, VT))) { 10094 SDValue C = DAG.getBitcast(VT, N0); 10095 if (C.getNode() != N) 10096 return C; 10097 } 10098 } 10099 10100 // (conv (conv x, t1), t2) -> (conv x, t2) 10101 if (N0.getOpcode() == ISD::BITCAST) 10102 return DAG.getBitcast(VT, N0.getOperand(0)); 10103 10104 // fold (conv (load x)) -> (load (conv*)x) 10105 // If the resultant load doesn't need a higher alignment than the original! 10106 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 10107 // Do not remove the cast if the types differ in endian layout. 10108 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == 10109 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && 10110 // If the load is volatile, we only want to change the load type if the 10111 // resulting load is legal. Otherwise we might increase the number of 10112 // memory accesses. We don't care if the original type was legal or not 10113 // as we assume software couldn't rely on the number of accesses of an 10114 // illegal type. 10115 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 10116 TLI.isOperationLegal(ISD::LOAD, VT)) && 10117 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 10118 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 10119 unsigned OrigAlign = LN0->getAlignment(); 10120 10121 bool Fast = false; 10122 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, 10123 LN0->getAddressSpace(), OrigAlign, &Fast) && 10124 Fast) { 10125 SDValue Load = 10126 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), 10127 LN0->getPointerInfo(), OrigAlign, 10128 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 10129 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 10130 return Load; 10131 } 10132 } 10133 10134 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) 10135 return V; 10136 10137 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 10138 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 10139 // 10140 // For ppc_fp128: 10141 // fold (bitcast (fneg x)) -> 10142 // flipbit = signbit 10143 // (xor (bitcast x) (build_pair flipbit, flipbit)) 10144 // 10145 // fold (bitcast (fabs x)) -> 10146 // flipbit = (and (extract_element (bitcast x), 0), signbit) 10147 // (xor (bitcast x) (build_pair flipbit, flipbit)) 10148 // This often reduces constant pool loads. 10149 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 10150 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 10151 N0.getNode()->hasOneUse() && VT.isInteger() && 10152 !VT.isVector() && !N0.getValueType().isVector()) { 10153 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); 10154 AddToWorklist(NewConv.getNode()); 10155 10156 SDLoc DL(N); 10157 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 10158 assert(VT.getSizeInBits() == 128); 10159 SDValue SignBit = DAG.getConstant( 10160 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); 10161 SDValue FlipBit; 10162 if (N0.getOpcode() == ISD::FNEG) { 10163 FlipBit = SignBit; 10164 AddToWorklist(FlipBit.getNode()); 10165 } else { 10166 assert(N0.getOpcode() == ISD::FABS); 10167 SDValue Hi = 10168 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, 10169 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 10170 SDLoc(NewConv))); 10171 AddToWorklist(Hi.getNode()); 10172 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); 10173 AddToWorklist(FlipBit.getNode()); 10174 } 10175 SDValue FlipBits = 10176 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 10177 AddToWorklist(FlipBits.getNode()); 10178 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); 10179 } 10180 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 10181 if (N0.getOpcode() == ISD::FNEG) 10182 return DAG.getNode(ISD::XOR, DL, VT, 10183 NewConv, DAG.getConstant(SignBit, DL, VT)); 10184 assert(N0.getOpcode() == ISD::FABS); 10185 return DAG.getNode(ISD::AND, DL, VT, 10186 NewConv, DAG.getConstant(~SignBit, DL, VT)); 10187 } 10188 10189 // fold (bitconvert (fcopysign cst, x)) -> 10190 // (or (and (bitconvert x), sign), (and cst, (not sign))) 10191 // Note that we don't handle (copysign x, cst) because this can always be 10192 // folded to an fneg or fabs. 10193 // 10194 // For ppc_fp128: 10195 // fold (bitcast (fcopysign cst, x)) -> 10196 // flipbit = (and (extract_element 10197 // (xor (bitcast cst), (bitcast x)), 0), 10198 // signbit) 10199 // (xor (bitcast cst) (build_pair flipbit, flipbit)) 10200 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 10201 isa<ConstantFPSDNode>(N0.getOperand(0)) && 10202 VT.isInteger() && !VT.isVector()) { 10203 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); 10204 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 10205 if (isTypeLegal(IntXVT)) { 10206 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); 10207 AddToWorklist(X.getNode()); 10208 10209 // If X has a different width than the result/lhs, sext it or truncate it. 10210 unsigned VTWidth = VT.getSizeInBits(); 10211 if (OrigXWidth < VTWidth) { 10212 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 10213 AddToWorklist(X.getNode()); 10214 } else if (OrigXWidth > VTWidth) { 10215 // To get the sign bit in the right place, we have to shift it right 10216 // before truncating. 10217 SDLoc DL(X); 10218 X = DAG.getNode(ISD::SRL, DL, 10219 X.getValueType(), X, 10220 DAG.getConstant(OrigXWidth-VTWidth, DL, 10221 X.getValueType())); 10222 AddToWorklist(X.getNode()); 10223 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 10224 AddToWorklist(X.getNode()); 10225 } 10226 10227 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 10228 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); 10229 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 10230 AddToWorklist(Cst.getNode()); 10231 SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); 10232 AddToWorklist(X.getNode()); 10233 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); 10234 AddToWorklist(XorResult.getNode()); 10235 SDValue XorResult64 = DAG.getNode( 10236 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, 10237 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 10238 SDLoc(XorResult))); 10239 AddToWorklist(XorResult64.getNode()); 10240 SDValue FlipBit = 10241 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, 10242 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); 10243 AddToWorklist(FlipBit.getNode()); 10244 SDValue FlipBits = 10245 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 10246 AddToWorklist(FlipBits.getNode()); 10247 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); 10248 } 10249 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 10250 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 10251 X, DAG.getConstant(SignBit, SDLoc(X), VT)); 10252 AddToWorklist(X.getNode()); 10253 10254 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 10255 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 10256 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); 10257 AddToWorklist(Cst.getNode()); 10258 10259 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 10260 } 10261 } 10262 10263 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 10264 if (N0.getOpcode() == ISD::BUILD_PAIR) 10265 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) 10266 return CombineLD; 10267 10268 // Remove double bitcasts from shuffles - this is often a legacy of 10269 // XformToShuffleWithZero being used to combine bitmaskings (of 10270 // float vectors bitcast to integer vectors) into shuffles. 10271 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) 10272 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && 10273 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() && 10274 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && 10275 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { 10276 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); 10277 10278 // If operands are a bitcast, peek through if it casts the original VT. 10279 // If operands are a constant, just bitcast back to original VT. 10280 auto PeekThroughBitcast = [&](SDValue Op) { 10281 if (Op.getOpcode() == ISD::BITCAST && 10282 Op.getOperand(0).getValueType() == VT) 10283 return SDValue(Op.getOperand(0)); 10284 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 10285 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 10286 return DAG.getBitcast(VT, Op); 10287 return SDValue(); 10288 }; 10289 10290 // FIXME: If either input vector is bitcast, try to convert the shuffle to 10291 // the result type of this bitcast. This would eliminate at least one 10292 // bitcast. See the transform in InstCombine. 10293 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); 10294 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); 10295 if (!(SV0 && SV1)) 10296 return SDValue(); 10297 10298 int MaskScale = 10299 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); 10300 SmallVector<int, 8> NewMask; 10301 for (int M : SVN->getMask()) 10302 for (int i = 0; i != MaskScale; ++i) 10303 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); 10304 10305 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 10306 if (!LegalMask) { 10307 std::swap(SV0, SV1); 10308 ShuffleVectorSDNode::commuteMask(NewMask); 10309 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 10310 } 10311 10312 if (LegalMask) 10313 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); 10314 } 10315 10316 return SDValue(); 10317 } 10318 10319 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 10320 EVT VT = N->getValueType(0); 10321 return CombineConsecutiveLoads(N, VT); 10322 } 10323 10324 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef 10325 /// operands. DstEltVT indicates the destination element value type. 10326 SDValue DAGCombiner:: 10327 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 10328 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 10329 10330 // If this is already the right type, we're done. 10331 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 10332 10333 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 10334 unsigned DstBitSize = DstEltVT.getSizeInBits(); 10335 10336 // If this is a conversion of N elements of one type to N elements of another 10337 // type, convert each element. This handles FP<->INT cases. 10338 if (SrcBitSize == DstBitSize) { 10339 SmallVector<SDValue, 8> Ops; 10340 for (SDValue Op : BV->op_values()) { 10341 // If the vector element type is not legal, the BUILD_VECTOR operands 10342 // are promoted and implicitly truncated. Make that explicit here. 10343 if (Op.getValueType() != SrcEltVT) 10344 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 10345 Ops.push_back(DAG.getBitcast(DstEltVT, Op)); 10346 AddToWorklist(Ops.back().getNode()); 10347 } 10348 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 10349 BV->getValueType(0).getVectorNumElements()); 10350 return DAG.getBuildVector(VT, SDLoc(BV), Ops); 10351 } 10352 10353 // Otherwise, we're growing or shrinking the elements. To avoid having to 10354 // handle annoying details of growing/shrinking FP values, we convert them to 10355 // int first. 10356 if (SrcEltVT.isFloatingPoint()) { 10357 // Convert the input float vector to a int vector where the elements are the 10358 // same sizes. 10359 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 10360 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 10361 SrcEltVT = IntVT; 10362 } 10363 10364 // Now we know the input is an integer vector. If the output is a FP type, 10365 // convert to integer first, then to FP of the right size. 10366 if (DstEltVT.isFloatingPoint()) { 10367 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 10368 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 10369 10370 // Next, convert to FP elements of the same size. 10371 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 10372 } 10373 10374 SDLoc DL(BV); 10375 10376 // Okay, we know the src/dst types are both integers of differing types. 10377 // Handling growing first. 10378 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 10379 if (SrcBitSize < DstBitSize) { 10380 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 10381 10382 SmallVector<SDValue, 8> Ops; 10383 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 10384 i += NumInputsPerOutput) { 10385 bool isLE = DAG.getDataLayout().isLittleEndian(); 10386 APInt NewBits = APInt(DstBitSize, 0); 10387 bool EltIsUndef = true; 10388 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 10389 // Shift the previously computed bits over. 10390 NewBits <<= SrcBitSize; 10391 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 10392 if (Op.isUndef()) continue; 10393 EltIsUndef = false; 10394 10395 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 10396 zextOrTrunc(SrcBitSize).zext(DstBitSize); 10397 } 10398 10399 if (EltIsUndef) 10400 Ops.push_back(DAG.getUNDEF(DstEltVT)); 10401 else 10402 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); 10403 } 10404 10405 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 10406 return DAG.getBuildVector(VT, DL, Ops); 10407 } 10408 10409 // Finally, this must be the case where we are shrinking elements: each input 10410 // turns into multiple outputs. 10411 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 10412 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 10413 NumOutputsPerInput*BV->getNumOperands()); 10414 SmallVector<SDValue, 8> Ops; 10415 10416 for (const SDValue &Op : BV->op_values()) { 10417 if (Op.isUndef()) { 10418 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); 10419 continue; 10420 } 10421 10422 APInt OpVal = cast<ConstantSDNode>(Op)-> 10423 getAPIntValue().zextOrTrunc(SrcBitSize); 10424 10425 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 10426 APInt ThisVal = OpVal.trunc(DstBitSize); 10427 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); 10428 OpVal.lshrInPlace(DstBitSize); 10429 } 10430 10431 // For big endian targets, swap the order of the pieces of each element. 10432 if (DAG.getDataLayout().isBigEndian()) 10433 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 10434 } 10435 10436 return DAG.getBuildVector(VT, DL, Ops); 10437 } 10438 10439 static bool isContractable(SDNode *N) { 10440 SDNodeFlags F = N->getFlags(); 10441 return F.hasAllowContract() || F.hasAllowReassociation(); 10442 } 10443 10444 /// Try to perform FMA combining on a given FADD node. 10445 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { 10446 SDValue N0 = N->getOperand(0); 10447 SDValue N1 = N->getOperand(1); 10448 EVT VT = N->getValueType(0); 10449 SDLoc SL(N); 10450 10451 const TargetOptions &Options = DAG.getTarget().Options; 10452 10453 // Floating-point multiply-add with intermediate rounding. 10454 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10455 10456 // Floating-point multiply-add without intermediate rounding. 10457 bool HasFMA = 10458 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10459 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10460 10461 // No valid opcode, do not combine. 10462 if (!HasFMAD && !HasFMA) 10463 return SDValue(); 10464 10465 SDNodeFlags Flags = N->getFlags(); 10466 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 10467 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 10468 CanFuse || HasFMAD); 10469 // If the addition is not contractable, do not combine. 10470 if (!AllowFusionGlobally && !isContractable(N)) 10471 return SDValue(); 10472 10473 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 10474 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 10475 return SDValue(); 10476 10477 // Always prefer FMAD to FMA for precision. 10478 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10479 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10480 10481 // Is the node an FMUL and contractable either due to global flags or 10482 // SDNodeFlags. 10483 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 10484 if (N.getOpcode() != ISD::FMUL) 10485 return false; 10486 return AllowFusionGlobally || isContractable(N.getNode()); 10487 }; 10488 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), 10489 // prefer to fold the multiply with fewer uses. 10490 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { 10491 if (N0.getNode()->use_size() > N1.getNode()->use_size()) 10492 std::swap(N0, N1); 10493 } 10494 10495 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 10496 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 10497 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10498 N0.getOperand(0), N0.getOperand(1), N1, Flags); 10499 } 10500 10501 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 10502 // Note: Commutes FADD operands. 10503 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 10504 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10505 N1.getOperand(0), N1.getOperand(1), N0, Flags); 10506 } 10507 10508 // Look through FP_EXTEND nodes to do more combining. 10509 10510 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 10511 if (N0.getOpcode() == ISD::FP_EXTEND) { 10512 SDValue N00 = N0.getOperand(0); 10513 if (isContractableFMUL(N00) && 10514 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10515 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10516 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10517 N00.getOperand(0)), 10518 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10519 N00.getOperand(1)), N1, Flags); 10520 } 10521 } 10522 10523 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 10524 // Note: Commutes FADD operands. 10525 if (N1.getOpcode() == ISD::FP_EXTEND) { 10526 SDValue N10 = N1.getOperand(0); 10527 if (isContractableFMUL(N10) && 10528 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10529 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10530 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10531 N10.getOperand(0)), 10532 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10533 N10.getOperand(1)), N0, Flags); 10534 } 10535 } 10536 10537 // More folding opportunities when target permits. 10538 if (Aggressive) { 10539 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 10540 if (CanFuse && 10541 N0.getOpcode() == PreferredFusedOpcode && 10542 N0.getOperand(2).getOpcode() == ISD::FMUL && 10543 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { 10544 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10545 N0.getOperand(0), N0.getOperand(1), 10546 DAG.getNode(PreferredFusedOpcode, SL, VT, 10547 N0.getOperand(2).getOperand(0), 10548 N0.getOperand(2).getOperand(1), 10549 N1, Flags), Flags); 10550 } 10551 10552 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 10553 if (CanFuse && 10554 N1->getOpcode() == PreferredFusedOpcode && 10555 N1.getOperand(2).getOpcode() == ISD::FMUL && 10556 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { 10557 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10558 N1.getOperand(0), N1.getOperand(1), 10559 DAG.getNode(PreferredFusedOpcode, SL, VT, 10560 N1.getOperand(2).getOperand(0), 10561 N1.getOperand(2).getOperand(1), 10562 N0, Flags), Flags); 10563 } 10564 10565 10566 // fold (fadd (fma x, y, (fpext (fmul u, v))), z) 10567 // -> (fma x, y, (fma (fpext u), (fpext v), z)) 10568 auto FoldFAddFMAFPExtFMul = [&] ( 10569 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 10570 SDNodeFlags Flags) { 10571 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, 10572 DAG.getNode(PreferredFusedOpcode, SL, VT, 10573 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 10574 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 10575 Z, Flags), Flags); 10576 }; 10577 if (N0.getOpcode() == PreferredFusedOpcode) { 10578 SDValue N02 = N0.getOperand(2); 10579 if (N02.getOpcode() == ISD::FP_EXTEND) { 10580 SDValue N020 = N02.getOperand(0); 10581 if (isContractableFMUL(N020) && 10582 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 10583 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), 10584 N020.getOperand(0), N020.getOperand(1), 10585 N1, Flags); 10586 } 10587 } 10588 } 10589 10590 // fold (fadd (fpext (fma x, y, (fmul u, v))), z) 10591 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) 10592 // FIXME: This turns two single-precision and one double-precision 10593 // operation into two double-precision operations, which might not be 10594 // interesting for all targets, especially GPUs. 10595 auto FoldFAddFPExtFMAFMul = [&] ( 10596 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 10597 SDNodeFlags Flags) { 10598 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10599 DAG.getNode(ISD::FP_EXTEND, SL, VT, X), 10600 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), 10601 DAG.getNode(PreferredFusedOpcode, SL, VT, 10602 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 10603 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 10604 Z, Flags), Flags); 10605 }; 10606 if (N0.getOpcode() == ISD::FP_EXTEND) { 10607 SDValue N00 = N0.getOperand(0); 10608 if (N00.getOpcode() == PreferredFusedOpcode) { 10609 SDValue N002 = N00.getOperand(2); 10610 if (isContractableFMUL(N002) && 10611 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10612 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), 10613 N002.getOperand(0), N002.getOperand(1), 10614 N1, Flags); 10615 } 10616 } 10617 } 10618 10619 // fold (fadd x, (fma y, z, (fpext (fmul u, v))) 10620 // -> (fma y, z, (fma (fpext u), (fpext v), x)) 10621 if (N1.getOpcode() == PreferredFusedOpcode) { 10622 SDValue N12 = N1.getOperand(2); 10623 if (N12.getOpcode() == ISD::FP_EXTEND) { 10624 SDValue N120 = N12.getOperand(0); 10625 if (isContractableFMUL(N120) && 10626 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 10627 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), 10628 N120.getOperand(0), N120.getOperand(1), 10629 N0, Flags); 10630 } 10631 } 10632 } 10633 10634 // fold (fadd x, (fpext (fma y, z, (fmul u, v))) 10635 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 10636 // FIXME: This turns two single-precision and one double-precision 10637 // operation into two double-precision operations, which might not be 10638 // interesting for all targets, especially GPUs. 10639 if (N1.getOpcode() == ISD::FP_EXTEND) { 10640 SDValue N10 = N1.getOperand(0); 10641 if (N10.getOpcode() == PreferredFusedOpcode) { 10642 SDValue N102 = N10.getOperand(2); 10643 if (isContractableFMUL(N102) && 10644 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10645 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), 10646 N102.getOperand(0), N102.getOperand(1), 10647 N0, Flags); 10648 } 10649 } 10650 } 10651 } 10652 10653 return SDValue(); 10654 } 10655 10656 /// Try to perform FMA combining on a given FSUB node. 10657 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { 10658 SDValue N0 = N->getOperand(0); 10659 SDValue N1 = N->getOperand(1); 10660 EVT VT = N->getValueType(0); 10661 SDLoc SL(N); 10662 10663 const TargetOptions &Options = DAG.getTarget().Options; 10664 // Floating-point multiply-add with intermediate rounding. 10665 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10666 10667 // Floating-point multiply-add without intermediate rounding. 10668 bool HasFMA = 10669 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10670 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10671 10672 // No valid opcode, do not combine. 10673 if (!HasFMAD && !HasFMA) 10674 return SDValue(); 10675 10676 const SDNodeFlags Flags = N->getFlags(); 10677 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 10678 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 10679 CanFuse || HasFMAD); 10680 10681 // If the subtraction is not contractable, do not combine. 10682 if (!AllowFusionGlobally && !isContractable(N)) 10683 return SDValue(); 10684 10685 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 10686 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 10687 return SDValue(); 10688 10689 // Always prefer FMAD to FMA for precision. 10690 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10691 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10692 10693 // Is the node an FMUL and contractable either due to global flags or 10694 // SDNodeFlags. 10695 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 10696 if (N.getOpcode() != ISD::FMUL) 10697 return false; 10698 return AllowFusionGlobally || isContractable(N.getNode()); 10699 }; 10700 10701 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 10702 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 10703 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10704 N0.getOperand(0), N0.getOperand(1), 10705 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10706 } 10707 10708 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 10709 // Note: Commutes FSUB operands. 10710 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 10711 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10712 DAG.getNode(ISD::FNEG, SL, VT, 10713 N1.getOperand(0)), 10714 N1.getOperand(1), N0, Flags); 10715 } 10716 10717 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 10718 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && 10719 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { 10720 SDValue N00 = N0.getOperand(0).getOperand(0); 10721 SDValue N01 = N0.getOperand(0).getOperand(1); 10722 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10723 DAG.getNode(ISD::FNEG, SL, VT, N00), N01, 10724 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10725 } 10726 10727 // Look through FP_EXTEND nodes to do more combining. 10728 10729 // fold (fsub (fpext (fmul x, y)), z) 10730 // -> (fma (fpext x), (fpext y), (fneg z)) 10731 if (N0.getOpcode() == ISD::FP_EXTEND) { 10732 SDValue N00 = N0.getOperand(0); 10733 if (isContractableFMUL(N00) && 10734 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10735 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10736 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10737 N00.getOperand(0)), 10738 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10739 N00.getOperand(1)), 10740 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10741 } 10742 } 10743 10744 // fold (fsub x, (fpext (fmul y, z))) 10745 // -> (fma (fneg (fpext y)), (fpext z), x) 10746 // Note: Commutes FSUB operands. 10747 if (N1.getOpcode() == ISD::FP_EXTEND) { 10748 SDValue N10 = N1.getOperand(0); 10749 if (isContractableFMUL(N10) && 10750 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10751 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10752 DAG.getNode(ISD::FNEG, SL, VT, 10753 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10754 N10.getOperand(0))), 10755 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10756 N10.getOperand(1)), 10757 N0, Flags); 10758 } 10759 } 10760 10761 // fold (fsub (fpext (fneg (fmul, x, y))), z) 10762 // -> (fneg (fma (fpext x), (fpext y), z)) 10763 // Note: This could be removed with appropriate canonicalization of the 10764 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10765 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10766 // from implementing the canonicalization in visitFSUB. 10767 if (N0.getOpcode() == ISD::FP_EXTEND) { 10768 SDValue N00 = N0.getOperand(0); 10769 if (N00.getOpcode() == ISD::FNEG) { 10770 SDValue N000 = N00.getOperand(0); 10771 if (isContractableFMUL(N000) && 10772 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10773 return DAG.getNode(ISD::FNEG, SL, VT, 10774 DAG.getNode(PreferredFusedOpcode, SL, VT, 10775 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10776 N000.getOperand(0)), 10777 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10778 N000.getOperand(1)), 10779 N1, Flags)); 10780 } 10781 } 10782 } 10783 10784 // fold (fsub (fneg (fpext (fmul, x, y))), z) 10785 // -> (fneg (fma (fpext x)), (fpext y), z) 10786 // Note: This could be removed with appropriate canonicalization of the 10787 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10788 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10789 // from implementing the canonicalization in visitFSUB. 10790 if (N0.getOpcode() == ISD::FNEG) { 10791 SDValue N00 = N0.getOperand(0); 10792 if (N00.getOpcode() == ISD::FP_EXTEND) { 10793 SDValue N000 = N00.getOperand(0); 10794 if (isContractableFMUL(N000) && 10795 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { 10796 return DAG.getNode(ISD::FNEG, SL, VT, 10797 DAG.getNode(PreferredFusedOpcode, SL, VT, 10798 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10799 N000.getOperand(0)), 10800 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10801 N000.getOperand(1)), 10802 N1, Flags)); 10803 } 10804 } 10805 } 10806 10807 // More folding opportunities when target permits. 10808 if (Aggressive) { 10809 // fold (fsub (fma x, y, (fmul u, v)), z) 10810 // -> (fma x, y (fma u, v, (fneg z))) 10811 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && 10812 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && 10813 N0.getOperand(2)->hasOneUse()) { 10814 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10815 N0.getOperand(0), N0.getOperand(1), 10816 DAG.getNode(PreferredFusedOpcode, SL, VT, 10817 N0.getOperand(2).getOperand(0), 10818 N0.getOperand(2).getOperand(1), 10819 DAG.getNode(ISD::FNEG, SL, VT, 10820 N1), Flags), Flags); 10821 } 10822 10823 // fold (fsub x, (fma y, z, (fmul u, v))) 10824 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 10825 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && 10826 isContractableFMUL(N1.getOperand(2))) { 10827 SDValue N20 = N1.getOperand(2).getOperand(0); 10828 SDValue N21 = N1.getOperand(2).getOperand(1); 10829 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10830 DAG.getNode(ISD::FNEG, SL, VT, 10831 N1.getOperand(0)), 10832 N1.getOperand(1), 10833 DAG.getNode(PreferredFusedOpcode, SL, VT, 10834 DAG.getNode(ISD::FNEG, SL, VT, N20), 10835 N21, N0, Flags), Flags); 10836 } 10837 10838 10839 // fold (fsub (fma x, y, (fpext (fmul u, v))), z) 10840 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) 10841 if (N0.getOpcode() == PreferredFusedOpcode) { 10842 SDValue N02 = N0.getOperand(2); 10843 if (N02.getOpcode() == ISD::FP_EXTEND) { 10844 SDValue N020 = N02.getOperand(0); 10845 if (isContractableFMUL(N020) && 10846 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 10847 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10848 N0.getOperand(0), N0.getOperand(1), 10849 DAG.getNode(PreferredFusedOpcode, SL, VT, 10850 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10851 N020.getOperand(0)), 10852 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10853 N020.getOperand(1)), 10854 DAG.getNode(ISD::FNEG, SL, VT, 10855 N1), Flags), Flags); 10856 } 10857 } 10858 } 10859 10860 // fold (fsub (fpext (fma x, y, (fmul u, v))), z) 10861 // -> (fma (fpext x), (fpext y), 10862 // (fma (fpext u), (fpext v), (fneg z))) 10863 // FIXME: This turns two single-precision and one double-precision 10864 // operation into two double-precision operations, which might not be 10865 // interesting for all targets, especially GPUs. 10866 if (N0.getOpcode() == ISD::FP_EXTEND) { 10867 SDValue N00 = N0.getOperand(0); 10868 if (N00.getOpcode() == PreferredFusedOpcode) { 10869 SDValue N002 = N00.getOperand(2); 10870 if (isContractableFMUL(N002) && 10871 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10872 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10873 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10874 N00.getOperand(0)), 10875 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10876 N00.getOperand(1)), 10877 DAG.getNode(PreferredFusedOpcode, SL, VT, 10878 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10879 N002.getOperand(0)), 10880 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10881 N002.getOperand(1)), 10882 DAG.getNode(ISD::FNEG, SL, VT, 10883 N1), Flags), Flags); 10884 } 10885 } 10886 } 10887 10888 // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) 10889 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) 10890 if (N1.getOpcode() == PreferredFusedOpcode && 10891 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { 10892 SDValue N120 = N1.getOperand(2).getOperand(0); 10893 if (isContractableFMUL(N120) && 10894 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 10895 SDValue N1200 = N120.getOperand(0); 10896 SDValue N1201 = N120.getOperand(1); 10897 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10898 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), 10899 N1.getOperand(1), 10900 DAG.getNode(PreferredFusedOpcode, SL, VT, 10901 DAG.getNode(ISD::FNEG, SL, VT, 10902 DAG.getNode(ISD::FP_EXTEND, SL, 10903 VT, N1200)), 10904 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10905 N1201), 10906 N0, Flags), Flags); 10907 } 10908 } 10909 10910 // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 10911 // -> (fma (fneg (fpext y)), (fpext z), 10912 // (fma (fneg (fpext u)), (fpext v), x)) 10913 // FIXME: This turns two single-precision and one double-precision 10914 // operation into two double-precision operations, which might not be 10915 // interesting for all targets, especially GPUs. 10916 if (N1.getOpcode() == ISD::FP_EXTEND && 10917 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { 10918 SDValue CvtSrc = N1.getOperand(0); 10919 SDValue N100 = CvtSrc.getOperand(0); 10920 SDValue N101 = CvtSrc.getOperand(1); 10921 SDValue N102 = CvtSrc.getOperand(2); 10922 if (isContractableFMUL(N102) && 10923 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { 10924 SDValue N1020 = N102.getOperand(0); 10925 SDValue N1021 = N102.getOperand(1); 10926 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10927 DAG.getNode(ISD::FNEG, SL, VT, 10928 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10929 N100)), 10930 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), 10931 DAG.getNode(PreferredFusedOpcode, SL, VT, 10932 DAG.getNode(ISD::FNEG, SL, VT, 10933 DAG.getNode(ISD::FP_EXTEND, SL, 10934 VT, N1020)), 10935 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10936 N1021), 10937 N0, Flags), Flags); 10938 } 10939 } 10940 } 10941 10942 return SDValue(); 10943 } 10944 10945 /// Try to perform FMA combining on a given FMUL node based on the distributive 10946 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, 10947 /// subtraction instead of addition). 10948 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { 10949 SDValue N0 = N->getOperand(0); 10950 SDValue N1 = N->getOperand(1); 10951 EVT VT = N->getValueType(0); 10952 SDLoc SL(N); 10953 const SDNodeFlags Flags = N->getFlags(); 10954 10955 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); 10956 10957 const TargetOptions &Options = DAG.getTarget().Options; 10958 10959 // The transforms below are incorrect when x == 0 and y == inf, because the 10960 // intermediate multiplication produces a nan. 10961 if (!Options.NoInfsFPMath) 10962 return SDValue(); 10963 10964 // Floating-point multiply-add without intermediate rounding. 10965 bool HasFMA = 10966 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 10967 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10968 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10969 10970 // Floating-point multiply-add with intermediate rounding. This can result 10971 // in a less precise result due to the changed rounding order. 10972 bool HasFMAD = Options.UnsafeFPMath && 10973 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10974 10975 // No valid opcode, do not combine. 10976 if (!HasFMAD && !HasFMA) 10977 return SDValue(); 10978 10979 // Always prefer FMAD to FMA for precision. 10980 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10981 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10982 10983 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) 10984 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) 10985 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 10986 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { 10987 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { 10988 if (C->isExactlyValue(+1.0)) 10989 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10990 Y, Flags); 10991 if (C->isExactlyValue(-1.0)) 10992 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10993 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10994 } 10995 } 10996 return SDValue(); 10997 }; 10998 10999 if (SDValue FMA = FuseFADD(N0, N1, Flags)) 11000 return FMA; 11001 if (SDValue FMA = FuseFADD(N1, N0, Flags)) 11002 return FMA; 11003 11004 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) 11005 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) 11006 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) 11007 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) 11008 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 11009 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { 11010 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { 11011 if (C0->isExactlyValue(+1.0)) 11012 return DAG.getNode(PreferredFusedOpcode, SL, VT, 11013 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 11014 Y, Flags); 11015 if (C0->isExactlyValue(-1.0)) 11016 return DAG.getNode(PreferredFusedOpcode, SL, VT, 11017 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 11018 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 11019 } 11020 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { 11021 if (C1->isExactlyValue(+1.0)) 11022 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 11023 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 11024 if (C1->isExactlyValue(-1.0)) 11025 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 11026 Y, Flags); 11027 } 11028 } 11029 return SDValue(); 11030 }; 11031 11032 if (SDValue FMA = FuseFSUB(N0, N1, Flags)) 11033 return FMA; 11034 if (SDValue FMA = FuseFSUB(N1, N0, Flags)) 11035 return FMA; 11036 11037 return SDValue(); 11038 } 11039 11040 SDValue DAGCombiner::visitFADD(SDNode *N) { 11041 SDValue N0 = N->getOperand(0); 11042 SDValue N1 = N->getOperand(1); 11043 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 11044 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 11045 EVT VT = N->getValueType(0); 11046 SDLoc DL(N); 11047 const TargetOptions &Options = DAG.getTarget().Options; 11048 const SDNodeFlags Flags = N->getFlags(); 11049 11050 // fold vector ops 11051 if (VT.isVector()) 11052 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11053 return FoldedVOp; 11054 11055 // fold (fadd c1, c2) -> c1 + c2 11056 if (N0CFP && N1CFP) 11057 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); 11058 11059 // canonicalize constant to RHS 11060 if (N0CFP && !N1CFP) 11061 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); 11062 11063 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) 11064 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); 11065 if (N1C && N1C->isZero()) 11066 if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) 11067 return N0; 11068 11069 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11070 return NewSel; 11071 11072 // fold (fadd A, (fneg B)) -> (fsub A, B) 11073 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 11074 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 11075 return DAG.getNode(ISD::FSUB, DL, VT, N0, 11076 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 11077 11078 // fold (fadd (fneg A), B) -> (fsub B, A) 11079 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 11080 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 11081 return DAG.getNode(ISD::FSUB, DL, VT, N1, 11082 GetNegatedExpression(N0, DAG, LegalOperations), Flags); 11083 11084 auto isFMulNegTwo = [](SDValue FMul) { 11085 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) 11086 return false; 11087 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true); 11088 return C && C->isExactlyValue(-2.0); 11089 }; 11090 11091 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) 11092 if (isFMulNegTwo(N0)) { 11093 SDValue B = N0.getOperand(0); 11094 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); 11095 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags); 11096 } 11097 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) 11098 if (isFMulNegTwo(N1)) { 11099 SDValue B = N1.getOperand(0); 11100 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); 11101 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags); 11102 } 11103 11104 // No FP constant should be created after legalization as Instruction 11105 // Selection pass has a hard time dealing with FP constants. 11106 bool AllowNewConst = (Level < AfterLegalizeDAG); 11107 11108 // If 'unsafe math' or nnan is enabled, fold lots of things. 11109 if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { 11110 // If allowed, fold (fadd (fneg x), x) -> 0.0 11111 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 11112 return DAG.getConstantFP(0.0, DL, VT); 11113 11114 // If allowed, fold (fadd x, (fneg x)) -> 0.0 11115 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 11116 return DAG.getConstantFP(0.0, DL, VT); 11117 } 11118 11119 // If 'unsafe math' or reassoc and nsz, fold lots of things. 11120 // TODO: break out portions of the transformations below for which Unsafe is 11121 // considered and which do not require both nsz and reassoc 11122 if ((Options.UnsafeFPMath || 11123 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && 11124 AllowNewConst) { 11125 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 11126 if (N1CFP && N0.getOpcode() == ISD::FADD && 11127 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 11128 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); 11129 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); 11130 } 11131 11132 // We can fold chains of FADD's of the same value into multiplications. 11133 // This transform is not safe in general because we are reducing the number 11134 // of rounding steps. 11135 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 11136 if (N0.getOpcode() == ISD::FMUL) { 11137 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 11138 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); 11139 11140 // (fadd (fmul x, c), x) -> (fmul x, c+1) 11141 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 11142 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 11143 DAG.getConstantFP(1.0, DL, VT), Flags); 11144 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); 11145 } 11146 11147 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 11148 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 11149 N1.getOperand(0) == N1.getOperand(1) && 11150 N0.getOperand(0) == N1.getOperand(0)) { 11151 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 11152 DAG.getConstantFP(2.0, DL, VT), Flags); 11153 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); 11154 } 11155 } 11156 11157 if (N1.getOpcode() == ISD::FMUL) { 11158 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 11159 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); 11160 11161 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 11162 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 11163 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 11164 DAG.getConstantFP(1.0, DL, VT), Flags); 11165 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); 11166 } 11167 11168 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 11169 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 11170 N0.getOperand(0) == N0.getOperand(1) && 11171 N1.getOperand(0) == N0.getOperand(0)) { 11172 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 11173 DAG.getConstantFP(2.0, DL, VT), Flags); 11174 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); 11175 } 11176 } 11177 11178 if (N0.getOpcode() == ISD::FADD) { 11179 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 11180 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 11181 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && 11182 (N0.getOperand(0) == N1)) { 11183 return DAG.getNode(ISD::FMUL, DL, VT, 11184 N1, DAG.getConstantFP(3.0, DL, VT), Flags); 11185 } 11186 } 11187 11188 if (N1.getOpcode() == ISD::FADD) { 11189 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 11190 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 11191 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 11192 N1.getOperand(0) == N0) { 11193 return DAG.getNode(ISD::FMUL, DL, VT, 11194 N0, DAG.getConstantFP(3.0, DL, VT), Flags); 11195 } 11196 } 11197 11198 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 11199 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 11200 N0.getOperand(0) == N0.getOperand(1) && 11201 N1.getOperand(0) == N1.getOperand(1) && 11202 N0.getOperand(0) == N1.getOperand(0)) { 11203 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), 11204 DAG.getConstantFP(4.0, DL, VT), Flags); 11205 } 11206 } 11207 } // enable-unsafe-fp-math 11208 11209 // FADD -> FMA combines: 11210 if (SDValue Fused = visitFADDForFMACombine(N)) { 11211 AddToWorklist(Fused.getNode()); 11212 return Fused; 11213 } 11214 return SDValue(); 11215 } 11216 11217 SDValue DAGCombiner::visitFSUB(SDNode *N) { 11218 SDValue N0 = N->getOperand(0); 11219 SDValue N1 = N->getOperand(1); 11220 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); 11221 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); 11222 EVT VT = N->getValueType(0); 11223 SDLoc DL(N); 11224 const TargetOptions &Options = DAG.getTarget().Options; 11225 const SDNodeFlags Flags = N->getFlags(); 11226 11227 // fold vector ops 11228 if (VT.isVector()) 11229 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11230 return FoldedVOp; 11231 11232 // fold (fsub c1, c2) -> c1-c2 11233 if (N0CFP && N1CFP) 11234 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); 11235 11236 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11237 return NewSel; 11238 11239 // (fsub A, 0) -> A 11240 if (N1CFP && N1CFP->isZero()) { 11241 if (!N1CFP->isNegative() || Options.UnsafeFPMath || 11242 Flags.hasNoSignedZeros()) { 11243 return N0; 11244 } 11245 } 11246 11247 if (N0 == N1) { 11248 // (fsub x, x) -> 0.0 11249 if (Options.UnsafeFPMath || Flags.hasNoNaNs()) 11250 return DAG.getConstantFP(0.0f, DL, VT); 11251 } 11252 11253 // (fsub -0.0, N1) -> -N1 11254 if (N0CFP && N0CFP->isZero()) { 11255 if (N0CFP->isNegative() || 11256 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { 11257 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 11258 return GetNegatedExpression(N1, DAG, LegalOperations); 11259 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11260 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); 11261 } 11262 } 11263 11264 if ((Options.UnsafeFPMath || 11265 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) 11266 && N1.getOpcode() == ISD::FADD) { 11267 // X - (X + Y) -> -Y 11268 if (N0 == N1->getOperand(0)) 11269 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); 11270 // X - (Y + X) -> -Y 11271 if (N0 == N1->getOperand(1)) 11272 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags); 11273 } 11274 11275 // fold (fsub A, (fneg B)) -> (fadd A, B) 11276 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 11277 return DAG.getNode(ISD::FADD, DL, VT, N0, 11278 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 11279 11280 // FSUB -> FMA combines: 11281 if (SDValue Fused = visitFSUBForFMACombine(N)) { 11282 AddToWorklist(Fused.getNode()); 11283 return Fused; 11284 } 11285 11286 return SDValue(); 11287 } 11288 11289 SDValue DAGCombiner::visitFMUL(SDNode *N) { 11290 SDValue N0 = N->getOperand(0); 11291 SDValue N1 = N->getOperand(1); 11292 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); 11293 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); 11294 EVT VT = N->getValueType(0); 11295 SDLoc DL(N); 11296 const TargetOptions &Options = DAG.getTarget().Options; 11297 const SDNodeFlags Flags = N->getFlags(); 11298 11299 // fold vector ops 11300 if (VT.isVector()) { 11301 // This just handles C1 * C2 for vectors. Other vector folds are below. 11302 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11303 return FoldedVOp; 11304 } 11305 11306 // fold (fmul c1, c2) -> c1*c2 11307 if (N0CFP && N1CFP) 11308 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); 11309 11310 // canonicalize constant to RHS 11311 if (isConstantFPBuildVectorOrConstantFP(N0) && 11312 !isConstantFPBuildVectorOrConstantFP(N1)) 11313 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); 11314 11315 // fold (fmul A, 1.0) -> A 11316 if (N1CFP && N1CFP->isExactlyValue(1.0)) 11317 return N0; 11318 11319 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11320 return NewSel; 11321 11322 if (Options.UnsafeFPMath || 11323 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { 11324 // fold (fmul A, 0) -> 0 11325 if (N1CFP && N1CFP->isZero()) 11326 return N1; 11327 } 11328 11329 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { 11330 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 11331 if (isConstantFPBuildVectorOrConstantFP(N1) && 11332 N0.getOpcode() == ISD::FMUL) { 11333 SDValue N00 = N0.getOperand(0); 11334 SDValue N01 = N0.getOperand(1); 11335 // Avoid an infinite loop by making sure that N00 is not a constant 11336 // (the inner multiply has not been constant folded yet). 11337 if (isConstantFPBuildVectorOrConstantFP(N01) && 11338 !isConstantFPBuildVectorOrConstantFP(N00)) { 11339 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); 11340 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); 11341 } 11342 } 11343 11344 // Match a special-case: we convert X * 2.0 into fadd. 11345 // fmul (fadd X, X), C -> fmul X, 2.0 * C 11346 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && 11347 N0.getOperand(0) == N0.getOperand(1)) { 11348 const SDValue Two = DAG.getConstantFP(2.0, DL, VT); 11349 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); 11350 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); 11351 } 11352 } 11353 11354 // fold (fmul X, 2.0) -> (fadd X, X) 11355 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 11356 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); 11357 11358 // fold (fmul X, -1.0) -> (fneg X) 11359 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 11360 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11361 return DAG.getNode(ISD::FNEG, DL, VT, N0); 11362 11363 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 11364 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 11365 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 11366 // Both can be negated for free, check to see if at least one is cheaper 11367 // negated. 11368 if (LHSNeg == 2 || RHSNeg == 2) 11369 return DAG.getNode(ISD::FMUL, DL, VT, 11370 GetNegatedExpression(N0, DAG, LegalOperations), 11371 GetNegatedExpression(N1, DAG, LegalOperations), 11372 Flags); 11373 } 11374 } 11375 11376 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) 11377 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) 11378 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && 11379 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && 11380 TLI.isOperationLegal(ISD::FABS, VT)) { 11381 SDValue Select = N0, X = N1; 11382 if (Select.getOpcode() != ISD::SELECT) 11383 std::swap(Select, X); 11384 11385 SDValue Cond = Select.getOperand(0); 11386 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); 11387 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); 11388 11389 if (TrueOpnd && FalseOpnd && 11390 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && 11391 isa<ConstantFPSDNode>(Cond.getOperand(1)) && 11392 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) { 11393 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 11394 switch (CC) { 11395 default: break; 11396 case ISD::SETOLT: 11397 case ISD::SETULT: 11398 case ISD::SETOLE: 11399 case ISD::SETULE: 11400 case ISD::SETLT: 11401 case ISD::SETLE: 11402 std::swap(TrueOpnd, FalseOpnd); 11403 LLVM_FALLTHROUGH; 11404 case ISD::SETOGT: 11405 case ISD::SETUGT: 11406 case ISD::SETOGE: 11407 case ISD::SETUGE: 11408 case ISD::SETGT: 11409 case ISD::SETGE: 11410 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && 11411 TLI.isOperationLegal(ISD::FNEG, VT)) 11412 return DAG.getNode(ISD::FNEG, DL, VT, 11413 DAG.getNode(ISD::FABS, DL, VT, X)); 11414 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) 11415 return DAG.getNode(ISD::FABS, DL, VT, X); 11416 11417 break; 11418 } 11419 } 11420 } 11421 11422 // FMUL -> FMA combines: 11423 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { 11424 AddToWorklist(Fused.getNode()); 11425 return Fused; 11426 } 11427 11428 return SDValue(); 11429 } 11430 11431 SDValue DAGCombiner::visitFMA(SDNode *N) { 11432 SDValue N0 = N->getOperand(0); 11433 SDValue N1 = N->getOperand(1); 11434 SDValue N2 = N->getOperand(2); 11435 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11436 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11437 EVT VT = N->getValueType(0); 11438 SDLoc DL(N); 11439 const TargetOptions &Options = DAG.getTarget().Options; 11440 11441 // FMA nodes have flags that propagate to the created nodes. 11442 const SDNodeFlags Flags = N->getFlags(); 11443 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); 11444 11445 // Constant fold FMA. 11446 if (isa<ConstantFPSDNode>(N0) && 11447 isa<ConstantFPSDNode>(N1) && 11448 isa<ConstantFPSDNode>(N2)) { 11449 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); 11450 } 11451 11452 if (UnsafeFPMath) { 11453 if (N0CFP && N0CFP->isZero()) 11454 return N2; 11455 if (N1CFP && N1CFP->isZero()) 11456 return N2; 11457 } 11458 // TODO: The FMA node should have flags that propagate to these nodes. 11459 if (N0CFP && N0CFP->isExactlyValue(1.0)) 11460 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 11461 if (N1CFP && N1CFP->isExactlyValue(1.0)) 11462 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 11463 11464 // Canonicalize (fma c, x, y) -> (fma x, c, y) 11465 if (isConstantFPBuildVectorOrConstantFP(N0) && 11466 !isConstantFPBuildVectorOrConstantFP(N1)) 11467 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 11468 11469 if (UnsafeFPMath) { 11470 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 11471 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && 11472 isConstantFPBuildVectorOrConstantFP(N1) && 11473 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { 11474 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11475 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), 11476 Flags), Flags); 11477 } 11478 11479 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 11480 if (N0.getOpcode() == ISD::FMUL && 11481 isConstantFPBuildVectorOrConstantFP(N1) && 11482 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 11483 return DAG.getNode(ISD::FMA, DL, VT, 11484 N0.getOperand(0), 11485 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), 11486 Flags), 11487 N2); 11488 } 11489 } 11490 11491 // (fma x, 1, y) -> (fadd x, y) 11492 // (fma x, -1, y) -> (fadd (fneg x), y) 11493 if (N1CFP) { 11494 if (N1CFP->isExactlyValue(1.0)) 11495 // TODO: The FMA node should have flags that propagate to this node. 11496 return DAG.getNode(ISD::FADD, DL, VT, N0, N2); 11497 11498 if (N1CFP->isExactlyValue(-1.0) && 11499 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 11500 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); 11501 AddToWorklist(RHSNeg.getNode()); 11502 // TODO: The FMA node should have flags that propagate to this node. 11503 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); 11504 } 11505 11506 // fma (fneg x), K, y -> fma x -K, y 11507 if (N0.getOpcode() == ISD::FNEG && 11508 (TLI.isOperationLegal(ISD::ConstantFP, VT) || 11509 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { 11510 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), 11511 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); 11512 } 11513 } 11514 11515 if (UnsafeFPMath) { 11516 // (fma x, c, x) -> (fmul x, (c+1)) 11517 if (N1CFP && N0 == N2) { 11518 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11519 DAG.getNode(ISD::FADD, DL, VT, N1, 11520 DAG.getConstantFP(1.0, DL, VT), Flags), 11521 Flags); 11522 } 11523 11524 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 11525 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 11526 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11527 DAG.getNode(ISD::FADD, DL, VT, N1, 11528 DAG.getConstantFP(-1.0, DL, VT), Flags), 11529 Flags); 11530 } 11531 } 11532 11533 return SDValue(); 11534 } 11535 11536 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 11537 // reciprocal. 11538 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 11539 // Notice that this is not always beneficial. One reason is different targets 11540 // may have different costs for FDIV and FMUL, so sometimes the cost of two 11541 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 11542 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 11543 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { 11544 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; 11545 const SDNodeFlags Flags = N->getFlags(); 11546 if (!UnsafeMath && !Flags.hasAllowReciprocal()) 11547 return SDValue(); 11548 11549 // Skip if current node is a reciprocal. 11550 SDValue N0 = N->getOperand(0); 11551 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11552 if (N0CFP && N0CFP->isExactlyValue(1.0)) 11553 return SDValue(); 11554 11555 // Exit early if the target does not want this transform or if there can't 11556 // possibly be enough uses of the divisor to make the transform worthwhile. 11557 SDValue N1 = N->getOperand(1); 11558 unsigned MinUses = TLI.combineRepeatedFPDivisors(); 11559 if (!MinUses || N1->use_size() < MinUses) 11560 return SDValue(); 11561 11562 // Find all FDIV users of the same divisor. 11563 // Use a set because duplicates may be present in the user list. 11564 SetVector<SDNode *> Users; 11565 for (auto *U : N1->uses()) { 11566 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { 11567 // This division is eligible for optimization only if global unsafe math 11568 // is enabled or if this division allows reciprocal formation. 11569 if (UnsafeMath || U->getFlags().hasAllowReciprocal()) 11570 Users.insert(U); 11571 } 11572 } 11573 11574 // Now that we have the actual number of divisor uses, make sure it meets 11575 // the minimum threshold specified by the target. 11576 if (Users.size() < MinUses) 11577 return SDValue(); 11578 11579 EVT VT = N->getValueType(0); 11580 SDLoc DL(N); 11581 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 11582 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); 11583 11584 // Dividend / Divisor -> Dividend * Reciprocal 11585 for (auto *U : Users) { 11586 SDValue Dividend = U->getOperand(0); 11587 if (Dividend != FPOne) { 11588 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, 11589 Reciprocal, Flags); 11590 CombineTo(U, NewNode); 11591 } else if (U != Reciprocal.getNode()) { 11592 // In the absence of fast-math-flags, this user node is always the 11593 // same node as Reciprocal, but with FMF they may be different nodes. 11594 CombineTo(U, Reciprocal); 11595 } 11596 } 11597 return SDValue(N, 0); // N was replaced. 11598 } 11599 11600 SDValue DAGCombiner::visitFDIV(SDNode *N) { 11601 SDValue N0 = N->getOperand(0); 11602 SDValue N1 = N->getOperand(1); 11603 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11604 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11605 EVT VT = N->getValueType(0); 11606 SDLoc DL(N); 11607 const TargetOptions &Options = DAG.getTarget().Options; 11608 SDNodeFlags Flags = N->getFlags(); 11609 11610 // fold vector ops 11611 if (VT.isVector()) 11612 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11613 return FoldedVOp; 11614 11615 // fold (fdiv c1, c2) -> c1/c2 11616 if (N0CFP && N1CFP) 11617 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); 11618 11619 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11620 return NewSel; 11621 11622 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { 11623 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 11624 if (N1CFP) { 11625 // Compute the reciprocal 1.0 / c2. 11626 const APFloat &N1APF = N1CFP->getValueAPF(); 11627 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 11628 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 11629 // Only do the transform if the reciprocal is a legal fp immediate that 11630 // isn't too nasty (eg NaN, denormal, ...). 11631 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 11632 (!LegalOperations || 11633 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 11634 // backend)... we should handle this gracefully after Legalize. 11635 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || 11636 TLI.isOperationLegal(ISD::ConstantFP, VT) || 11637 TLI.isFPImmLegal(Recip, VT))) 11638 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11639 DAG.getConstantFP(Recip, DL, VT), Flags); 11640 } 11641 11642 // If this FDIV is part of a reciprocal square root, it may be folded 11643 // into a target-specific square root estimate instruction. 11644 if (N1.getOpcode() == ISD::FSQRT) { 11645 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { 11646 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11647 } 11648 } else if (N1.getOpcode() == ISD::FP_EXTEND && 11649 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11650 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 11651 Flags)) { 11652 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 11653 AddToWorklist(RV.getNode()); 11654 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11655 } 11656 } else if (N1.getOpcode() == ISD::FP_ROUND && 11657 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11658 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 11659 Flags)) { 11660 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 11661 AddToWorklist(RV.getNode()); 11662 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11663 } 11664 } else if (N1.getOpcode() == ISD::FMUL) { 11665 // Look through an FMUL. Even though this won't remove the FDIV directly, 11666 // it's still worthwhile to get rid of the FSQRT if possible. 11667 SDValue SqrtOp; 11668 SDValue OtherOp; 11669 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11670 SqrtOp = N1.getOperand(0); 11671 OtherOp = N1.getOperand(1); 11672 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 11673 SqrtOp = N1.getOperand(1); 11674 OtherOp = N1.getOperand(0); 11675 } 11676 if (SqrtOp.getNode()) { 11677 // We found a FSQRT, so try to make this fold: 11678 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 11679 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { 11680 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); 11681 AddToWorklist(RV.getNode()); 11682 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11683 } 11684 } 11685 } 11686 11687 // Fold into a reciprocal estimate and multiply instead of a real divide. 11688 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { 11689 AddToWorklist(RV.getNode()); 11690 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11691 } 11692 } 11693 11694 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 11695 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 11696 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 11697 // Both can be negated for free, check to see if at least one is cheaper 11698 // negated. 11699 if (LHSNeg == 2 || RHSNeg == 2) 11700 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 11701 GetNegatedExpression(N0, DAG, LegalOperations), 11702 GetNegatedExpression(N1, DAG, LegalOperations), 11703 Flags); 11704 } 11705 } 11706 11707 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) 11708 return CombineRepeatedDivisors; 11709 11710 return SDValue(); 11711 } 11712 11713 SDValue DAGCombiner::visitFREM(SDNode *N) { 11714 SDValue N0 = N->getOperand(0); 11715 SDValue N1 = N->getOperand(1); 11716 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11717 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11718 EVT VT = N->getValueType(0); 11719 11720 // fold (frem c1, c2) -> fmod(c1,c2) 11721 if (N0CFP && N1CFP) 11722 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); 11723 11724 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11725 return NewSel; 11726 11727 return SDValue(); 11728 } 11729 11730 SDValue DAGCombiner::visitFSQRT(SDNode *N) { 11731 SDNodeFlags Flags = N->getFlags(); 11732 if (!DAG.getTarget().Options.UnsafeFPMath && 11733 !Flags.hasApproximateFuncs()) 11734 return SDValue(); 11735 11736 SDValue N0 = N->getOperand(0); 11737 if (TLI.isFsqrtCheap(N0, DAG)) 11738 return SDValue(); 11739 11740 // FSQRT nodes have flags that propagate to the created nodes. 11741 return buildSqrtEstimate(N0, Flags); 11742 } 11743 11744 /// copysign(x, fp_extend(y)) -> copysign(x, y) 11745 /// copysign(x, fp_round(y)) -> copysign(x, y) 11746 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { 11747 SDValue N1 = N->getOperand(1); 11748 if ((N1.getOpcode() == ISD::FP_EXTEND || 11749 N1.getOpcode() == ISD::FP_ROUND)) { 11750 // Do not optimize out type conversion of f128 type yet. 11751 // For some targets like x86_64, configuration is changed to keep one f128 11752 // value in one SSE register, but instruction selection cannot handle 11753 // FCOPYSIGN on SSE registers yet. 11754 EVT N1VT = N1->getValueType(0); 11755 EVT N1Op0VT = N1->getOperand(0).getValueType(); 11756 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); 11757 } 11758 return false; 11759 } 11760 11761 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 11762 SDValue N0 = N->getOperand(0); 11763 SDValue N1 = N->getOperand(1); 11764 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 11765 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 11766 EVT VT = N->getValueType(0); 11767 11768 if (N0CFP && N1CFP) // Constant fold 11769 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 11770 11771 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { 11772 const APFloat &V = N1C->getValueAPF(); 11773 // copysign(x, c1) -> fabs(x) iff ispos(c1) 11774 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 11775 if (!V.isNegative()) { 11776 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 11777 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11778 } else { 11779 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11780 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 11781 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 11782 } 11783 } 11784 11785 // copysign(fabs(x), y) -> copysign(x, y) 11786 // copysign(fneg(x), y) -> copysign(x, y) 11787 // copysign(copysign(x,z), y) -> copysign(x, y) 11788 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 11789 N0.getOpcode() == ISD::FCOPYSIGN) 11790 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); 11791 11792 // copysign(x, abs(y)) -> abs(x) 11793 if (N1.getOpcode() == ISD::FABS) 11794 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11795 11796 // copysign(x, copysign(y,z)) -> copysign(x, z) 11797 if (N1.getOpcode() == ISD::FCOPYSIGN) 11798 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); 11799 11800 // copysign(x, fp_extend(y)) -> copysign(x, y) 11801 // copysign(x, fp_round(y)) -> copysign(x, y) 11802 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) 11803 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); 11804 11805 return SDValue(); 11806 } 11807 11808 SDValue DAGCombiner::visitFPOW(SDNode *N) { 11809 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); 11810 if (!ExponentC) 11811 return SDValue(); 11812 11813 // Try to convert x ** (1/3) into cube root. 11814 // TODO: Handle the various flavors of long double. 11815 // TODO: Since we're approximating, we don't need an exact 1/3 exponent. 11816 // Some range near 1/3 should be fine. 11817 EVT VT = N->getValueType(0); 11818 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || 11819 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { 11820 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. 11821 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. 11822 // pow(-val, 1/3) = nan; cbrt(-val) = -num. 11823 // For regular numbers, rounding may cause the results to differ. 11824 // Therefore, we require { nsz ninf nnan afn } for this transform. 11825 // TODO: We could select out the special cases if we don't have nsz/ninf. 11826 SDNodeFlags Flags = N->getFlags(); 11827 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || 11828 !Flags.hasApproximateFuncs()) 11829 return SDValue(); 11830 11831 // Do not create a cbrt() libcall if the target does not have it, and do not 11832 // turn a pow that has lowering support into a cbrt() libcall. 11833 if (!DAG.getLibInfo().has(LibFunc_cbrt) || 11834 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && 11835 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) 11836 return SDValue(); 11837 11838 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); 11839 } 11840 11841 // Try to convert x ** (1/4) into square roots. 11842 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. 11843 // TODO: This could be extended (using a target hook) to handle smaller 11844 // power-of-2 fractional exponents. 11845 if (ExponentC->getValueAPF().isExactlyValue(0.25)) { 11846 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. 11847 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. 11848 // For regular numbers, rounding may cause the results to differ. 11849 // Therefore, we require { nsz ninf afn } for this transform. 11850 // TODO: We could select out the special cases if we don't have nsz/ninf. 11851 SDNodeFlags Flags = N->getFlags(); 11852 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || 11853 !Flags.hasApproximateFuncs()) 11854 return SDValue(); 11855 11856 // Don't double the number of libcalls. We are trying to inline fast code. 11857 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) 11858 return SDValue(); 11859 11860 // Assume that libcalls are the smallest code. 11861 // TODO: This restriction should probably be lifted for vectors. 11862 if (DAG.getMachineFunction().getFunction().optForSize()) 11863 return SDValue(); 11864 11865 // pow(X, 0.25) --> sqrt(sqrt(X)) 11866 SDLoc DL(N); 11867 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); 11868 return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); 11869 } 11870 11871 return SDValue(); 11872 } 11873 11874 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, 11875 const TargetLowering &TLI) { 11876 // This optimization is guarded by a function attribute because it may produce 11877 // unexpected results. Ie, programs may be relying on the platform-specific 11878 // undefined behavior when the float-to-int conversion overflows. 11879 const Function &F = DAG.getMachineFunction().getFunction(); 11880 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); 11881 if (StrictOverflow.getValueAsString().equals("false")) 11882 return SDValue(); 11883 11884 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be 11885 // replacing casts with a libcall. We also must be allowed to ignore -0.0 11886 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer 11887 // conversions would return +0.0. 11888 // FIXME: We should be able to use node-level FMF here. 11889 // TODO: If strict math, should we use FABS (+ range check for signed cast)? 11890 EVT VT = N->getValueType(0); 11891 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || 11892 !DAG.getTarget().Options.NoSignedZerosFPMath) 11893 return SDValue(); 11894 11895 // fptosi/fptoui round towards zero, so converting from FP to integer and 11896 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X 11897 SDValue N0 = N->getOperand(0); 11898 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && 11899 N0.getOperand(0).getValueType() == VT) 11900 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11901 11902 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && 11903 N0.getOperand(0).getValueType() == VT) 11904 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11905 11906 return SDValue(); 11907 } 11908 11909 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 11910 SDValue N0 = N->getOperand(0); 11911 EVT VT = N->getValueType(0); 11912 EVT OpVT = N0.getValueType(); 11913 11914 // fold (sint_to_fp c1) -> c1fp 11915 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11916 // ...but only if the target supports immediate floating-point values 11917 (!LegalOperations || 11918 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11919 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11920 11921 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 11922 // but UINT_TO_FP is legal on this target, try to convert. 11923 if (!hasOperation(ISD::SINT_TO_FP, OpVT) && 11924 hasOperation(ISD::UINT_TO_FP, OpVT)) { 11925 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 11926 if (DAG.SignBitIsZero(N0)) 11927 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11928 } 11929 11930 // The next optimizations are desirable only if SELECT_CC can be lowered. 11931 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11932 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11933 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 11934 !VT.isVector() && 11935 (!LegalOperations || 11936 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11937 SDLoc DL(N); 11938 SDValue Ops[] = 11939 { N0.getOperand(0), N0.getOperand(1), 11940 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11941 N0.getOperand(2) }; 11942 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11943 } 11944 11945 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 11946 // (select_cc x, y, 1.0, 0.0,, cc) 11947 if (N0.getOpcode() == ISD::ZERO_EXTEND && 11948 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 11949 (!LegalOperations || 11950 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11951 SDLoc DL(N); 11952 SDValue Ops[] = 11953 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 11954 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11955 N0.getOperand(0).getOperand(2) }; 11956 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11957 } 11958 } 11959 11960 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 11961 return FTrunc; 11962 11963 return SDValue(); 11964 } 11965 11966 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 11967 SDValue N0 = N->getOperand(0); 11968 EVT VT = N->getValueType(0); 11969 EVT OpVT = N0.getValueType(); 11970 11971 // fold (uint_to_fp c1) -> c1fp 11972 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11973 // ...but only if the target supports immediate floating-point values 11974 (!LegalOperations || 11975 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11976 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11977 11978 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 11979 // but SINT_TO_FP is legal on this target, try to convert. 11980 if (!hasOperation(ISD::UINT_TO_FP, OpVT) && 11981 hasOperation(ISD::SINT_TO_FP, OpVT)) { 11982 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 11983 if (DAG.SignBitIsZero(N0)) 11984 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11985 } 11986 11987 // The next optimizations are desirable only if SELECT_CC can be lowered. 11988 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11989 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11990 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 11991 (!LegalOperations || 11992 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11993 SDLoc DL(N); 11994 SDValue Ops[] = 11995 { N0.getOperand(0), N0.getOperand(1), 11996 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11997 N0.getOperand(2) }; 11998 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11999 } 12000 } 12001 12002 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 12003 return FTrunc; 12004 12005 return SDValue(); 12006 } 12007 12008 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x 12009 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { 12010 SDValue N0 = N->getOperand(0); 12011 EVT VT = N->getValueType(0); 12012 12013 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) 12014 return SDValue(); 12015 12016 SDValue Src = N0.getOperand(0); 12017 EVT SrcVT = Src.getValueType(); 12018 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; 12019 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; 12020 12021 // We can safely assume the conversion won't overflow the output range, 12022 // because (for example) (uint8_t)18293.f is undefined behavior. 12023 12024 // Since we can assume the conversion won't overflow, our decision as to 12025 // whether the input will fit in the float should depend on the minimum 12026 // of the input range and output range. 12027 12028 // This means this is also safe for a signed input and unsigned output, since 12029 // a negative input would lead to undefined behavior. 12030 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; 12031 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; 12032 unsigned ActualSize = std::min(InputSize, OutputSize); 12033 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); 12034 12035 // We can only fold away the float conversion if the input range can be 12036 // represented exactly in the float range. 12037 if (APFloat::semanticsPrecision(sem) >= ActualSize) { 12038 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { 12039 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND 12040 : ISD::ZERO_EXTEND; 12041 return DAG.getNode(ExtOp, SDLoc(N), VT, Src); 12042 } 12043 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) 12044 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); 12045 return DAG.getBitcast(VT, Src); 12046 } 12047 return SDValue(); 12048 } 12049 12050 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 12051 SDValue N0 = N->getOperand(0); 12052 EVT VT = N->getValueType(0); 12053 12054 // fold (fp_to_sint c1fp) -> c1 12055 if (isConstantFPBuildVectorOrConstantFP(N0)) 12056 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 12057 12058 return FoldIntToFPToInt(N, DAG); 12059 } 12060 12061 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 12062 SDValue N0 = N->getOperand(0); 12063 EVT VT = N->getValueType(0); 12064 12065 // fold (fp_to_uint c1fp) -> c1 12066 if (isConstantFPBuildVectorOrConstantFP(N0)) 12067 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 12068 12069 return FoldIntToFPToInt(N, DAG); 12070 } 12071 12072 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 12073 SDValue N0 = N->getOperand(0); 12074 SDValue N1 = N->getOperand(1); 12075 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 12076 EVT VT = N->getValueType(0); 12077 12078 // fold (fp_round c1fp) -> c1fp 12079 if (N0CFP) 12080 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 12081 12082 // fold (fp_round (fp_extend x)) -> x 12083 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 12084 return N0.getOperand(0); 12085 12086 // fold (fp_round (fp_round x)) -> (fp_round x) 12087 if (N0.getOpcode() == ISD::FP_ROUND) { 12088 const bool NIsTrunc = N->getConstantOperandVal(1) == 1; 12089 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; 12090 12091 // Skip this folding if it results in an fp_round from f80 to f16. 12092 // 12093 // f80 to f16 always generates an expensive (and as yet, unimplemented) 12094 // libcall to __truncxfhf2 instead of selecting native f16 conversion 12095 // instructions from f32 or f64. Moreover, the first (value-preserving) 12096 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like 12097 // x86. 12098 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) 12099 return SDValue(); 12100 12101 // If the first fp_round isn't a value preserving truncation, it might 12102 // introduce a tie in the second fp_round, that wouldn't occur in the 12103 // single-step fp_round we want to fold to. 12104 // In other words, double rounding isn't the same as rounding. 12105 // Also, this is a value preserving truncation iff both fp_round's are. 12106 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { 12107 SDLoc DL(N); 12108 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), 12109 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); 12110 } 12111 } 12112 12113 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 12114 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 12115 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 12116 N0.getOperand(0), N1); 12117 AddToWorklist(Tmp.getNode()); 12118 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 12119 Tmp, N0.getOperand(1)); 12120 } 12121 12122 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 12123 return NewVSel; 12124 12125 return SDValue(); 12126 } 12127 12128 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 12129 SDValue N0 = N->getOperand(0); 12130 EVT VT = N->getValueType(0); 12131 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 12132 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 12133 12134 // fold (fp_round_inreg c1fp) -> c1fp 12135 if (N0CFP && isTypeLegal(EVT)) { 12136 SDLoc DL(N); 12137 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); 12138 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); 12139 } 12140 12141 return SDValue(); 12142 } 12143 12144 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 12145 SDValue N0 = N->getOperand(0); 12146 EVT VT = N->getValueType(0); 12147 12148 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 12149 if (N->hasOneUse() && 12150 N->use_begin()->getOpcode() == ISD::FP_ROUND) 12151 return SDValue(); 12152 12153 // fold (fp_extend c1fp) -> c1fp 12154 if (isConstantFPBuildVectorOrConstantFP(N0)) 12155 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 12156 12157 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) 12158 if (N0.getOpcode() == ISD::FP16_TO_FP && 12159 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) 12160 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); 12161 12162 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 12163 // value of X. 12164 if (N0.getOpcode() == ISD::FP_ROUND 12165 && N0.getConstantOperandVal(1) == 1) { 12166 SDValue In = N0.getOperand(0); 12167 if (In.getValueType() == VT) return In; 12168 if (VT.bitsLT(In.getValueType())) 12169 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 12170 In, N0.getOperand(1)); 12171 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 12172 } 12173 12174 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 12175 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 12176 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 12177 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 12178 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 12179 LN0->getChain(), 12180 LN0->getBasePtr(), N0.getValueType(), 12181 LN0->getMemOperand()); 12182 CombineTo(N, ExtLoad); 12183 CombineTo(N0.getNode(), 12184 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 12185 N0.getValueType(), ExtLoad, 12186 DAG.getIntPtrConstant(1, SDLoc(N0))), 12187 ExtLoad.getValue(1)); 12188 return SDValue(N, 0); // Return N so it doesn't get rechecked! 12189 } 12190 12191 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 12192 return NewVSel; 12193 12194 return SDValue(); 12195 } 12196 12197 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 12198 SDValue N0 = N->getOperand(0); 12199 EVT VT = N->getValueType(0); 12200 12201 // fold (fceil c1) -> fceil(c1) 12202 if (isConstantFPBuildVectorOrConstantFP(N0)) 12203 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 12204 12205 return SDValue(); 12206 } 12207 12208 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 12209 SDValue N0 = N->getOperand(0); 12210 EVT VT = N->getValueType(0); 12211 12212 // fold (ftrunc c1) -> ftrunc(c1) 12213 if (isConstantFPBuildVectorOrConstantFP(N0)) 12214 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 12215 12216 // fold ftrunc (known rounded int x) -> x 12217 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is 12218 // likely to be generated to extract integer from a rounded floating value. 12219 switch (N0.getOpcode()) { 12220 default: break; 12221 case ISD::FRINT: 12222 case ISD::FTRUNC: 12223 case ISD::FNEARBYINT: 12224 case ISD::FFLOOR: 12225 case ISD::FCEIL: 12226 return N0; 12227 } 12228 12229 return SDValue(); 12230 } 12231 12232 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 12233 SDValue N0 = N->getOperand(0); 12234 EVT VT = N->getValueType(0); 12235 12236 // fold (ffloor c1) -> ffloor(c1) 12237 if (isConstantFPBuildVectorOrConstantFP(N0)) 12238 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 12239 12240 return SDValue(); 12241 } 12242 12243 // FIXME: FNEG and FABS have a lot in common; refactor. 12244 SDValue DAGCombiner::visitFNEG(SDNode *N) { 12245 SDValue N0 = N->getOperand(0); 12246 EVT VT = N->getValueType(0); 12247 12248 // Constant fold FNEG. 12249 if (isConstantFPBuildVectorOrConstantFP(N0)) 12250 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 12251 12252 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 12253 &DAG.getTarget().Options)) 12254 return GetNegatedExpression(N0, DAG, LegalOperations); 12255 12256 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 12257 // constant pool values. 12258 if (!TLI.isFNegFree(VT) && 12259 N0.getOpcode() == ISD::BITCAST && 12260 N0.getNode()->hasOneUse()) { 12261 SDValue Int = N0.getOperand(0); 12262 EVT IntVT = Int.getValueType(); 12263 if (IntVT.isInteger() && !IntVT.isVector()) { 12264 APInt SignMask; 12265 if (N0.getValueType().isVector()) { 12266 // For a vector, get a mask such as 0x80... per scalar element 12267 // and splat it. 12268 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); 12269 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 12270 } else { 12271 // For a scalar, just generate 0x80... 12272 SignMask = APInt::getSignMask(IntVT.getSizeInBits()); 12273 } 12274 SDLoc DL0(N0); 12275 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, 12276 DAG.getConstant(SignMask, DL0, IntVT)); 12277 AddToWorklist(Int.getNode()); 12278 return DAG.getBitcast(VT, Int); 12279 } 12280 } 12281 12282 // (fneg (fmul c, x)) -> (fmul -c, x) 12283 if (N0.getOpcode() == ISD::FMUL && 12284 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { 12285 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 12286 if (CFP1) { 12287 APFloat CVal = CFP1->getValueAPF(); 12288 CVal.changeSign(); 12289 if (Level >= AfterLegalizeDAG && 12290 (TLI.isFPImmLegal(CVal, VT) || 12291 TLI.isOperationLegal(ISD::ConstantFP, VT))) 12292 return DAG.getNode( 12293 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 12294 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), 12295 N0->getFlags()); 12296 } 12297 } 12298 12299 return SDValue(); 12300 } 12301 12302 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, 12303 APFloat (*Op)(const APFloat &, const APFloat &)) { 12304 SDValue N0 = N->getOperand(0); 12305 SDValue N1 = N->getOperand(1); 12306 EVT VT = N->getValueType(0); 12307 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 12308 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 12309 12310 if (N0CFP && N1CFP) { 12311 const APFloat &C0 = N0CFP->getValueAPF(); 12312 const APFloat &C1 = N1CFP->getValueAPF(); 12313 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); 12314 } 12315 12316 // Canonicalize to constant on RHS. 12317 if (isConstantFPBuildVectorOrConstantFP(N0) && 12318 !isConstantFPBuildVectorOrConstantFP(N1)) 12319 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 12320 12321 return SDValue(); 12322 } 12323 12324 SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 12325 return visitFMinMax(DAG, N, minnum); 12326 } 12327 12328 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 12329 return visitFMinMax(DAG, N, maxnum); 12330 } 12331 12332 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { 12333 return visitFMinMax(DAG, N, minimum); 12334 } 12335 12336 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { 12337 return visitFMinMax(DAG, N, maximum); 12338 } 12339 12340 SDValue DAGCombiner::visitFABS(SDNode *N) { 12341 SDValue N0 = N->getOperand(0); 12342 EVT VT = N->getValueType(0); 12343 12344 // fold (fabs c1) -> fabs(c1) 12345 if (isConstantFPBuildVectorOrConstantFP(N0)) 12346 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 12347 12348 // fold (fabs (fabs x)) -> (fabs x) 12349 if (N0.getOpcode() == ISD::FABS) 12350 return N->getOperand(0); 12351 12352 // fold (fabs (fneg x)) -> (fabs x) 12353 // fold (fabs (fcopysign x, y)) -> (fabs x) 12354 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 12355 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 12356 12357 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads. 12358 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { 12359 SDValue Int = N0.getOperand(0); 12360 EVT IntVT = Int.getValueType(); 12361 if (IntVT.isInteger() && !IntVT.isVector()) { 12362 APInt SignMask; 12363 if (N0.getValueType().isVector()) { 12364 // For a vector, get a mask such as 0x7f... per scalar element 12365 // and splat it. 12366 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); 12367 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 12368 } else { 12369 // For a scalar, just generate 0x7f... 12370 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); 12371 } 12372 SDLoc DL(N0); 12373 Int = DAG.getNode(ISD::AND, DL, IntVT, Int, 12374 DAG.getConstant(SignMask, DL, IntVT)); 12375 AddToWorklist(Int.getNode()); 12376 return DAG.getBitcast(N->getValueType(0), Int); 12377 } 12378 } 12379 12380 return SDValue(); 12381 } 12382 12383 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 12384 SDValue Chain = N->getOperand(0); 12385 SDValue N1 = N->getOperand(1); 12386 SDValue N2 = N->getOperand(2); 12387 12388 // If N is a constant we could fold this into a fallthrough or unconditional 12389 // branch. However that doesn't happen very often in normal code, because 12390 // Instcombine/SimplifyCFG should have handled the available opportunities. 12391 // If we did this folding here, it would be necessary to update the 12392 // MachineBasicBlock CFG, which is awkward. 12393 12394 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 12395 // on the target. 12396 if (N1.getOpcode() == ISD::SETCC && 12397 TLI.isOperationLegalOrCustom(ISD::BR_CC, 12398 N1.getOperand(0).getValueType())) { 12399 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 12400 Chain, N1.getOperand(2), 12401 N1.getOperand(0), N1.getOperand(1), N2); 12402 } 12403 12404 if (N1.hasOneUse()) { 12405 if (SDValue NewN1 = rebuildSetCC(N1)) 12406 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); 12407 } 12408 12409 return SDValue(); 12410 } 12411 12412 SDValue DAGCombiner::rebuildSetCC(SDValue N) { 12413 if (N.getOpcode() == ISD::SRL || 12414 (N.getOpcode() == ISD::TRUNCATE && 12415 (N.getOperand(0).hasOneUse() && 12416 N.getOperand(0).getOpcode() == ISD::SRL))) { 12417 // Look pass the truncate. 12418 if (N.getOpcode() == ISD::TRUNCATE) 12419 N = N.getOperand(0); 12420 12421 // Match this pattern so that we can generate simpler code: 12422 // 12423 // %a = ... 12424 // %b = and i32 %a, 2 12425 // %c = srl i32 %b, 1 12426 // brcond i32 %c ... 12427 // 12428 // into 12429 // 12430 // %a = ... 12431 // %b = and i32 %a, 2 12432 // %c = setcc eq %b, 0 12433 // brcond %c ... 12434 // 12435 // This applies only when the AND constant value has one bit set and the 12436 // SRL constant is equal to the log2 of the AND constant. The back-end is 12437 // smart enough to convert the result into a TEST/JMP sequence. 12438 SDValue Op0 = N.getOperand(0); 12439 SDValue Op1 = N.getOperand(1); 12440 12441 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { 12442 SDValue AndOp1 = Op0.getOperand(1); 12443 12444 if (AndOp1.getOpcode() == ISD::Constant) { 12445 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 12446 12447 if (AndConst.isPowerOf2() && 12448 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { 12449 SDLoc DL(N); 12450 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), 12451 Op0, DAG.getConstant(0, DL, Op0.getValueType()), 12452 ISD::SETNE); 12453 } 12454 } 12455 } 12456 } 12457 12458 // Transform br(xor(x, y)) -> br(x != y) 12459 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 12460 if (N.getOpcode() == ISD::XOR) { 12461 // Because we may call this on a speculatively constructed 12462 // SimplifiedSetCC Node, we need to simplify this node first. 12463 // Ideally this should be folded into SimplifySetCC and not 12464 // here. For now, grab a handle to N so we don't lose it from 12465 // replacements interal to the visit. 12466 HandleSDNode XORHandle(N); 12467 while (N.getOpcode() == ISD::XOR) { 12468 SDValue Tmp = visitXOR(N.getNode()); 12469 // No simplification done. 12470 if (!Tmp.getNode()) 12471 break; 12472 // Returning N is form in-visit replacement that may invalidated 12473 // N. Grab value from Handle. 12474 if (Tmp.getNode() == N.getNode()) 12475 N = XORHandle.getValue(); 12476 else // Node simplified. Try simplifying again. 12477 N = Tmp; 12478 } 12479 12480 if (N.getOpcode() != ISD::XOR) 12481 return N; 12482 12483 SDNode *TheXor = N.getNode(); 12484 12485 SDValue Op0 = TheXor->getOperand(0); 12486 SDValue Op1 = TheXor->getOperand(1); 12487 12488 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 12489 bool Equal = false; 12490 if (isOneConstant(Op0) && Op0.hasOneUse() && 12491 Op0.getOpcode() == ISD::XOR) { 12492 TheXor = Op0.getNode(); 12493 Equal = true; 12494 } 12495 12496 EVT SetCCVT = N.getValueType(); 12497 if (LegalTypes) 12498 SetCCVT = getSetCCResultType(SetCCVT); 12499 // Replace the uses of XOR with SETCC 12500 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, 12501 Equal ? ISD::SETEQ : ISD::SETNE); 12502 } 12503 } 12504 12505 return SDValue(); 12506 } 12507 12508 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 12509 // 12510 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 12511 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 12512 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 12513 12514 // If N is a constant we could fold this into a fallthrough or unconditional 12515 // branch. However that doesn't happen very often in normal code, because 12516 // Instcombine/SimplifyCFG should have handled the available opportunities. 12517 // If we did this folding here, it would be necessary to update the 12518 // MachineBasicBlock CFG, which is awkward. 12519 12520 // Use SimplifySetCC to simplify SETCC's. 12521 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 12522 CondLHS, CondRHS, CC->get(), SDLoc(N), 12523 false); 12524 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 12525 12526 // fold to a simpler setcc 12527 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 12528 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 12529 N->getOperand(0), Simp.getOperand(2), 12530 Simp.getOperand(0), Simp.getOperand(1), 12531 N->getOperand(4)); 12532 12533 return SDValue(); 12534 } 12535 12536 /// Return true if 'Use' is a load or a store that uses N as its base pointer 12537 /// and that N may be folded in the load / store addressing mode. 12538 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 12539 SelectionDAG &DAG, 12540 const TargetLowering &TLI) { 12541 EVT VT; 12542 unsigned AS; 12543 12544 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 12545 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 12546 return false; 12547 VT = LD->getMemoryVT(); 12548 AS = LD->getAddressSpace(); 12549 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 12550 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 12551 return false; 12552 VT = ST->getMemoryVT(); 12553 AS = ST->getAddressSpace(); 12554 } else 12555 return false; 12556 12557 TargetLowering::AddrMode AM; 12558 if (N->getOpcode() == ISD::ADD) { 12559 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12560 if (Offset) 12561 // [reg +/- imm] 12562 AM.BaseOffs = Offset->getSExtValue(); 12563 else 12564 // [reg +/- reg] 12565 AM.Scale = 1; 12566 } else if (N->getOpcode() == ISD::SUB) { 12567 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12568 if (Offset) 12569 // [reg +/- imm] 12570 AM.BaseOffs = -Offset->getSExtValue(); 12571 else 12572 // [reg +/- reg] 12573 AM.Scale = 1; 12574 } else 12575 return false; 12576 12577 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, 12578 VT.getTypeForEVT(*DAG.getContext()), AS); 12579 } 12580 12581 /// Try turning a load/store into a pre-indexed load/store when the base 12582 /// pointer is an add or subtract and it has other uses besides the load/store. 12583 /// After the transformation, the new indexed load/store has effectively folded 12584 /// the add/subtract in and all of its other uses are redirected to the 12585 /// new load/store. 12586 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 12587 if (Level < AfterLegalizeDAG) 12588 return false; 12589 12590 bool isLoad = true; 12591 SDValue Ptr; 12592 EVT VT; 12593 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 12594 if (LD->isIndexed()) 12595 return false; 12596 VT = LD->getMemoryVT(); 12597 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 12598 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 12599 return false; 12600 Ptr = LD->getBasePtr(); 12601 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 12602 if (ST->isIndexed()) 12603 return false; 12604 VT = ST->getMemoryVT(); 12605 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 12606 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 12607 return false; 12608 Ptr = ST->getBasePtr(); 12609 isLoad = false; 12610 } else { 12611 return false; 12612 } 12613 12614 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 12615 // out. There is no reason to make this a preinc/predec. 12616 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 12617 Ptr.getNode()->hasOneUse()) 12618 return false; 12619 12620 // Ask the target to do addressing mode selection. 12621 SDValue BasePtr; 12622 SDValue Offset; 12623 ISD::MemIndexedMode AM = ISD::UNINDEXED; 12624 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 12625 return false; 12626 12627 // Backends without true r+i pre-indexed forms may need to pass a 12628 // constant base with a variable offset so that constant coercion 12629 // will work with the patterns in canonical form. 12630 bool Swapped = false; 12631 if (isa<ConstantSDNode>(BasePtr)) { 12632 std::swap(BasePtr, Offset); 12633 Swapped = true; 12634 } 12635 12636 // Don't create a indexed load / store with zero offset. 12637 if (isNullConstant(Offset)) 12638 return false; 12639 12640 // Try turning it into a pre-indexed load / store except when: 12641 // 1) The new base ptr is a frame index. 12642 // 2) If N is a store and the new base ptr is either the same as or is a 12643 // predecessor of the value being stored. 12644 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 12645 // that would create a cycle. 12646 // 4) All uses are load / store ops that use it as old base ptr. 12647 12648 // Check #1. Preinc'ing a frame index would require copying the stack pointer 12649 // (plus the implicit offset) to a register to preinc anyway. 12650 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 12651 return false; 12652 12653 // Check #2. 12654 if (!isLoad) { 12655 SDValue Val = cast<StoreSDNode>(N)->getValue(); 12656 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 12657 return false; 12658 } 12659 12660 // Caches for hasPredecessorHelper. 12661 SmallPtrSet<const SDNode *, 32> Visited; 12662 SmallVector<const SDNode *, 16> Worklist; 12663 Worklist.push_back(N); 12664 12665 // If the offset is a constant, there may be other adds of constants that 12666 // can be folded with this one. We should do this to avoid having to keep 12667 // a copy of the original base pointer. 12668 SmallVector<SDNode *, 16> OtherUses; 12669 if (isa<ConstantSDNode>(Offset)) 12670 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), 12671 UE = BasePtr.getNode()->use_end(); 12672 UI != UE; ++UI) { 12673 SDUse &Use = UI.getUse(); 12674 // Skip the use that is Ptr and uses of other results from BasePtr's 12675 // node (important for nodes that return multiple results). 12676 if (Use.getUser() == Ptr.getNode() || Use != BasePtr) 12677 continue; 12678 12679 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) 12680 continue; 12681 12682 if (Use.getUser()->getOpcode() != ISD::ADD && 12683 Use.getUser()->getOpcode() != ISD::SUB) { 12684 OtherUses.clear(); 12685 break; 12686 } 12687 12688 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); 12689 if (!isa<ConstantSDNode>(Op1)) { 12690 OtherUses.clear(); 12691 break; 12692 } 12693 12694 // FIXME: In some cases, we can be smarter about this. 12695 if (Op1.getValueType() != Offset.getValueType()) { 12696 OtherUses.clear(); 12697 break; 12698 } 12699 12700 OtherUses.push_back(Use.getUser()); 12701 } 12702 12703 if (Swapped) 12704 std::swap(BasePtr, Offset); 12705 12706 // Now check for #3 and #4. 12707 bool RealUse = false; 12708 12709 for (SDNode *Use : Ptr.getNode()->uses()) { 12710 if (Use == N) 12711 continue; 12712 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) 12713 return false; 12714 12715 // If Ptr may be folded in addressing mode of other use, then it's 12716 // not profitable to do this transformation. 12717 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 12718 RealUse = true; 12719 } 12720 12721 if (!RealUse) 12722 return false; 12723 12724 SDValue Result; 12725 if (isLoad) 12726 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 12727 BasePtr, Offset, AM); 12728 else 12729 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 12730 BasePtr, Offset, AM); 12731 ++PreIndexedNodes; 12732 ++NodesCombined; 12733 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; 12734 Result.getNode()->dump(&DAG); dbgs() << '\n'); 12735 WorklistRemover DeadNodes(*this); 12736 if (isLoad) { 12737 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 12738 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 12739 } else { 12740 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 12741 } 12742 12743 // Finally, since the node is now dead, remove it from the graph. 12744 deleteAndRecombine(N); 12745 12746 if (Swapped) 12747 std::swap(BasePtr, Offset); 12748 12749 // Replace other uses of BasePtr that can be updated to use Ptr 12750 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 12751 unsigned OffsetIdx = 1; 12752 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 12753 OffsetIdx = 0; 12754 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 12755 BasePtr.getNode() && "Expected BasePtr operand"); 12756 12757 // We need to replace ptr0 in the following expression: 12758 // x0 * offset0 + y0 * ptr0 = t0 12759 // knowing that 12760 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 12761 // 12762 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 12763 // indexed load/store and the expression that needs to be re-written. 12764 // 12765 // Therefore, we have: 12766 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 12767 12768 ConstantSDNode *CN = 12769 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 12770 int X0, X1, Y0, Y1; 12771 const APInt &Offset0 = CN->getAPIntValue(); 12772 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 12773 12774 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 12775 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 12776 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 12777 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 12778 12779 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 12780 12781 APInt CNV = Offset0; 12782 if (X0 < 0) CNV = -CNV; 12783 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 12784 else CNV = CNV - Offset1; 12785 12786 SDLoc DL(OtherUses[i]); 12787 12788 // We can now generate the new expression. 12789 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); 12790 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 12791 12792 SDValue NewUse = DAG.getNode(Opcode, 12793 DL, 12794 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 12795 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 12796 deleteAndRecombine(OtherUses[i]); 12797 } 12798 12799 // Replace the uses of Ptr with uses of the updated base value. 12800 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 12801 deleteAndRecombine(Ptr.getNode()); 12802 AddToWorklist(Result.getNode()); 12803 12804 return true; 12805 } 12806 12807 /// Try to combine a load/store with a add/sub of the base pointer node into a 12808 /// post-indexed load/store. The transformation folded the add/subtract into the 12809 /// new indexed load/store effectively and all of its uses are redirected to the 12810 /// new load/store. 12811 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 12812 if (Level < AfterLegalizeDAG) 12813 return false; 12814 12815 bool isLoad = true; 12816 SDValue Ptr; 12817 EVT VT; 12818 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 12819 if (LD->isIndexed()) 12820 return false; 12821 VT = LD->getMemoryVT(); 12822 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 12823 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 12824 return false; 12825 Ptr = LD->getBasePtr(); 12826 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 12827 if (ST->isIndexed()) 12828 return false; 12829 VT = ST->getMemoryVT(); 12830 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 12831 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 12832 return false; 12833 Ptr = ST->getBasePtr(); 12834 isLoad = false; 12835 } else { 12836 return false; 12837 } 12838 12839 if (Ptr.getNode()->hasOneUse()) 12840 return false; 12841 12842 for (SDNode *Op : Ptr.getNode()->uses()) { 12843 if (Op == N || 12844 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 12845 continue; 12846 12847 SDValue BasePtr; 12848 SDValue Offset; 12849 ISD::MemIndexedMode AM = ISD::UNINDEXED; 12850 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 12851 // Don't create a indexed load / store with zero offset. 12852 if (isNullConstant(Offset)) 12853 continue; 12854 12855 // Try turning it into a post-indexed load / store except when 12856 // 1) All uses are load / store ops that use it as base ptr (and 12857 // it may be folded as addressing mmode). 12858 // 2) Op must be independent of N, i.e. Op is neither a predecessor 12859 // nor a successor of N. Otherwise, if Op is folded that would 12860 // create a cycle. 12861 12862 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 12863 continue; 12864 12865 // Check for #1. 12866 bool TryNext = false; 12867 for (SDNode *Use : BasePtr.getNode()->uses()) { 12868 if (Use == Ptr.getNode()) 12869 continue; 12870 12871 // If all the uses are load / store addresses, then don't do the 12872 // transformation. 12873 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 12874 bool RealUse = false; 12875 for (SDNode *UseUse : Use->uses()) { 12876 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 12877 RealUse = true; 12878 } 12879 12880 if (!RealUse) { 12881 TryNext = true; 12882 break; 12883 } 12884 } 12885 } 12886 12887 if (TryNext) 12888 continue; 12889 12890 // Check for #2. 12891 SmallPtrSet<const SDNode *, 32> Visited; 12892 SmallVector<const SDNode *, 8> Worklist; 12893 // Ptr is predecessor to both N and Op. 12894 Visited.insert(Ptr.getNode()); 12895 Worklist.push_back(N); 12896 Worklist.push_back(Op); 12897 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && 12898 !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { 12899 SDValue Result = isLoad 12900 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 12901 BasePtr, Offset, AM) 12902 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 12903 BasePtr, Offset, AM); 12904 ++PostIndexedNodes; 12905 ++NodesCombined; 12906 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); 12907 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); 12908 dbgs() << '\n'); 12909 WorklistRemover DeadNodes(*this); 12910 if (isLoad) { 12911 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 12912 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 12913 } else { 12914 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 12915 } 12916 12917 // Finally, since the node is now dead, remove it from the graph. 12918 deleteAndRecombine(N); 12919 12920 // Replace the uses of Use with uses of the updated base value. 12921 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 12922 Result.getValue(isLoad ? 1 : 0)); 12923 deleteAndRecombine(Op); 12924 return true; 12925 } 12926 } 12927 } 12928 12929 return false; 12930 } 12931 12932 /// Return the base-pointer arithmetic from an indexed \p LD. 12933 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 12934 ISD::MemIndexedMode AM = LD->getAddressingMode(); 12935 assert(AM != ISD::UNINDEXED); 12936 SDValue BP = LD->getOperand(1); 12937 SDValue Inc = LD->getOperand(2); 12938 12939 // Some backends use TargetConstants for load offsets, but don't expect 12940 // TargetConstants in general ADD nodes. We can convert these constants into 12941 // regular Constants (if the constant is not opaque). 12942 assert((Inc.getOpcode() != ISD::TargetConstant || 12943 !cast<ConstantSDNode>(Inc)->isOpaque()) && 12944 "Cannot split out indexing using opaque target constants"); 12945 if (Inc.getOpcode() == ISD::TargetConstant) { 12946 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 12947 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), 12948 ConstInc->getValueType(0)); 12949 } 12950 12951 unsigned Opc = 12952 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 12953 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 12954 } 12955 12956 static inline int numVectorEltsOrZero(EVT T) { 12957 return T.isVector() ? T.getVectorNumElements() : 0; 12958 } 12959 12960 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { 12961 Val = ST->getValue(); 12962 EVT STType = Val.getValueType(); 12963 EVT STMemType = ST->getMemoryVT(); 12964 if (STType == STMemType) 12965 return true; 12966 if (isTypeLegal(STMemType)) 12967 return false; // fail. 12968 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && 12969 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) { 12970 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); 12971 return true; 12972 } 12973 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) && 12974 STType.isInteger() && STMemType.isInteger()) { 12975 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); 12976 return true; 12977 } 12978 if (STType.getSizeInBits() == STMemType.getSizeInBits()) { 12979 Val = DAG.getBitcast(STMemType, Val); 12980 return true; 12981 } 12982 return false; // fail. 12983 } 12984 12985 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { 12986 EVT LDMemType = LD->getMemoryVT(); 12987 EVT LDType = LD->getValueType(0); 12988 assert(Val.getValueType() == LDMemType && 12989 "Attempting to extend value of non-matching type"); 12990 if (LDType == LDMemType) 12991 return true; 12992 if (LDMemType.isInteger() && LDType.isInteger()) { 12993 switch (LD->getExtensionType()) { 12994 case ISD::NON_EXTLOAD: 12995 Val = DAG.getBitcast(LDType, Val); 12996 return true; 12997 case ISD::EXTLOAD: 12998 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); 12999 return true; 13000 case ISD::SEXTLOAD: 13001 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); 13002 return true; 13003 case ISD::ZEXTLOAD: 13004 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); 13005 return true; 13006 } 13007 } 13008 return false; 13009 } 13010 13011 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { 13012 if (OptLevel == CodeGenOpt::None || LD->isVolatile()) 13013 return SDValue(); 13014 SDValue Chain = LD->getOperand(0); 13015 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); 13016 if (!ST || ST->isVolatile()) 13017 return SDValue(); 13018 13019 EVT LDType = LD->getValueType(0); 13020 EVT LDMemType = LD->getMemoryVT(); 13021 EVT STMemType = ST->getMemoryVT(); 13022 EVT STType = ST->getValue().getValueType(); 13023 13024 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); 13025 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); 13026 int64_t Offset; 13027 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) 13028 return SDValue(); 13029 13030 // Normalize for Endianness. After this Offset=0 will denote that the least 13031 // significant bit in the loaded value maps to the least significant bit in 13032 // the stored value). With Offset=n (for n > 0) the loaded value starts at the 13033 // n:th least significant byte of the stored value. 13034 if (DAG.getDataLayout().isBigEndian()) 13035 Offset = (STMemType.getStoreSizeInBits() - 13036 LDMemType.getStoreSizeInBits()) / 8 - Offset; 13037 13038 // Check that the stored value cover all bits that are loaded. 13039 bool STCoversLD = 13040 (Offset >= 0) && 13041 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); 13042 13043 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { 13044 if (LD->isIndexed()) { 13045 bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || 13046 LD->getAddressingMode() == ISD::POST_DEC); 13047 unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; 13048 SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), 13049 LD->getOperand(1), LD->getOperand(2)); 13050 SDValue Ops[] = {Val, Idx, Chain}; 13051 return CombineTo(LD, Ops, 3); 13052 } 13053 return CombineTo(LD, Val, Chain); 13054 }; 13055 13056 if (!STCoversLD) 13057 return SDValue(); 13058 13059 // Memory as copy space (potentially masked). 13060 if (Offset == 0 && LDType == STType && STMemType == LDMemType) { 13061 // Simple case: Direct non-truncating forwarding 13062 if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) 13063 return ReplaceLd(LD, ST->getValue(), Chain); 13064 // Can we model the truncate and extension with an and mask? 13065 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && 13066 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { 13067 // Mask to size of LDMemType 13068 auto Mask = 13069 DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), 13070 STMemType.getSizeInBits()), 13071 SDLoc(ST), STType); 13072 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); 13073 return ReplaceLd(LD, Val, Chain); 13074 } 13075 } 13076 13077 // TODO: Deal with nonzero offset. 13078 if (LD->getBasePtr().isUndef() || Offset != 0) 13079 return SDValue(); 13080 // Model necessary truncations / extenstions. 13081 SDValue Val; 13082 // Truncate Value To Stored Memory Size. 13083 do { 13084 if (!getTruncatedStoreValue(ST, Val)) 13085 continue; 13086 if (!isTypeLegal(LDMemType)) 13087 continue; 13088 if (STMemType != LDMemType) { 13089 // TODO: Support vectors? This requires extract_subvector/bitcast. 13090 if (!STMemType.isVector() && !LDMemType.isVector() && 13091 STMemType.isInteger() && LDMemType.isInteger()) 13092 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); 13093 else 13094 continue; 13095 } 13096 if (!extendLoadedValueToExtension(LD, Val)) 13097 continue; 13098 return ReplaceLd(LD, Val, Chain); 13099 } while (false); 13100 13101 // On failure, cleanup dead nodes we may have created. 13102 if (Val->use_empty()) 13103 deleteAndRecombine(Val.getNode()); 13104 return SDValue(); 13105 } 13106 13107 SDValue DAGCombiner::visitLOAD(SDNode *N) { 13108 LoadSDNode *LD = cast<LoadSDNode>(N); 13109 SDValue Chain = LD->getChain(); 13110 SDValue Ptr = LD->getBasePtr(); 13111 13112 // If load is not volatile and there are no uses of the loaded value (and 13113 // the updated indexed value in case of indexed loads), change uses of the 13114 // chain value into uses of the chain input (i.e. delete the dead load). 13115 if (!LD->isVolatile()) { 13116 if (N->getValueType(1) == MVT::Other) { 13117 // Unindexed loads. 13118 if (!N->hasAnyUseOfValue(0)) { 13119 // It's not safe to use the two value CombineTo variant here. e.g. 13120 // v1, chain2 = load chain1, loc 13121 // v2, chain3 = load chain2, loc 13122 // v3 = add v2, c 13123 // Now we replace use of chain2 with chain1. This makes the second load 13124 // isomorphic to the one we are deleting, and thus makes this load live. 13125 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); 13126 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); 13127 dbgs() << "\n"); 13128 WorklistRemover DeadNodes(*this); 13129 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 13130 AddUsersToWorklist(Chain.getNode()); 13131 if (N->use_empty()) 13132 deleteAndRecombine(N); 13133 13134 return SDValue(N, 0); // Return N so it doesn't get rechecked! 13135 } 13136 } else { 13137 // Indexed loads. 13138 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 13139 13140 // If this load has an opaque TargetConstant offset, then we cannot split 13141 // the indexing into an add/sub directly (that TargetConstant may not be 13142 // valid for a different type of node, and we cannot convert an opaque 13143 // target constant into a regular constant). 13144 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 13145 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 13146 13147 if (!N->hasAnyUseOfValue(0) && 13148 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 13149 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 13150 SDValue Index; 13151 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 13152 Index = SplitIndexingFromLoad(LD); 13153 // Try to fold the base pointer arithmetic into subsequent loads and 13154 // stores. 13155 AddUsersToWorklist(N); 13156 } else 13157 Index = DAG.getUNDEF(N->getValueType(1)); 13158 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); 13159 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); 13160 dbgs() << " and 2 other values\n"); 13161 WorklistRemover DeadNodes(*this); 13162 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 13163 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 13164 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 13165 deleteAndRecombine(N); 13166 return SDValue(N, 0); // Return N so it doesn't get rechecked! 13167 } 13168 } 13169 } 13170 13171 // If this load is directly stored, replace the load value with the stored 13172 // value. 13173 if (auto V = ForwardStoreValueToDirectLoad(LD)) 13174 return V; 13175 13176 // Try to infer better alignment information than the load already has. 13177 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 13178 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 13179 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { 13180 SDValue NewLoad = DAG.getExtLoad( 13181 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, 13182 LD->getPointerInfo(), LD->getMemoryVT(), Align, 13183 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 13184 // NewLoad will always be N as we are only refining the alignment 13185 assert(NewLoad.getNode() == N); 13186 (void)NewLoad; 13187 } 13188 } 13189 } 13190 13191 if (LD->isUnindexed()) { 13192 // Walk up chain skipping non-aliasing memory nodes. 13193 SDValue BetterChain = FindBetterChain(N, Chain); 13194 13195 // If there is a better chain. 13196 if (Chain != BetterChain) { 13197 SDValue ReplLoad; 13198 13199 // Replace the chain to void dependency. 13200 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 13201 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 13202 BetterChain, Ptr, LD->getMemOperand()); 13203 } else { 13204 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 13205 LD->getValueType(0), 13206 BetterChain, Ptr, LD->getMemoryVT(), 13207 LD->getMemOperand()); 13208 } 13209 13210 // Create token factor to keep old chain connected. 13211 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 13212 MVT::Other, Chain, ReplLoad.getValue(1)); 13213 13214 // Replace uses with load result and token factor 13215 return CombineTo(N, ReplLoad.getValue(0), Token); 13216 } 13217 } 13218 13219 // Try transforming N to an indexed load. 13220 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 13221 return SDValue(N, 0); 13222 13223 // Try to slice up N to more direct loads if the slices are mapped to 13224 // different register banks or pairing can take place. 13225 if (SliceUpLoad(N)) 13226 return SDValue(N, 0); 13227 13228 return SDValue(); 13229 } 13230 13231 namespace { 13232 13233 /// Helper structure used to slice a load in smaller loads. 13234 /// Basically a slice is obtained from the following sequence: 13235 /// Origin = load Ty1, Base 13236 /// Shift = srl Ty1 Origin, CstTy Amount 13237 /// Inst = trunc Shift to Ty2 13238 /// 13239 /// Then, it will be rewritten into: 13240 /// Slice = load SliceTy, Base + SliceOffset 13241 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 13242 /// 13243 /// SliceTy is deduced from the number of bits that are actually used to 13244 /// build Inst. 13245 struct LoadedSlice { 13246 /// Helper structure used to compute the cost of a slice. 13247 struct Cost { 13248 /// Are we optimizing for code size. 13249 bool ForCodeSize; 13250 13251 /// Various cost. 13252 unsigned Loads = 0; 13253 unsigned Truncates = 0; 13254 unsigned CrossRegisterBanksCopies = 0; 13255 unsigned ZExts = 0; 13256 unsigned Shift = 0; 13257 13258 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} 13259 13260 /// Get the cost of one isolated slice. 13261 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 13262 : ForCodeSize(ForCodeSize), Loads(1) { 13263 EVT TruncType = LS.Inst->getValueType(0); 13264 EVT LoadedType = LS.getLoadedType(); 13265 if (TruncType != LoadedType && 13266 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 13267 ZExts = 1; 13268 } 13269 13270 /// Account for slicing gain in the current cost. 13271 /// Slicing provide a few gains like removing a shift or a 13272 /// truncate. This method allows to grow the cost of the original 13273 /// load with the gain from this slice. 13274 void addSliceGain(const LoadedSlice &LS) { 13275 // Each slice saves a truncate. 13276 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 13277 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), 13278 LS.Inst->getValueType(0))) 13279 ++Truncates; 13280 // If there is a shift amount, this slice gets rid of it. 13281 if (LS.Shift) 13282 ++Shift; 13283 // If this slice can merge a cross register bank copy, account for it. 13284 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 13285 ++CrossRegisterBanksCopies; 13286 } 13287 13288 Cost &operator+=(const Cost &RHS) { 13289 Loads += RHS.Loads; 13290 Truncates += RHS.Truncates; 13291 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 13292 ZExts += RHS.ZExts; 13293 Shift += RHS.Shift; 13294 return *this; 13295 } 13296 13297 bool operator==(const Cost &RHS) const { 13298 return Loads == RHS.Loads && Truncates == RHS.Truncates && 13299 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 13300 ZExts == RHS.ZExts && Shift == RHS.Shift; 13301 } 13302 13303 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 13304 13305 bool operator<(const Cost &RHS) const { 13306 // Assume cross register banks copies are as expensive as loads. 13307 // FIXME: Do we want some more target hooks? 13308 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 13309 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 13310 // Unless we are optimizing for code size, consider the 13311 // expensive operation first. 13312 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 13313 return ExpensiveOpsLHS < ExpensiveOpsRHS; 13314 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 13315 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 13316 } 13317 13318 bool operator>(const Cost &RHS) const { return RHS < *this; } 13319 13320 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 13321 13322 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 13323 }; 13324 13325 // The last instruction that represent the slice. This should be a 13326 // truncate instruction. 13327 SDNode *Inst; 13328 13329 // The original load instruction. 13330 LoadSDNode *Origin; 13331 13332 // The right shift amount in bits from the original load. 13333 unsigned Shift; 13334 13335 // The DAG from which Origin came from. 13336 // This is used to get some contextual information about legal types, etc. 13337 SelectionDAG *DAG; 13338 13339 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 13340 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 13341 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 13342 13343 /// Get the bits used in a chunk of bits \p BitWidth large. 13344 /// \return Result is \p BitWidth and has used bits set to 1 and 13345 /// not used bits set to 0. 13346 APInt getUsedBits() const { 13347 // Reproduce the trunc(lshr) sequence: 13348 // - Start from the truncated value. 13349 // - Zero extend to the desired bit width. 13350 // - Shift left. 13351 assert(Origin && "No original load to compare against."); 13352 unsigned BitWidth = Origin->getValueSizeInBits(0); 13353 assert(Inst && "This slice is not bound to an instruction"); 13354 assert(Inst->getValueSizeInBits(0) <= BitWidth && 13355 "Extracted slice is bigger than the whole type!"); 13356 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 13357 UsedBits.setAllBits(); 13358 UsedBits = UsedBits.zext(BitWidth); 13359 UsedBits <<= Shift; 13360 return UsedBits; 13361 } 13362 13363 /// Get the size of the slice to be loaded in bytes. 13364 unsigned getLoadedSize() const { 13365 unsigned SliceSize = getUsedBits().countPopulation(); 13366 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 13367 return SliceSize / 8; 13368 } 13369 13370 /// Get the type that will be loaded for this slice. 13371 /// Note: This may not be the final type for the slice. 13372 EVT getLoadedType() const { 13373 assert(DAG && "Missing context"); 13374 LLVMContext &Ctxt = *DAG->getContext(); 13375 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 13376 } 13377 13378 /// Get the alignment of the load used for this slice. 13379 unsigned getAlignment() const { 13380 unsigned Alignment = Origin->getAlignment(); 13381 unsigned Offset = getOffsetFromBase(); 13382 if (Offset != 0) 13383 Alignment = MinAlign(Alignment, Alignment + Offset); 13384 return Alignment; 13385 } 13386 13387 /// Check if this slice can be rewritten with legal operations. 13388 bool isLegal() const { 13389 // An invalid slice is not legal. 13390 if (!Origin || !Inst || !DAG) 13391 return false; 13392 13393 // Offsets are for indexed load only, we do not handle that. 13394 if (!Origin->getOffset().isUndef()) 13395 return false; 13396 13397 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 13398 13399 // Check that the type is legal. 13400 EVT SliceType = getLoadedType(); 13401 if (!TLI.isTypeLegal(SliceType)) 13402 return false; 13403 13404 // Check that the load is legal for this type. 13405 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 13406 return false; 13407 13408 // Check that the offset can be computed. 13409 // 1. Check its type. 13410 EVT PtrType = Origin->getBasePtr().getValueType(); 13411 if (PtrType == MVT::Untyped || PtrType.isExtended()) 13412 return false; 13413 13414 // 2. Check that it fits in the immediate. 13415 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 13416 return false; 13417 13418 // 3. Check that the computation is legal. 13419 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 13420 return false; 13421 13422 // Check that the zext is legal if it needs one. 13423 EVT TruncateType = Inst->getValueType(0); 13424 if (TruncateType != SliceType && 13425 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 13426 return false; 13427 13428 return true; 13429 } 13430 13431 /// Get the offset in bytes of this slice in the original chunk of 13432 /// bits. 13433 /// \pre DAG != nullptr. 13434 uint64_t getOffsetFromBase() const { 13435 assert(DAG && "Missing context."); 13436 bool IsBigEndian = DAG->getDataLayout().isBigEndian(); 13437 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 13438 uint64_t Offset = Shift / 8; 13439 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 13440 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 13441 "The size of the original loaded type is not a multiple of a" 13442 " byte."); 13443 // If Offset is bigger than TySizeInBytes, it means we are loading all 13444 // zeros. This should have been optimized before in the process. 13445 assert(TySizeInBytes > Offset && 13446 "Invalid shift amount for given loaded size"); 13447 if (IsBigEndian) 13448 Offset = TySizeInBytes - Offset - getLoadedSize(); 13449 return Offset; 13450 } 13451 13452 /// Generate the sequence of instructions to load the slice 13453 /// represented by this object and redirect the uses of this slice to 13454 /// this new sequence of instructions. 13455 /// \pre this->Inst && this->Origin are valid Instructions and this 13456 /// object passed the legal check: LoadedSlice::isLegal returned true. 13457 /// \return The last instruction of the sequence used to load the slice. 13458 SDValue loadSlice() const { 13459 assert(Inst && Origin && "Unable to replace a non-existing slice."); 13460 const SDValue &OldBaseAddr = Origin->getBasePtr(); 13461 SDValue BaseAddr = OldBaseAddr; 13462 // Get the offset in that chunk of bytes w.r.t. the endianness. 13463 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 13464 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 13465 if (Offset) { 13466 // BaseAddr = BaseAddr + Offset. 13467 EVT ArithType = BaseAddr.getValueType(); 13468 SDLoc DL(Origin); 13469 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, 13470 DAG->getConstant(Offset, DL, ArithType)); 13471 } 13472 13473 // Create the type of the loaded slice according to its size. 13474 EVT SliceType = getLoadedType(); 13475 13476 // Create the load for the slice. 13477 SDValue LastInst = 13478 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 13479 Origin->getPointerInfo().getWithOffset(Offset), 13480 getAlignment(), Origin->getMemOperand()->getFlags()); 13481 // If the final type is not the same as the loaded type, this means that 13482 // we have to pad with zero. Create a zero extend for that. 13483 EVT FinalType = Inst->getValueType(0); 13484 if (SliceType != FinalType) 13485 LastInst = 13486 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 13487 return LastInst; 13488 } 13489 13490 /// Check if this slice can be merged with an expensive cross register 13491 /// bank copy. E.g., 13492 /// i = load i32 13493 /// f = bitcast i32 i to float 13494 bool canMergeExpensiveCrossRegisterBankCopy() const { 13495 if (!Inst || !Inst->hasOneUse()) 13496 return false; 13497 SDNode *Use = *Inst->use_begin(); 13498 if (Use->getOpcode() != ISD::BITCAST) 13499 return false; 13500 assert(DAG && "Missing context"); 13501 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 13502 EVT ResVT = Use->getValueType(0); 13503 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 13504 const TargetRegisterClass *ArgRC = 13505 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 13506 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 13507 return false; 13508 13509 // At this point, we know that we perform a cross-register-bank copy. 13510 // Check if it is expensive. 13511 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 13512 // Assume bitcasts are cheap, unless both register classes do not 13513 // explicitly share a common sub class. 13514 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 13515 return false; 13516 13517 // Check if it will be merged with the load. 13518 // 1. Check the alignment constraint. 13519 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( 13520 ResVT.getTypeForEVT(*DAG->getContext())); 13521 13522 if (RequiredAlignment > getAlignment()) 13523 return false; 13524 13525 // 2. Check that the load is a legal operation for that type. 13526 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 13527 return false; 13528 13529 // 3. Check that we do not have a zext in the way. 13530 if (Inst->getValueType(0) != getLoadedType()) 13531 return false; 13532 13533 return true; 13534 } 13535 }; 13536 13537 } // end anonymous namespace 13538 13539 /// Check that all bits set in \p UsedBits form a dense region, i.e., 13540 /// \p UsedBits looks like 0..0 1..1 0..0. 13541 static bool areUsedBitsDense(const APInt &UsedBits) { 13542 // If all the bits are one, this is dense! 13543 if (UsedBits.isAllOnesValue()) 13544 return true; 13545 13546 // Get rid of the unused bits on the right. 13547 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 13548 // Get rid of the unused bits on the left. 13549 if (NarrowedUsedBits.countLeadingZeros()) 13550 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 13551 // Check that the chunk of bits is completely used. 13552 return NarrowedUsedBits.isAllOnesValue(); 13553 } 13554 13555 /// Check whether or not \p First and \p Second are next to each other 13556 /// in memory. This means that there is no hole between the bits loaded 13557 /// by \p First and the bits loaded by \p Second. 13558 static bool areSlicesNextToEachOther(const LoadedSlice &First, 13559 const LoadedSlice &Second) { 13560 assert(First.Origin == Second.Origin && First.Origin && 13561 "Unable to match different memory origins."); 13562 APInt UsedBits = First.getUsedBits(); 13563 assert((UsedBits & Second.getUsedBits()) == 0 && 13564 "Slices are not supposed to overlap."); 13565 UsedBits |= Second.getUsedBits(); 13566 return areUsedBitsDense(UsedBits); 13567 } 13568 13569 /// Adjust the \p GlobalLSCost according to the target 13570 /// paring capabilities and the layout of the slices. 13571 /// \pre \p GlobalLSCost should account for at least as many loads as 13572 /// there is in the slices in \p LoadedSlices. 13573 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 13574 LoadedSlice::Cost &GlobalLSCost) { 13575 unsigned NumberOfSlices = LoadedSlices.size(); 13576 // If there is less than 2 elements, no pairing is possible. 13577 if (NumberOfSlices < 2) 13578 return; 13579 13580 // Sort the slices so that elements that are likely to be next to each 13581 // other in memory are next to each other in the list. 13582 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 13583 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 13584 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 13585 }); 13586 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 13587 // First (resp. Second) is the first (resp. Second) potentially candidate 13588 // to be placed in a paired load. 13589 const LoadedSlice *First = nullptr; 13590 const LoadedSlice *Second = nullptr; 13591 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 13592 // Set the beginning of the pair. 13593 First = Second) { 13594 Second = &LoadedSlices[CurrSlice]; 13595 13596 // If First is NULL, it means we start a new pair. 13597 // Get to the next slice. 13598 if (!First) 13599 continue; 13600 13601 EVT LoadedType = First->getLoadedType(); 13602 13603 // If the types of the slices are different, we cannot pair them. 13604 if (LoadedType != Second->getLoadedType()) 13605 continue; 13606 13607 // Check if the target supplies paired loads for this type. 13608 unsigned RequiredAlignment = 0; 13609 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 13610 // move to the next pair, this type is hopeless. 13611 Second = nullptr; 13612 continue; 13613 } 13614 // Check if we meet the alignment requirement. 13615 if (RequiredAlignment > First->getAlignment()) 13616 continue; 13617 13618 // Check that both loads are next to each other in memory. 13619 if (!areSlicesNextToEachOther(*First, *Second)) 13620 continue; 13621 13622 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 13623 --GlobalLSCost.Loads; 13624 // Move to the next pair. 13625 Second = nullptr; 13626 } 13627 } 13628 13629 /// Check the profitability of all involved LoadedSlice. 13630 /// Currently, it is considered profitable if there is exactly two 13631 /// involved slices (1) which are (2) next to each other in memory, and 13632 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 13633 /// 13634 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 13635 /// the elements themselves. 13636 /// 13637 /// FIXME: When the cost model will be mature enough, we can relax 13638 /// constraints (1) and (2). 13639 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 13640 const APInt &UsedBits, bool ForCodeSize) { 13641 unsigned NumberOfSlices = LoadedSlices.size(); 13642 if (StressLoadSlicing) 13643 return NumberOfSlices > 1; 13644 13645 // Check (1). 13646 if (NumberOfSlices != 2) 13647 return false; 13648 13649 // Check (2). 13650 if (!areUsedBitsDense(UsedBits)) 13651 return false; 13652 13653 // Check (3). 13654 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 13655 // The original code has one big load. 13656 OrigCost.Loads = 1; 13657 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 13658 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 13659 // Accumulate the cost of all the slices. 13660 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 13661 GlobalSlicingCost += SliceCost; 13662 13663 // Account as cost in the original configuration the gain obtained 13664 // with the current slices. 13665 OrigCost.addSliceGain(LS); 13666 } 13667 13668 // If the target supports paired load, adjust the cost accordingly. 13669 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 13670 return OrigCost > GlobalSlicingCost; 13671 } 13672 13673 /// If the given load, \p LI, is used only by trunc or trunc(lshr) 13674 /// operations, split it in the various pieces being extracted. 13675 /// 13676 /// This sort of thing is introduced by SROA. 13677 /// This slicing takes care not to insert overlapping loads. 13678 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 13679 bool DAGCombiner::SliceUpLoad(SDNode *N) { 13680 if (Level < AfterLegalizeDAG) 13681 return false; 13682 13683 LoadSDNode *LD = cast<LoadSDNode>(N); 13684 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 13685 !LD->getValueType(0).isInteger()) 13686 return false; 13687 13688 // Keep track of already used bits to detect overlapping values. 13689 // In that case, we will just abort the transformation. 13690 APInt UsedBits(LD->getValueSizeInBits(0), 0); 13691 13692 SmallVector<LoadedSlice, 4> LoadedSlices; 13693 13694 // Check if this load is used as several smaller chunks of bits. 13695 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 13696 // of computation for each trunc. 13697 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 13698 UI != UIEnd; ++UI) { 13699 // Skip the uses of the chain. 13700 if (UI.getUse().getResNo() != 0) 13701 continue; 13702 13703 SDNode *User = *UI; 13704 unsigned Shift = 0; 13705 13706 // Check if this is a trunc(lshr). 13707 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 13708 isa<ConstantSDNode>(User->getOperand(1))) { 13709 Shift = User->getConstantOperandVal(1); 13710 User = *User->use_begin(); 13711 } 13712 13713 // At this point, User is a Truncate, iff we encountered, trunc or 13714 // trunc(lshr). 13715 if (User->getOpcode() != ISD::TRUNCATE) 13716 return false; 13717 13718 // The width of the type must be a power of 2 and greater than 8-bits. 13719 // Otherwise the load cannot be represented in LLVM IR. 13720 // Moreover, if we shifted with a non-8-bits multiple, the slice 13721 // will be across several bytes. We do not support that. 13722 unsigned Width = User->getValueSizeInBits(0); 13723 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 13724 return false; 13725 13726 // Build the slice for this chain of computations. 13727 LoadedSlice LS(User, LD, Shift, &DAG); 13728 APInt CurrentUsedBits = LS.getUsedBits(); 13729 13730 // Check if this slice overlaps with another. 13731 if ((CurrentUsedBits & UsedBits) != 0) 13732 return false; 13733 // Update the bits used globally. 13734 UsedBits |= CurrentUsedBits; 13735 13736 // Check if the new slice would be legal. 13737 if (!LS.isLegal()) 13738 return false; 13739 13740 // Record the slice. 13741 LoadedSlices.push_back(LS); 13742 } 13743 13744 // Abort slicing if it does not seem to be profitable. 13745 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 13746 return false; 13747 13748 ++SlicedLoads; 13749 13750 // Rewrite each chain to use an independent load. 13751 // By construction, each chain can be represented by a unique load. 13752 13753 // Prepare the argument for the new token factor for all the slices. 13754 SmallVector<SDValue, 8> ArgChains; 13755 for (SmallVectorImpl<LoadedSlice>::const_iterator 13756 LSIt = LoadedSlices.begin(), 13757 LSItEnd = LoadedSlices.end(); 13758 LSIt != LSItEnd; ++LSIt) { 13759 SDValue SliceInst = LSIt->loadSlice(); 13760 CombineTo(LSIt->Inst, SliceInst, true); 13761 if (SliceInst.getOpcode() != ISD::LOAD) 13762 SliceInst = SliceInst.getOperand(0); 13763 assert(SliceInst->getOpcode() == ISD::LOAD && 13764 "It takes more than a zext to get to the loaded slice!!"); 13765 ArgChains.push_back(SliceInst.getValue(1)); 13766 } 13767 13768 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 13769 ArgChains); 13770 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 13771 AddToWorklist(Chain.getNode()); 13772 return true; 13773 } 13774 13775 /// Check to see if V is (and load (ptr), imm), where the load is having 13776 /// specific bytes cleared out. If so, return the byte size being masked out 13777 /// and the shift amount. 13778 static std::pair<unsigned, unsigned> 13779 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 13780 std::pair<unsigned, unsigned> Result(0, 0); 13781 13782 // Check for the structure we're looking for. 13783 if (V->getOpcode() != ISD::AND || 13784 !isa<ConstantSDNode>(V->getOperand(1)) || 13785 !ISD::isNormalLoad(V->getOperand(0).getNode())) 13786 return Result; 13787 13788 // Check the chain and pointer. 13789 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 13790 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 13791 13792 // This only handles simple types. 13793 if (V.getValueType() != MVT::i16 && 13794 V.getValueType() != MVT::i32 && 13795 V.getValueType() != MVT::i64) 13796 return Result; 13797 13798 // Check the constant mask. Invert it so that the bits being masked out are 13799 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 13800 // follow the sign bit for uniformity. 13801 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 13802 unsigned NotMaskLZ = countLeadingZeros(NotMask); 13803 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 13804 unsigned NotMaskTZ = countTrailingZeros(NotMask); 13805 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 13806 if (NotMaskLZ == 64) return Result; // All zero mask. 13807 13808 // See if we have a continuous run of bits. If so, we have 0*1+0* 13809 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) 13810 return Result; 13811 13812 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 13813 if (V.getValueType() != MVT::i64 && NotMaskLZ) 13814 NotMaskLZ -= 64-V.getValueSizeInBits(); 13815 13816 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 13817 switch (MaskedBytes) { 13818 case 1: 13819 case 2: 13820 case 4: break; 13821 default: return Result; // All one mask, or 5-byte mask. 13822 } 13823 13824 // Verify that the first bit starts at a multiple of mask so that the access 13825 // is aligned the same as the access width. 13826 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 13827 13828 // For narrowing to be valid, it must be the case that the load the 13829 // immediately preceeding memory operation before the store. 13830 if (LD == Chain.getNode()) 13831 ; // ok. 13832 else if (Chain->getOpcode() == ISD::TokenFactor && 13833 SDValue(LD, 1).hasOneUse()) { 13834 // LD has only 1 chain use so they are no indirect dependencies. 13835 bool isOk = false; 13836 for (const SDValue &ChainOp : Chain->op_values()) 13837 if (ChainOp.getNode() == LD) { 13838 isOk = true; 13839 break; 13840 } 13841 if (!isOk) 13842 return Result; 13843 } else 13844 return Result; // Fail. 13845 13846 Result.first = MaskedBytes; 13847 Result.second = NotMaskTZ/8; 13848 return Result; 13849 } 13850 13851 /// Check to see if IVal is something that provides a value as specified by 13852 /// MaskInfo. If so, replace the specified store with a narrower store of 13853 /// truncated IVal. 13854 static SDNode * 13855 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 13856 SDValue IVal, StoreSDNode *St, 13857 DAGCombiner *DC) { 13858 unsigned NumBytes = MaskInfo.first; 13859 unsigned ByteShift = MaskInfo.second; 13860 SelectionDAG &DAG = DC->getDAG(); 13861 13862 // Check to see if IVal is all zeros in the part being masked in by the 'or' 13863 // that uses this. If not, this is not a replacement. 13864 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 13865 ByteShift*8, (ByteShift+NumBytes)*8); 13866 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 13867 13868 // Check that it is legal on the target to do this. It is legal if the new 13869 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 13870 // legalization. 13871 MVT VT = MVT::getIntegerVT(NumBytes*8); 13872 if (!DC->isTypeLegal(VT)) 13873 return nullptr; 13874 13875 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 13876 // shifted by ByteShift and truncated down to NumBytes. 13877 if (ByteShift) { 13878 SDLoc DL(IVal); 13879 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, 13880 DAG.getConstant(ByteShift*8, DL, 13881 DC->getShiftAmountTy(IVal.getValueType()))); 13882 } 13883 13884 // Figure out the offset for the store and the alignment of the access. 13885 unsigned StOffset; 13886 unsigned NewAlign = St->getAlignment(); 13887 13888 if (DAG.getDataLayout().isLittleEndian()) 13889 StOffset = ByteShift; 13890 else 13891 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 13892 13893 SDValue Ptr = St->getBasePtr(); 13894 if (StOffset) { 13895 SDLoc DL(IVal); 13896 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), 13897 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); 13898 NewAlign = MinAlign(NewAlign, StOffset); 13899 } 13900 13901 // Truncate down to the new size. 13902 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 13903 13904 ++OpsNarrowed; 13905 return DAG 13906 .getStore(St->getChain(), SDLoc(St), IVal, Ptr, 13907 St->getPointerInfo().getWithOffset(StOffset), NewAlign) 13908 .getNode(); 13909 } 13910 13911 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and 13912 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 13913 /// narrowing the load and store if it would end up being a win for performance 13914 /// or code size. 13915 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 13916 StoreSDNode *ST = cast<StoreSDNode>(N); 13917 if (ST->isVolatile()) 13918 return SDValue(); 13919 13920 SDValue Chain = ST->getChain(); 13921 SDValue Value = ST->getValue(); 13922 SDValue Ptr = ST->getBasePtr(); 13923 EVT VT = Value.getValueType(); 13924 13925 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 13926 return SDValue(); 13927 13928 unsigned Opc = Value.getOpcode(); 13929 13930 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 13931 // is a byte mask indicating a consecutive number of bytes, check to see if 13932 // Y is known to provide just those bytes. If so, we try to replace the 13933 // load + replace + store sequence with a single (narrower) store, which makes 13934 // the load dead. 13935 if (Opc == ISD::OR) { 13936 std::pair<unsigned, unsigned> MaskedLoad; 13937 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 13938 if (MaskedLoad.first) 13939 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13940 Value.getOperand(1), ST,this)) 13941 return SDValue(NewST, 0); 13942 13943 // Or is commutative, so try swapping X and Y. 13944 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 13945 if (MaskedLoad.first) 13946 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13947 Value.getOperand(0), ST,this)) 13948 return SDValue(NewST, 0); 13949 } 13950 13951 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 13952 Value.getOperand(1).getOpcode() != ISD::Constant) 13953 return SDValue(); 13954 13955 SDValue N0 = Value.getOperand(0); 13956 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 13957 Chain == SDValue(N0.getNode(), 1)) { 13958 LoadSDNode *LD = cast<LoadSDNode>(N0); 13959 if (LD->getBasePtr() != Ptr || 13960 LD->getPointerInfo().getAddrSpace() != 13961 ST->getPointerInfo().getAddrSpace()) 13962 return SDValue(); 13963 13964 // Find the type to narrow it the load / op / store to. 13965 SDValue N1 = Value.getOperand(1); 13966 unsigned BitWidth = N1.getValueSizeInBits(); 13967 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 13968 if (Opc == ISD::AND) 13969 Imm ^= APInt::getAllOnesValue(BitWidth); 13970 if (Imm == 0 || Imm.isAllOnesValue()) 13971 return SDValue(); 13972 unsigned ShAmt = Imm.countTrailingZeros(); 13973 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 13974 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 13975 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13976 // The narrowing should be profitable, the load/store operation should be 13977 // legal (or custom) and the store size should be equal to the NewVT width. 13978 while (NewBW < BitWidth && 13979 (NewVT.getStoreSizeInBits() != NewBW || 13980 !TLI.isOperationLegalOrCustom(Opc, NewVT) || 13981 !TLI.isNarrowingProfitable(VT, NewVT))) { 13982 NewBW = NextPowerOf2(NewBW); 13983 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13984 } 13985 if (NewBW >= BitWidth) 13986 return SDValue(); 13987 13988 // If the lsb changed does not start at the type bitwidth boundary, 13989 // start at the previous one. 13990 if (ShAmt % NewBW) 13991 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 13992 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 13993 std::min(BitWidth, ShAmt + NewBW)); 13994 if ((Imm & Mask) == Imm) { 13995 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 13996 if (Opc == ISD::AND) 13997 NewImm ^= APInt::getAllOnesValue(NewBW); 13998 uint64_t PtrOff = ShAmt / 8; 13999 // For big endian targets, we need to adjust the offset to the pointer to 14000 // load the correct bytes. 14001 if (DAG.getDataLayout().isBigEndian()) 14002 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 14003 14004 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 14005 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 14006 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) 14007 return SDValue(); 14008 14009 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 14010 Ptr.getValueType(), Ptr, 14011 DAG.getConstant(PtrOff, SDLoc(LD), 14012 Ptr.getValueType())); 14013 SDValue NewLD = 14014 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, 14015 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, 14016 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 14017 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 14018 DAG.getConstant(NewImm, SDLoc(Value), 14019 NewVT)); 14020 SDValue NewST = 14021 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, 14022 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); 14023 14024 AddToWorklist(NewPtr.getNode()); 14025 AddToWorklist(NewLD.getNode()); 14026 AddToWorklist(NewVal.getNode()); 14027 WorklistRemover DeadNodes(*this); 14028 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 14029 ++OpsNarrowed; 14030 return NewST; 14031 } 14032 } 14033 14034 return SDValue(); 14035 } 14036 14037 /// For a given floating point load / store pair, if the load value isn't used 14038 /// by any other operations, then consider transforming the pair to integer 14039 /// load / store operations if the target deems the transformation profitable. 14040 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 14041 StoreSDNode *ST = cast<StoreSDNode>(N); 14042 SDValue Chain = ST->getChain(); 14043 SDValue Value = ST->getValue(); 14044 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 14045 Value.hasOneUse() && 14046 Chain == SDValue(Value.getNode(), 1)) { 14047 LoadSDNode *LD = cast<LoadSDNode>(Value); 14048 EVT VT = LD->getMemoryVT(); 14049 if (!VT.isFloatingPoint() || 14050 VT != ST->getMemoryVT() || 14051 LD->isNonTemporal() || 14052 ST->isNonTemporal() || 14053 LD->getPointerInfo().getAddrSpace() != 0 || 14054 ST->getPointerInfo().getAddrSpace() != 0) 14055 return SDValue(); 14056 14057 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 14058 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 14059 !TLI.isOperationLegal(ISD::STORE, IntVT) || 14060 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 14061 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 14062 return SDValue(); 14063 14064 unsigned LDAlign = LD->getAlignment(); 14065 unsigned STAlign = ST->getAlignment(); 14066 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 14067 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); 14068 if (LDAlign < ABIAlign || STAlign < ABIAlign) 14069 return SDValue(); 14070 14071 SDValue NewLD = 14072 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), 14073 LD->getPointerInfo(), LDAlign); 14074 14075 SDValue NewST = 14076 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), 14077 ST->getPointerInfo(), STAlign); 14078 14079 AddToWorklist(NewLD.getNode()); 14080 AddToWorklist(NewST.getNode()); 14081 WorklistRemover DeadNodes(*this); 14082 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 14083 ++LdStFP2Int; 14084 return NewST; 14085 } 14086 14087 return SDValue(); 14088 } 14089 14090 // This is a helper function for visitMUL to check the profitability 14091 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 14092 // MulNode is the original multiply, AddNode is (add x, c1), 14093 // and ConstNode is c2. 14094 // 14095 // If the (add x, c1) has multiple uses, we could increase 14096 // the number of adds if we make this transformation. 14097 // It would only be worth doing this if we can remove a 14098 // multiply in the process. Check for that here. 14099 // To illustrate: 14100 // (A + c1) * c3 14101 // (A + c2) * c3 14102 // We're checking for cases where we have common "c3 * A" expressions. 14103 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, 14104 SDValue &AddNode, 14105 SDValue &ConstNode) { 14106 APInt Val; 14107 14108 // If the add only has one use, this would be OK to do. 14109 if (AddNode.getNode()->hasOneUse()) 14110 return true; 14111 14112 // Walk all the users of the constant with which we're multiplying. 14113 for (SDNode *Use : ConstNode->uses()) { 14114 if (Use == MulNode) // This use is the one we're on right now. Skip it. 14115 continue; 14116 14117 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. 14118 SDNode *OtherOp; 14119 SDNode *MulVar = AddNode.getOperand(0).getNode(); 14120 14121 // OtherOp is what we're multiplying against the constant. 14122 if (Use->getOperand(0) == ConstNode) 14123 OtherOp = Use->getOperand(1).getNode(); 14124 else 14125 OtherOp = Use->getOperand(0).getNode(); 14126 14127 // Check to see if multiply is with the same operand of our "add". 14128 // 14129 // ConstNode = CONST 14130 // Use = ConstNode * A <-- visiting Use. OtherOp is A. 14131 // ... 14132 // AddNode = (A + c1) <-- MulVar is A. 14133 // = AddNode * ConstNode <-- current visiting instruction. 14134 // 14135 // If we make this transformation, we will have a common 14136 // multiply (ConstNode * A) that we can save. 14137 if (OtherOp == MulVar) 14138 return true; 14139 14140 // Now check to see if a future expansion will give us a common 14141 // multiply. 14142 // 14143 // ConstNode = CONST 14144 // AddNode = (A + c1) 14145 // ... = AddNode * ConstNode <-- current visiting instruction. 14146 // ... 14147 // OtherOp = (A + c2) 14148 // Use = OtherOp * ConstNode <-- visiting Use. 14149 // 14150 // If we make this transformation, we will have a common 14151 // multiply (CONST * A) after we also do the same transformation 14152 // to the "t2" instruction. 14153 if (OtherOp->getOpcode() == ISD::ADD && 14154 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && 14155 OtherOp->getOperand(0).getNode() == MulVar) 14156 return true; 14157 } 14158 } 14159 14160 // Didn't find a case where this would be profitable. 14161 return false; 14162 } 14163 14164 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 14165 unsigned NumStores) { 14166 SmallVector<SDValue, 8> Chains; 14167 SmallPtrSet<const SDNode *, 8> Visited; 14168 SDLoc StoreDL(StoreNodes[0].MemNode); 14169 14170 for (unsigned i = 0; i < NumStores; ++i) { 14171 Visited.insert(StoreNodes[i].MemNode); 14172 } 14173 14174 // don't include nodes that are children 14175 for (unsigned i = 0; i < NumStores; ++i) { 14176 if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0) 14177 Chains.push_back(StoreNodes[i].MemNode->getChain()); 14178 } 14179 14180 assert(Chains.size() > 0 && "Chain should have generated a chain"); 14181 return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains); 14182 } 14183 14184 bool DAGCombiner::MergeStoresOfConstantsOrVecElts( 14185 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, 14186 bool IsConstantSrc, bool UseVector, bool UseTrunc) { 14187 // Make sure we have something to merge. 14188 if (NumStores < 2) 14189 return false; 14190 14191 // The latest Node in the DAG. 14192 SDLoc DL(StoreNodes[0].MemNode); 14193 14194 int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); 14195 unsigned SizeInBits = NumStores * ElementSizeBits; 14196 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 14197 14198 EVT StoreTy; 14199 if (UseVector) { 14200 unsigned Elts = NumStores * NumMemElts; 14201 // Get the type for the merged vector store. 14202 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 14203 } else 14204 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 14205 14206 SDValue StoredVal; 14207 if (UseVector) { 14208 if (IsConstantSrc) { 14209 SmallVector<SDValue, 8> BuildVector; 14210 for (unsigned I = 0; I != NumStores; ++I) { 14211 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); 14212 SDValue Val = St->getValue(); 14213 // If constant is of the wrong type, convert it now. 14214 if (MemVT != Val.getValueType()) { 14215 Val = peekThroughBitcasts(Val); 14216 // Deal with constants of wrong size. 14217 if (ElementSizeBits != Val.getValueSizeInBits()) { 14218 EVT IntMemVT = 14219 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 14220 if (isa<ConstantFPSDNode>(Val)) { 14221 // Not clear how to truncate FP values. 14222 return false; 14223 } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) 14224 Val = DAG.getConstant(C->getAPIntValue() 14225 .zextOrTrunc(Val.getValueSizeInBits()) 14226 .zextOrTrunc(ElementSizeBits), 14227 SDLoc(C), IntMemVT); 14228 } 14229 // Make sure correctly size type is the correct type. 14230 Val = DAG.getBitcast(MemVT, Val); 14231 } 14232 BuildVector.push_back(Val); 14233 } 14234 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 14235 : ISD::BUILD_VECTOR, 14236 DL, StoreTy, BuildVector); 14237 } else { 14238 SmallVector<SDValue, 8> Ops; 14239 for (unsigned i = 0; i < NumStores; ++i) { 14240 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 14241 SDValue Val = peekThroughBitcasts(St->getValue()); 14242 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of 14243 // type MemVT. If the underlying value is not the correct 14244 // type, but it is an extraction of an appropriate vector we 14245 // can recast Val to be of the correct type. This may require 14246 // converting between EXTRACT_VECTOR_ELT and 14247 // EXTRACT_SUBVECTOR. 14248 if ((MemVT != Val.getValueType()) && 14249 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14250 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { 14251 EVT MemVTScalarTy = MemVT.getScalarType(); 14252 // We may need to add a bitcast here to get types to line up. 14253 if (MemVTScalarTy != Val.getValueType().getScalarType()) { 14254 Val = DAG.getBitcast(MemVT, Val); 14255 } else { 14256 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR 14257 : ISD::EXTRACT_VECTOR_ELT; 14258 SDValue Vec = Val.getOperand(0); 14259 SDValue Idx = Val.getOperand(1); 14260 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); 14261 } 14262 } 14263 Ops.push_back(Val); 14264 } 14265 14266 // Build the extracted vector elements back into a vector. 14267 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 14268 : ISD::BUILD_VECTOR, 14269 DL, StoreTy, Ops); 14270 } 14271 } else { 14272 // We should always use a vector store when merging extracted vector 14273 // elements, so this path implies a store of constants. 14274 assert(IsConstantSrc && "Merged vector elements should use vector store"); 14275 14276 APInt StoreInt(SizeInBits, 0); 14277 14278 // Construct a single integer constant which is made of the smaller 14279 // constant inputs. 14280 bool IsLE = DAG.getDataLayout().isLittleEndian(); 14281 for (unsigned i = 0; i < NumStores; ++i) { 14282 unsigned Idx = IsLE ? (NumStores - 1 - i) : i; 14283 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 14284 14285 SDValue Val = St->getValue(); 14286 Val = peekThroughBitcasts(Val); 14287 StoreInt <<= ElementSizeBits; 14288 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 14289 StoreInt |= C->getAPIntValue() 14290 .zextOrTrunc(ElementSizeBits) 14291 .zextOrTrunc(SizeInBits); 14292 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 14293 StoreInt |= C->getValueAPF() 14294 .bitcastToAPInt() 14295 .zextOrTrunc(ElementSizeBits) 14296 .zextOrTrunc(SizeInBits); 14297 // If fp truncation is necessary give up for now. 14298 if (MemVT.getSizeInBits() != ElementSizeBits) 14299 return false; 14300 } else { 14301 llvm_unreachable("Invalid constant element type"); 14302 } 14303 } 14304 14305 // Create the new Load and Store operations. 14306 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); 14307 } 14308 14309 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14310 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); 14311 14312 // make sure we use trunc store if it's necessary to be legal. 14313 SDValue NewStore; 14314 if (!UseTrunc) { 14315 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), 14316 FirstInChain->getPointerInfo(), 14317 FirstInChain->getAlignment()); 14318 } else { // Must be realized as a trunc store 14319 EVT LegalizedStoredValTy = 14320 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 14321 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); 14322 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); 14323 SDValue ExtendedStoreVal = 14324 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, 14325 LegalizedStoredValTy); 14326 NewStore = DAG.getTruncStore( 14327 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), 14328 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, 14329 FirstInChain->getAlignment(), 14330 FirstInChain->getMemOperand()->getFlags()); 14331 } 14332 14333 // Replace all merged stores with the new store. 14334 for (unsigned i = 0; i < NumStores; ++i) 14335 CombineTo(StoreNodes[i].MemNode, NewStore); 14336 14337 AddToWorklist(NewChain.getNode()); 14338 return true; 14339 } 14340 14341 void DAGCombiner::getStoreMergeCandidates( 14342 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, 14343 SDNode *&RootNode) { 14344 // This holds the base pointer, index, and the offset in bytes from the base 14345 // pointer. 14346 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 14347 EVT MemVT = St->getMemoryVT(); 14348 14349 SDValue Val = peekThroughBitcasts(St->getValue()); 14350 // We must have a base and an offset. 14351 if (!BasePtr.getBase().getNode()) 14352 return; 14353 14354 // Do not handle stores to undef base pointers. 14355 if (BasePtr.getBase().isUndef()) 14356 return; 14357 14358 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); 14359 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14360 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); 14361 bool IsLoadSrc = isa<LoadSDNode>(Val); 14362 BaseIndexOffset LBasePtr; 14363 // Match on loadbaseptr if relevant. 14364 EVT LoadVT; 14365 if (IsLoadSrc) { 14366 auto *Ld = cast<LoadSDNode>(Val); 14367 LBasePtr = BaseIndexOffset::match(Ld, DAG); 14368 LoadVT = Ld->getMemoryVT(); 14369 // Load and store should be the same type. 14370 if (MemVT != LoadVT) 14371 return; 14372 // Loads must only have one use. 14373 if (!Ld->hasNUsesOfValue(1, 0)) 14374 return; 14375 // The memory operands must not be volatile. 14376 if (Ld->isVolatile() || Ld->isIndexed()) 14377 return; 14378 } 14379 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, 14380 int64_t &Offset) -> bool { 14381 if (Other->isVolatile() || Other->isIndexed()) 14382 return false; 14383 SDValue Val = peekThroughBitcasts(Other->getValue()); 14384 // Allow merging constants of different types as integers. 14385 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) 14386 : Other->getMemoryVT() != MemVT; 14387 if (IsLoadSrc) { 14388 if (NoTypeMatch) 14389 return false; 14390 // The Load's Base Ptr must also match 14391 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { 14392 auto LPtr = BaseIndexOffset::match(OtherLd, DAG); 14393 if (LoadVT != OtherLd->getMemoryVT()) 14394 return false; 14395 // Loads must only have one use. 14396 if (!OtherLd->hasNUsesOfValue(1, 0)) 14397 return false; 14398 // The memory operands must not be volatile. 14399 if (OtherLd->isVolatile() || OtherLd->isIndexed()) 14400 return false; 14401 if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) 14402 return false; 14403 } else 14404 return false; 14405 } 14406 if (IsConstantSrc) { 14407 if (NoTypeMatch) 14408 return false; 14409 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) 14410 return false; 14411 } 14412 if (IsExtractVecSrc) { 14413 // Do not merge truncated stores here. 14414 if (Other->isTruncatingStore()) 14415 return false; 14416 if (!MemVT.bitsEq(Val.getValueType())) 14417 return false; 14418 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && 14419 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) 14420 return false; 14421 } 14422 Ptr = BaseIndexOffset::match(Other, DAG); 14423 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); 14424 }; 14425 14426 // We looking for a root node which is an ancestor to all mergable 14427 // stores. We search up through a load, to our root and then down 14428 // through all children. For instance we will find Store{1,2,3} if 14429 // St is Store1, Store2. or Store3 where the root is not a load 14430 // which always true for nonvolatile ops. TODO: Expand 14431 // the search to find all valid candidates through multiple layers of loads. 14432 // 14433 // Root 14434 // |-------|-------| 14435 // Load Load Store3 14436 // | | 14437 // Store1 Store2 14438 // 14439 // FIXME: We should be able to climb and 14440 // descend TokenFactors to find candidates as well. 14441 14442 RootNode = St->getChain().getNode(); 14443 14444 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { 14445 RootNode = Ldn->getChain().getNode(); 14446 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 14447 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain 14448 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) 14449 if (I2.getOperandNo() == 0) 14450 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { 14451 BaseIndexOffset Ptr; 14452 int64_t PtrDiff; 14453 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 14454 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 14455 } 14456 } else 14457 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 14458 if (I.getOperandNo() == 0) 14459 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { 14460 BaseIndexOffset Ptr; 14461 int64_t PtrDiff; 14462 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 14463 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 14464 } 14465 } 14466 14467 // We need to check that merging these stores does not cause a loop in 14468 // the DAG. Any store candidate may depend on another candidate 14469 // indirectly through its operand (we already consider dependencies 14470 // through the chain). Check in parallel by searching up from 14471 // non-chain operands of candidates. 14472 bool DAGCombiner::checkMergeStoreCandidatesForDependencies( 14473 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 14474 SDNode *RootNode) { 14475 // FIXME: We should be able to truncate a full search of 14476 // predecessors by doing a BFS and keeping tabs the originating 14477 // stores from which worklist nodes come from in a similar way to 14478 // TokenFactor simplfication. 14479 14480 SmallPtrSet<const SDNode *, 32> Visited; 14481 SmallVector<const SDNode *, 8> Worklist; 14482 14483 // RootNode is a predecessor to all candidates so we need not search 14484 // past it. Add RootNode (peeking through TokenFactors). Do not count 14485 // these towards size check. 14486 14487 Worklist.push_back(RootNode); 14488 while (!Worklist.empty()) { 14489 auto N = Worklist.pop_back_val(); 14490 if (!Visited.insert(N).second) 14491 continue; // Already present in Visited. 14492 if (N->getOpcode() == ISD::TokenFactor) { 14493 for (SDValue Op : N->ops()) 14494 Worklist.push_back(Op.getNode()); 14495 } 14496 } 14497 14498 // Don't count pruning nodes towards max. 14499 unsigned int Max = 1024 + Visited.size(); 14500 // Search Ops of store candidates. 14501 for (unsigned i = 0; i < NumStores; ++i) { 14502 SDNode *N = StoreNodes[i].MemNode; 14503 // Of the 4 Store Operands: 14504 // * Chain (Op 0) -> We have already considered these 14505 // in candidate selection and can be 14506 // safely ignored 14507 // * Value (Op 1) -> Cycles may happen (e.g. through load chains) 14508 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, 14509 // but aren't necessarily fromt the same base node, so 14510 // cycles possible (e.g. via indexed store). 14511 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for 14512 // non-indexed stores). Not constant on all targets (e.g. ARM) 14513 // and so can participate in a cycle. 14514 for (unsigned j = 1; j < N->getNumOperands(); ++j) 14515 Worklist.push_back(N->getOperand(j).getNode()); 14516 } 14517 // Search through DAG. We can stop early if we find a store node. 14518 for (unsigned i = 0; i < NumStores; ++i) 14519 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, 14520 Max)) 14521 return false; 14522 return true; 14523 } 14524 14525 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { 14526 if (OptLevel == CodeGenOpt::None) 14527 return false; 14528 14529 EVT MemVT = St->getMemoryVT(); 14530 int64_t ElementSizeBytes = MemVT.getStoreSize(); 14531 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 14532 14533 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) 14534 return false; 14535 14536 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( 14537 Attribute::NoImplicitFloat); 14538 14539 // This function cannot currently deal with non-byte-sized memory sizes. 14540 if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) 14541 return false; 14542 14543 if (!MemVT.isSimple()) 14544 return false; 14545 14546 // Perform an early exit check. Do not bother looking at stored values that 14547 // are not constants, loads, or extracted vector elements. 14548 SDValue StoredVal = peekThroughBitcasts(St->getValue()); 14549 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 14550 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || 14551 isa<ConstantFPSDNode>(StoredVal); 14552 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14553 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); 14554 14555 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) 14556 return false; 14557 14558 SmallVector<MemOpLink, 8> StoreNodes; 14559 SDNode *RootNode; 14560 // Find potential store merge candidates by searching through chain sub-DAG 14561 getStoreMergeCandidates(St, StoreNodes, RootNode); 14562 14563 // Check if there is anything to merge. 14564 if (StoreNodes.size() < 2) 14565 return false; 14566 14567 // Sort the memory operands according to their distance from the 14568 // base pointer. 14569 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { 14570 return LHS.OffsetFromBase < RHS.OffsetFromBase; 14571 }); 14572 14573 // Store Merge attempts to merge the lowest stores. This generally 14574 // works out as if successful, as the remaining stores are checked 14575 // after the first collection of stores is merged. However, in the 14576 // case that a non-mergeable store is found first, e.g., {p[-2], 14577 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent 14578 // mergeable cases. To prevent this, we prune such stores from the 14579 // front of StoreNodes here. 14580 14581 bool RV = false; 14582 while (StoreNodes.size() > 1) { 14583 unsigned StartIdx = 0; 14584 while ((StartIdx + 1 < StoreNodes.size()) && 14585 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != 14586 StoreNodes[StartIdx + 1].OffsetFromBase) 14587 ++StartIdx; 14588 14589 // Bail if we don't have enough candidates to merge. 14590 if (StartIdx + 1 >= StoreNodes.size()) 14591 return RV; 14592 14593 if (StartIdx) 14594 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); 14595 14596 // Scan the memory operations on the chain and find the first 14597 // non-consecutive store memory address. 14598 unsigned NumConsecutiveStores = 1; 14599 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 14600 // Check that the addresses are consecutive starting from the second 14601 // element in the list of stores. 14602 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { 14603 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 14604 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 14605 break; 14606 NumConsecutiveStores = i + 1; 14607 } 14608 14609 if (NumConsecutiveStores < 2) { 14610 StoreNodes.erase(StoreNodes.begin(), 14611 StoreNodes.begin() + NumConsecutiveStores); 14612 continue; 14613 } 14614 14615 // The node with the lowest store address. 14616 LLVMContext &Context = *DAG.getContext(); 14617 const DataLayout &DL = DAG.getDataLayout(); 14618 14619 // Store the constants into memory as one consecutive store. 14620 if (IsConstantSrc) { 14621 while (NumConsecutiveStores >= 2) { 14622 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14623 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14624 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14625 unsigned LastLegalType = 1; 14626 unsigned LastLegalVectorType = 1; 14627 bool LastIntegerTrunc = false; 14628 bool NonZero = false; 14629 unsigned FirstZeroAfterNonZero = NumConsecutiveStores; 14630 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14631 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); 14632 SDValue StoredVal = ST->getValue(); 14633 bool IsElementZero = false; 14634 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) 14635 IsElementZero = C->isNullValue(); 14636 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) 14637 IsElementZero = C->getConstantFPValue()->isNullValue(); 14638 if (IsElementZero) { 14639 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) 14640 FirstZeroAfterNonZero = i; 14641 } 14642 NonZero |= !IsElementZero; 14643 14644 // Find a legal type for the constant store. 14645 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 14646 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); 14647 bool IsFast = false; 14648 14649 // Break early when size is too large to be legal. 14650 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 14651 break; 14652 14653 if (TLI.isTypeLegal(StoreTy) && 14654 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14655 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14656 FirstStoreAlign, &IsFast) && 14657 IsFast) { 14658 LastIntegerTrunc = false; 14659 LastLegalType = i + 1; 14660 // Or check whether a truncstore is legal. 14661 } else if (TLI.getTypeAction(Context, StoreTy) == 14662 TargetLowering::TypePromoteInteger) { 14663 EVT LegalizedStoredValTy = 14664 TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); 14665 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 14666 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 14667 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14668 FirstStoreAlign, &IsFast) && 14669 IsFast) { 14670 LastIntegerTrunc = true; 14671 LastLegalType = i + 1; 14672 } 14673 } 14674 14675 // We only use vectors if the constant is known to be zero or the 14676 // target allows it and the function is not marked with the 14677 // noimplicitfloat attribute. 14678 if ((!NonZero || 14679 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && 14680 !NoVectors) { 14681 // Find a legal type for the vector store. 14682 unsigned Elts = (i + 1) * NumMemElts; 14683 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14684 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && 14685 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 14686 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 14687 FirstStoreAlign, &IsFast) && 14688 IsFast) 14689 LastLegalVectorType = i + 1; 14690 } 14691 } 14692 14693 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 14694 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; 14695 14696 // Check if we found a legal integer type that creates a meaningful 14697 // merge. 14698 if (NumElem < 2) { 14699 // We know that candidate stores are in order and of correct 14700 // shape. While there is no mergeable sequence from the 14701 // beginning one may start later in the sequence. The only 14702 // reason a merge of size N could have failed where another of 14703 // the same size would not have, is if the alignment has 14704 // improved or we've dropped a non-zero value. Drop as many 14705 // candidates as we can here. 14706 unsigned NumSkip = 1; 14707 while ( 14708 (NumSkip < NumConsecutiveStores) && 14709 (NumSkip < FirstZeroAfterNonZero) && 14710 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14711 NumSkip++; 14712 14713 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14714 NumConsecutiveStores -= NumSkip; 14715 continue; 14716 } 14717 14718 // Check that we can merge these candidates without causing a cycle. 14719 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 14720 RootNode)) { 14721 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14722 NumConsecutiveStores -= NumElem; 14723 continue; 14724 } 14725 14726 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, 14727 UseVector, LastIntegerTrunc); 14728 14729 // Remove merged stores for next iteration. 14730 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14731 NumConsecutiveStores -= NumElem; 14732 } 14733 continue; 14734 } 14735 14736 // When extracting multiple vector elements, try to store them 14737 // in one vector store rather than a sequence of scalar stores. 14738 if (IsExtractVecSrc) { 14739 // Loop on Consecutive Stores on success. 14740 while (NumConsecutiveStores >= 2) { 14741 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14742 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14743 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14744 unsigned NumStoresToMerge = 1; 14745 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14746 // Find a legal type for the vector store. 14747 unsigned Elts = (i + 1) * NumMemElts; 14748 EVT Ty = 14749 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 14750 bool IsFast; 14751 14752 // Break early when size is too large to be legal. 14753 if (Ty.getSizeInBits() > MaximumLegalStoreInBits) 14754 break; 14755 14756 if (TLI.isTypeLegal(Ty) && 14757 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 14758 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 14759 FirstStoreAlign, &IsFast) && 14760 IsFast) 14761 NumStoresToMerge = i + 1; 14762 } 14763 14764 // Check if we found a legal integer type creating a meaningful 14765 // merge. 14766 if (NumStoresToMerge < 2) { 14767 // We know that candidate stores are in order and of correct 14768 // shape. While there is no mergeable sequence from the 14769 // beginning one may start later in the sequence. The only 14770 // reason a merge of size N could have failed where another of 14771 // the same size would not have, is if the alignment has 14772 // improved. Drop as many candidates as we can here. 14773 unsigned NumSkip = 1; 14774 while ( 14775 (NumSkip < NumConsecutiveStores) && 14776 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14777 NumSkip++; 14778 14779 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14780 NumConsecutiveStores -= NumSkip; 14781 continue; 14782 } 14783 14784 // Check that we can merge these candidates without causing a cycle. 14785 if (!checkMergeStoreCandidatesForDependencies( 14786 StoreNodes, NumStoresToMerge, RootNode)) { 14787 StoreNodes.erase(StoreNodes.begin(), 14788 StoreNodes.begin() + NumStoresToMerge); 14789 NumConsecutiveStores -= NumStoresToMerge; 14790 continue; 14791 } 14792 14793 RV |= MergeStoresOfConstantsOrVecElts( 14794 StoreNodes, MemVT, NumStoresToMerge, false, true, false); 14795 14796 StoreNodes.erase(StoreNodes.begin(), 14797 StoreNodes.begin() + NumStoresToMerge); 14798 NumConsecutiveStores -= NumStoresToMerge; 14799 } 14800 continue; 14801 } 14802 14803 // Below we handle the case of multiple consecutive stores that 14804 // come from multiple consecutive loads. We merge them into a single 14805 // wide load and a single wide store. 14806 14807 // Look for load nodes which are used by the stored values. 14808 SmallVector<MemOpLink, 8> LoadNodes; 14809 14810 // Find acceptable loads. Loads need to have the same chain (token factor), 14811 // must not be zext, volatile, indexed, and they must be consecutive. 14812 BaseIndexOffset LdBasePtr; 14813 14814 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14815 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 14816 SDValue Val = peekThroughBitcasts(St->getValue()); 14817 LoadSDNode *Ld = cast<LoadSDNode>(Val); 14818 14819 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); 14820 // If this is not the first ptr that we check. 14821 int64_t LdOffset = 0; 14822 if (LdBasePtr.getBase().getNode()) { 14823 // The base ptr must be the same. 14824 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) 14825 break; 14826 } else { 14827 // Check that all other base pointers are the same as this one. 14828 LdBasePtr = LdPtr; 14829 } 14830 14831 // We found a potential memory operand to merge. 14832 LoadNodes.push_back(MemOpLink(Ld, LdOffset)); 14833 } 14834 14835 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { 14836 // If we have load/store pair instructions and we only have two values, 14837 // don't bother merging. 14838 unsigned RequiredAlignment; 14839 if (LoadNodes.size() == 2 && 14840 TLI.hasPairedLoad(MemVT, RequiredAlignment) && 14841 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { 14842 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); 14843 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); 14844 break; 14845 } 14846 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14847 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14848 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14849 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 14850 unsigned FirstLoadAS = FirstLoad->getAddressSpace(); 14851 unsigned FirstLoadAlign = FirstLoad->getAlignment(); 14852 14853 // Scan the memory operations on the chain and find the first 14854 // non-consecutive load memory address. These variables hold the index in 14855 // the store node array. 14856 14857 unsigned LastConsecutiveLoad = 1; 14858 14859 // This variable refers to the size and not index in the array. 14860 unsigned LastLegalVectorType = 1; 14861 unsigned LastLegalIntegerType = 1; 14862 bool isDereferenceable = true; 14863 bool DoIntegerTruncate = false; 14864 StartAddress = LoadNodes[0].OffsetFromBase; 14865 SDValue FirstChain = FirstLoad->getChain(); 14866 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 14867 // All loads must share the same chain. 14868 if (LoadNodes[i].MemNode->getChain() != FirstChain) 14869 break; 14870 14871 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 14872 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 14873 break; 14874 LastConsecutiveLoad = i; 14875 14876 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) 14877 isDereferenceable = false; 14878 14879 // Find a legal type for the vector store. 14880 unsigned Elts = (i + 1) * NumMemElts; 14881 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14882 14883 // Break early when size is too large to be legal. 14884 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 14885 break; 14886 14887 bool IsFastSt, IsFastLd; 14888 if (TLI.isTypeLegal(StoreTy) && 14889 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14890 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14891 FirstStoreAlign, &IsFastSt) && 14892 IsFastSt && 14893 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14894 FirstLoadAlign, &IsFastLd) && 14895 IsFastLd) { 14896 LastLegalVectorType = i + 1; 14897 } 14898 14899 // Find a legal type for the integer store. 14900 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 14901 StoreTy = EVT::getIntegerVT(Context, SizeInBits); 14902 if (TLI.isTypeLegal(StoreTy) && 14903 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14904 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14905 FirstStoreAlign, &IsFastSt) && 14906 IsFastSt && 14907 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14908 FirstLoadAlign, &IsFastLd) && 14909 IsFastLd) { 14910 LastLegalIntegerType = i + 1; 14911 DoIntegerTruncate = false; 14912 // Or check whether a truncstore and extload is legal. 14913 } else if (TLI.getTypeAction(Context, StoreTy) == 14914 TargetLowering::TypePromoteInteger) { 14915 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); 14916 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 14917 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 14918 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, 14919 StoreTy) && 14920 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, 14921 StoreTy) && 14922 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && 14923 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14924 FirstStoreAlign, &IsFastSt) && 14925 IsFastSt && 14926 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14927 FirstLoadAlign, &IsFastLd) && 14928 IsFastLd) { 14929 LastLegalIntegerType = i + 1; 14930 DoIntegerTruncate = true; 14931 } 14932 } 14933 } 14934 14935 // Only use vector types if the vector type is larger than the integer 14936 // type. If they are the same, use integers. 14937 bool UseVectorTy = 14938 LastLegalVectorType > LastLegalIntegerType && !NoVectors; 14939 unsigned LastLegalType = 14940 std::max(LastLegalVectorType, LastLegalIntegerType); 14941 14942 // We add +1 here because the LastXXX variables refer to location while 14943 // the NumElem refers to array/index size. 14944 unsigned NumElem = 14945 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); 14946 NumElem = std::min(LastLegalType, NumElem); 14947 14948 if (NumElem < 2) { 14949 // We know that candidate stores are in order and of correct 14950 // shape. While there is no mergeable sequence from the 14951 // beginning one may start later in the sequence. The only 14952 // reason a merge of size N could have failed where another of 14953 // the same size would not have is if the alignment or either 14954 // the load or store has improved. Drop as many candidates as we 14955 // can here. 14956 unsigned NumSkip = 1; 14957 while ((NumSkip < LoadNodes.size()) && 14958 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && 14959 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14960 NumSkip++; 14961 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14962 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); 14963 NumConsecutiveStores -= NumSkip; 14964 continue; 14965 } 14966 14967 // Check that we can merge these candidates without causing a cycle. 14968 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 14969 RootNode)) { 14970 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14971 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 14972 NumConsecutiveStores -= NumElem; 14973 continue; 14974 } 14975 14976 // Find if it is better to use vectors or integers to load and store 14977 // to memory. 14978 EVT JointMemOpVT; 14979 if (UseVectorTy) { 14980 // Find a legal type for the vector store. 14981 unsigned Elts = NumElem * NumMemElts; 14982 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14983 } else { 14984 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 14985 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); 14986 } 14987 14988 SDLoc LoadDL(LoadNodes[0].MemNode); 14989 SDLoc StoreDL(StoreNodes[0].MemNode); 14990 14991 // The merged loads are required to have the same incoming chain, so 14992 // using the first's chain is acceptable. 14993 14994 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); 14995 AddToWorklist(NewStoreChain.getNode()); 14996 14997 MachineMemOperand::Flags MMOFlags = 14998 isDereferenceable ? MachineMemOperand::MODereferenceable 14999 : MachineMemOperand::MONone; 15000 15001 SDValue NewLoad, NewStore; 15002 if (UseVectorTy || !DoIntegerTruncate) { 15003 NewLoad = 15004 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), 15005 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), 15006 FirstLoadAlign, MMOFlags); 15007 NewStore = DAG.getStore( 15008 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), 15009 FirstInChain->getPointerInfo(), FirstStoreAlign); 15010 } else { // This must be the truncstore/extload case 15011 EVT ExtendedTy = 15012 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); 15013 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, 15014 FirstLoad->getChain(), FirstLoad->getBasePtr(), 15015 FirstLoad->getPointerInfo(), JointMemOpVT, 15016 FirstLoadAlign, MMOFlags); 15017 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, 15018 FirstInChain->getBasePtr(), 15019 FirstInChain->getPointerInfo(), 15020 JointMemOpVT, FirstInChain->getAlignment(), 15021 FirstInChain->getMemOperand()->getFlags()); 15022 } 15023 15024 // Transfer chain users from old loads to the new load. 15025 for (unsigned i = 0; i < NumElem; ++i) { 15026 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 15027 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 15028 SDValue(NewLoad.getNode(), 1)); 15029 } 15030 15031 // Replace the all stores with the new store. Recursively remove 15032 // corresponding value if its no longer used. 15033 for (unsigned i = 0; i < NumElem; ++i) { 15034 SDValue Val = StoreNodes[i].MemNode->getOperand(1); 15035 CombineTo(StoreNodes[i].MemNode, NewStore); 15036 if (Val.getNode()->use_empty()) 15037 recursivelyDeleteUnusedNodes(Val.getNode()); 15038 } 15039 15040 RV = true; 15041 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 15042 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 15043 NumConsecutiveStores -= NumElem; 15044 } 15045 } 15046 return RV; 15047 } 15048 15049 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { 15050 SDLoc SL(ST); 15051 SDValue ReplStore; 15052 15053 // Replace the chain to avoid dependency. 15054 if (ST->isTruncatingStore()) { 15055 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), 15056 ST->getBasePtr(), ST->getMemoryVT(), 15057 ST->getMemOperand()); 15058 } else { 15059 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), 15060 ST->getMemOperand()); 15061 } 15062 15063 // Create token to keep both nodes around. 15064 SDValue Token = DAG.getNode(ISD::TokenFactor, SL, 15065 MVT::Other, ST->getChain(), ReplStore); 15066 15067 // Make sure the new and old chains are cleaned up. 15068 AddToWorklist(Token.getNode()); 15069 15070 // Don't add users to work list. 15071 return CombineTo(ST, Token, false); 15072 } 15073 15074 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { 15075 SDValue Value = ST->getValue(); 15076 if (Value.getOpcode() == ISD::TargetConstantFP) 15077 return SDValue(); 15078 15079 SDLoc DL(ST); 15080 15081 SDValue Chain = ST->getChain(); 15082 SDValue Ptr = ST->getBasePtr(); 15083 15084 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); 15085 15086 // NOTE: If the original store is volatile, this transform must not increase 15087 // the number of stores. For example, on x86-32 an f64 can be stored in one 15088 // processor operation but an i64 (which is not legal) requires two. So the 15089 // transform should not be done in this case. 15090 15091 SDValue Tmp; 15092 switch (CFP->getSimpleValueType(0).SimpleTy) { 15093 default: 15094 llvm_unreachable("Unknown FP type"); 15095 case MVT::f16: // We don't do this for these yet. 15096 case MVT::f80: 15097 case MVT::f128: 15098 case MVT::ppcf128: 15099 return SDValue(); 15100 case MVT::f32: 15101 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 15102 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 15103 ; 15104 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 15105 bitcastToAPInt().getZExtValue(), SDLoc(CFP), 15106 MVT::i32); 15107 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); 15108 } 15109 15110 return SDValue(); 15111 case MVT::f64: 15112 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 15113 !ST->isVolatile()) || 15114 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 15115 ; 15116 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 15117 getZExtValue(), SDLoc(CFP), MVT::i64); 15118 return DAG.getStore(Chain, DL, Tmp, 15119 Ptr, ST->getMemOperand()); 15120 } 15121 15122 if (!ST->isVolatile() && 15123 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 15124 // Many FP stores are not made apparent until after legalize, e.g. for 15125 // argument passing. Since this is so common, custom legalize the 15126 // 64-bit integer store into two 32-bit stores. 15127 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 15128 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); 15129 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); 15130 if (DAG.getDataLayout().isBigEndian()) 15131 std::swap(Lo, Hi); 15132 15133 unsigned Alignment = ST->getAlignment(); 15134 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 15135 AAMDNodes AAInfo = ST->getAAInfo(); 15136 15137 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 15138 ST->getAlignment(), MMOFlags, AAInfo); 15139 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 15140 DAG.getConstant(4, DL, Ptr.getValueType())); 15141 Alignment = MinAlign(Alignment, 4U); 15142 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, 15143 ST->getPointerInfo().getWithOffset(4), 15144 Alignment, MMOFlags, AAInfo); 15145 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 15146 St0, St1); 15147 } 15148 15149 return SDValue(); 15150 } 15151 } 15152 15153 SDValue DAGCombiner::visitSTORE(SDNode *N) { 15154 StoreSDNode *ST = cast<StoreSDNode>(N); 15155 SDValue Chain = ST->getChain(); 15156 SDValue Value = ST->getValue(); 15157 SDValue Ptr = ST->getBasePtr(); 15158 15159 // If this is a store of a bit convert, store the input value if the 15160 // resultant store does not need a higher alignment than the original. 15161 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 15162 ST->isUnindexed()) { 15163 EVT SVT = Value.getOperand(0).getValueType(); 15164 // If the store is volatile, we only want to change the store type if the 15165 // resulting store is legal. Otherwise we might increase the number of 15166 // memory accesses. We don't care if the original type was legal or not 15167 // as we assume software couldn't rely on the number of accesses of an 15168 // illegal type. 15169 if (((!LegalOperations && !ST->isVolatile()) || 15170 TLI.isOperationLegal(ISD::STORE, SVT)) && 15171 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { 15172 unsigned OrigAlign = ST->getAlignment(); 15173 bool Fast = false; 15174 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, 15175 ST->getAddressSpace(), OrigAlign, &Fast) && 15176 Fast) { 15177 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, 15178 ST->getPointerInfo(), OrigAlign, 15179 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 15180 } 15181 } 15182 } 15183 15184 // Turn 'store undef, Ptr' -> nothing. 15185 if (Value.isUndef() && ST->isUnindexed()) 15186 return Chain; 15187 15188 // Try to infer better alignment information than the store already has. 15189 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 15190 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 15191 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { 15192 SDValue NewStore = 15193 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), 15194 ST->getMemoryVT(), Align, 15195 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 15196 // NewStore will always be N as we are only refining the alignment 15197 assert(NewStore.getNode() == N); 15198 (void)NewStore; 15199 } 15200 } 15201 } 15202 15203 // Try transforming a pair floating point load / store ops to integer 15204 // load / store ops. 15205 if (SDValue NewST = TransformFPLoadStorePair(N)) 15206 return NewST; 15207 15208 if (ST->isUnindexed()) { 15209 // Walk up chain skipping non-aliasing memory nodes, on this store and any 15210 // adjacent stores. 15211 if (findBetterNeighborChains(ST)) { 15212 // replaceStoreChain uses CombineTo, which handled all of the worklist 15213 // manipulation. Return the original node to not do anything else. 15214 return SDValue(ST, 0); 15215 } 15216 Chain = ST->getChain(); 15217 } 15218 15219 // FIXME: is there such a thing as a truncating indexed store? 15220 if (ST->isTruncatingStore() && ST->isUnindexed() && 15221 Value.getValueType().isInteger() && 15222 (!isa<ConstantSDNode>(Value) || 15223 !cast<ConstantSDNode>(Value)->isOpaque())) { 15224 // See if we can simplify the input to this truncstore with knowledge that 15225 // only the low bits are being used. For example: 15226 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 15227 SDValue Shorter = DAG.GetDemandedBits( 15228 Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 15229 ST->getMemoryVT().getScalarSizeInBits())); 15230 AddToWorklist(Value.getNode()); 15231 if (Shorter.getNode()) 15232 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 15233 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 15234 15235 // Otherwise, see if we can simplify the operation with 15236 // SimplifyDemandedBits, which only works if the value has a single use. 15237 if (SimplifyDemandedBits( 15238 Value, 15239 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 15240 ST->getMemoryVT().getScalarSizeInBits()))) { 15241 // Re-visit the store if anything changed and the store hasn't been merged 15242 // with another node (N is deleted) SimplifyDemandedBits will add Value's 15243 // node back to the worklist if necessary, but we also need to re-visit 15244 // the Store node itself. 15245 if (N->getOpcode() != ISD::DELETED_NODE) 15246 AddToWorklist(N); 15247 return SDValue(N, 0); 15248 } 15249 } 15250 15251 // If this is a load followed by a store to the same location, then the store 15252 // is dead/noop. 15253 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 15254 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 15255 ST->isUnindexed() && !ST->isVolatile() && 15256 // There can't be any side effects between the load and store, such as 15257 // a call or store. 15258 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 15259 // The store is dead, remove it. 15260 return Chain; 15261 } 15262 } 15263 15264 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 15265 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && 15266 !ST1->isVolatile() && ST1->getBasePtr() == Ptr && 15267 ST->getMemoryVT() == ST1->getMemoryVT()) { 15268 // If this is a store followed by a store with the same value to the same 15269 // location, then the store is dead/noop. 15270 if (ST1->getValue() == Value) { 15271 // The store is dead, remove it. 15272 return Chain; 15273 } 15274 15275 // If this is a store who's preceeding store to the same location 15276 // and no one other node is chained to that store we can effectively 15277 // drop the store. Do not remove stores to undef as they may be used as 15278 // data sinks. 15279 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && 15280 !ST1->getBasePtr().isUndef()) { 15281 // ST1 is fully overwritten and can be elided. Combine with it's chain 15282 // value. 15283 CombineTo(ST1, ST1->getChain()); 15284 return SDValue(); 15285 } 15286 } 15287 } 15288 15289 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 15290 // truncating store. We can do this even if this is already a truncstore. 15291 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 15292 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 15293 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 15294 ST->getMemoryVT())) { 15295 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 15296 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 15297 } 15298 15299 // Always perform this optimization before types are legal. If the target 15300 // prefers, also try this after legalization to catch stores that were created 15301 // by intrinsics or other nodes. 15302 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { 15303 while (true) { 15304 // There can be multiple store sequences on the same chain. 15305 // Keep trying to merge store sequences until we are unable to do so 15306 // or until we merge the last store on the chain. 15307 bool Changed = MergeConsecutiveStores(ST); 15308 if (!Changed) break; 15309 // Return N as merge only uses CombineTo and no worklist clean 15310 // up is necessary. 15311 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N)) 15312 return SDValue(N, 0); 15313 } 15314 } 15315 15316 // Try transforming N to an indexed store. 15317 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 15318 return SDValue(N, 0); 15319 15320 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 15321 // 15322 // Make sure to do this only after attempting to merge stores in order to 15323 // avoid changing the types of some subset of stores due to visit order, 15324 // preventing their merging. 15325 if (isa<ConstantFPSDNode>(ST->getValue())) { 15326 if (SDValue NewSt = replaceStoreOfFPConstant(ST)) 15327 return NewSt; 15328 } 15329 15330 if (SDValue NewSt = splitMergedValStore(ST)) 15331 return NewSt; 15332 15333 return ReduceLoadOpStoreWidth(N); 15334 } 15335 15336 /// For the instruction sequence of store below, F and I values 15337 /// are bundled together as an i64 value before being stored into memory. 15338 /// Sometimes it is more efficent to generate separate stores for F and I, 15339 /// which can remove the bitwise instructions or sink them to colder places. 15340 /// 15341 /// (store (or (zext (bitcast F to i32) to i64), 15342 /// (shl (zext I to i64), 32)), addr) --> 15343 /// (store F, addr) and (store I, addr+4) 15344 /// 15345 /// Similarly, splitting for other merged store can also be beneficial, like: 15346 /// For pair of {i32, i32}, i64 store --> two i32 stores. 15347 /// For pair of {i32, i16}, i64 store --> two i32 stores. 15348 /// For pair of {i16, i16}, i32 store --> two i16 stores. 15349 /// For pair of {i16, i8}, i32 store --> two i16 stores. 15350 /// For pair of {i8, i8}, i16 store --> two i8 stores. 15351 /// 15352 /// We allow each target to determine specifically which kind of splitting is 15353 /// supported. 15354 /// 15355 /// The store patterns are commonly seen from the simple code snippet below 15356 /// if only std::make_pair(...) is sroa transformed before inlined into hoo. 15357 /// void goo(const std::pair<int, float> &); 15358 /// hoo() { 15359 /// ... 15360 /// goo(std::make_pair(tmp, ftmp)); 15361 /// ... 15362 /// } 15363 /// 15364 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { 15365 if (OptLevel == CodeGenOpt::None) 15366 return SDValue(); 15367 15368 SDValue Val = ST->getValue(); 15369 SDLoc DL(ST); 15370 15371 // Match OR operand. 15372 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) 15373 return SDValue(); 15374 15375 // Match SHL operand and get Lower and Higher parts of Val. 15376 SDValue Op1 = Val.getOperand(0); 15377 SDValue Op2 = Val.getOperand(1); 15378 SDValue Lo, Hi; 15379 if (Op1.getOpcode() != ISD::SHL) { 15380 std::swap(Op1, Op2); 15381 if (Op1.getOpcode() != ISD::SHL) 15382 return SDValue(); 15383 } 15384 Lo = Op2; 15385 Hi = Op1.getOperand(0); 15386 if (!Op1.hasOneUse()) 15387 return SDValue(); 15388 15389 // Match shift amount to HalfValBitSize. 15390 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; 15391 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); 15392 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) 15393 return SDValue(); 15394 15395 // Lo and Hi are zero-extended from int with size less equal than 32 15396 // to i64. 15397 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || 15398 !Lo.getOperand(0).getValueType().isScalarInteger() || 15399 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || 15400 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || 15401 !Hi.getOperand(0).getValueType().isScalarInteger() || 15402 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) 15403 return SDValue(); 15404 15405 // Use the EVT of low and high parts before bitcast as the input 15406 // of target query. 15407 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) 15408 ? Lo.getOperand(0).getValueType() 15409 : Lo.getValueType(); 15410 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) 15411 ? Hi.getOperand(0).getValueType() 15412 : Hi.getValueType(); 15413 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) 15414 return SDValue(); 15415 15416 // Start to split store. 15417 unsigned Alignment = ST->getAlignment(); 15418 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 15419 AAMDNodes AAInfo = ST->getAAInfo(); 15420 15421 // Change the sizes of Lo and Hi's value types to HalfValBitSize. 15422 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); 15423 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); 15424 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); 15425 15426 SDValue Chain = ST->getChain(); 15427 SDValue Ptr = ST->getBasePtr(); 15428 // Lower value store. 15429 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 15430 ST->getAlignment(), MMOFlags, AAInfo); 15431 Ptr = 15432 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 15433 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); 15434 // Higher value store. 15435 SDValue St1 = 15436 DAG.getStore(St0, DL, Hi, Ptr, 15437 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), 15438 Alignment / 2, MMOFlags, AAInfo); 15439 return St1; 15440 } 15441 15442 /// Convert a disguised subvector insertion into a shuffle: 15443 /// insert_vector_elt V, (bitcast X from vector type), IdxC --> 15444 /// bitcast(shuffle (bitcast V), (extended X), Mask) 15445 /// Note: We do not use an insert_subvector node because that requires a legal 15446 /// subvector type. 15447 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { 15448 SDValue InsertVal = N->getOperand(1); 15449 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || 15450 !InsertVal.getOperand(0).getValueType().isVector()) 15451 return SDValue(); 15452 15453 SDValue SubVec = InsertVal.getOperand(0); 15454 SDValue DestVec = N->getOperand(0); 15455 EVT SubVecVT = SubVec.getValueType(); 15456 EVT VT = DestVec.getValueType(); 15457 unsigned NumSrcElts = SubVecVT.getVectorNumElements(); 15458 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); 15459 unsigned NumMaskVals = ExtendRatio * NumSrcElts; 15460 15461 // Step 1: Create a shuffle mask that implements this insert operation. The 15462 // vector that we are inserting into will be operand 0 of the shuffle, so 15463 // those elements are just 'i'. The inserted subvector is in the first 15464 // positions of operand 1 of the shuffle. Example: 15465 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} 15466 SmallVector<int, 16> Mask(NumMaskVals); 15467 for (unsigned i = 0; i != NumMaskVals; ++i) { 15468 if (i / NumSrcElts == InsIndex) 15469 Mask[i] = (i % NumSrcElts) + NumMaskVals; 15470 else 15471 Mask[i] = i; 15472 } 15473 15474 // Bail out if the target can not handle the shuffle we want to create. 15475 EVT SubVecEltVT = SubVecVT.getVectorElementType(); 15476 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); 15477 if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) 15478 return SDValue(); 15479 15480 // Step 2: Create a wide vector from the inserted source vector by appending 15481 // undefined elements. This is the same size as our destination vector. 15482 SDLoc DL(N); 15483 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); 15484 ConcatOps[0] = SubVec; 15485 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); 15486 15487 // Step 3: Shuffle in the padded subvector. 15488 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); 15489 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); 15490 AddToWorklist(PaddedSubV.getNode()); 15491 AddToWorklist(DestVecBC.getNode()); 15492 AddToWorklist(Shuf.getNode()); 15493 return DAG.getBitcast(VT, Shuf); 15494 } 15495 15496 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 15497 SDValue InVec = N->getOperand(0); 15498 SDValue InVal = N->getOperand(1); 15499 SDValue EltNo = N->getOperand(2); 15500 SDLoc DL(N); 15501 15502 // If the inserted element is an UNDEF, just use the input vector. 15503 if (InVal.isUndef()) 15504 return InVec; 15505 15506 EVT VT = InVec.getValueType(); 15507 unsigned NumElts = VT.getVectorNumElements(); 15508 15509 // Remove redundant insertions: 15510 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x 15511 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15512 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) 15513 return InVec; 15514 15515 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 15516 if (!IndexC) { 15517 // If this is variable insert to undef vector, it might be better to splat: 15518 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > 15519 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { 15520 SmallVector<SDValue, 8> Ops(NumElts, InVal); 15521 return DAG.getBuildVector(VT, DL, Ops); 15522 } 15523 return SDValue(); 15524 } 15525 15526 // We must know which element is being inserted for folds below here. 15527 unsigned Elt = IndexC->getZExtValue(); 15528 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) 15529 return Shuf; 15530 15531 // Canonicalize insert_vector_elt dag nodes. 15532 // Example: 15533 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 15534 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 15535 // 15536 // Do this only if the child insert_vector node has one use; also 15537 // do this only if indices are both constants and Idx1 < Idx0. 15538 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 15539 && isa<ConstantSDNode>(InVec.getOperand(2))) { 15540 unsigned OtherElt = InVec.getConstantOperandVal(2); 15541 if (Elt < OtherElt) { 15542 // Swap nodes. 15543 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 15544 InVec.getOperand(0), InVal, EltNo); 15545 AddToWorklist(NewOp.getNode()); 15546 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 15547 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 15548 } 15549 } 15550 15551 // If we can't generate a legal BUILD_VECTOR, exit 15552 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 15553 return SDValue(); 15554 15555 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 15556 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 15557 // vector elements. 15558 SmallVector<SDValue, 8> Ops; 15559 // Do not combine these two vectors if the output vector will not replace 15560 // the input vector. 15561 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 15562 Ops.append(InVec.getNode()->op_begin(), 15563 InVec.getNode()->op_end()); 15564 } else if (InVec.isUndef()) { 15565 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); 15566 } else { 15567 return SDValue(); 15568 } 15569 assert(Ops.size() == NumElts && "Unexpected vector size"); 15570 15571 // Insert the element 15572 if (Elt < Ops.size()) { 15573 // All the operands of BUILD_VECTOR must have the same type; 15574 // we enforce that here. 15575 EVT OpVT = Ops[0].getValueType(); 15576 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; 15577 } 15578 15579 // Return the new vector 15580 return DAG.getBuildVector(VT, DL, Ops); 15581 } 15582 15583 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, 15584 SDValue EltNo, 15585 LoadSDNode *OriginalLoad) { 15586 assert(!OriginalLoad->isVolatile()); 15587 15588 EVT ResultVT = EVE->getValueType(0); 15589 EVT VecEltVT = InVecVT.getVectorElementType(); 15590 unsigned Align = OriginalLoad->getAlignment(); 15591 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 15592 VecEltVT.getTypeForEVT(*DAG.getContext())); 15593 15594 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 15595 return SDValue(); 15596 15597 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? 15598 ISD::NON_EXTLOAD : ISD::EXTLOAD; 15599 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) 15600 return SDValue(); 15601 15602 Align = NewAlign; 15603 15604 SDValue NewPtr = OriginalLoad->getBasePtr(); 15605 SDValue Offset; 15606 EVT PtrType = NewPtr.getValueType(); 15607 MachinePointerInfo MPI; 15608 SDLoc DL(EVE); 15609 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 15610 int Elt = ConstEltNo->getZExtValue(); 15611 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 15612 Offset = DAG.getConstant(PtrOff, DL, PtrType); 15613 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 15614 } else { 15615 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); 15616 Offset = DAG.getNode( 15617 ISD::MUL, DL, PtrType, Offset, 15618 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); 15619 MPI = OriginalLoad->getPointerInfo(); 15620 } 15621 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); 15622 15623 // The replacement we need to do here is a little tricky: we need to 15624 // replace an extractelement of a load with a load. 15625 // Use ReplaceAllUsesOfValuesWith to do the replacement. 15626 // Note that this replacement assumes that the extractvalue is the only 15627 // use of the load; that's okay because we don't want to perform this 15628 // transformation in other cases anyway. 15629 SDValue Load; 15630 SDValue Chain; 15631 if (ResultVT.bitsGT(VecEltVT)) { 15632 // If the result type of vextract is wider than the load, then issue an 15633 // extending load instead. 15634 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 15635 VecEltVT) 15636 ? ISD::ZEXTLOAD 15637 : ISD::EXTLOAD; 15638 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, 15639 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, 15640 Align, OriginalLoad->getMemOperand()->getFlags(), 15641 OriginalLoad->getAAInfo()); 15642 Chain = Load.getValue(1); 15643 } else { 15644 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, 15645 MPI, Align, OriginalLoad->getMemOperand()->getFlags(), 15646 OriginalLoad->getAAInfo()); 15647 Chain = Load.getValue(1); 15648 if (ResultVT.bitsLT(VecEltVT)) 15649 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 15650 else 15651 Load = DAG.getBitcast(ResultVT, Load); 15652 } 15653 WorklistRemover DeadNodes(*this); 15654 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 15655 SDValue To[] = { Load, Chain }; 15656 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 15657 // Since we're explicitly calling ReplaceAllUses, add the new node to the 15658 // worklist explicitly as well. 15659 AddToWorklist(Load.getNode()); 15660 AddUsersToWorklist(Load.getNode()); // Add users too 15661 // Make sure to revisit this node to clean it up; it will usually be dead. 15662 AddToWorklist(EVE); 15663 ++OpsNarrowed; 15664 return SDValue(EVE, 0); 15665 } 15666 15667 /// Transform a vector binary operation into a scalar binary operation by moving 15668 /// the math/logic after an extract element of a vector. 15669 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, 15670 bool LegalOperations) { 15671 SDValue Vec = ExtElt->getOperand(0); 15672 SDValue Index = ExtElt->getOperand(1); 15673 auto *IndexC = dyn_cast<ConstantSDNode>(Index); 15674 if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse()) 15675 return SDValue(); 15676 15677 // Targets may want to avoid this to prevent an expensive register transfer. 15678 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 15679 if (!TLI.shouldScalarizeBinop(Vec)) 15680 return SDValue(); 15681 15682 // Extracting an element of a vector constant is constant-folded, so this 15683 // transform is just replacing a vector op with a scalar op while moving the 15684 // extract. 15685 SDValue Op0 = Vec.getOperand(0); 15686 SDValue Op1 = Vec.getOperand(1); 15687 if (isAnyConstantBuildVector(Op0, true) || 15688 isAnyConstantBuildVector(Op1, true)) { 15689 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' 15690 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) 15691 SDLoc DL(ExtElt); 15692 EVT VT = ExtElt->getValueType(0); 15693 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); 15694 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); 15695 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); 15696 } 15697 15698 return SDValue(); 15699 } 15700 15701 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 15702 SDValue VecOp = N->getOperand(0); 15703 SDValue Index = N->getOperand(1); 15704 EVT ScalarVT = N->getValueType(0); 15705 EVT VecVT = VecOp.getValueType(); 15706 if (VecOp.isUndef()) 15707 return DAG.getUNDEF(ScalarVT); 15708 15709 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val 15710 // 15711 // This only really matters if the index is non-constant since other combines 15712 // on the constant elements already work. 15713 SDLoc DL(N); 15714 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT && 15715 Index == VecOp.getOperand(2)) { 15716 SDValue Elt = VecOp.getOperand(1); 15717 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt; 15718 } 15719 15720 // (vextract (scalar_to_vector val, 0) -> val 15721 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { 15722 // Check if the result type doesn't match the inserted element type. A 15723 // SCALAR_TO_VECTOR may truncate the inserted element and the 15724 // EXTRACT_VECTOR_ELT may widen the extracted vector. 15725 SDValue InOp = VecOp.getOperand(0); 15726 if (InOp.getValueType() != ScalarVT) { 15727 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); 15728 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT); 15729 } 15730 return InOp; 15731 } 15732 15733 // extract_vector_elt of out-of-bounds element -> UNDEF 15734 auto *IndexC = dyn_cast<ConstantSDNode>(Index); 15735 unsigned NumElts = VecVT.getVectorNumElements(); 15736 if (IndexC && IndexC->getAPIntValue().uge(NumElts)) 15737 return DAG.getUNDEF(ScalarVT); 15738 15739 // extract_vector_elt (build_vector x, y), 1 -> y 15740 if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR && 15741 TLI.isTypeLegal(VecVT) && 15742 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { 15743 SDValue Elt = VecOp.getOperand(IndexC->getZExtValue()); 15744 EVT InEltVT = Elt.getValueType(); 15745 15746 // Sometimes build_vector's scalar input types do not match result type. 15747 if (ScalarVT == InEltVT) 15748 return Elt; 15749 15750 // TODO: It may be useful to truncate if free if the build_vector implicitly 15751 // converts. 15752 } 15753 15754 // TODO: These transforms should not require the 'hasOneUse' restriction, but 15755 // there are regressions on multiple targets without it. We can end up with a 15756 // mess of scalar and vector code if we reduce only part of the DAG to scalar. 15757 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() && 15758 VecOp.hasOneUse()) { 15759 // The vector index of the LSBs of the source depend on the endian-ness. 15760 bool IsLE = DAG.getDataLayout().isLittleEndian(); 15761 unsigned ExtractIndex = IndexC->getZExtValue(); 15762 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) 15763 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; 15764 SDValue BCSrc = VecOp.getOperand(0); 15765 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) 15766 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); 15767 15768 if (LegalTypes && BCSrc.getValueType().isInteger() && 15769 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { 15770 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> 15771 // trunc i64 X to i32 15772 SDValue X = BCSrc.getOperand(0); 15773 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() && 15774 "Extract element and scalar to vector can't change element type " 15775 "from FP to integer."); 15776 unsigned XBitWidth = X.getValueSizeInBits(); 15777 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); 15778 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; 15779 15780 // An extract element return value type can be wider than its vector 15781 // operand element type. In that case, the high bits are undefined, so 15782 // it's possible that we may need to extend rather than truncate. 15783 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { 15784 assert(XBitWidth % VecEltBitWidth == 0 && 15785 "Scalar bitwidth must be a multiple of vector element bitwidth"); 15786 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT); 15787 } 15788 } 15789 } 15790 15791 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) 15792 return BO; 15793 15794 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 15795 // We only perform this optimization before the op legalization phase because 15796 // we may introduce new vector instructions which are not backed by TD 15797 // patterns. For example on AVX, extracting elements from a wide vector 15798 // without using extract_subvector. However, if we can find an underlying 15799 // scalar value, then we can always use that. 15800 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) { 15801 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp); 15802 // Find the new index to extract from. 15803 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue()); 15804 15805 // Extracting an undef index is undef. 15806 if (OrigElt == -1) 15807 return DAG.getUNDEF(ScalarVT); 15808 15809 // Select the right vector half to extract from. 15810 SDValue SVInVec; 15811 if (OrigElt < (int)NumElts) { 15812 SVInVec = VecOp.getOperand(0); 15813 } else { 15814 SVInVec = VecOp.getOperand(1); 15815 OrigElt -= NumElts; 15816 } 15817 15818 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 15819 SDValue InOp = SVInVec.getOperand(OrigElt); 15820 if (InOp.getValueType() != ScalarVT) { 15821 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); 15822 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT); 15823 } 15824 15825 return InOp; 15826 } 15827 15828 // FIXME: We should handle recursing on other vector shuffles and 15829 // scalar_to_vector here as well. 15830 15831 if (!LegalOperations || 15832 // FIXME: Should really be just isOperationLegalOrCustom. 15833 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || 15834 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { 15835 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 15836 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, 15837 DAG.getConstant(OrigElt, DL, IndexTy)); 15838 } 15839 } 15840 15841 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can 15842 // simplify it based on the (valid) extraction indices. 15843 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) { 15844 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15845 Use->getOperand(0) == VecOp && 15846 isa<ConstantSDNode>(Use->getOperand(1)); 15847 })) { 15848 APInt DemandedElts = APInt::getNullValue(NumElts); 15849 for (SDNode *Use : VecOp->uses()) { 15850 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); 15851 if (CstElt->getAPIntValue().ult(NumElts)) 15852 DemandedElts.setBit(CstElt->getZExtValue()); 15853 } 15854 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) { 15855 // We simplified the vector operand of this extract element. If this 15856 // extract is not dead, visit it again so it is folded properly. 15857 if (N->getOpcode() != ISD::DELETED_NODE) 15858 AddToWorklist(N); 15859 return SDValue(N, 0); 15860 } 15861 } 15862 15863 // Everything under here is trying to match an extract of a loaded value. 15864 // If the result of load has to be truncated, then it's not necessarily 15865 // profitable. 15866 bool BCNumEltsChanged = false; 15867 EVT ExtVT = VecVT.getVectorElementType(); 15868 EVT LVT = ExtVT; 15869 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT)) 15870 return SDValue(); 15871 15872 if (VecOp.getOpcode() == ISD::BITCAST) { 15873 // Don't duplicate a load with other uses. 15874 if (!VecOp.hasOneUse()) 15875 return SDValue(); 15876 15877 EVT BCVT = VecOp.getOperand(0).getValueType(); 15878 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 15879 return SDValue(); 15880 if (NumElts != BCVT.getVectorNumElements()) 15881 BCNumEltsChanged = true; 15882 VecOp = VecOp.getOperand(0); 15883 ExtVT = BCVT.getVectorElementType(); 15884 } 15885 15886 // extract (vector load $addr), i --> load $addr + i * size 15887 if (!LegalOperations && !IndexC && VecOp.hasOneUse() && 15888 ISD::isNormalLoad(VecOp.getNode()) && 15889 !Index->hasPredecessor(VecOp.getNode())) { 15890 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); 15891 if (VecLoad && !VecLoad->isVolatile()) 15892 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); 15893 } 15894 15895 // Perform only after legalization to ensure build_vector / vector_shuffle 15896 // optimizations have already been done. 15897 if (!LegalOperations || !IndexC) 15898 return SDValue(); 15899 15900 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 15901 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 15902 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 15903 int Elt = IndexC->getZExtValue(); 15904 LoadSDNode *LN0 = nullptr; 15905 if (ISD::isNormalLoad(VecOp.getNode())) { 15906 LN0 = cast<LoadSDNode>(VecOp); 15907 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 15908 VecOp.getOperand(0).getValueType() == ExtVT && 15909 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) { 15910 // Don't duplicate a load with other uses. 15911 if (!VecOp.hasOneUse()) 15912 return SDValue(); 15913 15914 LN0 = cast<LoadSDNode>(VecOp.getOperand(0)); 15915 } 15916 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) { 15917 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 15918 // => 15919 // (load $addr+1*size) 15920 15921 // Don't duplicate a load with other uses. 15922 if (!VecOp.hasOneUse()) 15923 return SDValue(); 15924 15925 // If the bit convert changed the number of elements, it is unsafe 15926 // to examine the mask. 15927 if (BCNumEltsChanged) 15928 return SDValue(); 15929 15930 // Select the input vector, guarding against out of range extract vector. 15931 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt); 15932 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1); 15933 15934 if (VecOp.getOpcode() == ISD::BITCAST) { 15935 // Don't duplicate a load with other uses. 15936 if (!VecOp.hasOneUse()) 15937 return SDValue(); 15938 15939 VecOp = VecOp.getOperand(0); 15940 } 15941 if (ISD::isNormalLoad(VecOp.getNode())) { 15942 LN0 = cast<LoadSDNode>(VecOp); 15943 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; 15944 Index = DAG.getConstant(Elt, DL, Index.getValueType()); 15945 } 15946 } 15947 15948 // Make sure we found a non-volatile load and the extractelement is 15949 // the only use. 15950 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 15951 return SDValue(); 15952 15953 // If Idx was -1 above, Elt is going to be -1, so just return undef. 15954 if (Elt == -1) 15955 return DAG.getUNDEF(LVT); 15956 15957 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0); 15958 } 15959 15960 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 15961 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 15962 // We perform this optimization post type-legalization because 15963 // the type-legalizer often scalarizes integer-promoted vectors. 15964 // Performing this optimization before may create bit-casts which 15965 // will be type-legalized to complex code sequences. 15966 // We perform this optimization only before the operation legalizer because we 15967 // may introduce illegal operations. 15968 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 15969 return SDValue(); 15970 15971 unsigned NumInScalars = N->getNumOperands(); 15972 SDLoc DL(N); 15973 EVT VT = N->getValueType(0); 15974 15975 // Check to see if this is a BUILD_VECTOR of a bunch of values 15976 // which come from any_extend or zero_extend nodes. If so, we can create 15977 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 15978 // optimizations. We do not handle sign-extend because we can't fill the sign 15979 // using shuffles. 15980 EVT SourceType = MVT::Other; 15981 bool AllAnyExt = true; 15982 15983 for (unsigned i = 0; i != NumInScalars; ++i) { 15984 SDValue In = N->getOperand(i); 15985 // Ignore undef inputs. 15986 if (In.isUndef()) continue; 15987 15988 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 15989 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 15990 15991 // Abort if the element is not an extension. 15992 if (!ZeroExt && !AnyExt) { 15993 SourceType = MVT::Other; 15994 break; 15995 } 15996 15997 // The input is a ZeroExt or AnyExt. Check the original type. 15998 EVT InTy = In.getOperand(0).getValueType(); 15999 16000 // Check that all of the widened source types are the same. 16001 if (SourceType == MVT::Other) 16002 // First time. 16003 SourceType = InTy; 16004 else if (InTy != SourceType) { 16005 // Multiple income types. Abort. 16006 SourceType = MVT::Other; 16007 break; 16008 } 16009 16010 // Check if all of the extends are ANY_EXTENDs. 16011 AllAnyExt &= AnyExt; 16012 } 16013 16014 // In order to have valid types, all of the inputs must be extended from the 16015 // same source type and all of the inputs must be any or zero extend. 16016 // Scalar sizes must be a power of two. 16017 EVT OutScalarTy = VT.getScalarType(); 16018 bool ValidTypes = SourceType != MVT::Other && 16019 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 16020 isPowerOf2_32(SourceType.getSizeInBits()); 16021 16022 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 16023 // turn into a single shuffle instruction. 16024 if (!ValidTypes) 16025 return SDValue(); 16026 16027 bool isLE = DAG.getDataLayout().isLittleEndian(); 16028 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 16029 assert(ElemRatio > 1 && "Invalid element size ratio"); 16030 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 16031 DAG.getConstant(0, DL, SourceType); 16032 16033 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 16034 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 16035 16036 // Populate the new build_vector 16037 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 16038 SDValue Cast = N->getOperand(i); 16039 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 16040 Cast.getOpcode() == ISD::ZERO_EXTEND || 16041 Cast.isUndef()) && "Invalid cast opcode"); 16042 SDValue In; 16043 if (Cast.isUndef()) 16044 In = DAG.getUNDEF(SourceType); 16045 else 16046 In = Cast->getOperand(0); 16047 unsigned Index = isLE ? (i * ElemRatio) : 16048 (i * ElemRatio + (ElemRatio - 1)); 16049 16050 assert(Index < Ops.size() && "Invalid index"); 16051 Ops[Index] = In; 16052 } 16053 16054 // The type of the new BUILD_VECTOR node. 16055 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 16056 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 16057 "Invalid vector size"); 16058 // Check if the new vector type is legal. 16059 if (!isTypeLegal(VecVT) || 16060 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && 16061 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) 16062 return SDValue(); 16063 16064 // Make the new BUILD_VECTOR. 16065 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); 16066 16067 // The new BUILD_VECTOR node has the potential to be further optimized. 16068 AddToWorklist(BV.getNode()); 16069 // Bitcast to the desired type. 16070 return DAG.getBitcast(VT, BV); 16071 } 16072 16073 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, 16074 ArrayRef<int> VectorMask, 16075 SDValue VecIn1, SDValue VecIn2, 16076 unsigned LeftIdx) { 16077 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 16078 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); 16079 16080 EVT VT = N->getValueType(0); 16081 EVT InVT1 = VecIn1.getValueType(); 16082 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; 16083 16084 unsigned Vec2Offset = 0; 16085 unsigned NumElems = VT.getVectorNumElements(); 16086 unsigned ShuffleNumElems = NumElems; 16087 16088 // In case both the input vectors are extracted from same base 16089 // vector we do not need extra addend (Vec2Offset) while 16090 // computing shuffle mask. 16091 if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 16092 !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 16093 !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) 16094 Vec2Offset = InVT1.getVectorNumElements(); 16095 16096 // We can't generate a shuffle node with mismatched input and output types. 16097 // Try to make the types match the type of the output. 16098 if (InVT1 != VT || InVT2 != VT) { 16099 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { 16100 // If the output vector length is a multiple of both input lengths, 16101 // we can concatenate them and pad the rest with undefs. 16102 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); 16103 assert(NumConcats >= 2 && "Concat needs at least two inputs!"); 16104 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); 16105 ConcatOps[0] = VecIn1; 16106 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); 16107 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 16108 VecIn2 = SDValue(); 16109 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { 16110 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) 16111 return SDValue(); 16112 16113 if (!VecIn2.getNode()) { 16114 // If we only have one input vector, and it's twice the size of the 16115 // output, split it in two. 16116 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, 16117 DAG.getConstant(NumElems, DL, IdxTy)); 16118 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); 16119 // Since we now have shorter input vectors, adjust the offset of the 16120 // second vector's start. 16121 Vec2Offset = NumElems; 16122 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { 16123 // VecIn1 is wider than the output, and we have another, possibly 16124 // smaller input. Pad the smaller input with undefs, shuffle at the 16125 // input vector width, and extract the output. 16126 // The shuffle type is different than VT, so check legality again. 16127 if (LegalOperations && 16128 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) 16129 return SDValue(); 16130 16131 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to 16132 // lower it back into a BUILD_VECTOR. So if the inserted type is 16133 // illegal, don't even try. 16134 if (InVT1 != InVT2) { 16135 if (!TLI.isTypeLegal(InVT2)) 16136 return SDValue(); 16137 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, 16138 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); 16139 } 16140 ShuffleNumElems = NumElems * 2; 16141 } else { 16142 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider 16143 // than VecIn1. We can't handle this for now - this case will disappear 16144 // when we start sorting the vectors by type. 16145 return SDValue(); 16146 } 16147 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && 16148 InVT1.getSizeInBits() == VT.getSizeInBits()) { 16149 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); 16150 ConcatOps[0] = VecIn2; 16151 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 16152 } else { 16153 // TODO: Support cases where the length mismatch isn't exactly by a 16154 // factor of 2. 16155 // TODO: Move this check upwards, so that if we have bad type 16156 // mismatches, we don't create any DAG nodes. 16157 return SDValue(); 16158 } 16159 } 16160 16161 // Initialize mask to undef. 16162 SmallVector<int, 8> Mask(ShuffleNumElems, -1); 16163 16164 // Only need to run up to the number of elements actually used, not the 16165 // total number of elements in the shuffle - if we are shuffling a wider 16166 // vector, the high lanes should be set to undef. 16167 for (unsigned i = 0; i != NumElems; ++i) { 16168 if (VectorMask[i] <= 0) 16169 continue; 16170 16171 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); 16172 if (VectorMask[i] == (int)LeftIdx) { 16173 Mask[i] = ExtIndex; 16174 } else if (VectorMask[i] == (int)LeftIdx + 1) { 16175 Mask[i] = Vec2Offset + ExtIndex; 16176 } 16177 } 16178 16179 // The type the input vectors may have changed above. 16180 InVT1 = VecIn1.getValueType(); 16181 16182 // If we already have a VecIn2, it should have the same type as VecIn1. 16183 // If we don't, get an undef/zero vector of the appropriate type. 16184 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); 16185 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); 16186 16187 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); 16188 if (ShuffleNumElems > NumElems) 16189 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); 16190 16191 return Shuffle; 16192 } 16193 16194 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { 16195 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); 16196 16197 // First, determine where the build vector is not undef. 16198 // TODO: We could extend this to handle zero elements as well as undefs. 16199 int NumBVOps = BV->getNumOperands(); 16200 int ZextElt = -1; 16201 for (int i = 0; i != NumBVOps; ++i) { 16202 SDValue Op = BV->getOperand(i); 16203 if (Op.isUndef()) 16204 continue; 16205 if (ZextElt == -1) 16206 ZextElt = i; 16207 else 16208 return SDValue(); 16209 } 16210 // Bail out if there's no non-undef element. 16211 if (ZextElt == -1) 16212 return SDValue(); 16213 16214 // The build vector contains some number of undef elements and exactly 16215 // one other element. That other element must be a zero-extended scalar 16216 // extracted from a vector at a constant index to turn this into a shuffle. 16217 // Also, require that the build vector does not implicitly truncate/extend 16218 // its elements. 16219 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. 16220 EVT VT = BV->getValueType(0); 16221 SDValue Zext = BV->getOperand(ZextElt); 16222 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || 16223 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 16224 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) || 16225 Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) 16226 return SDValue(); 16227 16228 // The zero-extend must be a multiple of the source size, and we must be 16229 // building a vector of the same size as the source of the extract element. 16230 SDValue Extract = Zext.getOperand(0); 16231 unsigned DestSize = Zext.getValueSizeInBits(); 16232 unsigned SrcSize = Extract.getValueSizeInBits(); 16233 if (DestSize % SrcSize != 0 || 16234 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) 16235 return SDValue(); 16236 16237 // Create a shuffle mask that will combine the extracted element with zeros 16238 // and undefs. 16239 int ZextRatio = DestSize / SrcSize; 16240 int NumMaskElts = NumBVOps * ZextRatio; 16241 SmallVector<int, 32> ShufMask(NumMaskElts, -1); 16242 for (int i = 0; i != NumMaskElts; ++i) { 16243 if (i / ZextRatio == ZextElt) { 16244 // The low bits of the (potentially translated) extracted element map to 16245 // the source vector. The high bits map to zero. We will use a zero vector 16246 // as the 2nd source operand of the shuffle, so use the 1st element of 16247 // that vector (mask value is number-of-elements) for the high bits. 16248 if (i % ZextRatio == 0) 16249 ShufMask[i] = Extract.getConstantOperandVal(1); 16250 else 16251 ShufMask[i] = NumMaskElts; 16252 } 16253 16254 // Undef elements of the build vector remain undef because we initialize 16255 // the shuffle mask with -1. 16256 } 16257 16258 // Turn this into a shuffle with zero if that's legal. 16259 EVT VecVT = Extract.getOperand(0).getValueType(); 16260 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) 16261 return SDValue(); 16262 16263 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> 16264 // bitcast (shuffle V, ZeroVec, VectorMask) 16265 SDLoc DL(BV); 16266 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); 16267 SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, 16268 ShufMask); 16269 return DAG.getBitcast(VT, Shuf); 16270 } 16271 16272 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 16273 // operations. If the types of the vectors we're extracting from allow it, 16274 // turn this into a vector_shuffle node. 16275 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { 16276 SDLoc DL(N); 16277 EVT VT = N->getValueType(0); 16278 16279 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 16280 if (!isTypeLegal(VT)) 16281 return SDValue(); 16282 16283 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG)) 16284 return V; 16285 16286 // May only combine to shuffle after legalize if shuffle is legal. 16287 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 16288 return SDValue(); 16289 16290 bool UsesZeroVector = false; 16291 unsigned NumElems = N->getNumOperands(); 16292 16293 // Record, for each element of the newly built vector, which input vector 16294 // that element comes from. -1 stands for undef, 0 for the zero vector, 16295 // and positive values for the input vectors. 16296 // VectorMask maps each element to its vector number, and VecIn maps vector 16297 // numbers to their initial SDValues. 16298 16299 SmallVector<int, 8> VectorMask(NumElems, -1); 16300 SmallVector<SDValue, 8> VecIn; 16301 VecIn.push_back(SDValue()); 16302 16303 for (unsigned i = 0; i != NumElems; ++i) { 16304 SDValue Op = N->getOperand(i); 16305 16306 if (Op.isUndef()) 16307 continue; 16308 16309 // See if we can use a blend with a zero vector. 16310 // TODO: Should we generalize this to a blend with an arbitrary constant 16311 // vector? 16312 if (isNullConstant(Op) || isNullFPConstant(Op)) { 16313 UsesZeroVector = true; 16314 VectorMask[i] = 0; 16315 continue; 16316 } 16317 16318 // Not an undef or zero. If the input is something other than an 16319 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. 16320 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 16321 !isa<ConstantSDNode>(Op.getOperand(1))) 16322 return SDValue(); 16323 SDValue ExtractedFromVec = Op.getOperand(0); 16324 16325 APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); 16326 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) 16327 return SDValue(); 16328 16329 // All inputs must have the same element type as the output. 16330 if (VT.getVectorElementType() != 16331 ExtractedFromVec.getValueType().getVectorElementType()) 16332 return SDValue(); 16333 16334 // Have we seen this input vector before? 16335 // The vectors are expected to be tiny (usually 1 or 2 elements), so using 16336 // a map back from SDValues to numbers isn't worth it. 16337 unsigned Idx = std::distance( 16338 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); 16339 if (Idx == VecIn.size()) 16340 VecIn.push_back(ExtractedFromVec); 16341 16342 VectorMask[i] = Idx; 16343 } 16344 16345 // If we didn't find at least one input vector, bail out. 16346 if (VecIn.size() < 2) 16347 return SDValue(); 16348 16349 // If all the Operands of BUILD_VECTOR extract from same 16350 // vector, then split the vector efficiently based on the maximum 16351 // vector access index and adjust the VectorMask and 16352 // VecIn accordingly. 16353 if (VecIn.size() == 2) { 16354 unsigned MaxIndex = 0; 16355 unsigned NearestPow2 = 0; 16356 SDValue Vec = VecIn.back(); 16357 EVT InVT = Vec.getValueType(); 16358 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 16359 SmallVector<unsigned, 8> IndexVec(NumElems, 0); 16360 16361 for (unsigned i = 0; i < NumElems; i++) { 16362 if (VectorMask[i] <= 0) 16363 continue; 16364 unsigned Index = N->getOperand(i).getConstantOperandVal(1); 16365 IndexVec[i] = Index; 16366 MaxIndex = std::max(MaxIndex, Index); 16367 } 16368 16369 NearestPow2 = PowerOf2Ceil(MaxIndex); 16370 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && 16371 NumElems * 2 < NearestPow2) { 16372 unsigned SplitSize = NearestPow2 / 2; 16373 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), 16374 InVT.getVectorElementType(), SplitSize); 16375 if (TLI.isTypeLegal(SplitVT)) { 16376 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 16377 DAG.getConstant(SplitSize, DL, IdxTy)); 16378 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 16379 DAG.getConstant(0, DL, IdxTy)); 16380 VecIn.pop_back(); 16381 VecIn.push_back(VecIn1); 16382 VecIn.push_back(VecIn2); 16383 16384 for (unsigned i = 0; i < NumElems; i++) { 16385 if (VectorMask[i] <= 0) 16386 continue; 16387 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; 16388 } 16389 } 16390 } 16391 } 16392 16393 // TODO: We want to sort the vectors by descending length, so that adjacent 16394 // pairs have similar length, and the longer vector is always first in the 16395 // pair. 16396 16397 // TODO: Should this fire if some of the input vectors has illegal type (like 16398 // it does now), or should we let legalization run its course first? 16399 16400 // Shuffle phase: 16401 // Take pairs of vectors, and shuffle them so that the result has elements 16402 // from these vectors in the correct places. 16403 // For example, given: 16404 // t10: i32 = extract_vector_elt t1, Constant:i64<0> 16405 // t11: i32 = extract_vector_elt t2, Constant:i64<0> 16406 // t12: i32 = extract_vector_elt t3, Constant:i64<0> 16407 // t13: i32 = extract_vector_elt t1, Constant:i64<1> 16408 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 16409 // We will generate: 16410 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 16411 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef 16412 SmallVector<SDValue, 4> Shuffles; 16413 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { 16414 unsigned LeftIdx = 2 * In + 1; 16415 SDValue VecLeft = VecIn[LeftIdx]; 16416 SDValue VecRight = 16417 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); 16418 16419 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, 16420 VecRight, LeftIdx)) 16421 Shuffles.push_back(Shuffle); 16422 else 16423 return SDValue(); 16424 } 16425 16426 // If we need the zero vector as an "ingredient" in the blend tree, add it 16427 // to the list of shuffles. 16428 if (UsesZeroVector) 16429 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) 16430 : DAG.getConstantFP(0.0, DL, VT)); 16431 16432 // If we only have one shuffle, we're done. 16433 if (Shuffles.size() == 1) 16434 return Shuffles[0]; 16435 16436 // Update the vector mask to point to the post-shuffle vectors. 16437 for (int &Vec : VectorMask) 16438 if (Vec == 0) 16439 Vec = Shuffles.size() - 1; 16440 else 16441 Vec = (Vec - 1) / 2; 16442 16443 // More than one shuffle. Generate a binary tree of blends, e.g. if from 16444 // the previous step we got the set of shuffles t10, t11, t12, t13, we will 16445 // generate: 16446 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 16447 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 16448 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 16449 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 16450 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 16451 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 16452 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 16453 16454 // Make sure the initial size of the shuffle list is even. 16455 if (Shuffles.size() % 2) 16456 Shuffles.push_back(DAG.getUNDEF(VT)); 16457 16458 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { 16459 if (CurSize % 2) { 16460 Shuffles[CurSize] = DAG.getUNDEF(VT); 16461 CurSize++; 16462 } 16463 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { 16464 int Left = 2 * In; 16465 int Right = 2 * In + 1; 16466 SmallVector<int, 8> Mask(NumElems, -1); 16467 for (unsigned i = 0; i != NumElems; ++i) { 16468 if (VectorMask[i] == Left) { 16469 Mask[i] = i; 16470 VectorMask[i] = In; 16471 } else if (VectorMask[i] == Right) { 16472 Mask[i] = i + NumElems; 16473 VectorMask[i] = In; 16474 } 16475 } 16476 16477 Shuffles[In] = 16478 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); 16479 } 16480 } 16481 return Shuffles[0]; 16482 } 16483 16484 // Try to turn a build vector of zero extends of extract vector elts into a 16485 // a vector zero extend and possibly an extract subvector. 16486 // TODO: Support sign extend or any extend? 16487 // TODO: Allow undef elements? 16488 // TODO: Don't require the extracts to start at element 0. 16489 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { 16490 if (LegalOperations) 16491 return SDValue(); 16492 16493 EVT VT = N->getValueType(0); 16494 16495 SDValue Op0 = N->getOperand(0); 16496 auto checkElem = [&](SDValue Op) -> int64_t { 16497 if (Op.getOpcode() == ISD::ZERO_EXTEND && 16498 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && 16499 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) 16500 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) 16501 return C->getZExtValue(); 16502 return -1; 16503 }; 16504 16505 // Make sure the first element matches 16506 // (zext (extract_vector_elt X, C)) 16507 int64_t Offset = checkElem(Op0); 16508 if (Offset < 0) 16509 return SDValue(); 16510 16511 unsigned NumElems = N->getNumOperands(); 16512 SDValue In = Op0.getOperand(0).getOperand(0); 16513 EVT InSVT = In.getValueType().getScalarType(); 16514 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); 16515 16516 // Don't create an illegal input type after type legalization. 16517 if (LegalTypes && !TLI.isTypeLegal(InVT)) 16518 return SDValue(); 16519 16520 // Ensure all the elements come from the same vector and are adjacent. 16521 for (unsigned i = 1; i != NumElems; ++i) { 16522 if ((Offset + i) != checkElem(N->getOperand(i))) 16523 return SDValue(); 16524 } 16525 16526 SDLoc DL(N); 16527 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, 16528 Op0.getOperand(0).getOperand(1)); 16529 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); 16530 } 16531 16532 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 16533 EVT VT = N->getValueType(0); 16534 16535 // A vector built entirely of undefs is undef. 16536 if (ISD::allOperandsUndef(N)) 16537 return DAG.getUNDEF(VT); 16538 16539 // If this is a splat of a bitcast from another vector, change to a 16540 // concat_vector. 16541 // For example: 16542 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> 16543 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) 16544 // 16545 // If X is a build_vector itself, the concat can become a larger build_vector. 16546 // TODO: Maybe this is useful for non-splat too? 16547 if (!LegalOperations) { 16548 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { 16549 Splat = peekThroughBitcasts(Splat); 16550 EVT SrcVT = Splat.getValueType(); 16551 if (SrcVT.isVector()) { 16552 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); 16553 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), 16554 SrcVT.getVectorElementType(), NumElts); 16555 if (!LegalTypes || TLI.isTypeLegal(NewVT)) { 16556 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); 16557 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), 16558 NewVT, Ops); 16559 return DAG.getBitcast(VT, Concat); 16560 } 16561 } 16562 } 16563 } 16564 16565 // Check if we can express BUILD VECTOR via subvector extract. 16566 if (!LegalTypes && (N->getNumOperands() > 1)) { 16567 SDValue Op0 = N->getOperand(0); 16568 auto checkElem = [&](SDValue Op) -> uint64_t { 16569 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && 16570 (Op0.getOperand(0) == Op.getOperand(0))) 16571 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 16572 return CNode->getZExtValue(); 16573 return -1; 16574 }; 16575 16576 int Offset = checkElem(Op0); 16577 for (unsigned i = 0; i < N->getNumOperands(); ++i) { 16578 if (Offset + i != checkElem(N->getOperand(i))) { 16579 Offset = -1; 16580 break; 16581 } 16582 } 16583 16584 if ((Offset == 0) && 16585 (Op0.getOperand(0).getValueType() == N->getValueType(0))) 16586 return Op0.getOperand(0); 16587 if ((Offset != -1) && 16588 ((Offset % N->getValueType(0).getVectorNumElements()) == 16589 0)) // IDX must be multiple of output size. 16590 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), 16591 Op0.getOperand(0), Op0.getOperand(1)); 16592 } 16593 16594 if (SDValue V = convertBuildVecZextToZext(N)) 16595 return V; 16596 16597 if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) 16598 return V; 16599 16600 if (SDValue V = reduceBuildVecToShuffle(N)) 16601 return V; 16602 16603 return SDValue(); 16604 } 16605 16606 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { 16607 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 16608 EVT OpVT = N->getOperand(0).getValueType(); 16609 16610 // If the operands are legal vectors, leave them alone. 16611 if (TLI.isTypeLegal(OpVT)) 16612 return SDValue(); 16613 16614 SDLoc DL(N); 16615 EVT VT = N->getValueType(0); 16616 SmallVector<SDValue, 8> Ops; 16617 16618 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); 16619 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 16620 16621 // Keep track of what we encounter. 16622 bool AnyInteger = false; 16623 bool AnyFP = false; 16624 for (const SDValue &Op : N->ops()) { 16625 if (ISD::BITCAST == Op.getOpcode() && 16626 !Op.getOperand(0).getValueType().isVector()) 16627 Ops.push_back(Op.getOperand(0)); 16628 else if (ISD::UNDEF == Op.getOpcode()) 16629 Ops.push_back(ScalarUndef); 16630 else 16631 return SDValue(); 16632 16633 // Note whether we encounter an integer or floating point scalar. 16634 // If it's neither, bail out, it could be something weird like x86mmx. 16635 EVT LastOpVT = Ops.back().getValueType(); 16636 if (LastOpVT.isFloatingPoint()) 16637 AnyFP = true; 16638 else if (LastOpVT.isInteger()) 16639 AnyInteger = true; 16640 else 16641 return SDValue(); 16642 } 16643 16644 // If any of the operands is a floating point scalar bitcast to a vector, 16645 // use floating point types throughout, and bitcast everything. 16646 // Replace UNDEFs by another scalar UNDEF node, of the final desired type. 16647 if (AnyFP) { 16648 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); 16649 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 16650 if (AnyInteger) { 16651 for (SDValue &Op : Ops) { 16652 if (Op.getValueType() == SVT) 16653 continue; 16654 if (Op.isUndef()) 16655 Op = ScalarUndef; 16656 else 16657 Op = DAG.getBitcast(SVT, Op); 16658 } 16659 } 16660 } 16661 16662 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, 16663 VT.getSizeInBits() / SVT.getSizeInBits()); 16664 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); 16665 } 16666 16667 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR 16668 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at 16669 // most two distinct vectors the same size as the result, attempt to turn this 16670 // into a legal shuffle. 16671 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { 16672 EVT VT = N->getValueType(0); 16673 EVT OpVT = N->getOperand(0).getValueType(); 16674 int NumElts = VT.getVectorNumElements(); 16675 int NumOpElts = OpVT.getVectorNumElements(); 16676 16677 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); 16678 SmallVector<int, 8> Mask; 16679 16680 for (SDValue Op : N->ops()) { 16681 Op = peekThroughBitcasts(Op); 16682 16683 // UNDEF nodes convert to UNDEF shuffle mask values. 16684 if (Op.isUndef()) { 16685 Mask.append((unsigned)NumOpElts, -1); 16686 continue; 16687 } 16688 16689 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 16690 return SDValue(); 16691 16692 // What vector are we extracting the subvector from and at what index? 16693 SDValue ExtVec = Op.getOperand(0); 16694 16695 // We want the EVT of the original extraction to correctly scale the 16696 // extraction index. 16697 EVT ExtVT = ExtVec.getValueType(); 16698 ExtVec = peekThroughBitcasts(ExtVec); 16699 16700 // UNDEF nodes convert to UNDEF shuffle mask values. 16701 if (ExtVec.isUndef()) { 16702 Mask.append((unsigned)NumOpElts, -1); 16703 continue; 16704 } 16705 16706 if (!isa<ConstantSDNode>(Op.getOperand(1))) 16707 return SDValue(); 16708 int ExtIdx = Op.getConstantOperandVal(1); 16709 16710 // Ensure that we are extracting a subvector from a vector the same 16711 // size as the result. 16712 if (ExtVT.getSizeInBits() != VT.getSizeInBits()) 16713 return SDValue(); 16714 16715 // Scale the subvector index to account for any bitcast. 16716 int NumExtElts = ExtVT.getVectorNumElements(); 16717 if (0 == (NumExtElts % NumElts)) 16718 ExtIdx /= (NumExtElts / NumElts); 16719 else if (0 == (NumElts % NumExtElts)) 16720 ExtIdx *= (NumElts / NumExtElts); 16721 else 16722 return SDValue(); 16723 16724 // At most we can reference 2 inputs in the final shuffle. 16725 if (SV0.isUndef() || SV0 == ExtVec) { 16726 SV0 = ExtVec; 16727 for (int i = 0; i != NumOpElts; ++i) 16728 Mask.push_back(i + ExtIdx); 16729 } else if (SV1.isUndef() || SV1 == ExtVec) { 16730 SV1 = ExtVec; 16731 for (int i = 0; i != NumOpElts; ++i) 16732 Mask.push_back(i + ExtIdx + NumElts); 16733 } else { 16734 return SDValue(); 16735 } 16736 } 16737 16738 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) 16739 return SDValue(); 16740 16741 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), 16742 DAG.getBitcast(VT, SV1), Mask); 16743 } 16744 16745 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 16746 // If we only have one input vector, we don't need to do any concatenation. 16747 if (N->getNumOperands() == 1) 16748 return N->getOperand(0); 16749 16750 // Check if all of the operands are undefs. 16751 EVT VT = N->getValueType(0); 16752 if (ISD::allOperandsUndef(N)) 16753 return DAG.getUNDEF(VT); 16754 16755 // Optimize concat_vectors where all but the first of the vectors are undef. 16756 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { 16757 return Op.isUndef(); 16758 })) { 16759 SDValue In = N->getOperand(0); 16760 assert(In.getValueType().isVector() && "Must concat vectors"); 16761 16762 SDValue Scalar = peekThroughOneUseBitcasts(In); 16763 16764 // concat_vectors(scalar_to_vector(scalar), undef) -> 16765 // scalar_to_vector(scalar) 16766 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR && 16767 Scalar.hasOneUse()) { 16768 EVT SVT = Scalar.getValueType().getVectorElementType(); 16769 if (SVT == Scalar.getOperand(0).getValueType()) 16770 Scalar = Scalar.getOperand(0); 16771 } 16772 16773 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar) 16774 if (!Scalar.getValueType().isVector()) { 16775 // If the bitcast type isn't legal, it might be a trunc of a legal type; 16776 // look through the trunc so we can still do the transform: 16777 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) 16778 if (Scalar->getOpcode() == ISD::TRUNCATE && 16779 !TLI.isTypeLegal(Scalar.getValueType()) && 16780 TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) 16781 Scalar = Scalar->getOperand(0); 16782 16783 EVT SclTy = Scalar.getValueType(); 16784 16785 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 16786 return SDValue(); 16787 16788 // Bail out if the vector size is not a multiple of the scalar size. 16789 if (VT.getSizeInBits() % SclTy.getSizeInBits()) 16790 return SDValue(); 16791 16792 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); 16793 if (VNTNumElms < 2) 16794 return SDValue(); 16795 16796 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); 16797 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 16798 return SDValue(); 16799 16800 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); 16801 return DAG.getBitcast(VT, Res); 16802 } 16803 } 16804 16805 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. 16806 // We have already tested above for an UNDEF only concatenation. 16807 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 16808 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 16809 auto IsBuildVectorOrUndef = [](const SDValue &Op) { 16810 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); 16811 }; 16812 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { 16813 SmallVector<SDValue, 8> Opnds; 16814 EVT SVT = VT.getScalarType(); 16815 16816 EVT MinVT = SVT; 16817 if (!SVT.isFloatingPoint()) { 16818 // If BUILD_VECTOR are from built from integer, they may have different 16819 // operand types. Get the smallest type and truncate all operands to it. 16820 bool FoundMinVT = false; 16821 for (const SDValue &Op : N->ops()) 16822 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 16823 EVT OpSVT = Op.getOperand(0).getValueType(); 16824 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; 16825 FoundMinVT = true; 16826 } 16827 assert(FoundMinVT && "Concat vector type mismatch"); 16828 } 16829 16830 for (const SDValue &Op : N->ops()) { 16831 EVT OpVT = Op.getValueType(); 16832 unsigned NumElts = OpVT.getVectorNumElements(); 16833 16834 if (ISD::UNDEF == Op.getOpcode()) 16835 Opnds.append(NumElts, DAG.getUNDEF(MinVT)); 16836 16837 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 16838 if (SVT.isFloatingPoint()) { 16839 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); 16840 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); 16841 } else { 16842 for (unsigned i = 0; i != NumElts; ++i) 16843 Opnds.push_back( 16844 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); 16845 } 16846 } 16847 } 16848 16849 assert(VT.getVectorNumElements() == Opnds.size() && 16850 "Concat vector type mismatch"); 16851 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 16852 } 16853 16854 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. 16855 if (SDValue V = combineConcatVectorOfScalars(N, DAG)) 16856 return V; 16857 16858 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. 16859 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) 16860 if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) 16861 return V; 16862 16863 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 16864 // nodes often generate nop CONCAT_VECTOR nodes. 16865 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 16866 // place the incoming vectors at the exact same location. 16867 SDValue SingleSource = SDValue(); 16868 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 16869 16870 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 16871 SDValue Op = N->getOperand(i); 16872 16873 if (Op.isUndef()) 16874 continue; 16875 16876 // Check if this is the identity extract: 16877 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 16878 return SDValue(); 16879 16880 // Find the single incoming vector for the extract_subvector. 16881 if (SingleSource.getNode()) { 16882 if (Op.getOperand(0) != SingleSource) 16883 return SDValue(); 16884 } else { 16885 SingleSource = Op.getOperand(0); 16886 16887 // Check the source type is the same as the type of the result. 16888 // If not, this concat may extend the vector, so we can not 16889 // optimize it away. 16890 if (SingleSource.getValueType() != N->getValueType(0)) 16891 return SDValue(); 16892 } 16893 16894 unsigned IdentityIndex = i * PartNumElem; 16895 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 16896 // The extract index must be constant. 16897 if (!CS) 16898 return SDValue(); 16899 16900 // Check that we are reading from the identity index. 16901 if (CS->getZExtValue() != IdentityIndex) 16902 return SDValue(); 16903 } 16904 16905 if (SingleSource.getNode()) 16906 return SingleSource; 16907 16908 return SDValue(); 16909 } 16910 16911 /// If we are extracting a subvector produced by a wide binary operator try 16912 /// to use a narrow binary operator and/or avoid concatenation and extraction. 16913 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { 16914 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share 16915 // some of these bailouts with other transforms. 16916 16917 // The extract index must be a constant, so we can map it to a concat operand. 16918 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 16919 if (!ExtractIndexC) 16920 return SDValue(); 16921 16922 // We are looking for an optionally bitcasted wide vector binary operator 16923 // feeding an extract subvector. 16924 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); 16925 if (!ISD::isBinaryOp(BinOp.getNode())) 16926 return SDValue(); 16927 16928 // The binop must be a vector type, so we can extract some fraction of it. 16929 EVT WideBVT = BinOp.getValueType(); 16930 if (!WideBVT.isVector()) 16931 return SDValue(); 16932 16933 EVT VT = Extract->getValueType(0); 16934 unsigned ExtractIndex = ExtractIndexC->getZExtValue(); 16935 assert(ExtractIndex % VT.getVectorNumElements() == 0 && 16936 "Extract index is not a multiple of the vector length."); 16937 16938 // Bail out if this is not a proper multiple width extraction. 16939 unsigned WideWidth = WideBVT.getSizeInBits(); 16940 unsigned NarrowWidth = VT.getSizeInBits(); 16941 if (WideWidth % NarrowWidth != 0) 16942 return SDValue(); 16943 16944 // Bail out if we are extracting a fraction of a single operation. This can 16945 // occur because we potentially looked through a bitcast of the binop. 16946 unsigned NarrowingRatio = WideWidth / NarrowWidth; 16947 unsigned WideNumElts = WideBVT.getVectorNumElements(); 16948 if (WideNumElts % NarrowingRatio != 0) 16949 return SDValue(); 16950 16951 // Bail out if the target does not support a narrower version of the binop. 16952 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), 16953 WideNumElts / NarrowingRatio); 16954 unsigned BOpcode = BinOp.getOpcode(); 16955 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 16956 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) 16957 return SDValue(); 16958 16959 // If extraction is cheap, we don't need to look at the binop operands 16960 // for concat ops. The narrow binop alone makes this transform profitable. 16961 // We can't just reuse the original extract index operand because we may have 16962 // bitcasted. 16963 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); 16964 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); 16965 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); 16966 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && 16967 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { 16968 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) 16969 SDLoc DL(Extract); 16970 SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); 16971 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16972 BinOp.getOperand(0), NewExtIndex); 16973 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16974 BinOp.getOperand(1), NewExtIndex); 16975 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, 16976 BinOp.getNode()->getFlags()); 16977 return DAG.getBitcast(VT, NarrowBinOp); 16978 } 16979 16980 // Only handle the case where we are doubling and then halving. A larger ratio 16981 // may require more than two narrow binops to replace the wide binop. 16982 if (NarrowingRatio != 2) 16983 return SDValue(); 16984 16985 // TODO: The motivating case for this transform is an x86 AVX1 target. That 16986 // target has temptingly almost legal versions of bitwise logic ops in 256-bit 16987 // flavors, but no other 256-bit integer support. This could be extended to 16988 // handle any binop, but that may require fixing/adding other folds to avoid 16989 // codegen regressions. 16990 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) 16991 return SDValue(); 16992 16993 // We need at least one concatenation operation of a binop operand to make 16994 // this transform worthwhile. The concat must double the input vector sizes. 16995 // TODO: Should we also handle INSERT_SUBVECTOR patterns? 16996 SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0)); 16997 SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1)); 16998 bool ConcatL = 16999 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; 17000 bool ConcatR = 17001 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; 17002 if (!ConcatL && !ConcatR) 17003 return SDValue(); 17004 17005 // If one of the binop operands was not the result of a concat, we must 17006 // extract a half-sized operand for our new narrow binop. 17007 SDLoc DL(Extract); 17008 17009 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN 17010 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) 17011 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN 17012 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) 17013 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 17014 BinOp.getOperand(0), 17015 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 17016 17017 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) 17018 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 17019 BinOp.getOperand(1), 17020 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 17021 17022 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); 17023 return DAG.getBitcast(VT, NarrowBinOp); 17024 } 17025 17026 /// If we are extracting a subvector from a wide vector load, convert to a 17027 /// narrow load to eliminate the extraction: 17028 /// (extract_subvector (load wide vector)) --> (load narrow vector) 17029 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { 17030 // TODO: Add support for big-endian. The offset calculation must be adjusted. 17031 if (DAG.getDataLayout().isBigEndian()) 17032 return SDValue(); 17033 17034 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); 17035 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 17036 if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) 17037 return SDValue(); 17038 17039 // Allow targets to opt-out. 17040 EVT VT = Extract->getValueType(0); 17041 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 17042 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) 17043 return SDValue(); 17044 17045 // The narrow load will be offset from the base address of the old load if 17046 // we are extracting from something besides index 0 (little-endian). 17047 SDLoc DL(Extract); 17048 SDValue BaseAddr = Ld->getOperand(1); 17049 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); 17050 17051 // TODO: Use "BaseIndexOffset" to make this more effective. 17052 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); 17053 MachineFunction &MF = DAG.getMachineFunction(); 17054 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, 17055 VT.getStoreSize()); 17056 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); 17057 DAG.makeEquivalentMemoryOrdering(Ld, NewLd); 17058 return NewLd; 17059 } 17060 17061 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 17062 EVT NVT = N->getValueType(0); 17063 SDValue V = N->getOperand(0); 17064 17065 // Extract from UNDEF is UNDEF. 17066 if (V.isUndef()) 17067 return DAG.getUNDEF(NVT); 17068 17069 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) 17070 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) 17071 return NarrowLoad; 17072 17073 // Combine: 17074 // (extract_subvec (concat V1, V2, ...), i) 17075 // Into: 17076 // Vi if possible 17077 // Only operand 0 is checked as 'concat' assumes all inputs of the same 17078 // type. 17079 if (V.getOpcode() == ISD::CONCAT_VECTORS && 17080 isa<ConstantSDNode>(N->getOperand(1)) && 17081 V.getOperand(0).getValueType() == NVT) { 17082 unsigned Idx = N->getConstantOperandVal(1); 17083 unsigned NumElems = NVT.getVectorNumElements(); 17084 assert((Idx % NumElems) == 0 && 17085 "IDX in concat is not a multiple of the result vector length."); 17086 return V->getOperand(Idx / NumElems); 17087 } 17088 17089 V = peekThroughBitcasts(V); 17090 17091 // If the input is a build vector. Try to make a smaller build vector. 17092 if (V.getOpcode() == ISD::BUILD_VECTOR) { 17093 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 17094 EVT InVT = V.getValueType(); 17095 unsigned ExtractSize = NVT.getSizeInBits(); 17096 unsigned EltSize = InVT.getScalarSizeInBits(); 17097 // Only do this if we won't split any elements. 17098 if (ExtractSize % EltSize == 0) { 17099 unsigned NumElems = ExtractSize / EltSize; 17100 EVT EltVT = InVT.getVectorElementType(); 17101 EVT ExtractVT = NumElems == 1 ? EltVT : 17102 EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); 17103 if ((Level < AfterLegalizeDAG || 17104 (NumElems == 1 || 17105 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && 17106 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { 17107 unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / 17108 EltSize; 17109 if (NumElems == 1) { 17110 SDValue Src = V->getOperand(IdxVal); 17111 if (EltVT != Src.getValueType()) 17112 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); 17113 17114 return DAG.getBitcast(NVT, Src); 17115 } 17116 17117 // Extract the pieces from the original build_vector. 17118 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), 17119 makeArrayRef(V->op_begin() + IdxVal, 17120 NumElems)); 17121 return DAG.getBitcast(NVT, BuildVec); 17122 } 17123 } 17124 } 17125 } 17126 17127 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { 17128 // Handle only simple case where vector being inserted and vector 17129 // being extracted are of same size. 17130 EVT SmallVT = V.getOperand(1).getValueType(); 17131 if (!NVT.bitsEq(SmallVT)) 17132 return SDValue(); 17133 17134 // Only handle cases where both indexes are constants. 17135 auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 17136 auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); 17137 17138 if (InsIdx && ExtIdx) { 17139 // Combine: 17140 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 17141 // Into: 17142 // indices are equal or bit offsets are equal => V1 17143 // otherwise => (extract_subvec V1, ExtIdx) 17144 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == 17145 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) 17146 return DAG.getBitcast(NVT, V.getOperand(1)); 17147 return DAG.getNode( 17148 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, 17149 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), 17150 N->getOperand(1)); 17151 } 17152 } 17153 17154 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) 17155 return NarrowBOp; 17156 17157 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 17158 return SDValue(N, 0); 17159 17160 return SDValue(); 17161 } 17162 17163 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, 17164 // or turn a shuffle of a single concat into simpler shuffle then concat. 17165 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 17166 EVT VT = N->getValueType(0); 17167 unsigned NumElts = VT.getVectorNumElements(); 17168 17169 SDValue N0 = N->getOperand(0); 17170 SDValue N1 = N->getOperand(1); 17171 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 17172 17173 SmallVector<SDValue, 4> Ops; 17174 EVT ConcatVT = N0.getOperand(0).getValueType(); 17175 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 17176 unsigned NumConcats = NumElts / NumElemsPerConcat; 17177 17178 // Special case: shuffle(concat(A,B)) can be more efficiently represented 17179 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high 17180 // half vector elements. 17181 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && 17182 std::all_of(SVN->getMask().begin() + NumElemsPerConcat, 17183 SVN->getMask().end(), [](int i) { return i == -1; })) { 17184 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), 17185 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); 17186 N1 = DAG.getUNDEF(ConcatVT); 17187 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); 17188 } 17189 17190 // Look at every vector that's inserted. We're looking for exact 17191 // subvector-sized copies from a concatenated vector 17192 for (unsigned I = 0; I != NumConcats; ++I) { 17193 // Make sure we're dealing with a copy. 17194 unsigned Begin = I * NumElemsPerConcat; 17195 bool AllUndef = true, NoUndef = true; 17196 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 17197 if (SVN->getMaskElt(J) >= 0) 17198 AllUndef = false; 17199 else 17200 NoUndef = false; 17201 } 17202 17203 if (NoUndef) { 17204 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 17205 return SDValue(); 17206 17207 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 17208 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 17209 return SDValue(); 17210 17211 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 17212 if (FirstElt < N0.getNumOperands()) 17213 Ops.push_back(N0.getOperand(FirstElt)); 17214 else 17215 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 17216 17217 } else if (AllUndef) { 17218 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 17219 } else { // Mixed with general masks and undefs, can't do optimization. 17220 return SDValue(); 17221 } 17222 } 17223 17224 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 17225 } 17226 17227 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 17228 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 17229 // 17230 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always 17231 // a simplification in some sense, but it isn't appropriate in general: some 17232 // BUILD_VECTORs are substantially cheaper than others. The general case 17233 // of a BUILD_VECTOR requires inserting each element individually (or 17234 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of 17235 // all constants is a single constant pool load. A BUILD_VECTOR where each 17236 // element is identical is a splat. A BUILD_VECTOR where most of the operands 17237 // are undef lowers to a small number of element insertions. 17238 // 17239 // To deal with this, we currently use a bunch of mostly arbitrary heuristics. 17240 // We don't fold shuffles where one side is a non-zero constant, and we don't 17241 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate 17242 // non-constant operands. This seems to work out reasonably well in practice. 17243 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, 17244 SelectionDAG &DAG, 17245 const TargetLowering &TLI) { 17246 EVT VT = SVN->getValueType(0); 17247 unsigned NumElts = VT.getVectorNumElements(); 17248 SDValue N0 = SVN->getOperand(0); 17249 SDValue N1 = SVN->getOperand(1); 17250 17251 if (!N0->hasOneUse()) 17252 return SDValue(); 17253 17254 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as 17255 // discussed above. 17256 if (!N1.isUndef()) { 17257 if (!N1->hasOneUse()) 17258 return SDValue(); 17259 17260 bool N0AnyConst = isAnyConstantBuildVector(N0); 17261 bool N1AnyConst = isAnyConstantBuildVector(N1); 17262 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) 17263 return SDValue(); 17264 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) 17265 return SDValue(); 17266 } 17267 17268 // If both inputs are splats of the same value then we can safely merge this 17269 // to a single BUILD_VECTOR with undef elements based on the shuffle mask. 17270 bool IsSplat = false; 17271 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); 17272 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 17273 if (BV0 && BV1) 17274 if (SDValue Splat0 = BV0->getSplatValue()) 17275 IsSplat = (Splat0 == BV1->getSplatValue()); 17276 17277 SmallVector<SDValue, 8> Ops; 17278 SmallSet<SDValue, 16> DuplicateOps; 17279 for (int M : SVN->getMask()) { 17280 SDValue Op = DAG.getUNDEF(VT.getScalarType()); 17281 if (M >= 0) { 17282 int Idx = M < (int)NumElts ? M : M - NumElts; 17283 SDValue &S = (M < (int)NumElts ? N0 : N1); 17284 if (S.getOpcode() == ISD::BUILD_VECTOR) { 17285 Op = S.getOperand(Idx); 17286 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { 17287 assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); 17288 Op = S.getOperand(0); 17289 } else { 17290 // Operand can't be combined - bail out. 17291 return SDValue(); 17292 } 17293 } 17294 17295 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're 17296 // generating a splat; semantically, this is fine, but it's likely to 17297 // generate low-quality code if the target can't reconstruct an appropriate 17298 // shuffle. 17299 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) 17300 if (!IsSplat && !DuplicateOps.insert(Op).second) 17301 return SDValue(); 17302 17303 Ops.push_back(Op); 17304 } 17305 17306 // BUILD_VECTOR requires all inputs to be of the same type, find the 17307 // maximum type and extend them all. 17308 EVT SVT = VT.getScalarType(); 17309 if (SVT.isInteger()) 17310 for (SDValue &Op : Ops) 17311 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 17312 if (SVT != VT.getScalarType()) 17313 for (SDValue &Op : Ops) 17314 Op = TLI.isZExtFree(Op.getValueType(), SVT) 17315 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) 17316 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); 17317 return DAG.getBuildVector(VT, SDLoc(SVN), Ops); 17318 } 17319 17320 // Match shuffles that can be converted to any_vector_extend_in_reg. 17321 // This is often generated during legalization. 17322 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) 17323 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. 17324 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, 17325 SelectionDAG &DAG, 17326 const TargetLowering &TLI, 17327 bool LegalOperations) { 17328 EVT VT = SVN->getValueType(0); 17329 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 17330 17331 // TODO Add support for big-endian when we have a test case. 17332 if (!VT.isInteger() || IsBigEndian) 17333 return SDValue(); 17334 17335 unsigned NumElts = VT.getVectorNumElements(); 17336 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 17337 ArrayRef<int> Mask = SVN->getMask(); 17338 SDValue N0 = SVN->getOperand(0); 17339 17340 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) 17341 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { 17342 for (unsigned i = 0; i != NumElts; ++i) { 17343 if (Mask[i] < 0) 17344 continue; 17345 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) 17346 continue; 17347 return false; 17348 } 17349 return true; 17350 }; 17351 17352 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for 17353 // power-of-2 extensions as they are the most likely. 17354 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { 17355 // Check for non power of 2 vector sizes 17356 if (NumElts % Scale != 0) 17357 continue; 17358 if (!isAnyExtend(Scale)) 17359 continue; 17360 17361 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); 17362 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); 17363 // Never create an illegal type. Only create unsupported operations if we 17364 // are pre-legalization. 17365 if (TLI.isTypeLegal(OutVT)) 17366 if (!LegalOperations || 17367 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) 17368 return DAG.getBitcast(VT, 17369 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, 17370 SDLoc(SVN), OutVT, N0)); 17371 } 17372 17373 return SDValue(); 17374 } 17375 17376 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of 17377 // each source element of a large type into the lowest elements of a smaller 17378 // destination type. This is often generated during legalization. 17379 // If the source node itself was a '*_extend_vector_inreg' node then we should 17380 // then be able to remove it. 17381 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, 17382 SelectionDAG &DAG) { 17383 EVT VT = SVN->getValueType(0); 17384 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 17385 17386 // TODO Add support for big-endian when we have a test case. 17387 if (!VT.isInteger() || IsBigEndian) 17388 return SDValue(); 17389 17390 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0)); 17391 17392 unsigned Opcode = N0.getOpcode(); 17393 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && 17394 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && 17395 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) 17396 return SDValue(); 17397 17398 SDValue N00 = N0.getOperand(0); 17399 ArrayRef<int> Mask = SVN->getMask(); 17400 unsigned NumElts = VT.getVectorNumElements(); 17401 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 17402 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); 17403 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); 17404 17405 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) 17406 return SDValue(); 17407 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; 17408 17409 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> 17410 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> 17411 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> 17412 auto isTruncate = [&Mask, &NumElts](unsigned Scale) { 17413 for (unsigned i = 0; i != NumElts; ++i) { 17414 if (Mask[i] < 0) 17415 continue; 17416 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) 17417 continue; 17418 return false; 17419 } 17420 return true; 17421 }; 17422 17423 // At the moment we just handle the case where we've truncated back to the 17424 // same size as before the extension. 17425 // TODO: handle more extension/truncation cases as cases arise. 17426 if (EltSizeInBits != ExtSrcSizeInBits) 17427 return SDValue(); 17428 17429 // We can remove *extend_vector_inreg only if the truncation happens at 17430 // the same scale as the extension. 17431 if (isTruncate(ExtScale)) 17432 return DAG.getBitcast(VT, N00); 17433 17434 return SDValue(); 17435 } 17436 17437 // Combine shuffles of splat-shuffles of the form: 17438 // shuffle (shuffle V, undef, splat-mask), undef, M 17439 // If splat-mask contains undef elements, we need to be careful about 17440 // introducing undef's in the folded mask which are not the result of composing 17441 // the masks of the shuffles. 17442 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, 17443 ShuffleVectorSDNode *Splat, 17444 SelectionDAG &DAG) { 17445 ArrayRef<int> SplatMask = Splat->getMask(); 17446 assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); 17447 17448 // Prefer simplifying to the splat-shuffle, if possible. This is legal if 17449 // every undef mask element in the splat-shuffle has a corresponding undef 17450 // element in the user-shuffle's mask or if the composition of mask elements 17451 // would result in undef. 17452 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): 17453 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] 17454 // In this case it is not legal to simplify to the splat-shuffle because we 17455 // may be exposing the users of the shuffle an undef element at index 1 17456 // which was not there before the combine. 17457 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] 17458 // In this case the composition of masks yields SplatMask, so it's ok to 17459 // simplify to the splat-shuffle. 17460 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] 17461 // In this case the composed mask includes all undef elements of SplatMask 17462 // and in addition sets element zero to undef. It is safe to simplify to 17463 // the splat-shuffle. 17464 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, 17465 ArrayRef<int> SplatMask) { 17466 for (unsigned i = 0, e = UserMask.size(); i != e; ++i) 17467 if (UserMask[i] != -1 && SplatMask[i] == -1 && 17468 SplatMask[UserMask[i]] != -1) 17469 return false; 17470 return true; 17471 }; 17472 if (CanSimplifyToExistingSplat(UserMask, SplatMask)) 17473 return SDValue(Splat, 0); 17474 17475 // Create a new shuffle with a mask that is composed of the two shuffles' 17476 // masks. 17477 SmallVector<int, 32> NewMask; 17478 for (int Idx : UserMask) 17479 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); 17480 17481 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), 17482 Splat->getOperand(0), Splat->getOperand(1), 17483 NewMask); 17484 } 17485 17486 /// If the shuffle mask is taking exactly one element from the first vector 17487 /// operand and passing through all other elements from the second vector 17488 /// operand, return the index of the mask element that is choosing an element 17489 /// from the first operand. Otherwise, return -1. 17490 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { 17491 int MaskSize = Mask.size(); 17492 int EltFromOp0 = -1; 17493 // TODO: This does not match if there are undef elements in the shuffle mask. 17494 // Should we ignore undefs in the shuffle mask instead? The trade-off is 17495 // removing an instruction (a shuffle), but losing the knowledge that some 17496 // vector lanes are not needed. 17497 for (int i = 0; i != MaskSize; ++i) { 17498 if (Mask[i] >= 0 && Mask[i] < MaskSize) { 17499 // We're looking for a shuffle of exactly one element from operand 0. 17500 if (EltFromOp0 != -1) 17501 return -1; 17502 EltFromOp0 = i; 17503 } else if (Mask[i] != i + MaskSize) { 17504 // Nothing from operand 1 can change lanes. 17505 return -1; 17506 } 17507 } 17508 return EltFromOp0; 17509 } 17510 17511 /// If a shuffle inserts exactly one element from a source vector operand into 17512 /// another vector operand and we can access the specified element as a scalar, 17513 /// then we can eliminate the shuffle. 17514 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, 17515 SelectionDAG &DAG) { 17516 // First, check if we are taking one element of a vector and shuffling that 17517 // element into another vector. 17518 ArrayRef<int> Mask = Shuf->getMask(); 17519 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); 17520 SDValue Op0 = Shuf->getOperand(0); 17521 SDValue Op1 = Shuf->getOperand(1); 17522 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); 17523 if (ShufOp0Index == -1) { 17524 // Commute mask and check again. 17525 ShuffleVectorSDNode::commuteMask(CommutedMask); 17526 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); 17527 if (ShufOp0Index == -1) 17528 return SDValue(); 17529 // Commute operands to match the commuted shuffle mask. 17530 std::swap(Op0, Op1); 17531 Mask = CommutedMask; 17532 } 17533 17534 // The shuffle inserts exactly one element from operand 0 into operand 1. 17535 // Now see if we can access that element as a scalar via a real insert element 17536 // instruction. 17537 // TODO: We can try harder to locate the element as a scalar. Examples: it 17538 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. 17539 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && 17540 "Shuffle mask value must be from operand 0"); 17541 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) 17542 return SDValue(); 17543 17544 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); 17545 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) 17546 return SDValue(); 17547 17548 // There's an existing insertelement with constant insertion index, so we 17549 // don't need to check the legality/profitability of a replacement operation 17550 // that differs at most in the constant value. The target should be able to 17551 // lower any of those in a similar way. If not, legalization will expand this 17552 // to a scalar-to-vector plus shuffle. 17553 // 17554 // Note that the shuffle may move the scalar from the position that the insert 17555 // element used. Therefore, our new insert element occurs at the shuffle's 17556 // mask index value, not the insert's index value. 17557 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' 17558 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), 17559 Op0.getOperand(2).getValueType()); 17560 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), 17561 Op1, Op0.getOperand(1), NewInsIndex); 17562 } 17563 17564 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 17565 EVT VT = N->getValueType(0); 17566 unsigned NumElts = VT.getVectorNumElements(); 17567 17568 SDValue N0 = N->getOperand(0); 17569 SDValue N1 = N->getOperand(1); 17570 17571 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 17572 17573 // Canonicalize shuffle undef, undef -> undef 17574 if (N0.isUndef() && N1.isUndef()) 17575 return DAG.getUNDEF(VT); 17576 17577 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 17578 17579 // Canonicalize shuffle v, v -> v, undef 17580 if (N0 == N1) { 17581 SmallVector<int, 8> NewMask; 17582 for (unsigned i = 0; i != NumElts; ++i) { 17583 int Idx = SVN->getMaskElt(i); 17584 if (Idx >= (int)NumElts) Idx -= NumElts; 17585 NewMask.push_back(Idx); 17586 } 17587 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); 17588 } 17589 17590 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 17591 if (N0.isUndef()) 17592 return DAG.getCommutedVectorShuffle(*SVN); 17593 17594 // Remove references to rhs if it is undef 17595 if (N1.isUndef()) { 17596 bool Changed = false; 17597 SmallVector<int, 8> NewMask; 17598 for (unsigned i = 0; i != NumElts; ++i) { 17599 int Idx = SVN->getMaskElt(i); 17600 if (Idx >= (int)NumElts) { 17601 Idx = -1; 17602 Changed = true; 17603 } 17604 NewMask.push_back(Idx); 17605 } 17606 if (Changed) 17607 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); 17608 } 17609 17610 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) 17611 return InsElt; 17612 17613 // A shuffle of a single vector that is a splat can always be folded. 17614 if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) 17615 if (N1->isUndef() && N0Shuf->isSplat()) 17616 return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); 17617 17618 // If it is a splat, check if the argument vector is another splat or a 17619 // build_vector. 17620 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 17621 SDNode *V = N0.getNode(); 17622 17623 // If this is a bit convert that changes the element type of the vector but 17624 // not the number of vector elements, look through it. Be careful not to 17625 // look though conversions that change things like v4f32 to v2f64. 17626 if (V->getOpcode() == ISD::BITCAST) { 17627 SDValue ConvInput = V->getOperand(0); 17628 if (ConvInput.getValueType().isVector() && 17629 ConvInput.getValueType().getVectorNumElements() == NumElts) 17630 V = ConvInput.getNode(); 17631 } 17632 17633 if (V->getOpcode() == ISD::BUILD_VECTOR) { 17634 assert(V->getNumOperands() == NumElts && 17635 "BUILD_VECTOR has wrong number of operands"); 17636 SDValue Base; 17637 bool AllSame = true; 17638 for (unsigned i = 0; i != NumElts; ++i) { 17639 if (!V->getOperand(i).isUndef()) { 17640 Base = V->getOperand(i); 17641 break; 17642 } 17643 } 17644 // Splat of <u, u, u, u>, return <u, u, u, u> 17645 if (!Base.getNode()) 17646 return N0; 17647 for (unsigned i = 0; i != NumElts; ++i) { 17648 if (V->getOperand(i) != Base) { 17649 AllSame = false; 17650 break; 17651 } 17652 } 17653 // Splat of <x, x, x, x>, return <x, x, x, x> 17654 if (AllSame) 17655 return N0; 17656 17657 // Canonicalize any other splat as a build_vector. 17658 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); 17659 SmallVector<SDValue, 8> Ops(NumElts, Splatted); 17660 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); 17661 17662 // We may have jumped through bitcasts, so the type of the 17663 // BUILD_VECTOR may not match the type of the shuffle. 17664 if (V->getValueType(0) != VT) 17665 NewBV = DAG.getBitcast(VT, NewBV); 17666 return NewBV; 17667 } 17668 } 17669 17670 // Simplify source operands based on shuffle mask. 17671 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 17672 return SDValue(N, 0); 17673 17674 // Match shuffles that can be converted to any_vector_extend_in_reg. 17675 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) 17676 return V; 17677 17678 // Combine "truncate_vector_in_reg" style shuffles. 17679 if (SDValue V = combineTruncationShuffle(SVN, DAG)) 17680 return V; 17681 17682 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 17683 Level < AfterLegalizeVectorOps && 17684 (N1.isUndef() || 17685 (N1.getOpcode() == ISD::CONCAT_VECTORS && 17686 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 17687 if (SDValue V = partitionShuffleOfConcats(N, DAG)) 17688 return V; 17689 } 17690 17691 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 17692 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 17693 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) 17694 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) 17695 return Res; 17696 17697 // If this shuffle only has a single input that is a bitcasted shuffle, 17698 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output 17699 // back to their original types. 17700 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 17701 N1.isUndef() && Level < AfterLegalizeVectorOps && 17702 TLI.isTypeLegal(VT)) { 17703 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { 17704 if (Scale == 1) 17705 return SmallVector<int, 8>(Mask.begin(), Mask.end()); 17706 17707 SmallVector<int, 8> NewMask; 17708 for (int M : Mask) 17709 for (int s = 0; s != Scale; ++s) 17710 NewMask.push_back(M < 0 ? -1 : Scale * M + s); 17711 return NewMask; 17712 }; 17713 17714 SDValue BC0 = peekThroughOneUseBitcasts(N0); 17715 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { 17716 EVT SVT = VT.getScalarType(); 17717 EVT InnerVT = BC0->getValueType(0); 17718 EVT InnerSVT = InnerVT.getScalarType(); 17719 17720 // Determine which shuffle works with the smaller scalar type. 17721 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; 17722 EVT ScaleSVT = ScaleVT.getScalarType(); 17723 17724 if (TLI.isTypeLegal(ScaleVT) && 17725 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 17726 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { 17727 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 17728 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 17729 17730 // Scale the shuffle masks to the smaller scalar type. 17731 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); 17732 SmallVector<int, 8> InnerMask = 17733 ScaleShuffleMask(InnerSVN->getMask(), InnerScale); 17734 SmallVector<int, 8> OuterMask = 17735 ScaleShuffleMask(SVN->getMask(), OuterScale); 17736 17737 // Merge the shuffle masks. 17738 SmallVector<int, 8> NewMask; 17739 for (int M : OuterMask) 17740 NewMask.push_back(M < 0 ? -1 : InnerMask[M]); 17741 17742 // Test for shuffle mask legality over both commutations. 17743 SDValue SV0 = BC0->getOperand(0); 17744 SDValue SV1 = BC0->getOperand(1); 17745 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 17746 if (!LegalMask) { 17747 std::swap(SV0, SV1); 17748 ShuffleVectorSDNode::commuteMask(NewMask); 17749 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 17750 } 17751 17752 if (LegalMask) { 17753 SV0 = DAG.getBitcast(ScaleVT, SV0); 17754 SV1 = DAG.getBitcast(ScaleVT, SV1); 17755 return DAG.getBitcast( 17756 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); 17757 } 17758 } 17759 } 17760 } 17761 17762 // Canonicalize shuffles according to rules: 17763 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 17764 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 17765 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 17766 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 17767 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 17768 TLI.isTypeLegal(VT)) { 17769 // The incoming shuffle must be of the same type as the result of the 17770 // current shuffle. 17771 assert(N1->getOperand(0).getValueType() == VT && 17772 "Shuffle types don't match"); 17773 17774 SDValue SV0 = N1->getOperand(0); 17775 SDValue SV1 = N1->getOperand(1); 17776 bool HasSameOp0 = N0 == SV0; 17777 bool IsSV1Undef = SV1.isUndef(); 17778 if (HasSameOp0 || IsSV1Undef || N0 == SV1) 17779 // Commute the operands of this shuffle so that next rule 17780 // will trigger. 17781 return DAG.getCommutedVectorShuffle(*SVN); 17782 } 17783 17784 // Try to fold according to rules: 17785 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 17786 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 17787 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 17788 // Don't try to fold shuffles with illegal type. 17789 // Only fold if this shuffle is the only user of the other shuffle. 17790 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && 17791 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { 17792 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 17793 17794 // Don't try to fold splats; they're likely to simplify somehow, or they 17795 // might be free. 17796 if (OtherSV->isSplat()) 17797 return SDValue(); 17798 17799 // The incoming shuffle must be of the same type as the result of the 17800 // current shuffle. 17801 assert(OtherSV->getOperand(0).getValueType() == VT && 17802 "Shuffle types don't match"); 17803 17804 SDValue SV0, SV1; 17805 SmallVector<int, 4> Mask; 17806 // Compute the combined shuffle mask for a shuffle with SV0 as the first 17807 // operand, and SV1 as the second operand. 17808 for (unsigned i = 0; i != NumElts; ++i) { 17809 int Idx = SVN->getMaskElt(i); 17810 if (Idx < 0) { 17811 // Propagate Undef. 17812 Mask.push_back(Idx); 17813 continue; 17814 } 17815 17816 SDValue CurrentVec; 17817 if (Idx < (int)NumElts) { 17818 // This shuffle index refers to the inner shuffle N0. Lookup the inner 17819 // shuffle mask to identify which vector is actually referenced. 17820 Idx = OtherSV->getMaskElt(Idx); 17821 if (Idx < 0) { 17822 // Propagate Undef. 17823 Mask.push_back(Idx); 17824 continue; 17825 } 17826 17827 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 17828 : OtherSV->getOperand(1); 17829 } else { 17830 // This shuffle index references an element within N1. 17831 CurrentVec = N1; 17832 } 17833 17834 // Simple case where 'CurrentVec' is UNDEF. 17835 if (CurrentVec.isUndef()) { 17836 Mask.push_back(-1); 17837 continue; 17838 } 17839 17840 // Canonicalize the shuffle index. We don't know yet if CurrentVec 17841 // will be the first or second operand of the combined shuffle. 17842 Idx = Idx % NumElts; 17843 if (!SV0.getNode() || SV0 == CurrentVec) { 17844 // Ok. CurrentVec is the left hand side. 17845 // Update the mask accordingly. 17846 SV0 = CurrentVec; 17847 Mask.push_back(Idx); 17848 continue; 17849 } 17850 17851 // Bail out if we cannot convert the shuffle pair into a single shuffle. 17852 if (SV1.getNode() && SV1 != CurrentVec) 17853 return SDValue(); 17854 17855 // Ok. CurrentVec is the right hand side. 17856 // Update the mask accordingly. 17857 SV1 = CurrentVec; 17858 Mask.push_back(Idx + NumElts); 17859 } 17860 17861 // Check if all indices in Mask are Undef. In case, propagate Undef. 17862 bool isUndefMask = true; 17863 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 17864 isUndefMask &= Mask[i] < 0; 17865 17866 if (isUndefMask) 17867 return DAG.getUNDEF(VT); 17868 17869 if (!SV0.getNode()) 17870 SV0 = DAG.getUNDEF(VT); 17871 if (!SV1.getNode()) 17872 SV1 = DAG.getUNDEF(VT); 17873 17874 // Avoid introducing shuffles with illegal mask. 17875 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 17876 ShuffleVectorSDNode::commuteMask(Mask); 17877 17878 if (!TLI.isShuffleMaskLegal(Mask, VT)) 17879 return SDValue(); 17880 17881 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 17882 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 17883 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 17884 std::swap(SV0, SV1); 17885 } 17886 17887 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 17888 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 17889 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 17890 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); 17891 } 17892 17893 return SDValue(); 17894 } 17895 17896 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { 17897 SDValue InVal = N->getOperand(0); 17898 EVT VT = N->getValueType(0); 17899 17900 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern 17901 // with a VECTOR_SHUFFLE and possible truncate. 17902 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 17903 SDValue InVec = InVal->getOperand(0); 17904 SDValue EltNo = InVal->getOperand(1); 17905 auto InVecT = InVec.getValueType(); 17906 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { 17907 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); 17908 int Elt = C0->getZExtValue(); 17909 NewMask[0] = Elt; 17910 SDValue Val; 17911 // If we have an implict truncate do truncate here as long as it's legal. 17912 // if it's not legal, this should 17913 if (VT.getScalarType() != InVal.getValueType() && 17914 InVal.getValueType().isScalarInteger() && 17915 isTypeLegal(VT.getScalarType())) { 17916 Val = 17917 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); 17918 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); 17919 } 17920 if (VT.getScalarType() == InVecT.getScalarType() && 17921 VT.getVectorNumElements() <= InVecT.getVectorNumElements() && 17922 TLI.isShuffleMaskLegal(NewMask, VT)) { 17923 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, 17924 DAG.getUNDEF(InVecT), NewMask); 17925 // If the initial vector is the correct size this shuffle is a 17926 // valid result. 17927 if (VT == InVecT) 17928 return Val; 17929 // If not we must truncate the vector. 17930 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { 17931 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 17932 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); 17933 EVT SubVT = 17934 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), 17935 VT.getVectorNumElements()); 17936 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, 17937 ZeroIdx); 17938 return Val; 17939 } 17940 } 17941 } 17942 } 17943 17944 return SDValue(); 17945 } 17946 17947 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 17948 EVT VT = N->getValueType(0); 17949 SDValue N0 = N->getOperand(0); 17950 SDValue N1 = N->getOperand(1); 17951 SDValue N2 = N->getOperand(2); 17952 17953 // If inserting an UNDEF, just return the original vector. 17954 if (N1.isUndef()) 17955 return N0; 17956 17957 // If this is an insert of an extracted vector into an undef vector, we can 17958 // just use the input to the extract. 17959 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && 17960 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) 17961 return N1.getOperand(0); 17962 17963 // If we are inserting a bitcast value into an undef, with the same 17964 // number of elements, just use the bitcast input of the extract. 17965 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> 17966 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) 17967 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && 17968 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && 17969 N1.getOperand(0).getOperand(1) == N2 && 17970 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == 17971 VT.getVectorNumElements() && 17972 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == 17973 VT.getSizeInBits()) { 17974 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); 17975 } 17976 17977 // If both N1 and N2 are bitcast values on which insert_subvector 17978 // would makes sense, pull the bitcast through. 17979 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> 17980 // BITCAST (INSERT_SUBVECTOR N0 N1 N2) 17981 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { 17982 SDValue CN0 = N0.getOperand(0); 17983 SDValue CN1 = N1.getOperand(0); 17984 EVT CN0VT = CN0.getValueType(); 17985 EVT CN1VT = CN1.getValueType(); 17986 if (CN0VT.isVector() && CN1VT.isVector() && 17987 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && 17988 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { 17989 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), 17990 CN0.getValueType(), CN0, CN1, N2); 17991 return DAG.getBitcast(VT, NewINSERT); 17992 } 17993 } 17994 17995 // Combine INSERT_SUBVECTORs where we are inserting to the same index. 17996 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) 17997 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) 17998 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && 17999 N0.getOperand(1).getValueType() == N1.getValueType() && 18000 N0.getOperand(2) == N2) 18001 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), 18002 N1, N2); 18003 18004 // Eliminate an intermediate insert into an undef vector: 18005 // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> 18006 // insert_subvector undef, X, N2 18007 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && 18008 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) 18009 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, 18010 N1.getOperand(1), N2); 18011 18012 if (!isa<ConstantSDNode>(N2)) 18013 return SDValue(); 18014 18015 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); 18016 18017 // Canonicalize insert_subvector dag nodes. 18018 // Example: 18019 // (insert_subvector (insert_subvector A, Idx0), Idx1) 18020 // -> (insert_subvector (insert_subvector A, Idx1), Idx0) 18021 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && 18022 N1.getValueType() == N0.getOperand(1).getValueType() && 18023 isa<ConstantSDNode>(N0.getOperand(2))) { 18024 unsigned OtherIdx = N0.getConstantOperandVal(2); 18025 if (InsIdx < OtherIdx) { 18026 // Swap nodes. 18027 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, 18028 N0.getOperand(0), N1, N2); 18029 AddToWorklist(NewOp.getNode()); 18030 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), 18031 VT, NewOp, N0.getOperand(1), N0.getOperand(2)); 18032 } 18033 } 18034 18035 // If the input vector is a concatenation, and the insert replaces 18036 // one of the pieces, we can optimize into a single concat_vectors. 18037 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && 18038 N0.getOperand(0).getValueType() == N1.getValueType()) { 18039 unsigned Factor = N1.getValueType().getVectorNumElements(); 18040 18041 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); 18042 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; 18043 18044 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 18045 } 18046 18047 // Simplify source operands based on insertion. 18048 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 18049 return SDValue(N, 0); 18050 18051 return SDValue(); 18052 } 18053 18054 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { 18055 SDValue N0 = N->getOperand(0); 18056 18057 // fold (fp_to_fp16 (fp16_to_fp op)) -> op 18058 if (N0->getOpcode() == ISD::FP16_TO_FP) 18059 return N0->getOperand(0); 18060 18061 return SDValue(); 18062 } 18063 18064 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { 18065 SDValue N0 = N->getOperand(0); 18066 18067 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) 18068 if (N0->getOpcode() == ISD::AND) { 18069 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); 18070 if (AndConst && AndConst->getAPIntValue() == 0xffff) { 18071 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), 18072 N0.getOperand(0)); 18073 } 18074 } 18075 18076 return SDValue(); 18077 } 18078 18079 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 18080 /// with the destination vector and a zero vector. 18081 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 18082 /// vector_shuffle V, Zero, <0, 4, 2, 4> 18083 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 18084 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); 18085 18086 EVT VT = N->getValueType(0); 18087 SDValue LHS = N->getOperand(0); 18088 SDValue RHS = peekThroughBitcasts(N->getOperand(1)); 18089 SDLoc DL(N); 18090 18091 // Make sure we're not running after operation legalization where it 18092 // may have custom lowered the vector shuffles. 18093 if (LegalOperations) 18094 return SDValue(); 18095 18096 if (RHS.getOpcode() != ISD::BUILD_VECTOR) 18097 return SDValue(); 18098 18099 EVT RVT = RHS.getValueType(); 18100 unsigned NumElts = RHS.getNumOperands(); 18101 18102 // Attempt to create a valid clear mask, splitting the mask into 18103 // sub elements and checking to see if each is 18104 // all zeros or all ones - suitable for shuffle masking. 18105 auto BuildClearMask = [&](int Split) { 18106 int NumSubElts = NumElts * Split; 18107 int NumSubBits = RVT.getScalarSizeInBits() / Split; 18108 18109 SmallVector<int, 8> Indices; 18110 for (int i = 0; i != NumSubElts; ++i) { 18111 int EltIdx = i / Split; 18112 int SubIdx = i % Split; 18113 SDValue Elt = RHS.getOperand(EltIdx); 18114 if (Elt.isUndef()) { 18115 Indices.push_back(-1); 18116 continue; 18117 } 18118 18119 APInt Bits; 18120 if (isa<ConstantSDNode>(Elt)) 18121 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); 18122 else if (isa<ConstantFPSDNode>(Elt)) 18123 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); 18124 else 18125 return SDValue(); 18126 18127 // Extract the sub element from the constant bit mask. 18128 if (DAG.getDataLayout().isBigEndian()) { 18129 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); 18130 } else { 18131 Bits.lshrInPlace(SubIdx * NumSubBits); 18132 } 18133 18134 if (Split > 1) 18135 Bits = Bits.trunc(NumSubBits); 18136 18137 if (Bits.isAllOnesValue()) 18138 Indices.push_back(i); 18139 else if (Bits == 0) 18140 Indices.push_back(i + NumSubElts); 18141 else 18142 return SDValue(); 18143 } 18144 18145 // Let's see if the target supports this vector_shuffle. 18146 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); 18147 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); 18148 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) 18149 return SDValue(); 18150 18151 SDValue Zero = DAG.getConstant(0, DL, ClearVT); 18152 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, 18153 DAG.getBitcast(ClearVT, LHS), 18154 Zero, Indices)); 18155 }; 18156 18157 // Determine maximum split level (byte level masking). 18158 int MaxSplit = 1; 18159 if (RVT.getScalarSizeInBits() % 8 == 0) 18160 MaxSplit = RVT.getScalarSizeInBits() / 8; 18161 18162 for (int Split = 1; Split <= MaxSplit; ++Split) 18163 if (RVT.getScalarSizeInBits() % Split == 0) 18164 if (SDValue S = BuildClearMask(Split)) 18165 return S; 18166 18167 return SDValue(); 18168 } 18169 18170 /// Visit a binary vector operation, like ADD. 18171 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 18172 assert(N->getValueType(0).isVector() && 18173 "SimplifyVBinOp only works on vectors!"); 18174 18175 SDValue LHS = N->getOperand(0); 18176 SDValue RHS = N->getOperand(1); 18177 SDValue Ops[] = {LHS, RHS}; 18178 18179 // See if we can constant fold the vector operation. 18180 if (SDValue Fold = DAG.FoldConstantVectorArithmetic( 18181 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) 18182 return Fold; 18183 18184 // Type legalization might introduce new shuffles in the DAG. 18185 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 18186 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 18187 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 18188 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 18189 LHS.getOperand(1).isUndef() && 18190 RHS.getOperand(1).isUndef()) { 18191 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 18192 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 18193 18194 if (SVN0->getMask().equals(SVN1->getMask())) { 18195 EVT VT = N->getValueType(0); 18196 SDValue UndefVector = LHS.getOperand(1); 18197 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 18198 LHS.getOperand(0), RHS.getOperand(0), 18199 N->getFlags()); 18200 AddUsersToWorklist(N); 18201 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 18202 SVN0->getMask()); 18203 } 18204 } 18205 18206 return SDValue(); 18207 } 18208 18209 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, 18210 SDValue N2) { 18211 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 18212 18213 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 18214 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 18215 18216 // If we got a simplified select_cc node back from SimplifySelectCC, then 18217 // break it down into a new SETCC node, and a new SELECT node, and then return 18218 // the SELECT node, since we were called with a SELECT node. 18219 if (SCC.getNode()) { 18220 // Check to see if we got a select_cc back (to turn into setcc/select). 18221 // Otherwise, just return whatever node we got back, like fabs. 18222 if (SCC.getOpcode() == ISD::SELECT_CC) { 18223 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 18224 N0.getValueType(), 18225 SCC.getOperand(0), SCC.getOperand(1), 18226 SCC.getOperand(4)); 18227 AddToWorklist(SETCC.getNode()); 18228 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 18229 SCC.getOperand(2), SCC.getOperand(3)); 18230 } 18231 18232 return SCC; 18233 } 18234 return SDValue(); 18235 } 18236 18237 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 18238 /// being selected between, see if we can simplify the select. Callers of this 18239 /// should assume that TheSelect is deleted if this returns true. As such, they 18240 /// should return the appropriate thing (e.g. the node) back to the top-level of 18241 /// the DAG combiner loop to avoid it being looked at. 18242 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 18243 SDValue RHS) { 18244 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 18245 // The select + setcc is redundant, because fsqrt returns NaN for X < 0. 18246 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { 18247 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { 18248 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) 18249 SDValue Sqrt = RHS; 18250 ISD::CondCode CC; 18251 SDValue CmpLHS; 18252 const ConstantFPSDNode *Zero = nullptr; 18253 18254 if (TheSelect->getOpcode() == ISD::SELECT_CC) { 18255 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); 18256 CmpLHS = TheSelect->getOperand(0); 18257 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); 18258 } else { 18259 // SELECT or VSELECT 18260 SDValue Cmp = TheSelect->getOperand(0); 18261 if (Cmp.getOpcode() == ISD::SETCC) { 18262 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); 18263 CmpLHS = Cmp.getOperand(0); 18264 Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); 18265 } 18266 } 18267 if (Zero && Zero->isZero() && 18268 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || 18269 CC == ISD::SETULT || CC == ISD::SETLT)) { 18270 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 18271 CombineTo(TheSelect, Sqrt); 18272 return true; 18273 } 18274 } 18275 } 18276 // Cannot simplify select with vector condition 18277 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 18278 18279 // If this is a select from two identical things, try to pull the operation 18280 // through the select. 18281 if (LHS.getOpcode() != RHS.getOpcode() || 18282 !LHS.hasOneUse() || !RHS.hasOneUse()) 18283 return false; 18284 18285 // If this is a load and the token chain is identical, replace the select 18286 // of two loads with a load through a select of the address to load from. 18287 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 18288 // constants have been dropped into the constant pool. 18289 if (LHS.getOpcode() == ISD::LOAD) { 18290 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 18291 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 18292 18293 // Token chains must be identical. 18294 if (LHS.getOperand(0) != RHS.getOperand(0) || 18295 // Do not let this transformation reduce the number of volatile loads. 18296 LLD->isVolatile() || RLD->isVolatile() || 18297 // FIXME: If either is a pre/post inc/dec load, 18298 // we'd need to split out the address adjustment. 18299 LLD->isIndexed() || RLD->isIndexed() || 18300 // If this is an EXTLOAD, the VT's must match. 18301 LLD->getMemoryVT() != RLD->getMemoryVT() || 18302 // If this is an EXTLOAD, the kind of extension must match. 18303 (LLD->getExtensionType() != RLD->getExtensionType() && 18304 // The only exception is if one of the extensions is anyext. 18305 LLD->getExtensionType() != ISD::EXTLOAD && 18306 RLD->getExtensionType() != ISD::EXTLOAD) || 18307 // FIXME: this discards src value information. This is 18308 // over-conservative. It would be beneficial to be able to remember 18309 // both potential memory locations. Since we are discarding 18310 // src value info, don't do the transformation if the memory 18311 // locations are not in the default address space. 18312 LLD->getPointerInfo().getAddrSpace() != 0 || 18313 RLD->getPointerInfo().getAddrSpace() != 0 || 18314 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 18315 LLD->getBasePtr().getValueType())) 18316 return false; 18317 18318 // The loads must not depend on one another. 18319 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) 18320 return false; 18321 18322 // Check that the select condition doesn't reach either load. If so, 18323 // folding this will induce a cycle into the DAG. If not, this is safe to 18324 // xform, so create a select of the addresses. 18325 18326 SmallPtrSet<const SDNode *, 32> Visited; 18327 SmallVector<const SDNode *, 16> Worklist; 18328 18329 // Always fail if LLD and RLD are not independent. TheSelect is a 18330 // predecessor to all Nodes in question so we need not search past it. 18331 18332 Visited.insert(TheSelect); 18333 Worklist.push_back(LLD); 18334 Worklist.push_back(RLD); 18335 18336 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) || 18337 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)) 18338 return false; 18339 18340 SDValue Addr; 18341 if (TheSelect->getOpcode() == ISD::SELECT) { 18342 // We cannot do this optimization if any pair of {RLD, LLD} is a 18343 // predecessor to {RLD, LLD, CondNode}. As we've already compared the 18344 // Loads, we only need to check if CondNode is a successor to one of the 18345 // loads. We can further avoid this if there's no use of their chain 18346 // value. 18347 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 18348 Worklist.push_back(CondNode); 18349 18350 if ((LLD->hasAnyUseOfValue(1) && 18351 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || 18352 (RLD->hasAnyUseOfValue(1) && 18353 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) 18354 return false; 18355 18356 Addr = DAG.getSelect(SDLoc(TheSelect), 18357 LLD->getBasePtr().getValueType(), 18358 TheSelect->getOperand(0), LLD->getBasePtr(), 18359 RLD->getBasePtr()); 18360 } else { // Otherwise SELECT_CC 18361 // We cannot do this optimization if any pair of {RLD, LLD} is a 18362 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared 18363 // the Loads, we only need to check if CondLHS/CondRHS is a successor to 18364 // one of the loads. We can further avoid this if there's no use of their 18365 // chain value. 18366 18367 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 18368 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 18369 Worklist.push_back(CondLHS); 18370 Worklist.push_back(CondRHS); 18371 18372 if ((LLD->hasAnyUseOfValue(1) && 18373 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || 18374 (RLD->hasAnyUseOfValue(1) && 18375 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) 18376 return false; 18377 18378 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 18379 LLD->getBasePtr().getValueType(), 18380 TheSelect->getOperand(0), 18381 TheSelect->getOperand(1), 18382 LLD->getBasePtr(), RLD->getBasePtr(), 18383 TheSelect->getOperand(4)); 18384 } 18385 18386 SDValue Load; 18387 // It is safe to replace the two loads if they have different alignments, 18388 // but the new load must be the minimum (most restrictive) alignment of the 18389 // inputs. 18390 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 18391 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); 18392 if (!RLD->isInvariant()) 18393 MMOFlags &= ~MachineMemOperand::MOInvariant; 18394 if (!RLD->isDereferenceable()) 18395 MMOFlags &= ~MachineMemOperand::MODereferenceable; 18396 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 18397 // FIXME: Discards pointer and AA info. 18398 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), 18399 LLD->getChain(), Addr, MachinePointerInfo(), Alignment, 18400 MMOFlags); 18401 } else { 18402 // FIXME: Discards pointer and AA info. 18403 Load = DAG.getExtLoad( 18404 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() 18405 : LLD->getExtensionType(), 18406 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, 18407 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); 18408 } 18409 18410 // Users of the select now use the result of the load. 18411 CombineTo(TheSelect, Load); 18412 18413 // Users of the old loads now use the new load's chain. We know the 18414 // old-load value is dead now. 18415 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 18416 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 18417 return true; 18418 } 18419 18420 return false; 18421 } 18422 18423 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and 18424 /// bitwise 'and'. 18425 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, 18426 SDValue N1, SDValue N2, SDValue N3, 18427 ISD::CondCode CC) { 18428 // If this is a select where the false operand is zero and the compare is a 18429 // check of the sign bit, see if we can perform the "gzip trick": 18430 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A 18431 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A 18432 EVT XType = N0.getValueType(); 18433 EVT AType = N2.getValueType(); 18434 if (!isNullConstant(N3) || !XType.bitsGE(AType)) 18435 return SDValue(); 18436 18437 // If the comparison is testing for a positive value, we have to invert 18438 // the sign bit mask, so only do that transform if the target has a bitwise 18439 // 'and not' instruction (the invert is free). 18440 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { 18441 // (X > -1) ? A : 0 18442 // (X > 0) ? X : 0 <-- This is canonical signed max. 18443 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) 18444 return SDValue(); 18445 } else if (CC == ISD::SETLT) { 18446 // (X < 0) ? A : 0 18447 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. 18448 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) 18449 return SDValue(); 18450 } else { 18451 return SDValue(); 18452 } 18453 18454 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit 18455 // constant. 18456 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 18457 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 18458 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { 18459 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; 18460 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); 18461 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); 18462 AddToWorklist(Shift.getNode()); 18463 18464 if (XType.bitsGT(AType)) { 18465 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 18466 AddToWorklist(Shift.getNode()); 18467 } 18468 18469 if (CC == ISD::SETGT) 18470 Shift = DAG.getNOT(DL, Shift, AType); 18471 18472 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 18473 } 18474 18475 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); 18476 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); 18477 AddToWorklist(Shift.getNode()); 18478 18479 if (XType.bitsGT(AType)) { 18480 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 18481 AddToWorklist(Shift.getNode()); 18482 } 18483 18484 if (CC == ISD::SETGT) 18485 Shift = DAG.getNOT(DL, Shift, AType); 18486 18487 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 18488 } 18489 18490 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 18491 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 18492 /// in it. This may be a win when the constant is not otherwise available 18493 /// because it replaces two constant pool loads with one. 18494 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( 18495 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, 18496 ISD::CondCode CC) { 18497 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) 18498 return SDValue(); 18499 18500 // If we are before legalize types, we want the other legalization to happen 18501 // first (for example, to avoid messing with soft float). 18502 auto *TV = dyn_cast<ConstantFPSDNode>(N2); 18503 auto *FV = dyn_cast<ConstantFPSDNode>(N3); 18504 EVT VT = N2.getValueType(); 18505 if (!TV || !FV || !TLI.isTypeLegal(VT)) 18506 return SDValue(); 18507 18508 // If a constant can be materialized without loads, this does not make sense. 18509 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || 18510 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) || 18511 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) 18512 return SDValue(); 18513 18514 // If both constants have multiple uses, then we won't need to do an extra 18515 // load. The values are likely around in registers for other users. 18516 if (!TV->hasOneUse() && !FV->hasOneUse()) 18517 return SDValue(); 18518 18519 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()), 18520 const_cast<ConstantFP*>(TV->getConstantFPValue()) }; 18521 Type *FPTy = Elts[0]->getType(); 18522 const DataLayout &TD = DAG.getDataLayout(); 18523 18524 // Create a ConstantArray of the two constants. 18525 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 18526 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), 18527 TD.getPrefTypeAlignment(FPTy)); 18528 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 18529 18530 // Get offsets to the 0 and 1 elements of the array, so we can select between 18531 // them. 18532 SDValue Zero = DAG.getIntPtrConstant(0, DL); 18533 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 18534 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); 18535 SDValue Cond = 18536 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); 18537 AddToWorklist(Cond.getNode()); 18538 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); 18539 AddToWorklist(CstOffset.getNode()); 18540 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); 18541 AddToWorklist(CPIdx.getNode()); 18542 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 18543 MachinePointerInfo::getConstantPool( 18544 DAG.getMachineFunction()), Alignment); 18545 } 18546 18547 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 18548 /// where 'cond' is the comparison specified by CC. 18549 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 18550 SDValue N2, SDValue N3, ISD::CondCode CC, 18551 bool NotExtCompare) { 18552 // (x ? y : y) -> y. 18553 if (N2 == N3) return N2; 18554 18555 EVT CmpOpVT = N0.getValueType(); 18556 EVT VT = N2.getValueType(); 18557 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 18558 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 18559 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 18560 18561 // Determine if the condition we're dealing with is constant. 18562 SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL, 18563 false); 18564 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 18565 18566 if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 18567 // fold select_cc true, x, y -> x 18568 // fold select_cc false, x, y -> y 18569 return !SCCC->isNullValue() ? N2 : N3; 18570 } 18571 18572 if (SDValue V = 18573 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC)) 18574 return V; 18575 18576 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) 18577 return V; 18578 18579 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 18580 // where y is has a single bit set. 18581 // A plaintext description would be, we can turn the SELECT_CC into an AND 18582 // when the condition can be materialized as an all-ones register. Any 18583 // single bit-test can be materialized as an all-ones register with 18584 // shift-left and shift-right-arith. 18585 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 18586 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { 18587 SDValue AndLHS = N0->getOperand(0); 18588 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 18589 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 18590 // Shift the tested bit over the sign bit. 18591 const APInt &AndMask = ConstAndRHS->getAPIntValue(); 18592 SDValue ShlAmt = 18593 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), 18594 getShiftAmountTy(AndLHS.getValueType())); 18595 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 18596 18597 // Now arithmetic right shift it all the way over, so the result is either 18598 // all-ones, or zero. 18599 SDValue ShrAmt = 18600 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), 18601 getShiftAmountTy(Shl.getValueType())); 18602 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 18603 18604 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 18605 } 18606 } 18607 18608 // fold select C, 16, 0 -> shl C, 4 18609 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2(); 18610 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2(); 18611 18612 if ((Fold || Swap) && 18613 TLI.getBooleanContents(CmpOpVT) == 18614 TargetLowering::ZeroOrOneBooleanContent && 18615 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { 18616 18617 if (Swap) { 18618 CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); 18619 std::swap(N2C, N3C); 18620 } 18621 18622 // If the caller doesn't want us to simplify this into a zext of a compare, 18623 // don't do it. 18624 if (NotExtCompare && N2C->isOne()) 18625 return SDValue(); 18626 18627 SDValue Temp, SCC; 18628 // zext (setcc n0, n1) 18629 if (LegalTypes) { 18630 SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC); 18631 if (VT.bitsLT(SCC.getValueType())) 18632 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); 18633 else 18634 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); 18635 } else { 18636 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 18637 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); 18638 } 18639 18640 AddToWorklist(SCC.getNode()); 18641 AddToWorklist(Temp.getNode()); 18642 18643 if (N2C->isOne()) 18644 return Temp; 18645 18646 // shl setcc result by log2 n2c 18647 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 18648 DAG.getConstant(N2C->getAPIntValue().logBase2(), 18649 SDLoc(Temp), 18650 getShiftAmountTy(Temp.getValueType()))); 18651 } 18652 18653 // Check to see if this is an integer abs. 18654 // select_cc setg[te] X, 0, X, -X -> 18655 // select_cc setgt X, -1, X, -X -> 18656 // select_cc setl[te] X, 0, -X, X -> 18657 // select_cc setlt X, 1, -X, X -> 18658 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 18659 if (N1C) { 18660 ConstantSDNode *SubC = nullptr; 18661 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 18662 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 18663 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 18664 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 18665 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 18666 (N1C->isOne() && CC == ISD::SETLT)) && 18667 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 18668 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 18669 18670 if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) { 18671 SDLoc DL(N0); 18672 SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, 18673 DAG.getConstant(CmpOpVT.getSizeInBits() - 1, 18674 DL, 18675 getShiftAmountTy(CmpOpVT))); 18676 SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift); 18677 AddToWorklist(Shift.getNode()); 18678 AddToWorklist(Add.getNode()); 18679 return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift); 18680 } 18681 } 18682 18683 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) 18684 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) 18685 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) 18686 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) 18687 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) 18688 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) 18689 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) 18690 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) 18691 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 18692 SDValue ValueOnZero = N2; 18693 SDValue Count = N3; 18694 // If the condition is NE instead of E, swap the operands. 18695 if (CC == ISD::SETNE) 18696 std::swap(ValueOnZero, Count); 18697 // Check if the value on zero is a constant equal to the bits in the type. 18698 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { 18699 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { 18700 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is 18701 // legal, combine to just cttz. 18702 if ((Count.getOpcode() == ISD::CTTZ || 18703 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && 18704 N0 == Count.getOperand(0) && 18705 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) 18706 return DAG.getNode(ISD::CTTZ, DL, VT, N0); 18707 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is 18708 // legal, combine to just ctlz. 18709 if ((Count.getOpcode() == ISD::CTLZ || 18710 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && 18711 N0 == Count.getOperand(0) && 18712 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) 18713 return DAG.getNode(ISD::CTLZ, DL, VT, N0); 18714 } 18715 } 18716 } 18717 18718 return SDValue(); 18719 } 18720 18721 /// This is a stub for TargetLowering::SimplifySetCC. 18722 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, 18723 ISD::CondCode Cond, const SDLoc &DL, 18724 bool foldBooleans) { 18725 TargetLowering::DAGCombinerInfo 18726 DagCombineInfo(DAG, Level, false, this); 18727 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 18728 } 18729 18730 /// Given an ISD::SDIV node expressing a divide by constant, return 18731 /// a DAG expression to select that will generate the same value by multiplying 18732 /// by a magic number. 18733 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 18734 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 18735 // when optimising for minimum size, we don't want to expand a div to a mul 18736 // and a shift. 18737 if (DAG.getMachineFunction().getFunction().optForMinSize()) 18738 return SDValue(); 18739 18740 SmallVector<SDNode *, 8> Built; 18741 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) { 18742 for (SDNode *N : Built) 18743 AddToWorklist(N); 18744 return S; 18745 } 18746 18747 return SDValue(); 18748 } 18749 18750 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 18751 /// DAG expression that will generate the same value by right shifting. 18752 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { 18753 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 18754 if (!C) 18755 return SDValue(); 18756 18757 // Avoid division by zero. 18758 if (C->isNullValue()) 18759 return SDValue(); 18760 18761 SmallVector<SDNode *, 8> Built; 18762 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) { 18763 for (SDNode *N : Built) 18764 AddToWorklist(N); 18765 return S; 18766 } 18767 18768 return SDValue(); 18769 } 18770 18771 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG 18772 /// expression that will generate the same value by multiplying by a magic 18773 /// number. 18774 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 18775 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 18776 // when optimising for minimum size, we don't want to expand a div to a mul 18777 // and a shift. 18778 if (DAG.getMachineFunction().getFunction().optForMinSize()) 18779 return SDValue(); 18780 18781 SmallVector<SDNode *, 8> Built; 18782 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) { 18783 for (SDNode *N : Built) 18784 AddToWorklist(N); 18785 return S; 18786 } 18787 18788 return SDValue(); 18789 } 18790 18791 /// Determines the LogBase2 value for a non-null input value using the 18792 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). 18793 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { 18794 EVT VT = V.getValueType(); 18795 unsigned EltBits = VT.getScalarSizeInBits(); 18796 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); 18797 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); 18798 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); 18799 return LogBase2; 18800 } 18801 18802 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18803 /// For the reciprocal, we need to find the zero of the function: 18804 /// F(X) = A X - 1 [which has a zero at X = 1/A] 18805 /// => 18806 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 18807 /// does not require additional intermediate precision] 18808 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { 18809 if (Level >= AfterLegalizeDAG) 18810 return SDValue(); 18811 18812 // TODO: Handle half and/or extended types? 18813 EVT VT = Op.getValueType(); 18814 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 18815 return SDValue(); 18816 18817 // If estimates are explicitly disabled for this function, we're done. 18818 MachineFunction &MF = DAG.getMachineFunction(); 18819 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); 18820 if (Enabled == TLI.ReciprocalEstimate::Disabled) 18821 return SDValue(); 18822 18823 // Estimates may be explicitly enabled for this type with a custom number of 18824 // refinement steps. 18825 int Iterations = TLI.getDivRefinementSteps(VT, MF); 18826 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { 18827 AddToWorklist(Est.getNode()); 18828 18829 if (Iterations) { 18830 EVT VT = Op.getValueType(); 18831 SDLoc DL(Op); 18832 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 18833 18834 // Newton iterations: Est = Est + Est (1 - Arg * Est) 18835 for (int i = 0; i < Iterations; ++i) { 18836 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); 18837 AddToWorklist(NewEst.getNode()); 18838 18839 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); 18840 AddToWorklist(NewEst.getNode()); 18841 18842 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 18843 AddToWorklist(NewEst.getNode()); 18844 18845 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); 18846 AddToWorklist(Est.getNode()); 18847 } 18848 } 18849 return Est; 18850 } 18851 18852 return SDValue(); 18853 } 18854 18855 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18856 /// For the reciprocal sqrt, we need to find the zero of the function: 18857 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 18858 /// => 18859 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 18860 /// As a result, we precompute A/2 prior to the iteration loop. 18861 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, 18862 unsigned Iterations, 18863 SDNodeFlags Flags, bool Reciprocal) { 18864 EVT VT = Arg.getValueType(); 18865 SDLoc DL(Arg); 18866 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); 18867 18868 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 18869 // this entire sequence requires only one FP constant. 18870 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); 18871 AddToWorklist(HalfArg.getNode()); 18872 18873 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); 18874 AddToWorklist(HalfArg.getNode()); 18875 18876 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 18877 for (unsigned i = 0; i < Iterations; ++i) { 18878 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); 18879 AddToWorklist(NewEst.getNode()); 18880 18881 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); 18882 AddToWorklist(NewEst.getNode()); 18883 18884 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); 18885 AddToWorklist(NewEst.getNode()); 18886 18887 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 18888 AddToWorklist(Est.getNode()); 18889 } 18890 18891 // If non-reciprocal square root is requested, multiply the result by Arg. 18892 if (!Reciprocal) { 18893 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); 18894 AddToWorklist(Est.getNode()); 18895 } 18896 18897 return Est; 18898 } 18899 18900 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18901 /// For the reciprocal sqrt, we need to find the zero of the function: 18902 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 18903 /// => 18904 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 18905 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, 18906 unsigned Iterations, 18907 SDNodeFlags Flags, bool Reciprocal) { 18908 EVT VT = Arg.getValueType(); 18909 SDLoc DL(Arg); 18910 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); 18911 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); 18912 18913 // This routine must enter the loop below to work correctly 18914 // when (Reciprocal == false). 18915 assert(Iterations > 0); 18916 18917 // Newton iterations for reciprocal square root: 18918 // E = (E * -0.5) * ((A * E) * E + -3.0) 18919 for (unsigned i = 0; i < Iterations; ++i) { 18920 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); 18921 AddToWorklist(AE.getNode()); 18922 18923 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); 18924 AddToWorklist(AEE.getNode()); 18925 18926 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); 18927 AddToWorklist(RHS.getNode()); 18928 18929 // When calculating a square root at the last iteration build: 18930 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) 18931 // (notice a common subexpression) 18932 SDValue LHS; 18933 if (Reciprocal || (i + 1) < Iterations) { 18934 // RSQRT: LHS = (E * -0.5) 18935 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); 18936 } else { 18937 // SQRT: LHS = (A * E) * -0.5 18938 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); 18939 } 18940 AddToWorklist(LHS.getNode()); 18941 18942 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); 18943 AddToWorklist(Est.getNode()); 18944 } 18945 18946 return Est; 18947 } 18948 18949 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case 18950 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if 18951 /// Op can be zero. 18952 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, 18953 bool Reciprocal) { 18954 if (Level >= AfterLegalizeDAG) 18955 return SDValue(); 18956 18957 // TODO: Handle half and/or extended types? 18958 EVT VT = Op.getValueType(); 18959 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 18960 return SDValue(); 18961 18962 // If estimates are explicitly disabled for this function, we're done. 18963 MachineFunction &MF = DAG.getMachineFunction(); 18964 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); 18965 if (Enabled == TLI.ReciprocalEstimate::Disabled) 18966 return SDValue(); 18967 18968 // Estimates may be explicitly enabled for this type with a custom number of 18969 // refinement steps. 18970 int Iterations = TLI.getSqrtRefinementSteps(VT, MF); 18971 18972 bool UseOneConstNR = false; 18973 if (SDValue Est = 18974 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, 18975 Reciprocal)) { 18976 AddToWorklist(Est.getNode()); 18977 18978 if (Iterations) { 18979 Est = UseOneConstNR 18980 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) 18981 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); 18982 18983 if (!Reciprocal) { 18984 // The estimate is now completely wrong if the input was exactly 0.0 or 18985 // possibly a denormal. Force the answer to 0.0 for those cases. 18986 EVT VT = Op.getValueType(); 18987 SDLoc DL(Op); 18988 EVT CCVT = getSetCCResultType(VT); 18989 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; 18990 const Function &F = DAG.getMachineFunction().getFunction(); 18991 Attribute Denorms = F.getFnAttribute("denormal-fp-math"); 18992 if (Denorms.getValueAsString().equals("ieee")) { 18993 // fabs(X) < SmallestNormal ? 0.0 : Est 18994 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 18995 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); 18996 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); 18997 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 18998 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); 18999 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); 19000 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); 19001 AddToWorklist(Fabs.getNode()); 19002 AddToWorklist(IsDenorm.getNode()); 19003 AddToWorklist(Est.getNode()); 19004 } else { 19005 // X == 0.0 ? 0.0 : Est 19006 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 19007 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); 19008 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); 19009 AddToWorklist(IsZero.getNode()); 19010 AddToWorklist(Est.getNode()); 19011 } 19012 } 19013 } 19014 return Est; 19015 } 19016 19017 return SDValue(); 19018 } 19019 19020 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { 19021 return buildSqrtEstimateImpl(Op, Flags, true); 19022 } 19023 19024 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { 19025 return buildSqrtEstimateImpl(Op, Flags, false); 19026 } 19027 19028 /// Return true if there is any possibility that the two addresses overlap. 19029 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 19030 // If they are the same then they must be aliases. 19031 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 19032 19033 // If they are both volatile then they cannot be reordered. 19034 if (Op0->isVolatile() && Op1->isVolatile()) return true; 19035 19036 // If one operation reads from invariant memory, and the other may store, they 19037 // cannot alias. These should really be checking the equivalent of mayWrite, 19038 // but it only matters for memory nodes other than load /store. 19039 if (Op0->isInvariant() && Op1->writeMem()) 19040 return false; 19041 19042 if (Op1->isInvariant() && Op0->writeMem()) 19043 return false; 19044 19045 unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); 19046 unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); 19047 19048 // Check for BaseIndexOffset matching. 19049 BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); 19050 BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); 19051 int64_t PtrDiff; 19052 if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { 19053 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) 19054 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); 19055 19056 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be 19057 // able to calculate their relative offset if at least one arises 19058 // from an alloca. However, these allocas cannot overlap and we 19059 // can infer there is no alias. 19060 if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) 19061 if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { 19062 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 19063 // If the base are the same frame index but the we couldn't find a 19064 // constant offset, (indices are different) be conservative. 19065 if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || 19066 !MFI.isFixedObjectIndex(B->getIndex()))) 19067 return false; 19068 } 19069 19070 bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); 19071 bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); 19072 bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); 19073 bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); 19074 bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); 19075 bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); 19076 19077 // If of mismatched base types or checkable indices we can check 19078 // they do not alias. 19079 if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || 19080 (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && 19081 (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) 19082 return false; 19083 } 19084 19085 // If we know required SrcValue1 and SrcValue2 have relatively large 19086 // alignment compared to the size and offset of the access, we may be able 19087 // to prove they do not alias. This check is conservative for now to catch 19088 // cases created by splitting vector types. 19089 int64_t SrcValOffset0 = Op0->getSrcValueOffset(); 19090 int64_t SrcValOffset1 = Op1->getSrcValueOffset(); 19091 unsigned OrigAlignment0 = Op0->getOriginalAlignment(); 19092 unsigned OrigAlignment1 = Op1->getOriginalAlignment(); 19093 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && 19094 NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { 19095 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; 19096 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; 19097 19098 // There is no overlap between these relatively aligned accesses of 19099 // similar size. Return no alias. 19100 if ((OffAlign0 + NumBytes0) <= OffAlign1 || 19101 (OffAlign1 + NumBytes1) <= OffAlign0) 19102 return false; 19103 } 19104 19105 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 19106 ? CombinerGlobalAA 19107 : DAG.getSubtarget().useAA(); 19108 #ifndef NDEBUG 19109 if (CombinerAAOnlyFunc.getNumOccurrences() && 19110 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 19111 UseAA = false; 19112 #endif 19113 19114 if (UseAA && AA && 19115 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 19116 // Use alias analysis information. 19117 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); 19118 int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; 19119 int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; 19120 AliasResult AAResult = 19121 AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, 19122 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 19123 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, 19124 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); 19125 if (AAResult == NoAlias) 19126 return false; 19127 } 19128 19129 // Otherwise we have to assume they alias. 19130 return true; 19131 } 19132 19133 /// Walk up chain skipping non-aliasing memory nodes, 19134 /// looking for aliasing nodes and adding them to the Aliases vector. 19135 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 19136 SmallVectorImpl<SDValue> &Aliases) { 19137 SmallVector<SDValue, 8> Chains; // List of chains to visit. 19138 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 19139 19140 // Get alias information for node. 19141 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 19142 19143 // Starting off. 19144 Chains.push_back(OriginalChain); 19145 unsigned Depth = 0; 19146 19147 // Look at each chain and determine if it is an alias. If so, add it to the 19148 // aliases list. If not, then continue up the chain looking for the next 19149 // candidate. 19150 while (!Chains.empty()) { 19151 SDValue Chain = Chains.pop_back_val(); 19152 19153 // For TokenFactor nodes, look at each operand and only continue up the 19154 // chain until we reach the depth limit. 19155 // 19156 // FIXME: The depth check could be made to return the last non-aliasing 19157 // chain we found before we hit a tokenfactor rather than the original 19158 // chain. 19159 if (Depth > TLI.getGatherAllAliasesMaxDepth()) { 19160 Aliases.clear(); 19161 Aliases.push_back(OriginalChain); 19162 return; 19163 } 19164 19165 // Don't bother if we've been before. 19166 if (!Visited.insert(Chain.getNode()).second) 19167 continue; 19168 19169 switch (Chain.getOpcode()) { 19170 case ISD::EntryToken: 19171 // Entry token is ideal chain operand, but handled in FindBetterChain. 19172 break; 19173 19174 case ISD::LOAD: 19175 case ISD::STORE: { 19176 // Get alias information for Chain. 19177 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 19178 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 19179 19180 // If chain is alias then stop here. 19181 if (!(IsLoad && IsOpLoad) && 19182 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 19183 Aliases.push_back(Chain); 19184 } else { 19185 // Look further up the chain. 19186 Chains.push_back(Chain.getOperand(0)); 19187 ++Depth; 19188 } 19189 break; 19190 } 19191 19192 case ISD::TokenFactor: 19193 // We have to check each of the operands of the token factor for "small" 19194 // token factors, so we queue them up. Adding the operands to the queue 19195 // (stack) in reverse order maintains the original order and increases the 19196 // likelihood that getNode will find a matching token factor (CSE.) 19197 if (Chain.getNumOperands() > 16) { 19198 Aliases.push_back(Chain); 19199 break; 19200 } 19201 for (unsigned n = Chain.getNumOperands(); n;) 19202 Chains.push_back(Chain.getOperand(--n)); 19203 ++Depth; 19204 break; 19205 19206 case ISD::CopyFromReg: 19207 // Forward past CopyFromReg. 19208 Chains.push_back(Chain.getOperand(0)); 19209 ++Depth; 19210 break; 19211 19212 default: 19213 // For all other instructions we will just have to take what we can get. 19214 Aliases.push_back(Chain); 19215 break; 19216 } 19217 } 19218 } 19219 19220 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 19221 /// (aliasing node.) 19222 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 19223 if (OptLevel == CodeGenOpt::None) 19224 return OldChain; 19225 19226 // Ops for replacing token factor. 19227 SmallVector<SDValue, 8> Aliases; 19228 19229 // Accumulate all the aliases to this node. 19230 GatherAllAliases(N, OldChain, Aliases); 19231 19232 // If no operands then chain to entry token. 19233 if (Aliases.size() == 0) 19234 return DAG.getEntryNode(); 19235 19236 // If a single operand then chain to it. We don't need to revisit it. 19237 if (Aliases.size() == 1) 19238 return Aliases[0]; 19239 19240 // Construct a custom tailored token factor. 19241 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 19242 } 19243 19244 // TODO: Replace with with std::monostate when we move to C++17. 19245 struct UnitT { } Unit; 19246 bool operator==(const UnitT &, const UnitT &) { return true; } 19247 bool operator!=(const UnitT &, const UnitT &) { return false; } 19248 19249 // This function tries to collect a bunch of potentially interesting 19250 // nodes to improve the chains of, all at once. This might seem 19251 // redundant, as this function gets called when visiting every store 19252 // node, so why not let the work be done on each store as it's visited? 19253 // 19254 // I believe this is mainly important because MergeConsecutiveStores 19255 // is unable to deal with merging stores of different sizes, so unless 19256 // we improve the chains of all the potential candidates up-front 19257 // before running MergeConsecutiveStores, it might only see some of 19258 // the nodes that will eventually be candidates, and then not be able 19259 // to go from a partially-merged state to the desired final 19260 // fully-merged state. 19261 19262 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { 19263 SmallVector<StoreSDNode *, 8> ChainedStores; 19264 StoreSDNode *STChain = St; 19265 // Intervals records which offsets from BaseIndex have been covered. In 19266 // the common case, every store writes to the immediately previous address 19267 // space and thus merged with the previous interval at insertion time. 19268 19269 using IMap = 19270 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; 19271 IMap::Allocator A; 19272 IMap Intervals(A); 19273 19274 // This holds the base pointer, index, and the offset in bytes from the base 19275 // pointer. 19276 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 19277 19278 // We must have a base and an offset. 19279 if (!BasePtr.getBase().getNode()) 19280 return false; 19281 19282 // Do not handle stores to undef base pointers. 19283 if (BasePtr.getBase().isUndef()) 19284 return false; 19285 19286 // Add ST's interval. 19287 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); 19288 19289 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { 19290 // If the chain has more than one use, then we can't reorder the mem ops. 19291 if (!SDValue(Chain, 0)->hasOneUse()) 19292 break; 19293 if (Chain->isVolatile() || Chain->isIndexed()) 19294 break; 19295 19296 // Find the base pointer and offset for this memory node. 19297 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); 19298 // Check that the base pointer is the same as the original one. 19299 int64_t Offset; 19300 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) 19301 break; 19302 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; 19303 // Make sure we don't overlap with other intervals by checking the ones to 19304 // the left or right before inserting. 19305 auto I = Intervals.find(Offset); 19306 // If there's a next interval, we should end before it. 19307 if (I != Intervals.end() && I.start() < (Offset + Length)) 19308 break; 19309 // If there's a previous interval, we should start after it. 19310 if (I != Intervals.begin() && (--I).stop() <= Offset) 19311 break; 19312 Intervals.insert(Offset, Offset + Length, Unit); 19313 19314 ChainedStores.push_back(Chain); 19315 STChain = Chain; 19316 } 19317 19318 // If we didn't find a chained store, exit. 19319 if (ChainedStores.size() == 0) 19320 return false; 19321 19322 // Improve all chained stores (St and ChainedStores members) starting from 19323 // where the store chain ended and return single TokenFactor. 19324 SDValue NewChain = STChain->getChain(); 19325 SmallVector<SDValue, 8> TFOps; 19326 for (unsigned I = ChainedStores.size(); I;) { 19327 StoreSDNode *S = ChainedStores[--I]; 19328 SDValue BetterChain = FindBetterChain(S, NewChain); 19329 S = cast<StoreSDNode>(DAG.UpdateNodeOperands( 19330 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); 19331 TFOps.push_back(SDValue(S, 0)); 19332 ChainedStores[I] = S; 19333 } 19334 19335 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. 19336 SDValue BetterChain = FindBetterChain(St, NewChain); 19337 SDValue NewST; 19338 if (St->isTruncatingStore()) 19339 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), 19340 St->getBasePtr(), St->getMemoryVT(), 19341 St->getMemOperand()); 19342 else 19343 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), 19344 St->getBasePtr(), St->getMemOperand()); 19345 19346 TFOps.push_back(NewST); 19347 19348 // If we improved every element of TFOps, then we've lost the dependence on 19349 // NewChain to successors of St and we need to add it back to TFOps. Do so at 19350 // the beginning to keep relative order consistent with FindBetterChains. 19351 auto hasImprovedChain = [&](SDValue ST) -> bool { 19352 return ST->getOperand(0) != NewChain; 19353 }; 19354 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); 19355 if (AddNewChain) 19356 TFOps.insert(TFOps.begin(), NewChain); 19357 19358 SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); 19359 CombineTo(St, TF); 19360 19361 AddToWorklist(STChain); 19362 // Add TF operands worklist in reverse order. 19363 for (auto I = TF->getNumOperands(); I;) 19364 AddToWorklist(TF->getOperand(--I).getNode()); 19365 AddToWorklist(TF.getNode()); 19366 return true; 19367 } 19368 19369 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { 19370 if (OptLevel == CodeGenOpt::None) 19371 return false; 19372 19373 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 19374 19375 // We must have a base and an offset. 19376 if (!BasePtr.getBase().getNode()) 19377 return false; 19378 19379 // Do not handle stores to undef base pointers. 19380 if (BasePtr.getBase().isUndef()) 19381 return false; 19382 19383 // Directly improve a chain of disjoint stores starting at St. 19384 if (parallelizeChainedStores(St)) 19385 return true; 19386 19387 // Improve St's Chain.. 19388 SDValue BetterChain = FindBetterChain(St, St->getChain()); 19389 if (St->getChain() != BetterChain) { 19390 replaceStoreChain(St, BetterChain); 19391 return true; 19392 } 19393 return false; 19394 } 19395 19396 /// This is the entry point for the file. 19397 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, 19398 CodeGenOpt::Level OptLevel) { 19399 /// This is the main entry point to this class. 19400 DAGCombiner(*this, AA, OptLevel).Run(Level); 19401 } 19402