1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/IntervalMap.h" 24 #include "llvm/ADT/None.h" 25 #include "llvm/ADT/Optional.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SetVector.h" 28 #include "llvm/ADT/SmallBitVector.h" 29 #include "llvm/ADT/SmallPtrSet.h" 30 #include "llvm/ADT/SmallSet.h" 31 #include "llvm/ADT/SmallVector.h" 32 #include "llvm/ADT/Statistic.h" 33 #include "llvm/Analysis/AliasAnalysis.h" 34 #include "llvm/Analysis/MemoryLocation.h" 35 #include "llvm/CodeGen/DAGCombine.h" 36 #include "llvm/CodeGen/ISDOpcodes.h" 37 #include "llvm/CodeGen/MachineFrameInfo.h" 38 #include "llvm/CodeGen/MachineFunction.h" 39 #include "llvm/CodeGen/MachineMemOperand.h" 40 #include "llvm/CodeGen/RuntimeLibcalls.h" 41 #include "llvm/CodeGen/SelectionDAG.h" 42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 43 #include "llvm/CodeGen/SelectionDAGNodes.h" 44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 45 #include "llvm/CodeGen/TargetLowering.h" 46 #include "llvm/CodeGen/TargetRegisterInfo.h" 47 #include "llvm/CodeGen/TargetSubtargetInfo.h" 48 #include "llvm/CodeGen/ValueTypes.h" 49 #include "llvm/IR/Attributes.h" 50 #include "llvm/IR/Constant.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Function.h" 54 #include "llvm/IR/LLVMContext.h" 55 #include "llvm/IR/Metadata.h" 56 #include "llvm/Support/Casting.h" 57 #include "llvm/Support/CodeGen.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Compiler.h" 60 #include "llvm/Support/Debug.h" 61 #include "llvm/Support/ErrorHandling.h" 62 #include "llvm/Support/KnownBits.h" 63 #include "llvm/Support/MachineValueType.h" 64 #include "llvm/Support/MathExtras.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include "llvm/Target/TargetMachine.h" 67 #include "llvm/Target/TargetOptions.h" 68 #include <algorithm> 69 #include <cassert> 70 #include <cstdint> 71 #include <functional> 72 #include <iterator> 73 #include <string> 74 #include <tuple> 75 #include <utility> 76 77 using namespace llvm; 78 79 #define DEBUG_TYPE "dagcombine" 80 81 STATISTIC(NodesCombined , "Number of dag nodes combined"); 82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 86 STATISTIC(SlicedLoads, "Number of load sliced"); 87 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops"); 88 89 static cl::opt<bool> 90 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 91 cl::desc("Enable DAG combiner's use of IR alias analysis")); 92 93 static cl::opt<bool> 94 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 95 cl::desc("Enable DAG combiner's use of TBAA")); 96 97 #ifndef NDEBUG 98 static cl::opt<std::string> 99 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 100 cl::desc("Only use DAG-combiner alias analysis in this" 101 " function")); 102 #endif 103 104 /// Hidden option to stress test load slicing, i.e., when this option 105 /// is enabled, load slicing bypasses most of its profitability guards. 106 static cl::opt<bool> 107 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 108 cl::desc("Bypass the profitability model of load slicing"), 109 cl::init(false)); 110 111 static cl::opt<bool> 112 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 113 cl::desc("DAG combiner may split indexing from loads")); 114 115 namespace { 116 117 class DAGCombiner { 118 SelectionDAG &DAG; 119 const TargetLowering &TLI; 120 CombineLevel Level; 121 CodeGenOpt::Level OptLevel; 122 bool LegalOperations = false; 123 bool LegalTypes = false; 124 bool ForCodeSize; 125 126 /// Worklist of all of the nodes that need to be simplified. 127 /// 128 /// This must behave as a stack -- new nodes to process are pushed onto the 129 /// back and when processing we pop off of the back. 130 /// 131 /// The worklist will not contain duplicates but may contain null entries 132 /// due to nodes being deleted from the underlying DAG. 133 SmallVector<SDNode *, 64> Worklist; 134 135 /// Mapping from an SDNode to its position on the worklist. 136 /// 137 /// This is used to find and remove nodes from the worklist (by nulling 138 /// them) when they are deleted from the underlying DAG. It relies on 139 /// stable indices of nodes within the worklist. 140 DenseMap<SDNode *, unsigned> WorklistMap; 141 142 /// Set of nodes which have been combined (at least once). 143 /// 144 /// This is used to allow us to reliably add any operands of a DAG node 145 /// which have not yet been combined to the worklist. 146 SmallPtrSet<SDNode *, 32> CombinedNodes; 147 148 // AA - Used for DAG load/store alias analysis. 149 AliasAnalysis *AA; 150 151 /// When an instruction is simplified, add all users of the instruction to 152 /// the work lists because they might get more simplified now. 153 void AddUsersToWorklist(SDNode *N) { 154 for (SDNode *Node : N->uses()) 155 AddToWorklist(Node); 156 } 157 158 /// Call the node-specific routine that folds each particular type of node. 159 SDValue visit(SDNode *N); 160 161 public: 162 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) 163 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 164 OptLevel(OL), AA(AA) { 165 ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); 166 167 MaximumLegalStoreInBits = 0; 168 for (MVT VT : MVT::all_valuetypes()) 169 if (EVT(VT).isSimple() && VT != MVT::Other && 170 TLI.isTypeLegal(EVT(VT)) && 171 VT.getSizeInBits() >= MaximumLegalStoreInBits) 172 MaximumLegalStoreInBits = VT.getSizeInBits(); 173 } 174 175 /// Add to the worklist making sure its instance is at the back (next to be 176 /// processed.) 177 void AddToWorklist(SDNode *N) { 178 assert(N->getOpcode() != ISD::DELETED_NODE && 179 "Deleted Node added to Worklist"); 180 181 // Skip handle nodes as they can't usefully be combined and confuse the 182 // zero-use deletion strategy. 183 if (N->getOpcode() == ISD::HANDLENODE) 184 return; 185 186 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 187 Worklist.push_back(N); 188 } 189 190 /// Remove all instances of N from the worklist. 191 void removeFromWorklist(SDNode *N) { 192 CombinedNodes.erase(N); 193 194 auto It = WorklistMap.find(N); 195 if (It == WorklistMap.end()) 196 return; // Not in the worklist. 197 198 // Null out the entry rather than erasing it to avoid a linear operation. 199 Worklist[It->second] = nullptr; 200 WorklistMap.erase(It); 201 } 202 203 void deleteAndRecombine(SDNode *N); 204 bool recursivelyDeleteUnusedNodes(SDNode *N); 205 206 /// Replaces all uses of the results of one DAG node with new values. 207 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 208 bool AddTo = true); 209 210 /// Replaces all uses of the results of one DAG node with new values. 211 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 212 return CombineTo(N, &Res, 1, AddTo); 213 } 214 215 /// Replaces all uses of the results of one DAG node with new values. 216 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 217 bool AddTo = true) { 218 SDValue To[] = { Res0, Res1 }; 219 return CombineTo(N, To, 2, AddTo); 220 } 221 222 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 223 224 private: 225 unsigned MaximumLegalStoreInBits; 226 227 /// Check the specified integer node value to see if it can be simplified or 228 /// if things it uses can be simplified by bit propagation. 229 /// If so, return true. 230 bool SimplifyDemandedBits(SDValue Op) { 231 unsigned BitWidth = Op.getScalarValueSizeInBits(); 232 APInt Demanded = APInt::getAllOnesValue(BitWidth); 233 return SimplifyDemandedBits(Op, Demanded); 234 } 235 236 /// Check the specified vector node value to see if it can be simplified or 237 /// if things it uses can be simplified as it only uses some of the 238 /// elements. If so, return true. 239 bool SimplifyDemandedVectorElts(SDValue Op) { 240 unsigned NumElts = Op.getValueType().getVectorNumElements(); 241 APInt Demanded = APInt::getAllOnesValue(NumElts); 242 return SimplifyDemandedVectorElts(Op, Demanded); 243 } 244 245 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 246 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, 247 bool AssumeSingleUse = false); 248 249 bool CombineToPreIndexedLoadStore(SDNode *N); 250 bool CombineToPostIndexedLoadStore(SDNode *N); 251 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 252 bool SliceUpLoad(SDNode *N); 253 254 // Scalars have size 0 to distinguish from singleton vectors. 255 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD); 256 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val); 257 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val); 258 259 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 260 /// load. 261 /// 262 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 263 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 264 /// \param EltNo index of the vector element to load. 265 /// \param OriginalLoad load that EVE came from to be replaced. 266 /// \returns EVE on success SDValue() on failure. 267 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 268 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 269 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 270 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 271 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 272 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 273 SDValue PromoteIntBinOp(SDValue Op); 274 SDValue PromoteIntShiftOp(SDValue Op); 275 SDValue PromoteExtend(SDValue Op); 276 bool PromoteLoad(SDValue Op); 277 278 /// Call the node-specific routine that knows how to fold each 279 /// particular type of node. If that doesn't do anything, try the 280 /// target-specific DAG combines. 281 SDValue combine(SDNode *N); 282 283 // Visitation implementation - Implement dag node combining for different 284 // node types. The semantics are as follows: 285 // Return Value: 286 // SDValue.getNode() == 0 - No change was made 287 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 288 // otherwise - N should be replaced by the returned Operand. 289 // 290 SDValue visitTokenFactor(SDNode *N); 291 SDValue visitMERGE_VALUES(SDNode *N); 292 SDValue visitADD(SDNode *N); 293 SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); 294 SDValue visitSUB(SDNode *N); 295 SDValue visitADDC(SDNode *N); 296 SDValue visitUADDO(SDNode *N); 297 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); 298 SDValue visitSUBC(SDNode *N); 299 SDValue visitUSUBO(SDNode *N); 300 SDValue visitADDE(SDNode *N); 301 SDValue visitADDCARRY(SDNode *N); 302 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); 303 SDValue visitSUBE(SDNode *N); 304 SDValue visitSUBCARRY(SDNode *N); 305 SDValue visitMUL(SDNode *N); 306 SDValue useDivRem(SDNode *N); 307 SDValue visitSDIV(SDNode *N); 308 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); 309 SDValue visitUDIV(SDNode *N); 310 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); 311 SDValue visitREM(SDNode *N); 312 SDValue visitMULHU(SDNode *N); 313 SDValue visitMULHS(SDNode *N); 314 SDValue visitSMUL_LOHI(SDNode *N); 315 SDValue visitUMUL_LOHI(SDNode *N); 316 SDValue visitSMULO(SDNode *N); 317 SDValue visitUMULO(SDNode *N); 318 SDValue visitIMINMAX(SDNode *N); 319 SDValue visitAND(SDNode *N); 320 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); 321 SDValue visitOR(SDNode *N); 322 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); 323 SDValue visitXOR(SDNode *N); 324 SDValue SimplifyVBinOp(SDNode *N); 325 SDValue visitSHL(SDNode *N); 326 SDValue visitSRA(SDNode *N); 327 SDValue visitSRL(SDNode *N); 328 SDValue visitRotate(SDNode *N); 329 SDValue visitABS(SDNode *N); 330 SDValue visitBSWAP(SDNode *N); 331 SDValue visitBITREVERSE(SDNode *N); 332 SDValue visitCTLZ(SDNode *N); 333 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 334 SDValue visitCTTZ(SDNode *N); 335 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 336 SDValue visitCTPOP(SDNode *N); 337 SDValue visitSELECT(SDNode *N); 338 SDValue visitVSELECT(SDNode *N); 339 SDValue visitSELECT_CC(SDNode *N); 340 SDValue visitSETCC(SDNode *N); 341 SDValue visitSETCCCARRY(SDNode *N); 342 SDValue visitSIGN_EXTEND(SDNode *N); 343 SDValue visitZERO_EXTEND(SDNode *N); 344 SDValue visitANY_EXTEND(SDNode *N); 345 SDValue visitAssertExt(SDNode *N); 346 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 347 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 348 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); 349 SDValue visitTRUNCATE(SDNode *N); 350 SDValue visitBITCAST(SDNode *N); 351 SDValue visitBUILD_PAIR(SDNode *N); 352 SDValue visitFADD(SDNode *N); 353 SDValue visitFSUB(SDNode *N); 354 SDValue visitFMUL(SDNode *N); 355 SDValue visitFMA(SDNode *N); 356 SDValue visitFDIV(SDNode *N); 357 SDValue visitFREM(SDNode *N); 358 SDValue visitFSQRT(SDNode *N); 359 SDValue visitFCOPYSIGN(SDNode *N); 360 SDValue visitFPOW(SDNode *N); 361 SDValue visitSINT_TO_FP(SDNode *N); 362 SDValue visitUINT_TO_FP(SDNode *N); 363 SDValue visitFP_TO_SINT(SDNode *N); 364 SDValue visitFP_TO_UINT(SDNode *N); 365 SDValue visitFP_ROUND(SDNode *N); 366 SDValue visitFP_ROUND_INREG(SDNode *N); 367 SDValue visitFP_EXTEND(SDNode *N); 368 SDValue visitFNEG(SDNode *N); 369 SDValue visitFABS(SDNode *N); 370 SDValue visitFCEIL(SDNode *N); 371 SDValue visitFTRUNC(SDNode *N); 372 SDValue visitFFLOOR(SDNode *N); 373 SDValue visitFMINNUM(SDNode *N); 374 SDValue visitFMAXNUM(SDNode *N); 375 SDValue visitFMINIMUM(SDNode *N); 376 SDValue visitFMAXIMUM(SDNode *N); 377 SDValue visitBRCOND(SDNode *N); 378 SDValue visitBR_CC(SDNode *N); 379 SDValue visitLOAD(SDNode *N); 380 381 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 382 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 383 384 SDValue visitSTORE(SDNode *N); 385 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 386 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 387 SDValue visitBUILD_VECTOR(SDNode *N); 388 SDValue visitCONCAT_VECTORS(SDNode *N); 389 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 390 SDValue visitVECTOR_SHUFFLE(SDNode *N); 391 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 392 SDValue visitINSERT_SUBVECTOR(SDNode *N); 393 SDValue visitMLOAD(SDNode *N); 394 SDValue visitMSTORE(SDNode *N); 395 SDValue visitMGATHER(SDNode *N); 396 SDValue visitMSCATTER(SDNode *N); 397 SDValue visitFP_TO_FP16(SDNode *N); 398 SDValue visitFP16_TO_FP(SDNode *N); 399 400 SDValue visitFADDForFMACombine(SDNode *N); 401 SDValue visitFSUBForFMACombine(SDNode *N); 402 SDValue visitFMULForFMADistributiveCombine(SDNode *N); 403 404 SDValue XformToShuffleWithZero(SDNode *N); 405 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 406 SDValue N1, SDNodeFlags Flags); 407 408 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 409 410 SDValue foldSelectOfConstants(SDNode *N); 411 SDValue foldVSelectOfConstants(SDNode *N); 412 SDValue foldBinOpIntoSelect(SDNode *BO); 413 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 414 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 415 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); 416 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 417 SDValue N2, SDValue N3, ISD::CondCode CC, 418 bool NotExtCompare = false); 419 SDValue convertSelectOfFPConstantsToLoadOffset( 420 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, 421 ISD::CondCode CC); 422 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, 423 SDValue N2, SDValue N3, ISD::CondCode CC); 424 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 425 const SDLoc &DL); 426 SDValue unfoldMaskedMerge(SDNode *N); 427 SDValue unfoldExtremeBitClearingToShifts(SDNode *N); 428 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 429 const SDLoc &DL, bool foldBooleans); 430 SDValue rebuildSetCC(SDValue N); 431 432 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 433 SDValue &CC) const; 434 bool isOneUseSetCC(SDValue N) const; 435 436 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 437 unsigned HiOp); 438 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 439 SDValue CombineExtLoad(SDNode *N); 440 SDValue CombineZExtLogicopShiftLoad(SDNode *N); 441 SDValue combineRepeatedFPDivisors(SDNode *N); 442 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); 443 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 444 SDValue BuildSDIV(SDNode *N); 445 SDValue BuildSDIVPow2(SDNode *N); 446 SDValue BuildUDIV(SDNode *N); 447 SDValue BuildLogBase2(SDValue V, const SDLoc &DL); 448 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); 449 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); 450 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); 451 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); 452 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, 453 SDNodeFlags Flags, bool Reciprocal); 454 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, 455 SDNodeFlags Flags, bool Reciprocal); 456 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 457 bool DemandHighBits = true); 458 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 459 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 460 SDValue InnerPos, SDValue InnerNeg, 461 unsigned PosOpcode, unsigned NegOpcode, 462 const SDLoc &DL); 463 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); 464 SDValue MatchLoadCombine(SDNode *N); 465 SDValue ReduceLoadWidth(SDNode *N); 466 SDValue ReduceLoadOpStoreWidth(SDNode *N); 467 SDValue splitMergedValStore(StoreSDNode *ST); 468 SDValue TransformFPLoadStorePair(SDNode *N); 469 SDValue convertBuildVecZextToZext(SDNode *N); 470 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 471 SDValue reduceBuildVecToShuffle(SDNode *N); 472 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, 473 ArrayRef<int> VectorMask, SDValue VecIn1, 474 SDValue VecIn2, unsigned LeftIdx); 475 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); 476 477 /// Walk up chain skipping non-aliasing memory nodes, 478 /// looking for aliasing nodes and adding them to the Aliases vector. 479 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 480 SmallVectorImpl<SDValue> &Aliases); 481 482 /// Return true if there is any possibility that the two addresses overlap. 483 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 484 485 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 486 /// chain (aliasing node.) 487 SDValue FindBetterChain(SDNode *N, SDValue Chain); 488 489 /// Try to replace a store and any possibly adjacent stores on 490 /// consecutive chains with better chains. Return true only if St is 491 /// replaced. 492 /// 493 /// Notice that other chains may still be replaced even if the function 494 /// returns false. 495 bool findBetterNeighborChains(StoreSDNode *St); 496 497 // Helper for findBetterNeighborChains. Walk up store chain add additional 498 // chained stores that do not overlap and can be parallelized. 499 bool parallelizeChainedStores(StoreSDNode *St); 500 501 /// Holds a pointer to an LSBaseSDNode as well as information on where it 502 /// is located in a sequence of memory operations connected by a chain. 503 struct MemOpLink { 504 // Ptr to the mem node. 505 LSBaseSDNode *MemNode; 506 507 // Offset from the base ptr. 508 int64_t OffsetFromBase; 509 510 MemOpLink(LSBaseSDNode *N, int64_t Offset) 511 : MemNode(N), OffsetFromBase(Offset) {} 512 }; 513 514 /// This is a helper function for visitMUL to check the profitability 515 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 516 /// MulNode is the original multiply, AddNode is (add x, c1), 517 /// and ConstNode is c2. 518 bool isMulAddWithConstProfitable(SDNode *MulNode, 519 SDValue &AddNode, 520 SDValue &ConstNode); 521 522 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 523 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 524 /// the type of the loaded value to be extended. 525 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 526 EVT LoadResultTy, EVT &ExtVT); 527 528 /// Helper function to calculate whether the given Load/Store can have its 529 /// width reduced to ExtVT. 530 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, 531 EVT &MemVT, unsigned ShAmt = 0); 532 533 /// Used by BackwardsPropagateMask to find suitable loads. 534 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads, 535 SmallPtrSetImpl<SDNode*> &NodesWithConsts, 536 ConstantSDNode *Mask, SDNode *&NodeToMask); 537 /// Attempt to propagate a given AND node back to load leaves so that they 538 /// can be combined into narrow loads. 539 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); 540 541 /// Helper function for MergeConsecutiveStores which merges the 542 /// component store chains. 543 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 544 unsigned NumStores); 545 546 /// This is a helper function for MergeConsecutiveStores. When the 547 /// source elements of the consecutive stores are all constants or 548 /// all extracted vector elements, try to merge them into one 549 /// larger store introducing bitcasts if necessary. \return True 550 /// if a merged store was created. 551 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 552 EVT MemVT, unsigned NumStores, 553 bool IsConstantSrc, bool UseVector, 554 bool UseTrunc); 555 556 /// This is a helper function for MergeConsecutiveStores. Stores 557 /// that potentially may be merged with St are placed in 558 /// StoreNodes. RootNode is a chain predecessor to all store 559 /// candidates. 560 void getStoreMergeCandidates(StoreSDNode *St, 561 SmallVectorImpl<MemOpLink> &StoreNodes, 562 SDNode *&Root); 563 564 /// Helper function for MergeConsecutiveStores. Checks if 565 /// candidate stores have indirect dependency through their 566 /// operands. RootNode is the predecessor to all stores calculated 567 /// by getStoreMergeCandidates and is used to prune the dependency check. 568 /// \return True if safe to merge. 569 bool checkMergeStoreCandidatesForDependencies( 570 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 571 SDNode *RootNode); 572 573 /// Merge consecutive store operations into a wide store. 574 /// This optimization uses wide integers or vectors when possible. 575 /// \return number of stores that were merged into a merged store (the 576 /// affected nodes are stored as a prefix in \p StoreNodes). 577 bool MergeConsecutiveStores(StoreSDNode *St); 578 579 /// Try to transform a truncation where C is a constant: 580 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 581 /// 582 /// \p N needs to be a truncation and its first operand an AND. Other 583 /// requirements are checked by the function (e.g. that trunc is 584 /// single-use) and if missed an empty SDValue is returned. 585 SDValue distributeTruncateThroughAnd(SDNode *N); 586 587 /// Helper function to determine whether the target supports operation 588 /// given by \p Opcode for type \p VT, that is, whether the operation 589 /// is legal or custom before legalizing operations, and whether is 590 /// legal (but not custom) after legalization. 591 bool hasOperation(unsigned Opcode, EVT VT) { 592 if (LegalOperations) 593 return TLI.isOperationLegal(Opcode, VT); 594 return TLI.isOperationLegalOrCustom(Opcode, VT); 595 } 596 597 public: 598 /// Runs the dag combiner on all nodes in the work list 599 void Run(CombineLevel AtLevel); 600 601 SelectionDAG &getDAG() const { return DAG; } 602 603 /// Returns a type large enough to hold any valid shift amount - before type 604 /// legalization these can be huge. 605 EVT getShiftAmountTy(EVT LHSTy) { 606 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 607 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); 608 } 609 610 /// This method returns true if we are running before type legalization or 611 /// if the specified VT is legal. 612 bool isTypeLegal(const EVT &VT) { 613 if (!LegalTypes) return true; 614 return TLI.isTypeLegal(VT); 615 } 616 617 /// Convenience wrapper around TargetLowering::getSetCCResultType 618 EVT getSetCCResultType(EVT VT) const { 619 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 620 } 621 622 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 623 SDValue OrigLoad, SDValue ExtLoad, 624 ISD::NodeType ExtType); 625 }; 626 627 /// This class is a DAGUpdateListener that removes any deleted 628 /// nodes from the worklist. 629 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 630 DAGCombiner &DC; 631 632 public: 633 explicit WorklistRemover(DAGCombiner &dc) 634 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 635 636 void NodeDeleted(SDNode *N, SDNode *E) override { 637 DC.removeFromWorklist(N); 638 } 639 }; 640 641 } // end anonymous namespace 642 643 //===----------------------------------------------------------------------===// 644 // TargetLowering::DAGCombinerInfo implementation 645 //===----------------------------------------------------------------------===// 646 647 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 648 ((DAGCombiner*)DC)->AddToWorklist(N); 649 } 650 651 SDValue TargetLowering::DAGCombinerInfo:: 652 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 653 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 654 } 655 656 SDValue TargetLowering::DAGCombinerInfo:: 657 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 658 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 659 } 660 661 SDValue TargetLowering::DAGCombinerInfo:: 662 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 663 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 664 } 665 666 void TargetLowering::DAGCombinerInfo:: 667 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 668 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 669 } 670 671 //===----------------------------------------------------------------------===// 672 // Helper Functions 673 //===----------------------------------------------------------------------===// 674 675 void DAGCombiner::deleteAndRecombine(SDNode *N) { 676 removeFromWorklist(N); 677 678 // If the operands of this node are only used by the node, they will now be 679 // dead. Make sure to re-visit them and recursively delete dead nodes. 680 for (const SDValue &Op : N->ops()) 681 // For an operand generating multiple values, one of the values may 682 // become dead allowing further simplification (e.g. split index 683 // arithmetic from an indexed load). 684 if (Op->hasOneUse() || Op->getNumValues() > 1) 685 AddToWorklist(Op.getNode()); 686 687 DAG.DeleteNode(N); 688 } 689 690 /// Return 1 if we can compute the negated form of the specified expression for 691 /// the same cost as the expression itself, or 2 if we can compute the negated 692 /// form more cheaply than the expression itself. 693 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 694 const TargetLowering &TLI, 695 const TargetOptions *Options, 696 unsigned Depth = 0) { 697 // fneg is removable even if it has multiple uses. 698 if (Op.getOpcode() == ISD::FNEG) return 2; 699 700 // Don't allow anything with multiple uses unless we know it is free. 701 EVT VT = Op.getValueType(); 702 const SDNodeFlags Flags = Op->getFlags(); 703 if (!Op.hasOneUse()) 704 if (!(Op.getOpcode() == ISD::FP_EXTEND && 705 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) 706 return 0; 707 708 // Don't recurse exponentially. 709 if (Depth > 6) return 0; 710 711 switch (Op.getOpcode()) { 712 default: return false; 713 case ISD::ConstantFP: { 714 if (!LegalOperations) 715 return 1; 716 717 // Don't invert constant FP values after legalization unless the target says 718 // the negated constant is legal. 719 return TLI.isOperationLegal(ISD::ConstantFP, VT) || 720 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); 721 } 722 case ISD::FADD: 723 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) 724 return 0; 725 726 // After operation legalization, it might not be legal to create new FSUBs. 727 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) 728 return 0; 729 730 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 731 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 732 Options, Depth + 1)) 733 return V; 734 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 735 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 736 Depth + 1); 737 case ISD::FSUB: 738 // We can't turn -(A-B) into B-A when we honor signed zeros. 739 if (!Options->NoSignedZerosFPMath && 740 !Flags.hasNoSignedZeros()) 741 return 0; 742 743 // fold (fneg (fsub A, B)) -> (fsub B, A) 744 return 1; 745 746 case ISD::FMUL: 747 case ISD::FDIV: 748 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 749 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 750 Options, Depth + 1)) 751 return V; 752 753 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 754 Depth + 1); 755 756 case ISD::FP_EXTEND: 757 case ISD::FP_ROUND: 758 case ISD::FSIN: 759 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 760 Depth + 1); 761 } 762 } 763 764 /// If isNegatibleForFree returns true, return the newly negated expression. 765 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 766 bool LegalOperations, unsigned Depth = 0) { 767 const TargetOptions &Options = DAG.getTarget().Options; 768 // fneg is removable even if it has multiple uses. 769 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 770 771 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 772 773 const SDNodeFlags Flags = Op.getNode()->getFlags(); 774 775 switch (Op.getOpcode()) { 776 default: llvm_unreachable("Unknown code"); 777 case ISD::ConstantFP: { 778 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 779 V.changeSign(); 780 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 781 } 782 case ISD::FADD: 783 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); 784 785 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 786 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 787 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 788 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 789 GetNegatedExpression(Op.getOperand(0), DAG, 790 LegalOperations, Depth+1), 791 Op.getOperand(1), Flags); 792 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 793 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 794 GetNegatedExpression(Op.getOperand(1), DAG, 795 LegalOperations, Depth+1), 796 Op.getOperand(0), Flags); 797 case ISD::FSUB: 798 // fold (fneg (fsub 0, B)) -> B 799 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 800 if (N0CFP->isZero()) 801 return Op.getOperand(1); 802 803 // fold (fneg (fsub A, B)) -> (fsub B, A) 804 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 805 Op.getOperand(1), Op.getOperand(0), Flags); 806 807 case ISD::FMUL: 808 case ISD::FDIV: 809 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 810 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 811 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 812 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 813 GetNegatedExpression(Op.getOperand(0), DAG, 814 LegalOperations, Depth+1), 815 Op.getOperand(1), Flags); 816 817 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 818 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 819 Op.getOperand(0), 820 GetNegatedExpression(Op.getOperand(1), DAG, 821 LegalOperations, Depth+1), Flags); 822 823 case ISD::FP_EXTEND: 824 case ISD::FSIN: 825 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 826 GetNegatedExpression(Op.getOperand(0), DAG, 827 LegalOperations, Depth+1)); 828 case ISD::FP_ROUND: 829 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 830 GetNegatedExpression(Op.getOperand(0), DAG, 831 LegalOperations, Depth+1), 832 Op.getOperand(1)); 833 } 834 } 835 836 // APInts must be the same size for most operations, this helper 837 // function zero extends the shorter of the pair so that they match. 838 // We provide an Offset so that we can create bitwidths that won't overflow. 839 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { 840 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); 841 LHS = LHS.zextOrSelf(Bits); 842 RHS = RHS.zextOrSelf(Bits); 843 } 844 845 // Return true if this node is a setcc, or is a select_cc 846 // that selects between the target values used for true and false, making it 847 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 848 // the appropriate nodes based on the type of node we are checking. This 849 // simplifies life a bit for the callers. 850 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 851 SDValue &CC) const { 852 if (N.getOpcode() == ISD::SETCC) { 853 LHS = N.getOperand(0); 854 RHS = N.getOperand(1); 855 CC = N.getOperand(2); 856 return true; 857 } 858 859 if (N.getOpcode() != ISD::SELECT_CC || 860 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 861 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 862 return false; 863 864 if (TLI.getBooleanContents(N.getValueType()) == 865 TargetLowering::UndefinedBooleanContent) 866 return false; 867 868 LHS = N.getOperand(0); 869 RHS = N.getOperand(1); 870 CC = N.getOperand(4); 871 return true; 872 } 873 874 /// Return true if this is a SetCC-equivalent operation with only one use. 875 /// If this is true, it allows the users to invert the operation for free when 876 /// it is profitable to do so. 877 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 878 SDValue N0, N1, N2; 879 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 880 return true; 881 return false; 882 } 883 884 // Returns the SDNode if it is a constant float BuildVector 885 // or constant float. 886 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 887 if (isa<ConstantFPSDNode>(N)) 888 return N.getNode(); 889 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 890 return N.getNode(); 891 return nullptr; 892 } 893 894 // Determines if it is a constant integer or a build vector of constant 895 // integers (and undefs). 896 // Do not permit build vector implicit truncation. 897 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { 898 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) 899 return !(Const->isOpaque() && NoOpaques); 900 if (N.getOpcode() != ISD::BUILD_VECTOR) 901 return false; 902 unsigned BitWidth = N.getScalarValueSizeInBits(); 903 for (const SDValue &Op : N->op_values()) { 904 if (Op.isUndef()) 905 continue; 906 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); 907 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || 908 (Const->isOpaque() && NoOpaques)) 909 return false; 910 } 911 return true; 912 } 913 914 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with 915 // undef's. 916 static bool isAnyConstantBuildVector(const SDNode *N) { 917 return ISD::isBuildVectorOfConstantSDNodes(N) || 918 ISD::isBuildVectorOfConstantFPSDNodes(N); 919 } 920 921 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 922 SDValue N1, SDNodeFlags Flags) { 923 // Don't reassociate reductions. 924 if (Flags.hasVectorReduction()) 925 return SDValue(); 926 927 EVT VT = N0.getValueType(); 928 if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) { 929 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 930 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 931 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 932 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 933 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 934 return SDValue(); 935 } 936 if (N0.hasOneUse()) { 937 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 938 // use 939 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 940 if (!OpNode.getNode()) 941 return SDValue(); 942 AddToWorklist(OpNode.getNode()); 943 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 944 } 945 } 946 } 947 948 if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) { 949 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 950 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 951 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 952 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 953 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 954 return SDValue(); 955 } 956 if (N1.hasOneUse()) { 957 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one 958 // use 959 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); 960 if (!OpNode.getNode()) 961 return SDValue(); 962 AddToWorklist(OpNode.getNode()); 963 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 964 } 965 } 966 } 967 968 return SDValue(); 969 } 970 971 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 972 bool AddTo) { 973 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 974 ++NodesCombined; 975 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; 976 To[0].getNode()->dump(&DAG); 977 dbgs() << " and " << NumTo - 1 << " other values\n"); 978 for (unsigned i = 0, e = NumTo; i != e; ++i) 979 assert((!To[i].getNode() || 980 N->getValueType(i) == To[i].getValueType()) && 981 "Cannot combine value to value of different type!"); 982 983 WorklistRemover DeadNodes(*this); 984 DAG.ReplaceAllUsesWith(N, To); 985 if (AddTo) { 986 // Push the new nodes and any users onto the worklist 987 for (unsigned i = 0, e = NumTo; i != e; ++i) { 988 if (To[i].getNode()) { 989 AddToWorklist(To[i].getNode()); 990 AddUsersToWorklist(To[i].getNode()); 991 } 992 } 993 } 994 995 // Finally, if the node is now dead, remove it from the graph. The node 996 // may not be dead if the replacement process recursively simplified to 997 // something else needing this node. 998 if (N->use_empty()) 999 deleteAndRecombine(N); 1000 return SDValue(N, 0); 1001 } 1002 1003 void DAGCombiner:: 1004 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 1005 // Replace all uses. If any nodes become isomorphic to other nodes and 1006 // are deleted, make sure to remove them from our worklist. 1007 WorklistRemover DeadNodes(*this); 1008 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 1009 1010 // Push the new node and any (possibly new) users onto the worklist. 1011 AddToWorklist(TLO.New.getNode()); 1012 AddUsersToWorklist(TLO.New.getNode()); 1013 1014 // Finally, if the node is now dead, remove it from the graph. The node 1015 // may not be dead if the replacement process recursively simplified to 1016 // something else needing this node. 1017 if (TLO.Old.getNode()->use_empty()) 1018 deleteAndRecombine(TLO.Old.getNode()); 1019 } 1020 1021 /// Check the specified integer node value to see if it can be simplified or if 1022 /// things it uses can be simplified by bit propagation. If so, return true. 1023 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 1024 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1025 KnownBits Known; 1026 if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) 1027 return false; 1028 1029 // Revisit the node. 1030 AddToWorklist(Op.getNode()); 1031 1032 // Replace the old value with the new one. 1033 ++NodesCombined; 1034 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1035 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1036 dbgs() << '\n'); 1037 1038 CommitTargetLoweringOpt(TLO); 1039 return true; 1040 } 1041 1042 /// Check the specified vector node value to see if it can be simplified or 1043 /// if things it uses can be simplified as it only uses some of the elements. 1044 /// If so, return true. 1045 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, 1046 bool AssumeSingleUse) { 1047 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1048 APInt KnownUndef, KnownZero; 1049 if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, 1050 0, AssumeSingleUse)) 1051 return false; 1052 1053 // Revisit the node. 1054 AddToWorklist(Op.getNode()); 1055 1056 // Replace the old value with the new one. 1057 ++NodesCombined; 1058 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1059 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1060 dbgs() << '\n'); 1061 1062 CommitTargetLoweringOpt(TLO); 1063 return true; 1064 } 1065 1066 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 1067 SDLoc DL(Load); 1068 EVT VT = Load->getValueType(0); 1069 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); 1070 1071 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; 1072 Trunc.getNode()->dump(&DAG); dbgs() << '\n'); 1073 WorklistRemover DeadNodes(*this); 1074 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 1075 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 1076 deleteAndRecombine(Load); 1077 AddToWorklist(Trunc.getNode()); 1078 } 1079 1080 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 1081 Replace = false; 1082 SDLoc DL(Op); 1083 if (ISD::isUNINDEXEDLoad(Op.getNode())) { 1084 LoadSDNode *LD = cast<LoadSDNode>(Op); 1085 EVT MemVT = LD->getMemoryVT(); 1086 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1087 : LD->getExtensionType(); 1088 Replace = true; 1089 return DAG.getExtLoad(ExtType, DL, PVT, 1090 LD->getChain(), LD->getBasePtr(), 1091 MemVT, LD->getMemOperand()); 1092 } 1093 1094 unsigned Opc = Op.getOpcode(); 1095 switch (Opc) { 1096 default: break; 1097 case ISD::AssertSext: 1098 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) 1099 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); 1100 break; 1101 case ISD::AssertZext: 1102 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) 1103 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); 1104 break; 1105 case ISD::Constant: { 1106 unsigned ExtOpc = 1107 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1108 return DAG.getNode(ExtOpc, DL, PVT, Op); 1109 } 1110 } 1111 1112 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 1113 return SDValue(); 1114 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); 1115 } 1116 1117 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1118 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1119 return SDValue(); 1120 EVT OldVT = Op.getValueType(); 1121 SDLoc DL(Op); 1122 bool Replace = false; 1123 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1124 if (!NewOp.getNode()) 1125 return SDValue(); 1126 AddToWorklist(NewOp.getNode()); 1127 1128 if (Replace) 1129 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1130 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, 1131 DAG.getValueType(OldVT)); 1132 } 1133 1134 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1135 EVT OldVT = Op.getValueType(); 1136 SDLoc DL(Op); 1137 bool Replace = false; 1138 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1139 if (!NewOp.getNode()) 1140 return SDValue(); 1141 AddToWorklist(NewOp.getNode()); 1142 1143 if (Replace) 1144 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1145 return DAG.getZeroExtendInReg(NewOp, DL, OldVT); 1146 } 1147 1148 /// Promote the specified integer binary operation if the target indicates it is 1149 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1150 /// i32 since i16 instructions are longer. 1151 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1152 if (!LegalOperations) 1153 return SDValue(); 1154 1155 EVT VT = Op.getValueType(); 1156 if (VT.isVector() || !VT.isInteger()) 1157 return SDValue(); 1158 1159 // If operation type is 'undesirable', e.g. i16 on x86, consider 1160 // promoting it. 1161 unsigned Opc = Op.getOpcode(); 1162 if (TLI.isTypeDesirableForOp(Opc, VT)) 1163 return SDValue(); 1164 1165 EVT PVT = VT; 1166 // Consult target whether it is a good idea to promote this operation and 1167 // what's the right type to promote it to. 1168 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1169 assert(PVT != VT && "Don't know what type to promote to!"); 1170 1171 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1172 1173 bool Replace0 = false; 1174 SDValue N0 = Op.getOperand(0); 1175 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1176 1177 bool Replace1 = false; 1178 SDValue N1 = Op.getOperand(1); 1179 SDValue NN1 = PromoteOperand(N1, PVT, Replace1); 1180 SDLoc DL(Op); 1181 1182 SDValue RV = 1183 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); 1184 1185 // We are always replacing N0/N1's use in N and only need 1186 // additional replacements if there are additional uses. 1187 Replace0 &= !N0->hasOneUse(); 1188 Replace1 &= (N0 != N1) && !N1->hasOneUse(); 1189 1190 // Combine Op here so it is preserved past replacements. 1191 CombineTo(Op.getNode(), RV); 1192 1193 // If operands have a use ordering, make sure we deal with 1194 // predecessor first. 1195 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { 1196 std::swap(N0, N1); 1197 std::swap(NN0, NN1); 1198 } 1199 1200 if (Replace0) { 1201 AddToWorklist(NN0.getNode()); 1202 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1203 } 1204 if (Replace1) { 1205 AddToWorklist(NN1.getNode()); 1206 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1207 } 1208 return Op; 1209 } 1210 return SDValue(); 1211 } 1212 1213 /// Promote the specified integer shift operation if the target indicates it is 1214 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1215 /// i32 since i16 instructions are longer. 1216 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1217 if (!LegalOperations) 1218 return SDValue(); 1219 1220 EVT VT = Op.getValueType(); 1221 if (VT.isVector() || !VT.isInteger()) 1222 return SDValue(); 1223 1224 // If operation type is 'undesirable', e.g. i16 on x86, consider 1225 // promoting it. 1226 unsigned Opc = Op.getOpcode(); 1227 if (TLI.isTypeDesirableForOp(Opc, VT)) 1228 return SDValue(); 1229 1230 EVT PVT = VT; 1231 // Consult target whether it is a good idea to promote this operation and 1232 // what's the right type to promote it to. 1233 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1234 assert(PVT != VT && "Don't know what type to promote to!"); 1235 1236 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1237 1238 bool Replace = false; 1239 SDValue N0 = Op.getOperand(0); 1240 SDValue N1 = Op.getOperand(1); 1241 if (Opc == ISD::SRA) 1242 N0 = SExtPromoteOperand(N0, PVT); 1243 else if (Opc == ISD::SRL) 1244 N0 = ZExtPromoteOperand(N0, PVT); 1245 else 1246 N0 = PromoteOperand(N0, PVT, Replace); 1247 1248 if (!N0.getNode()) 1249 return SDValue(); 1250 1251 SDLoc DL(Op); 1252 SDValue RV = 1253 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); 1254 1255 AddToWorklist(N0.getNode()); 1256 if (Replace) 1257 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1258 1259 // Deal with Op being deleted. 1260 if (Op && Op.getOpcode() != ISD::DELETED_NODE) 1261 return RV; 1262 } 1263 return SDValue(); 1264 } 1265 1266 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1267 if (!LegalOperations) 1268 return SDValue(); 1269 1270 EVT VT = Op.getValueType(); 1271 if (VT.isVector() || !VT.isInteger()) 1272 return SDValue(); 1273 1274 // If operation type is 'undesirable', e.g. i16 on x86, consider 1275 // promoting it. 1276 unsigned Opc = Op.getOpcode(); 1277 if (TLI.isTypeDesirableForOp(Opc, VT)) 1278 return SDValue(); 1279 1280 EVT PVT = VT; 1281 // Consult target whether it is a good idea to promote this operation and 1282 // what's the right type to promote it to. 1283 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1284 assert(PVT != VT && "Don't know what type to promote to!"); 1285 // fold (aext (aext x)) -> (aext x) 1286 // fold (aext (zext x)) -> (zext x) 1287 // fold (aext (sext x)) -> (sext x) 1288 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1289 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1290 } 1291 return SDValue(); 1292 } 1293 1294 bool DAGCombiner::PromoteLoad(SDValue Op) { 1295 if (!LegalOperations) 1296 return false; 1297 1298 if (!ISD::isUNINDEXEDLoad(Op.getNode())) 1299 return false; 1300 1301 EVT VT = Op.getValueType(); 1302 if (VT.isVector() || !VT.isInteger()) 1303 return false; 1304 1305 // If operation type is 'undesirable', e.g. i16 on x86, consider 1306 // promoting it. 1307 unsigned Opc = Op.getOpcode(); 1308 if (TLI.isTypeDesirableForOp(Opc, VT)) 1309 return false; 1310 1311 EVT PVT = VT; 1312 // Consult target whether it is a good idea to promote this operation and 1313 // what's the right type to promote it to. 1314 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1315 assert(PVT != VT && "Don't know what type to promote to!"); 1316 1317 SDLoc DL(Op); 1318 SDNode *N = Op.getNode(); 1319 LoadSDNode *LD = cast<LoadSDNode>(N); 1320 EVT MemVT = LD->getMemoryVT(); 1321 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1322 : LD->getExtensionType(); 1323 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, 1324 LD->getChain(), LD->getBasePtr(), 1325 MemVT, LD->getMemOperand()); 1326 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); 1327 1328 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; 1329 Result.getNode()->dump(&DAG); dbgs() << '\n'); 1330 WorklistRemover DeadNodes(*this); 1331 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1332 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1333 deleteAndRecombine(N); 1334 AddToWorklist(Result.getNode()); 1335 return true; 1336 } 1337 return false; 1338 } 1339 1340 /// Recursively delete a node which has no uses and any operands for 1341 /// which it is the only use. 1342 /// 1343 /// Note that this both deletes the nodes and removes them from the worklist. 1344 /// It also adds any nodes who have had a user deleted to the worklist as they 1345 /// may now have only one use and subject to other combines. 1346 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1347 if (!N->use_empty()) 1348 return false; 1349 1350 SmallSetVector<SDNode *, 16> Nodes; 1351 Nodes.insert(N); 1352 do { 1353 N = Nodes.pop_back_val(); 1354 if (!N) 1355 continue; 1356 1357 if (N->use_empty()) { 1358 for (const SDValue &ChildN : N->op_values()) 1359 Nodes.insert(ChildN.getNode()); 1360 1361 removeFromWorklist(N); 1362 DAG.DeleteNode(N); 1363 } else { 1364 AddToWorklist(N); 1365 } 1366 } while (!Nodes.empty()); 1367 return true; 1368 } 1369 1370 //===----------------------------------------------------------------------===// 1371 // Main DAG Combiner implementation 1372 //===----------------------------------------------------------------------===// 1373 1374 void DAGCombiner::Run(CombineLevel AtLevel) { 1375 // set the instance variables, so that the various visit routines may use it. 1376 Level = AtLevel; 1377 LegalOperations = Level >= AfterLegalizeVectorOps; 1378 LegalTypes = Level >= AfterLegalizeTypes; 1379 1380 // Add all the dag nodes to the worklist. 1381 for (SDNode &Node : DAG.allnodes()) 1382 AddToWorklist(&Node); 1383 1384 // Create a dummy node (which is not added to allnodes), that adds a reference 1385 // to the root node, preventing it from being deleted, and tracking any 1386 // changes of the root. 1387 HandleSDNode Dummy(DAG.getRoot()); 1388 1389 // While the worklist isn't empty, find a node and try to combine it. 1390 while (!WorklistMap.empty()) { 1391 SDNode *N; 1392 // The Worklist holds the SDNodes in order, but it may contain null entries. 1393 do { 1394 N = Worklist.pop_back_val(); 1395 } while (!N); 1396 1397 bool GoodWorklistEntry = WorklistMap.erase(N); 1398 (void)GoodWorklistEntry; 1399 assert(GoodWorklistEntry && 1400 "Found a worklist entry without a corresponding map entry!"); 1401 1402 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1403 // N is deleted from the DAG, since they too may now be dead or may have a 1404 // reduced number of uses, allowing other xforms. 1405 if (recursivelyDeleteUnusedNodes(N)) 1406 continue; 1407 1408 WorklistRemover DeadNodes(*this); 1409 1410 // If this combine is running after legalizing the DAG, re-legalize any 1411 // nodes pulled off the worklist. 1412 if (Level == AfterLegalizeDAG) { 1413 SmallSetVector<SDNode *, 16> UpdatedNodes; 1414 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1415 1416 for (SDNode *LN : UpdatedNodes) { 1417 AddToWorklist(LN); 1418 AddUsersToWorklist(LN); 1419 } 1420 if (!NIsValid) 1421 continue; 1422 } 1423 1424 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1425 1426 // Add any operands of the new node which have not yet been combined to the 1427 // worklist as well. Because the worklist uniques things already, this 1428 // won't repeatedly process the same operand. 1429 CombinedNodes.insert(N); 1430 for (const SDValue &ChildN : N->op_values()) 1431 if (!CombinedNodes.count(ChildN.getNode())) 1432 AddToWorklist(ChildN.getNode()); 1433 1434 SDValue RV = combine(N); 1435 1436 if (!RV.getNode()) 1437 continue; 1438 1439 ++NodesCombined; 1440 1441 // If we get back the same node we passed in, rather than a new node or 1442 // zero, we know that the node must have defined multiple values and 1443 // CombineTo was used. Since CombineTo takes care of the worklist 1444 // mechanics for us, we have no work to do in this case. 1445 if (RV.getNode() == N) 1446 continue; 1447 1448 assert(N->getOpcode() != ISD::DELETED_NODE && 1449 RV.getOpcode() != ISD::DELETED_NODE && 1450 "Node was deleted but visit returned new node!"); 1451 1452 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); 1453 1454 if (N->getNumValues() == RV.getNode()->getNumValues()) 1455 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1456 else { 1457 assert(N->getValueType(0) == RV.getValueType() && 1458 N->getNumValues() == 1 && "Type mismatch"); 1459 DAG.ReplaceAllUsesWith(N, &RV); 1460 } 1461 1462 // Push the new node and any users onto the worklist 1463 AddToWorklist(RV.getNode()); 1464 AddUsersToWorklist(RV.getNode()); 1465 1466 // Finally, if the node is now dead, remove it from the graph. The node 1467 // may not be dead if the replacement process recursively simplified to 1468 // something else needing this node. This will also take care of adding any 1469 // operands which have lost a user to the worklist. 1470 recursivelyDeleteUnusedNodes(N); 1471 } 1472 1473 // If the root changed (e.g. it was a dead load, update the root). 1474 DAG.setRoot(Dummy.getValue()); 1475 DAG.RemoveDeadNodes(); 1476 } 1477 1478 SDValue DAGCombiner::visit(SDNode *N) { 1479 switch (N->getOpcode()) { 1480 default: break; 1481 case ISD::TokenFactor: return visitTokenFactor(N); 1482 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1483 case ISD::ADD: return visitADD(N); 1484 case ISD::SUB: return visitSUB(N); 1485 case ISD::ADDC: return visitADDC(N); 1486 case ISD::UADDO: return visitUADDO(N); 1487 case ISD::SUBC: return visitSUBC(N); 1488 case ISD::USUBO: return visitUSUBO(N); 1489 case ISD::ADDE: return visitADDE(N); 1490 case ISD::ADDCARRY: return visitADDCARRY(N); 1491 case ISD::SUBE: return visitSUBE(N); 1492 case ISD::SUBCARRY: return visitSUBCARRY(N); 1493 case ISD::MUL: return visitMUL(N); 1494 case ISD::SDIV: return visitSDIV(N); 1495 case ISD::UDIV: return visitUDIV(N); 1496 case ISD::SREM: 1497 case ISD::UREM: return visitREM(N); 1498 case ISD::MULHU: return visitMULHU(N); 1499 case ISD::MULHS: return visitMULHS(N); 1500 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1501 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1502 case ISD::SMULO: return visitSMULO(N); 1503 case ISD::UMULO: return visitUMULO(N); 1504 case ISD::SMIN: 1505 case ISD::SMAX: 1506 case ISD::UMIN: 1507 case ISD::UMAX: return visitIMINMAX(N); 1508 case ISD::AND: return visitAND(N); 1509 case ISD::OR: return visitOR(N); 1510 case ISD::XOR: return visitXOR(N); 1511 case ISD::SHL: return visitSHL(N); 1512 case ISD::SRA: return visitSRA(N); 1513 case ISD::SRL: return visitSRL(N); 1514 case ISD::ROTR: 1515 case ISD::ROTL: return visitRotate(N); 1516 case ISD::ABS: return visitABS(N); 1517 case ISD::BSWAP: return visitBSWAP(N); 1518 case ISD::BITREVERSE: return visitBITREVERSE(N); 1519 case ISD::CTLZ: return visitCTLZ(N); 1520 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1521 case ISD::CTTZ: return visitCTTZ(N); 1522 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1523 case ISD::CTPOP: return visitCTPOP(N); 1524 case ISD::SELECT: return visitSELECT(N); 1525 case ISD::VSELECT: return visitVSELECT(N); 1526 case ISD::SELECT_CC: return visitSELECT_CC(N); 1527 case ISD::SETCC: return visitSETCC(N); 1528 case ISD::SETCCCARRY: return visitSETCCCARRY(N); 1529 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1530 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1531 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1532 case ISD::AssertSext: 1533 case ISD::AssertZext: return visitAssertExt(N); 1534 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1535 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1536 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); 1537 case ISD::TRUNCATE: return visitTRUNCATE(N); 1538 case ISD::BITCAST: return visitBITCAST(N); 1539 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1540 case ISD::FADD: return visitFADD(N); 1541 case ISD::FSUB: return visitFSUB(N); 1542 case ISD::FMUL: return visitFMUL(N); 1543 case ISD::FMA: return visitFMA(N); 1544 case ISD::FDIV: return visitFDIV(N); 1545 case ISD::FREM: return visitFREM(N); 1546 case ISD::FSQRT: return visitFSQRT(N); 1547 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1548 case ISD::FPOW: return visitFPOW(N); 1549 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1550 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1551 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1552 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1553 case ISD::FP_ROUND: return visitFP_ROUND(N); 1554 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1555 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1556 case ISD::FNEG: return visitFNEG(N); 1557 case ISD::FABS: return visitFABS(N); 1558 case ISD::FFLOOR: return visitFFLOOR(N); 1559 case ISD::FMINNUM: return visitFMINNUM(N); 1560 case ISD::FMAXNUM: return visitFMAXNUM(N); 1561 case ISD::FMINIMUM: return visitFMINIMUM(N); 1562 case ISD::FMAXIMUM: return visitFMAXIMUM(N); 1563 case ISD::FCEIL: return visitFCEIL(N); 1564 case ISD::FTRUNC: return visitFTRUNC(N); 1565 case ISD::BRCOND: return visitBRCOND(N); 1566 case ISD::BR_CC: return visitBR_CC(N); 1567 case ISD::LOAD: return visitLOAD(N); 1568 case ISD::STORE: return visitSTORE(N); 1569 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1570 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1571 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1572 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1573 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1574 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1575 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1576 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1577 case ISD::MGATHER: return visitMGATHER(N); 1578 case ISD::MLOAD: return visitMLOAD(N); 1579 case ISD::MSCATTER: return visitMSCATTER(N); 1580 case ISD::MSTORE: return visitMSTORE(N); 1581 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1582 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1583 } 1584 return SDValue(); 1585 } 1586 1587 SDValue DAGCombiner::combine(SDNode *N) { 1588 SDValue RV = visit(N); 1589 1590 // If nothing happened, try a target-specific DAG combine. 1591 if (!RV.getNode()) { 1592 assert(N->getOpcode() != ISD::DELETED_NODE && 1593 "Node was deleted but visit returned NULL!"); 1594 1595 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1596 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1597 1598 // Expose the DAG combiner to the target combiner impls. 1599 TargetLowering::DAGCombinerInfo 1600 DagCombineInfo(DAG, Level, false, this); 1601 1602 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1603 } 1604 } 1605 1606 // If nothing happened still, try promoting the operation. 1607 if (!RV.getNode()) { 1608 switch (N->getOpcode()) { 1609 default: break; 1610 case ISD::ADD: 1611 case ISD::SUB: 1612 case ISD::MUL: 1613 case ISD::AND: 1614 case ISD::OR: 1615 case ISD::XOR: 1616 RV = PromoteIntBinOp(SDValue(N, 0)); 1617 break; 1618 case ISD::SHL: 1619 case ISD::SRA: 1620 case ISD::SRL: 1621 RV = PromoteIntShiftOp(SDValue(N, 0)); 1622 break; 1623 case ISD::SIGN_EXTEND: 1624 case ISD::ZERO_EXTEND: 1625 case ISD::ANY_EXTEND: 1626 RV = PromoteExtend(SDValue(N, 0)); 1627 break; 1628 case ISD::LOAD: 1629 if (PromoteLoad(SDValue(N, 0))) 1630 RV = SDValue(N, 0); 1631 break; 1632 } 1633 } 1634 1635 // If N is a commutative binary node, try eliminate it if the commuted 1636 // version is already present in the DAG. 1637 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && 1638 N->getNumValues() == 1) { 1639 SDValue N0 = N->getOperand(0); 1640 SDValue N1 = N->getOperand(1); 1641 1642 // Constant operands are canonicalized to RHS. 1643 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { 1644 SDValue Ops[] = {N1, N0}; 1645 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1646 N->getFlags()); 1647 if (CSENode) 1648 return SDValue(CSENode, 0); 1649 } 1650 } 1651 1652 return RV; 1653 } 1654 1655 /// Given a node, return its input chain if it has one, otherwise return a null 1656 /// sd operand. 1657 static SDValue getInputChainForNode(SDNode *N) { 1658 if (unsigned NumOps = N->getNumOperands()) { 1659 if (N->getOperand(0).getValueType() == MVT::Other) 1660 return N->getOperand(0); 1661 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1662 return N->getOperand(NumOps-1); 1663 for (unsigned i = 1; i < NumOps-1; ++i) 1664 if (N->getOperand(i).getValueType() == MVT::Other) 1665 return N->getOperand(i); 1666 } 1667 return SDValue(); 1668 } 1669 1670 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1671 // If N has two operands, where one has an input chain equal to the other, 1672 // the 'other' chain is redundant. 1673 if (N->getNumOperands() == 2) { 1674 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1675 return N->getOperand(0); 1676 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1677 return N->getOperand(1); 1678 } 1679 1680 // Don't simplify token factors if optnone. 1681 if (OptLevel == CodeGenOpt::None) 1682 return SDValue(); 1683 1684 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1685 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1686 SmallPtrSet<SDNode*, 16> SeenOps; 1687 bool Changed = false; // If we should replace this token factor. 1688 1689 // Start out with this token factor. 1690 TFs.push_back(N); 1691 1692 // Iterate through token factors. The TFs grows when new token factors are 1693 // encountered. 1694 for (unsigned i = 0; i < TFs.size(); ++i) { 1695 SDNode *TF = TFs[i]; 1696 1697 // Check each of the operands. 1698 for (const SDValue &Op : TF->op_values()) { 1699 switch (Op.getOpcode()) { 1700 case ISD::EntryToken: 1701 // Entry tokens don't need to be added to the list. They are 1702 // redundant. 1703 Changed = true; 1704 break; 1705 1706 case ISD::TokenFactor: 1707 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { 1708 // Queue up for processing. 1709 TFs.push_back(Op.getNode()); 1710 // Clean up in case the token factor is removed. 1711 AddToWorklist(Op.getNode()); 1712 Changed = true; 1713 break; 1714 } 1715 LLVM_FALLTHROUGH; 1716 1717 default: 1718 // Only add if it isn't already in the list. 1719 if (SeenOps.insert(Op.getNode()).second) 1720 Ops.push_back(Op); 1721 else 1722 Changed = true; 1723 break; 1724 } 1725 } 1726 } 1727 1728 // Remove Nodes that are chained to another node in the list. Do so 1729 // by walking up chains breath-first stopping when we've seen 1730 // another operand. In general we must climb to the EntryNode, but we can exit 1731 // early if we find all remaining work is associated with just one operand as 1732 // no further pruning is possible. 1733 1734 // List of nodes to search through and original Ops from which they originate. 1735 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; 1736 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. 1737 SmallPtrSet<SDNode *, 16> SeenChains; 1738 bool DidPruneOps = false; 1739 1740 unsigned NumLeftToConsider = 0; 1741 for (const SDValue &Op : Ops) { 1742 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); 1743 OpWorkCount.push_back(1); 1744 } 1745 1746 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { 1747 // If this is an Op, we can remove the op from the list. Remark any 1748 // search associated with it as from the current OpNumber. 1749 if (SeenOps.count(Op) != 0) { 1750 Changed = true; 1751 DidPruneOps = true; 1752 unsigned OrigOpNumber = 0; 1753 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) 1754 OrigOpNumber++; 1755 assert((OrigOpNumber != Ops.size()) && 1756 "expected to find TokenFactor Operand"); 1757 // Re-mark worklist from OrigOpNumber to OpNumber 1758 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { 1759 if (Worklist[i].second == OrigOpNumber) { 1760 Worklist[i].second = OpNumber; 1761 } 1762 } 1763 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; 1764 OpWorkCount[OrigOpNumber] = 0; 1765 NumLeftToConsider--; 1766 } 1767 // Add if it's a new chain 1768 if (SeenChains.insert(Op).second) { 1769 OpWorkCount[OpNumber]++; 1770 Worklist.push_back(std::make_pair(Op, OpNumber)); 1771 } 1772 }; 1773 1774 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { 1775 // We need at least be consider at least 2 Ops to prune. 1776 if (NumLeftToConsider <= 1) 1777 break; 1778 auto CurNode = Worklist[i].first; 1779 auto CurOpNumber = Worklist[i].second; 1780 assert((OpWorkCount[CurOpNumber] > 0) && 1781 "Node should not appear in worklist"); 1782 switch (CurNode->getOpcode()) { 1783 case ISD::EntryToken: 1784 // Hitting EntryToken is the only way for the search to terminate without 1785 // hitting 1786 // another operand's search. Prevent us from marking this operand 1787 // considered. 1788 NumLeftToConsider++; 1789 break; 1790 case ISD::TokenFactor: 1791 for (const SDValue &Op : CurNode->op_values()) 1792 AddToWorklist(i, Op.getNode(), CurOpNumber); 1793 break; 1794 case ISD::CopyFromReg: 1795 case ISD::CopyToReg: 1796 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); 1797 break; 1798 default: 1799 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) 1800 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); 1801 break; 1802 } 1803 OpWorkCount[CurOpNumber]--; 1804 if (OpWorkCount[CurOpNumber] == 0) 1805 NumLeftToConsider--; 1806 } 1807 1808 // If we've changed things around then replace token factor. 1809 if (Changed) { 1810 SDValue Result; 1811 if (Ops.empty()) { 1812 // The entry token is the only possible outcome. 1813 Result = DAG.getEntryNode(); 1814 } else { 1815 if (DidPruneOps) { 1816 SmallVector<SDValue, 8> PrunedOps; 1817 // 1818 for (const SDValue &Op : Ops) { 1819 if (SeenChains.count(Op.getNode()) == 0) 1820 PrunedOps.push_back(Op); 1821 } 1822 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); 1823 } else { 1824 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1825 } 1826 } 1827 return Result; 1828 } 1829 return SDValue(); 1830 } 1831 1832 /// MERGE_VALUES can always be eliminated. 1833 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1834 WorklistRemover DeadNodes(*this); 1835 // Replacing results may cause a different MERGE_VALUES to suddenly 1836 // be CSE'd with N, and carry its uses with it. Iterate until no 1837 // uses remain, to ensure that the node can be safely deleted. 1838 // First add the users of this node to the work list so that they 1839 // can be tried again once they have new operands. 1840 AddUsersToWorklist(N); 1841 do { 1842 // Do as a single replacement to avoid rewalking use lists. 1843 SmallVector<SDValue, 8> Ops; 1844 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1845 Ops.push_back(N->getOperand(i)); 1846 DAG.ReplaceAllUsesWith(N, Ops.data()); 1847 } while (!N->use_empty()); 1848 deleteAndRecombine(N); 1849 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1850 } 1851 1852 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a 1853 /// ConstantSDNode pointer else nullptr. 1854 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1855 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1856 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1857 } 1858 1859 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { 1860 assert(ISD::isBinaryOp(BO) && "Unexpected binary operator"); 1861 1862 // Don't do this unless the old select is going away. We want to eliminate the 1863 // binary operator, not replace a binop with a select. 1864 // TODO: Handle ISD::SELECT_CC. 1865 unsigned SelOpNo = 0; 1866 SDValue Sel = BO->getOperand(0); 1867 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 1868 SelOpNo = 1; 1869 Sel = BO->getOperand(1); 1870 } 1871 1872 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 1873 return SDValue(); 1874 1875 SDValue CT = Sel.getOperand(1); 1876 if (!isConstantOrConstantVector(CT, true) && 1877 !isConstantFPBuildVectorOrConstantFP(CT)) 1878 return SDValue(); 1879 1880 SDValue CF = Sel.getOperand(2); 1881 if (!isConstantOrConstantVector(CF, true) && 1882 !isConstantFPBuildVectorOrConstantFP(CF)) 1883 return SDValue(); 1884 1885 // Bail out if any constants are opaque because we can't constant fold those. 1886 // The exception is "and" and "or" with either 0 or -1 in which case we can 1887 // propagate non constant operands into select. I.e.: 1888 // and (select Cond, 0, -1), X --> select Cond, 0, X 1889 // or X, (select Cond, -1, 0) --> select Cond, -1, X 1890 auto BinOpcode = BO->getOpcode(); 1891 bool CanFoldNonConst = 1892 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && 1893 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && 1894 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); 1895 1896 SDValue CBO = BO->getOperand(SelOpNo ^ 1); 1897 if (!CanFoldNonConst && 1898 !isConstantOrConstantVector(CBO, true) && 1899 !isConstantFPBuildVectorOrConstantFP(CBO)) 1900 return SDValue(); 1901 1902 EVT VT = Sel.getValueType(); 1903 1904 // In case of shift value and shift amount may have different VT. For instance 1905 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have 1906 // swapped operands and value types do not match. NB: x86 is fine if operands 1907 // are not swapped with shift amount VT being not bigger than shifted value. 1908 // TODO: that is possible to check for a shift operation, correct VTs and 1909 // still perform optimization on x86 if needed. 1910 if (SelOpNo && VT != CBO.getValueType()) 1911 return SDValue(); 1912 1913 // We have a select-of-constants followed by a binary operator with a 1914 // constant. Eliminate the binop by pulling the constant math into the select. 1915 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO 1916 SDLoc DL(Sel); 1917 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) 1918 : DAG.getNode(BinOpcode, DL, VT, CT, CBO); 1919 if (!CanFoldNonConst && !NewCT.isUndef() && 1920 !isConstantOrConstantVector(NewCT, true) && 1921 !isConstantFPBuildVectorOrConstantFP(NewCT)) 1922 return SDValue(); 1923 1924 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) 1925 : DAG.getNode(BinOpcode, DL, VT, CF, CBO); 1926 if (!CanFoldNonConst && !NewCF.isUndef() && 1927 !isConstantOrConstantVector(NewCF, true) && 1928 !isConstantFPBuildVectorOrConstantFP(NewCF)) 1929 return SDValue(); 1930 1931 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); 1932 } 1933 1934 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { 1935 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && 1936 "Expecting add or sub"); 1937 1938 // Match a constant operand and a zext operand for the math instruction: 1939 // add Z, C 1940 // sub C, Z 1941 bool IsAdd = N->getOpcode() == ISD::ADD; 1942 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); 1943 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); 1944 auto *CN = dyn_cast<ConstantSDNode>(C); 1945 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) 1946 return SDValue(); 1947 1948 // Match the zext operand as a setcc of a boolean. 1949 if (Z.getOperand(0).getOpcode() != ISD::SETCC || 1950 Z.getOperand(0).getValueType() != MVT::i1) 1951 return SDValue(); 1952 1953 // Match the compare as: setcc (X & 1), 0, eq. 1954 SDValue SetCC = Z.getOperand(0); 1955 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); 1956 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || 1957 SetCC.getOperand(0).getOpcode() != ISD::AND || 1958 !isOneConstant(SetCC.getOperand(0).getOperand(1))) 1959 return SDValue(); 1960 1961 // We are adding/subtracting a constant and an inverted low bit. Turn that 1962 // into a subtract/add of the low bit with incremented/decremented constant: 1963 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) 1964 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) 1965 EVT VT = C.getValueType(); 1966 SDLoc DL(N); 1967 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); 1968 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : 1969 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); 1970 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); 1971 } 1972 1973 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into 1974 /// a shift and add with a different constant. 1975 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { 1976 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && 1977 "Expecting add or sub"); 1978 1979 // We need a constant operand for the add/sub, and the other operand is a 1980 // logical shift right: add (srl), C or sub C, (srl). 1981 bool IsAdd = N->getOpcode() == ISD::ADD; 1982 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); 1983 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); 1984 ConstantSDNode *C = isConstOrConstSplat(ConstantOp); 1985 if (!C || ShiftOp.getOpcode() != ISD::SRL) 1986 return SDValue(); 1987 1988 // The shift must be of a 'not' value. 1989 SDValue Not = ShiftOp.getOperand(0); 1990 if (!Not.hasOneUse() || !isBitwiseNot(Not)) 1991 return SDValue(); 1992 1993 // The shift must be moving the sign bit to the least-significant-bit. 1994 EVT VT = ShiftOp.getValueType(); 1995 SDValue ShAmt = ShiftOp.getOperand(1); 1996 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); 1997 if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) 1998 return SDValue(); 1999 2000 // Eliminate the 'not' by adjusting the shift and add/sub constant: 2001 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) 2002 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) 2003 SDLoc DL(N); 2004 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; 2005 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); 2006 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; 2007 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); 2008 } 2009 2010 SDValue DAGCombiner::visitADD(SDNode *N) { 2011 SDValue N0 = N->getOperand(0); 2012 SDValue N1 = N->getOperand(1); 2013 EVT VT = N0.getValueType(); 2014 SDLoc DL(N); 2015 2016 // fold vector ops 2017 if (VT.isVector()) { 2018 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2019 return FoldedVOp; 2020 2021 // fold (add x, 0) -> x, vector edition 2022 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2023 return N0; 2024 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2025 return N1; 2026 } 2027 2028 // fold (add x, undef) -> undef 2029 if (N0.isUndef()) 2030 return N0; 2031 2032 if (N1.isUndef()) 2033 return N1; 2034 2035 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 2036 // canonicalize constant to RHS 2037 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2038 return DAG.getNode(ISD::ADD, DL, VT, N1, N0); 2039 // fold (add c1, c2) -> c1+c2 2040 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), 2041 N1.getNode()); 2042 } 2043 2044 // fold (add x, 0) -> x 2045 if (isNullConstant(N1)) 2046 return N0; 2047 2048 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { 2049 // fold ((c1-A)+c2) -> (c1+c2)-A 2050 if (N0.getOpcode() == ISD::SUB && 2051 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { 2052 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. 2053 return DAG.getNode(ISD::SUB, DL, VT, 2054 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), 2055 N0.getOperand(1)); 2056 } 2057 2058 // add (sext i1 X), 1 -> zext (not i1 X) 2059 // We don't transform this pattern: 2060 // add (zext i1 X), -1 -> sext (not i1 X) 2061 // because most (?) targets generate better code for the zext form. 2062 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && 2063 isOneOrOneSplat(N1)) { 2064 SDValue X = N0.getOperand(0); 2065 if ((!LegalOperations || 2066 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && 2067 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && 2068 X.getScalarValueSizeInBits() == 1) { 2069 SDValue Not = DAG.getNOT(DL, X, X.getValueType()); 2070 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); 2071 } 2072 } 2073 2074 // Undo the add -> or combine to merge constant offsets from a frame index. 2075 if (N0.getOpcode() == ISD::OR && 2076 isa<FrameIndexSDNode>(N0.getOperand(0)) && 2077 isa<ConstantSDNode>(N0.getOperand(1)) && 2078 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { 2079 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); 2080 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); 2081 } 2082 } 2083 2084 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2085 return NewSel; 2086 2087 // reassociate add 2088 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags())) 2089 return RADD; 2090 2091 // fold ((0-A) + B) -> B-A 2092 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0))) 2093 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); 2094 2095 // fold (A + (0-B)) -> A-B 2096 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) 2097 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); 2098 2099 // fold (A+(B-A)) -> B 2100 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 2101 return N1.getOperand(0); 2102 2103 // fold ((B-A)+A) -> B 2104 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 2105 return N0.getOperand(0); 2106 2107 // fold (A+(B-(A+C))) to (B-C) 2108 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2109 N0 == N1.getOperand(1).getOperand(0)) 2110 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2111 N1.getOperand(1).getOperand(1)); 2112 2113 // fold (A+(B-(C+A))) to (B-C) 2114 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2115 N0 == N1.getOperand(1).getOperand(1)) 2116 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2117 N1.getOperand(1).getOperand(0)); 2118 2119 // fold (A+((B-A)+or-C)) to (B+or-C) 2120 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 2121 N1.getOperand(0).getOpcode() == ISD::SUB && 2122 N0 == N1.getOperand(0).getOperand(1)) 2123 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), 2124 N1.getOperand(1)); 2125 2126 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 2127 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 2128 SDValue N00 = N0.getOperand(0); 2129 SDValue N01 = N0.getOperand(1); 2130 SDValue N10 = N1.getOperand(0); 2131 SDValue N11 = N1.getOperand(1); 2132 2133 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) 2134 return DAG.getNode(ISD::SUB, DL, VT, 2135 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 2136 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 2137 } 2138 2139 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2140 return V; 2141 2142 if (SDValue V = foldAddSubOfSignBit(N, DAG)) 2143 return V; 2144 2145 if (SimplifyDemandedBits(SDValue(N, 0))) 2146 return SDValue(N, 0); 2147 2148 // fold (a+b) -> (a|b) iff a and b share no bits. 2149 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 2150 DAG.haveNoCommonBitsSet(N0, N1)) 2151 return DAG.getNode(ISD::OR, DL, VT, N0, N1); 2152 2153 // fold (add (xor a, -1), 1) -> (sub 0, a) 2154 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) 2155 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 2156 N0.getOperand(0)); 2157 2158 if (SDValue Combined = visitADDLike(N0, N1, N)) 2159 return Combined; 2160 2161 if (SDValue Combined = visitADDLike(N1, N0, N)) 2162 return Combined; 2163 2164 return SDValue(); 2165 } 2166 2167 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { 2168 bool Masked = false; 2169 2170 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. 2171 while (true) { 2172 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { 2173 V = V.getOperand(0); 2174 continue; 2175 } 2176 2177 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { 2178 Masked = true; 2179 V = V.getOperand(0); 2180 continue; 2181 } 2182 2183 break; 2184 } 2185 2186 // If this is not a carry, return. 2187 if (V.getResNo() != 1) 2188 return SDValue(); 2189 2190 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && 2191 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) 2192 return SDValue(); 2193 2194 // If the result is masked, then no matter what kind of bool it is we can 2195 // return. If it isn't, then we need to make sure the bool type is either 0 or 2196 // 1 and not other values. 2197 if (Masked || 2198 TLI.getBooleanContents(V.getValueType()) == 2199 TargetLoweringBase::ZeroOrOneBooleanContent) 2200 return V; 2201 2202 return SDValue(); 2203 } 2204 2205 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { 2206 EVT VT = N0.getValueType(); 2207 SDLoc DL(LocReference); 2208 2209 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 2210 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 2211 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) 2212 return DAG.getNode(ISD::SUB, DL, VT, N0, 2213 DAG.getNode(ISD::SHL, DL, VT, 2214 N1.getOperand(0).getOperand(1), 2215 N1.getOperand(1))); 2216 2217 if (N1.getOpcode() == ISD::AND) { 2218 SDValue AndOp0 = N1.getOperand(0); 2219 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 2220 unsigned DestBits = VT.getScalarSizeInBits(); 2221 2222 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 2223 // and similar xforms where the inner op is either ~0 or 0. 2224 if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1))) 2225 return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); 2226 } 2227 2228 // add (sext i1), X -> sub X, (zext i1) 2229 if (N0.getOpcode() == ISD::SIGN_EXTEND && 2230 N0.getOperand(0).getValueType() == MVT::i1 && 2231 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 2232 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 2233 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 2234 } 2235 2236 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 2237 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2238 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2239 if (TN->getVT() == MVT::i1) { 2240 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2241 DAG.getConstant(1, DL, VT)); 2242 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 2243 } 2244 } 2245 2246 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2247 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && 2248 N1.getResNo() == 0) 2249 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), 2250 N0, N1.getOperand(0), N1.getOperand(2)); 2251 2252 // (add X, Carry) -> (addcarry X, 0, Carry) 2253 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2254 if (SDValue Carry = getAsCarry(TLI, N1)) 2255 return DAG.getNode(ISD::ADDCARRY, DL, 2256 DAG.getVTList(VT, Carry.getValueType()), N0, 2257 DAG.getConstant(0, DL, VT), Carry); 2258 2259 return SDValue(); 2260 } 2261 2262 SDValue DAGCombiner::visitADDC(SDNode *N) { 2263 SDValue N0 = N->getOperand(0); 2264 SDValue N1 = N->getOperand(1); 2265 EVT VT = N0.getValueType(); 2266 SDLoc DL(N); 2267 2268 // If the flag result is dead, turn this into an ADD. 2269 if (!N->hasAnyUseOfValue(1)) 2270 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2271 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2272 2273 // canonicalize constant to RHS. 2274 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2275 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2276 if (N0C && !N1C) 2277 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); 2278 2279 // fold (addc x, 0) -> x + no carry out 2280 if (isNullConstant(N1)) 2281 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 2282 DL, MVT::Glue)); 2283 2284 // If it cannot overflow, transform into an add. 2285 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2286 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2287 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2288 2289 return SDValue(); 2290 } 2291 2292 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, 2293 SelectionDAG &DAG, const TargetLowering &TLI) { 2294 SDValue Cst; 2295 switch (TLI.getBooleanContents(VT)) { 2296 case TargetLowering::ZeroOrOneBooleanContent: 2297 case TargetLowering::UndefinedBooleanContent: 2298 Cst = DAG.getConstant(1, DL, VT); 2299 break; 2300 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2301 Cst = DAG.getConstant(-1, DL, VT); 2302 break; 2303 } 2304 2305 return DAG.getNode(ISD::XOR, DL, VT, V, Cst); 2306 } 2307 2308 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { 2309 if (V.getOpcode() != ISD::XOR) return false; 2310 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); 2311 if (!Const) return false; 2312 2313 switch(TLI.getBooleanContents(VT)) { 2314 case TargetLowering::ZeroOrOneBooleanContent: 2315 return Const->isOne(); 2316 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2317 return Const->isAllOnesValue(); 2318 case TargetLowering::UndefinedBooleanContent: 2319 return (Const->getAPIntValue() & 0x01) == 1; 2320 } 2321 llvm_unreachable("Unsupported boolean content"); 2322 } 2323 2324 SDValue DAGCombiner::visitUADDO(SDNode *N) { 2325 SDValue N0 = N->getOperand(0); 2326 SDValue N1 = N->getOperand(1); 2327 EVT VT = N0.getValueType(); 2328 if (VT.isVector()) 2329 return SDValue(); 2330 2331 EVT CarryVT = N->getValueType(1); 2332 SDLoc DL(N); 2333 2334 // If the flag result is dead, turn this into an ADD. 2335 if (!N->hasAnyUseOfValue(1)) 2336 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2337 DAG.getUNDEF(CarryVT)); 2338 2339 // canonicalize constant to RHS. 2340 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2341 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2342 if (N0C && !N1C) 2343 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); 2344 2345 // fold (uaddo x, 0) -> x + no carry out 2346 if (isNullConstant(N1)) 2347 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2348 2349 // If it cannot overflow, transform into an add. 2350 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2351 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2352 DAG.getConstant(0, DL, CarryVT)); 2353 2354 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. 2355 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { 2356 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), 2357 DAG.getConstant(0, DL, VT), 2358 N0.getOperand(0)); 2359 return CombineTo(N, Sub, 2360 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2361 } 2362 2363 if (SDValue Combined = visitUADDOLike(N0, N1, N)) 2364 return Combined; 2365 2366 if (SDValue Combined = visitUADDOLike(N1, N0, N)) 2367 return Combined; 2368 2369 return SDValue(); 2370 } 2371 2372 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { 2373 auto VT = N0.getValueType(); 2374 2375 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2376 // If Y + 1 cannot overflow. 2377 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { 2378 SDValue Y = N1.getOperand(0); 2379 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); 2380 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) 2381 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, 2382 N1.getOperand(2)); 2383 } 2384 2385 // (uaddo X, Carry) -> (addcarry X, 0, Carry) 2386 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2387 if (SDValue Carry = getAsCarry(TLI, N1)) 2388 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2389 DAG.getConstant(0, SDLoc(N), VT), Carry); 2390 2391 return SDValue(); 2392 } 2393 2394 SDValue DAGCombiner::visitADDE(SDNode *N) { 2395 SDValue N0 = N->getOperand(0); 2396 SDValue N1 = N->getOperand(1); 2397 SDValue CarryIn = N->getOperand(2); 2398 2399 // canonicalize constant to RHS 2400 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2401 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2402 if (N0C && !N1C) 2403 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 2404 N1, N0, CarryIn); 2405 2406 // fold (adde x, y, false) -> (addc x, y) 2407 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2408 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 2409 2410 return SDValue(); 2411 } 2412 2413 SDValue DAGCombiner::visitADDCARRY(SDNode *N) { 2414 SDValue N0 = N->getOperand(0); 2415 SDValue N1 = N->getOperand(1); 2416 SDValue CarryIn = N->getOperand(2); 2417 SDLoc DL(N); 2418 2419 // canonicalize constant to RHS 2420 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2421 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2422 if (N0C && !N1C) 2423 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); 2424 2425 // fold (addcarry x, y, false) -> (uaddo x, y) 2426 if (isNullConstant(CarryIn)) { 2427 if (!LegalOperations || 2428 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) 2429 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); 2430 } 2431 2432 EVT CarryVT = CarryIn.getValueType(); 2433 2434 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. 2435 if (isNullConstant(N0) && isNullConstant(N1)) { 2436 EVT VT = N0.getValueType(); 2437 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); 2438 AddToWorklist(CarryExt.getNode()); 2439 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, 2440 DAG.getConstant(1, DL, VT)), 2441 DAG.getConstant(0, DL, CarryVT)); 2442 } 2443 2444 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. 2445 if (isBitwiseNot(N0) && isNullConstant(N1) && 2446 isBooleanFlip(CarryIn, CarryVT, TLI)) { 2447 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), 2448 DAG.getConstant(0, DL, N0.getValueType()), 2449 N0.getOperand(0), CarryIn.getOperand(0)); 2450 return CombineTo(N, Sub, 2451 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2452 } 2453 2454 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) 2455 return Combined; 2456 2457 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) 2458 return Combined; 2459 2460 return SDValue(); 2461 } 2462 2463 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, 2464 SDNode *N) { 2465 // Iff the flag result is dead: 2466 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) 2467 if ((N0.getOpcode() == ISD::ADD || 2468 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && 2469 isNullConstant(N1) && !N->hasAnyUseOfValue(1)) 2470 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), 2471 N0.getOperand(0), N0.getOperand(1), CarryIn); 2472 2473 /** 2474 * When one of the addcarry argument is itself a carry, we may be facing 2475 * a diamond carry propagation. In which case we try to transform the DAG 2476 * to ensure linear carry propagation if that is possible. 2477 * 2478 * We are trying to get: 2479 * (addcarry X, 0, (addcarry A, B, Z):Carry) 2480 */ 2481 if (auto Y = getAsCarry(TLI, N1)) { 2482 /** 2483 * (uaddo A, B) 2484 * / \ 2485 * Carry Sum 2486 * | \ 2487 * | (addcarry *, 0, Z) 2488 * | / 2489 * \ Carry 2490 * | / 2491 * (addcarry X, *, *) 2492 */ 2493 if (Y.getOpcode() == ISD::UADDO && 2494 CarryIn.getResNo() == 1 && 2495 CarryIn.getOpcode() == ISD::ADDCARRY && 2496 isNullConstant(CarryIn.getOperand(1)) && 2497 CarryIn.getOperand(0) == Y.getValue(0)) { 2498 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), 2499 Y.getOperand(0), Y.getOperand(1), 2500 CarryIn.getOperand(2)); 2501 AddToWorklist(NewY.getNode()); 2502 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2503 DAG.getConstant(0, SDLoc(N), N0.getValueType()), 2504 NewY.getValue(1)); 2505 } 2506 } 2507 2508 return SDValue(); 2509 } 2510 2511 // Since it may not be valid to emit a fold to zero for vector initializers 2512 // check if we can before folding. 2513 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, 2514 SelectionDAG &DAG, bool LegalOperations) { 2515 if (!VT.isVector()) 2516 return DAG.getConstant(0, DL, VT); 2517 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 2518 return DAG.getConstant(0, DL, VT); 2519 return SDValue(); 2520 } 2521 2522 SDValue DAGCombiner::visitSUB(SDNode *N) { 2523 SDValue N0 = N->getOperand(0); 2524 SDValue N1 = N->getOperand(1); 2525 EVT VT = N0.getValueType(); 2526 SDLoc DL(N); 2527 2528 // fold vector ops 2529 if (VT.isVector()) { 2530 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2531 return FoldedVOp; 2532 2533 // fold (sub x, 0) -> x, vector edition 2534 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2535 return N0; 2536 } 2537 2538 // fold (sub x, x) -> 0 2539 // FIXME: Refactor this and xor and other similar operations together. 2540 if (N0 == N1) 2541 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 2542 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2543 DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 2544 // fold (sub c1, c2) -> c1-c2 2545 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), 2546 N1.getNode()); 2547 } 2548 2549 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2550 return NewSel; 2551 2552 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2553 2554 // fold (sub x, c) -> (add x, -c) 2555 if (N1C) { 2556 return DAG.getNode(ISD::ADD, DL, VT, N0, 2557 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 2558 } 2559 2560 if (isNullOrNullSplat(N0)) { 2561 unsigned BitWidth = VT.getScalarSizeInBits(); 2562 // Right-shifting everything out but the sign bit followed by negation is 2563 // the same as flipping arithmetic/logical shift type without the negation: 2564 // -(X >>u 31) -> (X >>s 31) 2565 // -(X >>s 31) -> (X >>u 31) 2566 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { 2567 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); 2568 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { 2569 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; 2570 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) 2571 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); 2572 } 2573 } 2574 2575 // 0 - X --> 0 if the sub is NUW. 2576 if (N->getFlags().hasNoUnsignedWrap()) 2577 return N0; 2578 2579 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { 2580 // N1 is either 0 or the minimum signed value. If the sub is NSW, then 2581 // N1 must be 0 because negating the minimum signed value is undefined. 2582 if (N->getFlags().hasNoSignedWrap()) 2583 return N0; 2584 2585 // 0 - X --> X if X is 0 or the minimum signed value. 2586 return N1; 2587 } 2588 } 2589 2590 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 2591 if (isAllOnesOrAllOnesSplat(N0)) 2592 return DAG.getNode(ISD::XOR, DL, VT, N1, N0); 2593 2594 // fold (A - (0-B)) -> A+B 2595 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0))) 2596 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); 2597 2598 // fold A-(A-B) -> B 2599 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 2600 return N1.getOperand(1); 2601 2602 // fold (A+B)-A -> B 2603 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 2604 return N0.getOperand(1); 2605 2606 // fold (A+B)-B -> A 2607 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 2608 return N0.getOperand(0); 2609 2610 // fold C2-(A+C1) -> (C2-C1)-A 2611 if (N1.getOpcode() == ISD::ADD) { 2612 SDValue N11 = N1.getOperand(1); 2613 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && 2614 isConstantOrConstantVector(N11, /* NoOpaques */ true)) { 2615 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); 2616 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); 2617 } 2618 } 2619 2620 // fold ((A+(B+or-C))-B) -> A+or-C 2621 if (N0.getOpcode() == ISD::ADD && 2622 (N0.getOperand(1).getOpcode() == ISD::SUB || 2623 N0.getOperand(1).getOpcode() == ISD::ADD) && 2624 N0.getOperand(1).getOperand(0) == N1) 2625 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), 2626 N0.getOperand(1).getOperand(1)); 2627 2628 // fold ((A+(C+B))-B) -> A+C 2629 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && 2630 N0.getOperand(1).getOperand(1) == N1) 2631 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), 2632 N0.getOperand(1).getOperand(0)); 2633 2634 // fold ((A-(B-C))-C) -> A-B 2635 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && 2636 N0.getOperand(1).getOperand(1) == N1) 2637 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), 2638 N0.getOperand(1).getOperand(0)); 2639 2640 // fold (A-(B-C)) -> A+(C-B) 2641 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse()) 2642 return DAG.getNode(ISD::ADD, DL, VT, N0, 2643 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1), 2644 N1.getOperand(0))); 2645 2646 // fold (X - (-Y * Z)) -> (X + (Y * Z)) 2647 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { 2648 if (N1.getOperand(0).getOpcode() == ISD::SUB && 2649 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) { 2650 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, 2651 N1.getOperand(0).getOperand(1), 2652 N1.getOperand(1)); 2653 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); 2654 } 2655 if (N1.getOperand(1).getOpcode() == ISD::SUB && 2656 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) { 2657 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, 2658 N1.getOperand(0), 2659 N1.getOperand(1).getOperand(1)); 2660 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); 2661 } 2662 } 2663 2664 // If either operand of a sub is undef, the result is undef 2665 if (N0.isUndef()) 2666 return N0; 2667 if (N1.isUndef()) 2668 return N1; 2669 2670 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2671 return V; 2672 2673 if (SDValue V = foldAddSubOfSignBit(N, DAG)) 2674 return V; 2675 2676 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) 2677 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 2678 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { 2679 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); 2680 SDValue S0 = N1.getOperand(0); 2681 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { 2682 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 2683 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) 2684 if (C->getAPIntValue() == (OpSizeInBits - 1)) 2685 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); 2686 } 2687 } 2688 } 2689 2690 // If the relocation model supports it, consider symbol offsets. 2691 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 2692 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 2693 // fold (sub Sym, c) -> Sym-c 2694 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 2695 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 2696 GA->getOffset() - 2697 (uint64_t)N1C->getSExtValue()); 2698 // fold (sub Sym+c1, Sym+c2) -> c1-c2 2699 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 2700 if (GA->getGlobal() == GB->getGlobal()) 2701 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 2702 DL, VT); 2703 } 2704 2705 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 2706 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2707 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2708 if (TN->getVT() == MVT::i1) { 2709 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2710 DAG.getConstant(1, DL, VT)); 2711 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 2712 } 2713 } 2714 2715 // Prefer an add for more folding potential and possibly better codegen: 2716 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) 2717 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { 2718 SDValue ShAmt = N1.getOperand(1); 2719 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); 2720 if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) { 2721 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt); 2722 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA); 2723 } 2724 } 2725 2726 return SDValue(); 2727 } 2728 2729 SDValue DAGCombiner::visitSUBC(SDNode *N) { 2730 SDValue N0 = N->getOperand(0); 2731 SDValue N1 = N->getOperand(1); 2732 EVT VT = N0.getValueType(); 2733 SDLoc DL(N); 2734 2735 // If the flag result is dead, turn this into an SUB. 2736 if (!N->hasAnyUseOfValue(1)) 2737 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2738 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2739 2740 // fold (subc x, x) -> 0 + no borrow 2741 if (N0 == N1) 2742 return CombineTo(N, DAG.getConstant(0, DL, VT), 2743 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2744 2745 // fold (subc x, 0) -> x + no borrow 2746 if (isNullConstant(N1)) 2747 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2748 2749 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2750 if (isAllOnesConstant(N0)) 2751 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2752 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2753 2754 return SDValue(); 2755 } 2756 2757 SDValue DAGCombiner::visitUSUBO(SDNode *N) { 2758 SDValue N0 = N->getOperand(0); 2759 SDValue N1 = N->getOperand(1); 2760 EVT VT = N0.getValueType(); 2761 if (VT.isVector()) 2762 return SDValue(); 2763 2764 EVT CarryVT = N->getValueType(1); 2765 SDLoc DL(N); 2766 2767 // If the flag result is dead, turn this into an SUB. 2768 if (!N->hasAnyUseOfValue(1)) 2769 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2770 DAG.getUNDEF(CarryVT)); 2771 2772 // fold (usubo x, x) -> 0 + no borrow 2773 if (N0 == N1) 2774 return CombineTo(N, DAG.getConstant(0, DL, VT), 2775 DAG.getConstant(0, DL, CarryVT)); 2776 2777 // fold (usubo x, 0) -> x + no borrow 2778 if (isNullConstant(N1)) 2779 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2780 2781 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2782 if (isAllOnesConstant(N0)) 2783 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2784 DAG.getConstant(0, DL, CarryVT)); 2785 2786 return SDValue(); 2787 } 2788 2789 SDValue DAGCombiner::visitSUBE(SDNode *N) { 2790 SDValue N0 = N->getOperand(0); 2791 SDValue N1 = N->getOperand(1); 2792 SDValue CarryIn = N->getOperand(2); 2793 2794 // fold (sube x, y, false) -> (subc x, y) 2795 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2796 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2797 2798 return SDValue(); 2799 } 2800 2801 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { 2802 SDValue N0 = N->getOperand(0); 2803 SDValue N1 = N->getOperand(1); 2804 SDValue CarryIn = N->getOperand(2); 2805 2806 // fold (subcarry x, y, false) -> (usubo x, y) 2807 if (isNullConstant(CarryIn)) { 2808 if (!LegalOperations || 2809 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) 2810 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); 2811 } 2812 2813 return SDValue(); 2814 } 2815 2816 SDValue DAGCombiner::visitMUL(SDNode *N) { 2817 SDValue N0 = N->getOperand(0); 2818 SDValue N1 = N->getOperand(1); 2819 EVT VT = N0.getValueType(); 2820 2821 // fold (mul x, undef) -> 0 2822 if (N0.isUndef() || N1.isUndef()) 2823 return DAG.getConstant(0, SDLoc(N), VT); 2824 2825 bool N0IsConst = false; 2826 bool N1IsConst = false; 2827 bool N1IsOpaqueConst = false; 2828 bool N0IsOpaqueConst = false; 2829 APInt ConstValue0, ConstValue1; 2830 // fold vector ops 2831 if (VT.isVector()) { 2832 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2833 return FoldedVOp; 2834 2835 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); 2836 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); 2837 assert((!N0IsConst || 2838 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && 2839 "Splat APInt should be element width"); 2840 assert((!N1IsConst || 2841 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && 2842 "Splat APInt should be element width"); 2843 } else { 2844 N0IsConst = isa<ConstantSDNode>(N0); 2845 if (N0IsConst) { 2846 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2847 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2848 } 2849 N1IsConst = isa<ConstantSDNode>(N1); 2850 if (N1IsConst) { 2851 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2852 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2853 } 2854 } 2855 2856 // fold (mul c1, c2) -> c1*c2 2857 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2858 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2859 N0.getNode(), N1.getNode()); 2860 2861 // canonicalize constant to RHS (vector doesn't have to splat) 2862 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2863 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2864 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2865 // fold (mul x, 0) -> 0 2866 if (N1IsConst && ConstValue1.isNullValue()) 2867 return N1; 2868 // fold (mul x, 1) -> x 2869 if (N1IsConst && ConstValue1.isOneValue()) 2870 return N0; 2871 2872 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2873 return NewSel; 2874 2875 // fold (mul x, -1) -> 0-x 2876 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2877 SDLoc DL(N); 2878 return DAG.getNode(ISD::SUB, DL, VT, 2879 DAG.getConstant(0, DL, VT), N0); 2880 } 2881 // fold (mul x, (1 << c)) -> x << c 2882 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 2883 DAG.isKnownToBeAPowerOfTwo(N1) && 2884 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { 2885 SDLoc DL(N); 2886 SDValue LogBase2 = BuildLogBase2(N1, DL); 2887 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 2888 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 2889 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); 2890 } 2891 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2892 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { 2893 unsigned Log2Val = (-ConstValue1).logBase2(); 2894 SDLoc DL(N); 2895 // FIXME: If the input is something that is easily negated (e.g. a 2896 // single-use add), we should put the negate there. 2897 return DAG.getNode(ISD::SUB, DL, VT, 2898 DAG.getConstant(0, DL, VT), 2899 DAG.getNode(ISD::SHL, DL, VT, N0, 2900 DAG.getConstant(Log2Val, DL, 2901 getShiftAmountTy(N0.getValueType())))); 2902 } 2903 2904 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub. 2905 // mul x, (2^N + 1) --> add (shl x, N), x 2906 // mul x, (2^N - 1) --> sub (shl x, N), x 2907 // Examples: x * 33 --> (x << 5) + x 2908 // x * 15 --> (x << 4) - x 2909 // x * -33 --> -((x << 5) + x) 2910 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) 2911 if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { 2912 // TODO: We could handle more general decomposition of any constant by 2913 // having the target set a limit on number of ops and making a 2914 // callback to determine that sequence (similar to sqrt expansion). 2915 unsigned MathOp = ISD::DELETED_NODE; 2916 APInt MulC = ConstValue1.abs(); 2917 if ((MulC - 1).isPowerOf2()) 2918 MathOp = ISD::ADD; 2919 else if ((MulC + 1).isPowerOf2()) 2920 MathOp = ISD::SUB; 2921 2922 if (MathOp != ISD::DELETED_NODE) { 2923 unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2() 2924 : (MulC + 1).logBase2(); 2925 assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() && 2926 "Not expecting multiply-by-constant that could have simplified"); 2927 SDLoc DL(N); 2928 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0, 2929 DAG.getConstant(ShAmt, DL, VT)); 2930 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0); 2931 if (ConstValue1.isNegative()) 2932 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); 2933 return R; 2934 } 2935 } 2936 2937 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2938 if (N0.getOpcode() == ISD::SHL && 2939 isConstantOrConstantVector(N1, /* NoOpaques */ true) && 2940 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { 2941 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); 2942 if (isConstantOrConstantVector(C3)) 2943 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); 2944 } 2945 2946 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2947 // use. 2948 { 2949 SDValue Sh(nullptr, 0), Y(nullptr, 0); 2950 2951 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2952 if (N0.getOpcode() == ISD::SHL && 2953 isConstantOrConstantVector(N0.getOperand(1)) && 2954 N0.getNode()->hasOneUse()) { 2955 Sh = N0; Y = N1; 2956 } else if (N1.getOpcode() == ISD::SHL && 2957 isConstantOrConstantVector(N1.getOperand(1)) && 2958 N1.getNode()->hasOneUse()) { 2959 Sh = N1; Y = N0; 2960 } 2961 2962 if (Sh.getNode()) { 2963 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); 2964 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); 2965 } 2966 } 2967 2968 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2969 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && 2970 N0.getOpcode() == ISD::ADD && 2971 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2972 isMulAddWithConstProfitable(N, N0, N1)) 2973 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2974 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2975 N0.getOperand(0), N1), 2976 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2977 N0.getOperand(1), N1)); 2978 2979 // reassociate mul 2980 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) 2981 return RMUL; 2982 2983 return SDValue(); 2984 } 2985 2986 /// Return true if divmod libcall is available. 2987 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2988 const TargetLowering &TLI) { 2989 RTLIB::Libcall LC; 2990 EVT NodeType = Node->getValueType(0); 2991 if (!NodeType.isSimple()) 2992 return false; 2993 switch (NodeType.getSimpleVT().SimpleTy) { 2994 default: return false; // No libcall for vector types. 2995 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2996 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2997 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2998 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2999 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 3000 } 3001 3002 return TLI.getLibcallName(LC) != nullptr; 3003 } 3004 3005 /// Issue divrem if both quotient and remainder are needed. 3006 SDValue DAGCombiner::useDivRem(SDNode *Node) { 3007 if (Node->use_empty()) 3008 return SDValue(); // This is a dead node, leave it alone. 3009 3010 unsigned Opcode = Node->getOpcode(); 3011 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 3012 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 3013 3014 // DivMod lib calls can still work on non-legal types if using lib-calls. 3015 EVT VT = Node->getValueType(0); 3016 if (VT.isVector() || !VT.isInteger()) 3017 return SDValue(); 3018 3019 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) 3020 return SDValue(); 3021 3022 // If DIVREM is going to get expanded into a libcall, 3023 // but there is no libcall available, then don't combine. 3024 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 3025 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 3026 return SDValue(); 3027 3028 // If div is legal, it's better to do the normal expansion 3029 unsigned OtherOpcode = 0; 3030 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 3031 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 3032 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 3033 return SDValue(); 3034 } else { 3035 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 3036 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 3037 return SDValue(); 3038 } 3039 3040 SDValue Op0 = Node->getOperand(0); 3041 SDValue Op1 = Node->getOperand(1); 3042 SDValue combined; 3043 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 3044 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 3045 SDNode *User = *UI; 3046 if (User == Node || User->getOpcode() == ISD::DELETED_NODE || 3047 User->use_empty()) 3048 continue; 3049 // Convert the other matching node(s), too; 3050 // otherwise, the DIVREM may get target-legalized into something 3051 // target-specific that we won't be able to recognize. 3052 unsigned UserOpc = User->getOpcode(); 3053 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 3054 User->getOperand(0) == Op0 && 3055 User->getOperand(1) == Op1) { 3056 if (!combined) { 3057 if (UserOpc == OtherOpcode) { 3058 SDVTList VTs = DAG.getVTList(VT, VT); 3059 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 3060 } else if (UserOpc == DivRemOpc) { 3061 combined = SDValue(User, 0); 3062 } else { 3063 assert(UserOpc == Opcode); 3064 continue; 3065 } 3066 } 3067 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 3068 CombineTo(User, combined); 3069 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 3070 CombineTo(User, combined.getValue(1)); 3071 } 3072 } 3073 return combined; 3074 } 3075 3076 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { 3077 SDValue N0 = N->getOperand(0); 3078 SDValue N1 = N->getOperand(1); 3079 EVT VT = N->getValueType(0); 3080 SDLoc DL(N); 3081 3082 unsigned Opc = N->getOpcode(); 3083 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); 3084 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3085 3086 // X / undef -> undef 3087 // X % undef -> undef 3088 // X / 0 -> undef 3089 // X % 0 -> undef 3090 // NOTE: This includes vectors where any divisor element is zero/undef. 3091 if (DAG.isUndef(Opc, {N0, N1})) 3092 return DAG.getUNDEF(VT); 3093 3094 // undef / X -> 0 3095 // undef % X -> 0 3096 if (N0.isUndef()) 3097 return DAG.getConstant(0, DL, VT); 3098 3099 // 0 / X -> 0 3100 // 0 % X -> 0 3101 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3102 if (N0C && N0C->isNullValue()) 3103 return N0; 3104 3105 // X / X -> 1 3106 // X % X -> 0 3107 if (N0 == N1) 3108 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); 3109 3110 // X / 1 -> X 3111 // X % 1 -> 0 3112 // If this is a boolean op (single-bit element type), we can't have 3113 // division-by-zero or remainder-by-zero, so assume the divisor is 1. 3114 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume 3115 // it's a 1. 3116 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) 3117 return IsDiv ? N0 : DAG.getConstant(0, DL, VT); 3118 3119 return SDValue(); 3120 } 3121 3122 SDValue DAGCombiner::visitSDIV(SDNode *N) { 3123 SDValue N0 = N->getOperand(0); 3124 SDValue N1 = N->getOperand(1); 3125 EVT VT = N->getValueType(0); 3126 EVT CCVT = getSetCCResultType(VT); 3127 3128 // fold vector ops 3129 if (VT.isVector()) 3130 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3131 return FoldedVOp; 3132 3133 SDLoc DL(N); 3134 3135 // fold (sdiv c1, c2) -> c1/c2 3136 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3137 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3138 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 3139 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 3140 // fold (sdiv X, -1) -> 0-X 3141 if (N1C && N1C->isAllOnesValue()) 3142 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); 3143 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) 3144 if (N1C && N1C->getAPIntValue().isMinSignedValue()) 3145 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3146 DAG.getConstant(1, DL, VT), 3147 DAG.getConstant(0, DL, VT)); 3148 3149 if (SDValue V = simplifyDivRem(N, DAG)) 3150 return V; 3151 3152 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3153 return NewSel; 3154 3155 // If we know the sign bits of both operands are zero, strength reduce to a 3156 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 3157 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3158 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 3159 3160 if (SDValue V = visitSDIVLike(N0, N1, N)) 3161 return V; 3162 3163 // sdiv, srem -> sdivrem 3164 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3165 // true. Otherwise, we break the simplification logic in visitREM(). 3166 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3167 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3168 if (SDValue DivRem = useDivRem(N)) 3169 return DivRem; 3170 3171 return SDValue(); 3172 } 3173 3174 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { 3175 SDLoc DL(N); 3176 EVT VT = N->getValueType(0); 3177 EVT CCVT = getSetCCResultType(VT); 3178 unsigned BitWidth = VT.getScalarSizeInBits(); 3179 3180 // Helper for determining whether a value is a power-2 constant scalar or a 3181 // vector of such elements. 3182 auto IsPowerOfTwo = [](ConstantSDNode *C) { 3183 if (C->isNullValue() || C->isOpaque()) 3184 return false; 3185 if (C->getAPIntValue().isPowerOf2()) 3186 return true; 3187 if ((-C->getAPIntValue()).isPowerOf2()) 3188 return true; 3189 return false; 3190 }; 3191 3192 // fold (sdiv X, pow2) -> simple ops after legalize 3193 // FIXME: We check for the exact bit here because the generic lowering gives 3194 // better results in that case. The target-specific lowering should learn how 3195 // to handle exact sdivs efficiently. 3196 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) { 3197 // Target-specific implementation of sdiv x, pow2. 3198 if (SDValue Res = BuildSDIVPow2(N)) 3199 return Res; 3200 3201 // Create constants that are functions of the shift amount value. 3202 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 3203 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); 3204 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); 3205 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); 3206 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); 3207 if (!isConstantOrConstantVector(Inexact)) 3208 return SDValue(); 3209 3210 // Splat the sign bit into the register 3211 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, 3212 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); 3213 AddToWorklist(Sign.getNode()); 3214 3215 // Add (N0 < 0) ? abs2 - 1 : 0; 3216 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); 3217 AddToWorklist(Srl.getNode()); 3218 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); 3219 AddToWorklist(Add.getNode()); 3220 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); 3221 AddToWorklist(Sra.getNode()); 3222 3223 // Special case: (sdiv X, 1) -> X 3224 // Special Case: (sdiv X, -1) -> 0-X 3225 SDValue One = DAG.getConstant(1, DL, VT); 3226 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); 3227 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); 3228 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); 3229 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); 3230 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); 3231 3232 // If dividing by a positive value, we're done. Otherwise, the result must 3233 // be negated. 3234 SDValue Zero = DAG.getConstant(0, DL, VT); 3235 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); 3236 3237 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. 3238 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); 3239 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); 3240 return Res; 3241 } 3242 3243 // If integer divide is expensive and we satisfy the requirements, emit an 3244 // alternate sequence. Targets may check function attributes for size/speed 3245 // trade-offs. 3246 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3247 if (isConstantOrConstantVector(N1) && 3248 !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3249 if (SDValue Op = BuildSDIV(N)) 3250 return Op; 3251 3252 return SDValue(); 3253 } 3254 3255 SDValue DAGCombiner::visitUDIV(SDNode *N) { 3256 SDValue N0 = N->getOperand(0); 3257 SDValue N1 = N->getOperand(1); 3258 EVT VT = N->getValueType(0); 3259 EVT CCVT = getSetCCResultType(VT); 3260 3261 // fold vector ops 3262 if (VT.isVector()) 3263 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3264 return FoldedVOp; 3265 3266 SDLoc DL(N); 3267 3268 // fold (udiv c1, c2) -> c1/c2 3269 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3270 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3271 if (N0C && N1C) 3272 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 3273 N0C, N1C)) 3274 return Folded; 3275 // fold (udiv X, -1) -> select(X == -1, 1, 0) 3276 if (N1C && N1C->getAPIntValue().isAllOnesValue()) 3277 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3278 DAG.getConstant(1, DL, VT), 3279 DAG.getConstant(0, DL, VT)); 3280 3281 if (SDValue V = simplifyDivRem(N, DAG)) 3282 return V; 3283 3284 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3285 return NewSel; 3286 3287 if (SDValue V = visitUDIVLike(N0, N1, N)) 3288 return V; 3289 3290 // sdiv, srem -> sdivrem 3291 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3292 // true. Otherwise, we break the simplification logic in visitREM(). 3293 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3294 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3295 if (SDValue DivRem = useDivRem(N)) 3296 return DivRem; 3297 3298 return SDValue(); 3299 } 3300 3301 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { 3302 SDLoc DL(N); 3303 EVT VT = N->getValueType(0); 3304 3305 // fold (udiv x, (1 << c)) -> x >>u c 3306 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 3307 DAG.isKnownToBeAPowerOfTwo(N1)) { 3308 SDValue LogBase2 = BuildLogBase2(N1, DL); 3309 AddToWorklist(LogBase2.getNode()); 3310 3311 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 3312 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 3313 AddToWorklist(Trunc.getNode()); 3314 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); 3315 } 3316 3317 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 3318 if (N1.getOpcode() == ISD::SHL) { 3319 SDValue N10 = N1.getOperand(0); 3320 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && 3321 DAG.isKnownToBeAPowerOfTwo(N10)) { 3322 SDValue LogBase2 = BuildLogBase2(N10, DL); 3323 AddToWorklist(LogBase2.getNode()); 3324 3325 EVT ADDVT = N1.getOperand(1).getValueType(); 3326 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); 3327 AddToWorklist(Trunc.getNode()); 3328 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); 3329 AddToWorklist(Add.getNode()); 3330 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 3331 } 3332 } 3333 3334 // fold (udiv x, c) -> alternate 3335 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3336 if (isConstantOrConstantVector(N1) && 3337 !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3338 if (SDValue Op = BuildUDIV(N)) 3339 return Op; 3340 3341 return SDValue(); 3342 } 3343 3344 // handles ISD::SREM and ISD::UREM 3345 SDValue DAGCombiner::visitREM(SDNode *N) { 3346 unsigned Opcode = N->getOpcode(); 3347 SDValue N0 = N->getOperand(0); 3348 SDValue N1 = N->getOperand(1); 3349 EVT VT = N->getValueType(0); 3350 EVT CCVT = getSetCCResultType(VT); 3351 3352 bool isSigned = (Opcode == ISD::SREM); 3353 SDLoc DL(N); 3354 3355 // fold (rem c1, c2) -> c1%c2 3356 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3357 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3358 if (N0C && N1C) 3359 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 3360 return Folded; 3361 // fold (urem X, -1) -> select(X == -1, 0, x) 3362 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) 3363 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), 3364 DAG.getConstant(0, DL, VT), N0); 3365 3366 if (SDValue V = simplifyDivRem(N, DAG)) 3367 return V; 3368 3369 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3370 return NewSel; 3371 3372 if (isSigned) { 3373 // If we know the sign bits of both operands are zero, strength reduce to a 3374 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 3375 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3376 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 3377 } else { 3378 SDValue NegOne = DAG.getAllOnesConstant(DL, VT); 3379 if (DAG.isKnownToBeAPowerOfTwo(N1)) { 3380 // fold (urem x, pow2) -> (and x, pow2-1) 3381 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3382 AddToWorklist(Add.getNode()); 3383 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3384 } 3385 if (N1.getOpcode() == ISD::SHL && 3386 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { 3387 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 3388 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3389 AddToWorklist(Add.getNode()); 3390 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3391 } 3392 } 3393 3394 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3395 3396 // If X/C can be simplified by the division-by-constant logic, lower 3397 // X%C to the equivalent of X-X/C*C. 3398 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the 3399 // speculative DIV must not cause a DIVREM conversion. We guard against this 3400 // by skipping the simplification if isIntDivCheap(). When div is not cheap, 3401 // combine will not return a DIVREM. Regardless, checking cheapness here 3402 // makes sense since the simplification results in fatter code. 3403 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { 3404 SDValue OptimizedDiv = 3405 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); 3406 if (OptimizedDiv.getNode()) { 3407 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 3408 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 3409 AddToWorklist(OptimizedDiv.getNode()); 3410 AddToWorklist(Mul.getNode()); 3411 return Sub; 3412 } 3413 } 3414 3415 // sdiv, srem -> sdivrem 3416 if (SDValue DivRem = useDivRem(N)) 3417 return DivRem.getValue(1); 3418 3419 return SDValue(); 3420 } 3421 3422 SDValue DAGCombiner::visitMULHS(SDNode *N) { 3423 SDValue N0 = N->getOperand(0); 3424 SDValue N1 = N->getOperand(1); 3425 EVT VT = N->getValueType(0); 3426 SDLoc DL(N); 3427 3428 if (VT.isVector()) { 3429 // fold (mulhs x, 0) -> 0 3430 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3431 return N1; 3432 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3433 return N0; 3434 } 3435 3436 // fold (mulhs x, 0) -> 0 3437 if (isNullConstant(N1)) 3438 return N1; 3439 // fold (mulhs x, 1) -> (sra x, size(x)-1) 3440 if (isOneConstant(N1)) 3441 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 3442 DAG.getConstant(N0.getValueSizeInBits() - 1, DL, 3443 getShiftAmountTy(N0.getValueType()))); 3444 3445 // fold (mulhs x, undef) -> 0 3446 if (N0.isUndef() || N1.isUndef()) 3447 return DAG.getConstant(0, DL, VT); 3448 3449 // If the type twice as wide is legal, transform the mulhs to a wider multiply 3450 // plus a shift. 3451 if (VT.isSimple() && !VT.isVector()) { 3452 MVT Simple = VT.getSimpleVT(); 3453 unsigned SimpleSize = Simple.getSizeInBits(); 3454 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3455 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3456 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 3457 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 3458 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3459 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3460 DAG.getConstant(SimpleSize, DL, 3461 getShiftAmountTy(N1.getValueType()))); 3462 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3463 } 3464 } 3465 3466 return SDValue(); 3467 } 3468 3469 SDValue DAGCombiner::visitMULHU(SDNode *N) { 3470 SDValue N0 = N->getOperand(0); 3471 SDValue N1 = N->getOperand(1); 3472 EVT VT = N->getValueType(0); 3473 SDLoc DL(N); 3474 3475 if (VT.isVector()) { 3476 // fold (mulhu x, 0) -> 0 3477 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3478 return N1; 3479 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3480 return N0; 3481 } 3482 3483 // fold (mulhu x, 0) -> 0 3484 if (isNullConstant(N1)) 3485 return N1; 3486 // fold (mulhu x, 1) -> 0 3487 if (isOneConstant(N1)) 3488 return DAG.getConstant(0, DL, N0.getValueType()); 3489 // fold (mulhu x, undef) -> 0 3490 if (N0.isUndef() || N1.isUndef()) 3491 return DAG.getConstant(0, DL, VT); 3492 3493 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) 3494 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 3495 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { 3496 SDLoc DL(N); 3497 unsigned NumEltBits = VT.getScalarSizeInBits(); 3498 SDValue LogBase2 = BuildLogBase2(N1, DL); 3499 SDValue SRLAmt = DAG.getNode( 3500 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); 3501 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 3502 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); 3503 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); 3504 } 3505 3506 // If the type twice as wide is legal, transform the mulhu to a wider multiply 3507 // plus a shift. 3508 if (VT.isSimple() && !VT.isVector()) { 3509 MVT Simple = VT.getSimpleVT(); 3510 unsigned SimpleSize = Simple.getSizeInBits(); 3511 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3512 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3513 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 3514 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 3515 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3516 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3517 DAG.getConstant(SimpleSize, DL, 3518 getShiftAmountTy(N1.getValueType()))); 3519 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3520 } 3521 } 3522 3523 return SDValue(); 3524 } 3525 3526 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 3527 /// give the opcodes for the two computations that are being performed. Return 3528 /// true if a simplification was made. 3529 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 3530 unsigned HiOp) { 3531 // If the high half is not needed, just compute the low half. 3532 bool HiExists = N->hasAnyUseOfValue(1); 3533 if (!HiExists && (!LegalOperations || 3534 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 3535 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3536 return CombineTo(N, Res, Res); 3537 } 3538 3539 // If the low half is not needed, just compute the high half. 3540 bool LoExists = N->hasAnyUseOfValue(0); 3541 if (!LoExists && (!LegalOperations || 3542 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) { 3543 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3544 return CombineTo(N, Res, Res); 3545 } 3546 3547 // If both halves are used, return as it is. 3548 if (LoExists && HiExists) 3549 return SDValue(); 3550 3551 // If the two computed results can be simplified separately, separate them. 3552 if (LoExists) { 3553 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3554 AddToWorklist(Lo.getNode()); 3555 SDValue LoOpt = combine(Lo.getNode()); 3556 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 3557 (!LegalOperations || 3558 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType()))) 3559 return CombineTo(N, LoOpt, LoOpt); 3560 } 3561 3562 if (HiExists) { 3563 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3564 AddToWorklist(Hi.getNode()); 3565 SDValue HiOpt = combine(Hi.getNode()); 3566 if (HiOpt.getNode() && HiOpt != Hi && 3567 (!LegalOperations || 3568 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType()))) 3569 return CombineTo(N, HiOpt, HiOpt); 3570 } 3571 3572 return SDValue(); 3573 } 3574 3575 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 3576 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 3577 return Res; 3578 3579 EVT VT = N->getValueType(0); 3580 SDLoc DL(N); 3581 3582 // If the type is twice as wide is legal, transform the mulhu to a wider 3583 // multiply plus a shift. 3584 if (VT.isSimple() && !VT.isVector()) { 3585 MVT Simple = VT.getSimpleVT(); 3586 unsigned SimpleSize = Simple.getSizeInBits(); 3587 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3588 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3589 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 3590 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 3591 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3592 // Compute the high part as N1. 3593 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3594 DAG.getConstant(SimpleSize, DL, 3595 getShiftAmountTy(Lo.getValueType()))); 3596 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3597 // Compute the low part as N0. 3598 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3599 return CombineTo(N, Lo, Hi); 3600 } 3601 } 3602 3603 return SDValue(); 3604 } 3605 3606 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 3607 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 3608 return Res; 3609 3610 EVT VT = N->getValueType(0); 3611 SDLoc DL(N); 3612 3613 // If the type is twice as wide is legal, transform the mulhu to a wider 3614 // multiply plus a shift. 3615 if (VT.isSimple() && !VT.isVector()) { 3616 MVT Simple = VT.getSimpleVT(); 3617 unsigned SimpleSize = Simple.getSizeInBits(); 3618 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3619 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3620 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 3621 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 3622 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3623 // Compute the high part as N1. 3624 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3625 DAG.getConstant(SimpleSize, DL, 3626 getShiftAmountTy(Lo.getValueType()))); 3627 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3628 // Compute the low part as N0. 3629 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3630 return CombineTo(N, Lo, Hi); 3631 } 3632 } 3633 3634 return SDValue(); 3635 } 3636 3637 SDValue DAGCombiner::visitSMULO(SDNode *N) { 3638 // (smulo x, 2) -> (saddo x, x) 3639 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3640 if (C2->getAPIntValue() == 2) 3641 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 3642 N->getOperand(0), N->getOperand(0)); 3643 3644 return SDValue(); 3645 } 3646 3647 SDValue DAGCombiner::visitUMULO(SDNode *N) { 3648 // (umulo x, 2) -> (uaddo x, x) 3649 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3650 if (C2->getAPIntValue() == 2) 3651 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 3652 N->getOperand(0), N->getOperand(0)); 3653 3654 return SDValue(); 3655 } 3656 3657 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 3658 SDValue N0 = N->getOperand(0); 3659 SDValue N1 = N->getOperand(1); 3660 EVT VT = N0.getValueType(); 3661 3662 // fold vector ops 3663 if (VT.isVector()) 3664 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3665 return FoldedVOp; 3666 3667 // fold operation with constant operands. 3668 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3669 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 3670 if (N0C && N1C) 3671 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 3672 3673 // canonicalize constant to RHS 3674 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 3675 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 3676 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 3677 3678 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. 3679 // Only do this if the current op isn't legal and the flipped is. 3680 unsigned Opcode = N->getOpcode(); 3681 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3682 if (!TLI.isOperationLegal(Opcode, VT) && 3683 (N0.isUndef() || DAG.SignBitIsZero(N0)) && 3684 (N1.isUndef() || DAG.SignBitIsZero(N1))) { 3685 unsigned AltOpcode; 3686 switch (Opcode) { 3687 case ISD::SMIN: AltOpcode = ISD::UMIN; break; 3688 case ISD::SMAX: AltOpcode = ISD::UMAX; break; 3689 case ISD::UMIN: AltOpcode = ISD::SMIN; break; 3690 case ISD::UMAX: AltOpcode = ISD::SMAX; break; 3691 default: llvm_unreachable("Unknown MINMAX opcode"); 3692 } 3693 if (TLI.isOperationLegal(AltOpcode, VT)) 3694 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); 3695 } 3696 3697 return SDValue(); 3698 } 3699 3700 /// If this is a binary operator with two operands of the same opcode, try to 3701 /// simplify it. 3702 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 3703 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3704 EVT VT = N0.getValueType(); 3705 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 3706 3707 // Bail early if none of these transforms apply. 3708 if (N0.getNumOperands() == 0) return SDValue(); 3709 3710 // For each of OP in AND/OR/XOR: 3711 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 3712 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 3713 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 3714 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 3715 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 3716 // 3717 // do not sink logical op inside of a vector extend, since it may combine 3718 // into a vsetcc. 3719 EVT Op0VT = N0.getOperand(0).getValueType(); 3720 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 3721 N0.getOpcode() == ISD::SIGN_EXTEND || 3722 N0.getOpcode() == ISD::BSWAP || 3723 // Avoid infinite looping with PromoteIntBinOp. 3724 (N0.getOpcode() == ISD::ANY_EXTEND && 3725 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 3726 (N0.getOpcode() == ISD::TRUNCATE && 3727 (!TLI.isZExtFree(VT, Op0VT) || 3728 !TLI.isTruncateFree(Op0VT, VT)) && 3729 TLI.isTypeLegal(Op0VT))) && 3730 !VT.isVector() && 3731 Op0VT == N1.getOperand(0).getValueType() && 3732 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 3733 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 3734 N0.getOperand(0).getValueType(), 3735 N0.getOperand(0), N1.getOperand(0)); 3736 AddToWorklist(ORNode.getNode()); 3737 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 3738 } 3739 3740 // For each of OP in SHL/SRL/SRA/AND... 3741 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 3742 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 3743 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 3744 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 3745 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 3746 N0.getOperand(1) == N1.getOperand(1)) { 3747 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 3748 N0.getOperand(0).getValueType(), 3749 N0.getOperand(0), N1.getOperand(0)); 3750 AddToWorklist(ORNode.getNode()); 3751 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 3752 ORNode, N0.getOperand(1)); 3753 } 3754 3755 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 3756 // Only perform this optimization up until type legalization, before 3757 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 3758 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 3759 // we don't want to undo this promotion. 3760 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 3761 // on scalars. 3762 if ((N0.getOpcode() == ISD::BITCAST || 3763 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 3764 Level <= AfterLegalizeTypes) { 3765 SDValue In0 = N0.getOperand(0); 3766 SDValue In1 = N1.getOperand(0); 3767 EVT In0Ty = In0.getValueType(); 3768 EVT In1Ty = In1.getValueType(); 3769 SDLoc DL(N); 3770 // If both incoming values are integers, and the original types are the 3771 // same. 3772 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 3773 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 3774 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 3775 AddToWorklist(Op.getNode()); 3776 return BC; 3777 } 3778 } 3779 3780 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 3781 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 3782 // If both shuffles use the same mask, and both shuffle within a single 3783 // vector, then it is worthwhile to move the swizzle after the operation. 3784 // The type-legalizer generates this pattern when loading illegal 3785 // vector types from memory. In many cases this allows additional shuffle 3786 // optimizations. 3787 // There are other cases where moving the shuffle after the xor/and/or 3788 // is profitable even if shuffles don't perform a swizzle. 3789 // If both shuffles use the same mask, and both shuffles have the same first 3790 // or second operand, then it might still be profitable to move the shuffle 3791 // after the xor/and/or operation. 3792 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 3793 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 3794 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 3795 3796 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 3797 "Inputs to shuffles are not the same type"); 3798 3799 // Check that both shuffles use the same mask. The masks are known to be of 3800 // the same length because the result vector type is the same. 3801 // Check also that shuffles have only one use to avoid introducing extra 3802 // instructions. 3803 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 3804 SVN0->getMask().equals(SVN1->getMask())) { 3805 SDValue ShOp = N0->getOperand(1); 3806 3807 // Don't try to fold this node if it requires introducing a 3808 // build vector of all zeros that might be illegal at this stage. 3809 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 3810 ShOp = tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); 3811 } 3812 3813 // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) 3814 // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C) 3815 // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0) 3816 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 3817 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 3818 N0->getOperand(0), N1->getOperand(0)); 3819 AddToWorklist(NewNode.getNode()); 3820 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 3821 SVN0->getMask()); 3822 } 3823 3824 // Don't try to fold this node if it requires introducing a 3825 // build vector of all zeros that might be illegal at this stage. 3826 ShOp = N0->getOperand(0); 3827 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 3828 ShOp = tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations); 3829 } 3830 3831 // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) 3832 // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B)) 3833 // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B)) 3834 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 3835 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 3836 N0->getOperand(1), N1->getOperand(1)); 3837 AddToWorklist(NewNode.getNode()); 3838 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 3839 SVN0->getMask()); 3840 } 3841 } 3842 } 3843 3844 return SDValue(); 3845 } 3846 3847 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. 3848 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 3849 const SDLoc &DL) { 3850 SDValue LL, LR, RL, RR, N0CC, N1CC; 3851 if (!isSetCCEquivalent(N0, LL, LR, N0CC) || 3852 !isSetCCEquivalent(N1, RL, RR, N1CC)) 3853 return SDValue(); 3854 3855 assert(N0.getValueType() == N1.getValueType() && 3856 "Unexpected operand types for bitwise logic op"); 3857 assert(LL.getValueType() == LR.getValueType() && 3858 RL.getValueType() == RR.getValueType() && 3859 "Unexpected operand types for setcc"); 3860 3861 // If we're here post-legalization or the logic op type is not i1, the logic 3862 // op type must match a setcc result type. Also, all folds require new 3863 // operations on the left and right operands, so those types must match. 3864 EVT VT = N0.getValueType(); 3865 EVT OpVT = LL.getValueType(); 3866 if (LegalOperations || VT.getScalarType() != MVT::i1) 3867 if (VT != getSetCCResultType(OpVT)) 3868 return SDValue(); 3869 if (OpVT != RL.getValueType()) 3870 return SDValue(); 3871 3872 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get(); 3873 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); 3874 bool IsInteger = OpVT.isInteger(); 3875 if (LR == RR && CC0 == CC1 && IsInteger) { 3876 bool IsZero = isNullOrNullSplat(LR); 3877 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR); 3878 3879 // All bits clear? 3880 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; 3881 // All sign bits clear? 3882 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1; 3883 // Any bits set? 3884 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero; 3885 // Any sign bits set? 3886 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero; 3887 3888 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) 3889 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) 3890 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) 3891 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) 3892 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) { 3893 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); 3894 AddToWorklist(Or.getNode()); 3895 return DAG.getSetCC(DL, VT, Or, LR, CC1); 3896 } 3897 3898 // All bits set? 3899 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1; 3900 // All sign bits set? 3901 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero; 3902 // Any bits clear? 3903 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1; 3904 // Any sign bits clear? 3905 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1; 3906 3907 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) 3908 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) 3909 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) 3910 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) 3911 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) { 3912 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); 3913 AddToWorklist(And.getNode()); 3914 return DAG.getSetCC(DL, VT, And, LR, CC1); 3915 } 3916 } 3917 3918 // TODO: What is the 'or' equivalent of this fold? 3919 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) 3920 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && 3921 IsInteger && CC0 == ISD::SETNE && 3922 ((isNullConstant(LR) && isAllOnesConstant(RR)) || 3923 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 3924 SDValue One = DAG.getConstant(1, DL, OpVT); 3925 SDValue Two = DAG.getConstant(2, DL, OpVT); 3926 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); 3927 AddToWorklist(Add.getNode()); 3928 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); 3929 } 3930 3931 // Try more general transforms if the predicates match and the only user of 3932 // the compares is the 'and' or 'or'. 3933 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && 3934 N0.hasOneUse() && N1.hasOneUse()) { 3935 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 3936 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 3937 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { 3938 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); 3939 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); 3940 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); 3941 SDValue Zero = DAG.getConstant(0, DL, OpVT); 3942 return DAG.getSetCC(DL, VT, Or, Zero, CC1); 3943 } 3944 } 3945 3946 // Canonicalize equivalent operands to LL == RL. 3947 if (LL == RR && LR == RL) { 3948 CC1 = ISD::getSetCCSwappedOperands(CC1); 3949 std::swap(RL, RR); 3950 } 3951 3952 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 3953 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 3954 if (LL == RL && LR == RR) { 3955 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) 3956 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); 3957 if (NewCC != ISD::SETCC_INVALID && 3958 (!LegalOperations || 3959 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && 3960 TLI.isOperationLegal(ISD::SETCC, OpVT)))) 3961 return DAG.getSetCC(DL, VT, LL, LR, NewCC); 3962 } 3963 3964 return SDValue(); 3965 } 3966 3967 /// This contains all DAGCombine rules which reduce two values combined by 3968 /// an And operation to a single value. This makes them reusable in the context 3969 /// of visitSELECT(). Rules involving constants are not included as 3970 /// visitSELECT() already handles those cases. 3971 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { 3972 EVT VT = N1.getValueType(); 3973 SDLoc DL(N); 3974 3975 // fold (and x, undef) -> 0 3976 if (N0.isUndef() || N1.isUndef()) 3977 return DAG.getConstant(0, DL, VT); 3978 3979 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) 3980 return V; 3981 3982 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3983 VT.getSizeInBits() <= 64) { 3984 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3985 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3986 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3987 // immediate for an add, but it is legal if its top c2 bits are set, 3988 // transform the ADD so the immediate doesn't need to be materialized 3989 // in a register. 3990 APInt ADDC = ADDI->getAPIntValue(); 3991 APInt SRLC = SRLI->getAPIntValue(); 3992 if (ADDC.getMinSignedBits() <= 64 && 3993 SRLC.ult(VT.getSizeInBits()) && 3994 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3995 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3996 SRLC.getZExtValue()); 3997 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3998 ADDC |= Mask; 3999 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 4000 SDLoc DL0(N0); 4001 SDValue NewAdd = 4002 DAG.getNode(ISD::ADD, DL0, VT, 4003 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 4004 CombineTo(N0.getNode(), NewAdd); 4005 // Return N so it doesn't get rechecked! 4006 return SDValue(N, 0); 4007 } 4008 } 4009 } 4010 } 4011 } 4012 } 4013 4014 // Reduce bit extract of low half of an integer to the narrower type. 4015 // (and (srl i64:x, K), KMask) -> 4016 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) 4017 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4018 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { 4019 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4020 unsigned Size = VT.getSizeInBits(); 4021 const APInt &AndMask = CAnd->getAPIntValue(); 4022 unsigned ShiftBits = CShift->getZExtValue(); 4023 4024 // Bail out, this node will probably disappear anyway. 4025 if (ShiftBits == 0) 4026 return SDValue(); 4027 4028 unsigned MaskBits = AndMask.countTrailingOnes(); 4029 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); 4030 4031 if (AndMask.isMask() && 4032 // Required bits must not span the two halves of the integer and 4033 // must fit in the half size type. 4034 (ShiftBits + MaskBits <= Size / 2) && 4035 TLI.isNarrowingProfitable(VT, HalfVT) && 4036 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && 4037 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && 4038 TLI.isTruncateFree(VT, HalfVT) && 4039 TLI.isZExtFree(HalfVT, VT)) { 4040 // The isNarrowingProfitable is to avoid regressions on PPC and 4041 // AArch64 which match a few 64-bit bit insert / bit extract patterns 4042 // on downstream users of this. Those patterns could probably be 4043 // extended to handle extensions mixed in. 4044 4045 SDValue SL(N0); 4046 assert(MaskBits <= Size); 4047 4048 // Extracting the highest bit of the low half. 4049 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); 4050 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, 4051 N0.getOperand(0)); 4052 4053 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); 4054 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); 4055 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); 4056 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); 4057 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); 4058 } 4059 } 4060 } 4061 } 4062 4063 return SDValue(); 4064 } 4065 4066 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 4067 EVT LoadResultTy, EVT &ExtVT) { 4068 if (!AndC->getAPIntValue().isMask()) 4069 return false; 4070 4071 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); 4072 4073 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 4074 EVT LoadedVT = LoadN->getMemoryVT(); 4075 4076 if (ExtVT == LoadedVT && 4077 (!LegalOperations || 4078 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 4079 // ZEXTLOAD will match without needing to change the size of the value being 4080 // loaded. 4081 return true; 4082 } 4083 4084 // Do not change the width of a volatile load. 4085 if (LoadN->isVolatile()) 4086 return false; 4087 4088 // Do not generate loads of non-round integer types since these can 4089 // be expensive (and would be wrong if the type is not byte sized). 4090 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 4091 return false; 4092 4093 if (LegalOperations && 4094 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 4095 return false; 4096 4097 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 4098 return false; 4099 4100 return true; 4101 } 4102 4103 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, 4104 ISD::LoadExtType ExtType, EVT &MemVT, 4105 unsigned ShAmt) { 4106 if (!LDST) 4107 return false; 4108 // Only allow byte offsets. 4109 if (ShAmt % 8) 4110 return false; 4111 4112 // Do not generate loads of non-round integer types since these can 4113 // be expensive (and would be wrong if the type is not byte sized). 4114 if (!MemVT.isRound()) 4115 return false; 4116 4117 // Don't change the width of a volatile load. 4118 if (LDST->isVolatile()) 4119 return false; 4120 4121 // Verify that we are actually reducing a load width here. 4122 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) 4123 return false; 4124 4125 // Ensure that this isn't going to produce an unsupported unaligned access. 4126 if (ShAmt && 4127 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, 4128 LDST->getAddressSpace(), ShAmt / 8)) 4129 return false; 4130 4131 // It's not possible to generate a constant of extended or untyped type. 4132 EVT PtrType = LDST->getBasePtr().getValueType(); 4133 if (PtrType == MVT::Untyped || PtrType.isExtended()) 4134 return false; 4135 4136 if (isa<LoadSDNode>(LDST)) { 4137 LoadSDNode *Load = cast<LoadSDNode>(LDST); 4138 // Don't transform one with multiple uses, this would require adding a new 4139 // load. 4140 if (!SDValue(Load, 0).hasOneUse()) 4141 return false; 4142 4143 if (LegalOperations && 4144 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) 4145 return false; 4146 4147 // For the transform to be legal, the load must produce only two values 4148 // (the value loaded and the chain). Don't transform a pre-increment 4149 // load, for example, which produces an extra value. Otherwise the 4150 // transformation is not equivalent, and the downstream logic to replace 4151 // uses gets things wrong. 4152 if (Load->getNumValues() > 2) 4153 return false; 4154 4155 // If the load that we're shrinking is an extload and we're not just 4156 // discarding the extension we can't simply shrink the load. Bail. 4157 // TODO: It would be possible to merge the extensions in some cases. 4158 if (Load->getExtensionType() != ISD::NON_EXTLOAD && 4159 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) 4160 return false; 4161 4162 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) 4163 return false; 4164 } else { 4165 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); 4166 StoreSDNode *Store = cast<StoreSDNode>(LDST); 4167 // Can't write outside the original store 4168 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) 4169 return false; 4170 4171 if (LegalOperations && 4172 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) 4173 return false; 4174 } 4175 return true; 4176 } 4177 4178 bool DAGCombiner::SearchForAndLoads(SDNode *N, 4179 SmallVectorImpl<LoadSDNode*> &Loads, 4180 SmallPtrSetImpl<SDNode*> &NodesWithConsts, 4181 ConstantSDNode *Mask, 4182 SDNode *&NodeToMask) { 4183 // Recursively search for the operands, looking for loads which can be 4184 // narrowed. 4185 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { 4186 SDValue Op = N->getOperand(i); 4187 4188 if (Op.getValueType().isVector()) 4189 return false; 4190 4191 // Some constants may need fixing up later if they are too large. 4192 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4193 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && 4194 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) 4195 NodesWithConsts.insert(N); 4196 continue; 4197 } 4198 4199 if (!Op.hasOneUse()) 4200 return false; 4201 4202 switch(Op.getOpcode()) { 4203 case ISD::LOAD: { 4204 auto *Load = cast<LoadSDNode>(Op); 4205 EVT ExtVT; 4206 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && 4207 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { 4208 4209 // ZEXTLOAD is already small enough. 4210 if (Load->getExtensionType() == ISD::ZEXTLOAD && 4211 ExtVT.bitsGE(Load->getMemoryVT())) 4212 continue; 4213 4214 // Use LE to convert equal sized loads to zext. 4215 if (ExtVT.bitsLE(Load->getMemoryVT())) 4216 Loads.push_back(Load); 4217 4218 continue; 4219 } 4220 return false; 4221 } 4222 case ISD::ZERO_EXTEND: 4223 case ISD::AssertZext: { 4224 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); 4225 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 4226 EVT VT = Op.getOpcode() == ISD::AssertZext ? 4227 cast<VTSDNode>(Op.getOperand(1))->getVT() : 4228 Op.getOperand(0).getValueType(); 4229 4230 // We can accept extending nodes if the mask is wider or an equal 4231 // width to the original type. 4232 if (ExtVT.bitsGE(VT)) 4233 continue; 4234 break; 4235 } 4236 case ISD::OR: 4237 case ISD::XOR: 4238 case ISD::AND: 4239 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, 4240 NodeToMask)) 4241 return false; 4242 continue; 4243 } 4244 4245 // Allow one node which will masked along with any loads found. 4246 if (NodeToMask) 4247 return false; 4248 4249 // Also ensure that the node to be masked only produces one data result. 4250 NodeToMask = Op.getNode(); 4251 if (NodeToMask->getNumValues() > 1) { 4252 bool HasValue = false; 4253 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { 4254 MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); 4255 if (VT != MVT::Glue && VT != MVT::Other) { 4256 if (HasValue) { 4257 NodeToMask = nullptr; 4258 return false; 4259 } 4260 HasValue = true; 4261 } 4262 } 4263 assert(HasValue && "Node to be masked has no data result?"); 4264 } 4265 } 4266 return true; 4267 } 4268 4269 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { 4270 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4271 if (!Mask) 4272 return false; 4273 4274 if (!Mask->getAPIntValue().isMask()) 4275 return false; 4276 4277 // No need to do anything if the and directly uses a load. 4278 if (isa<LoadSDNode>(N->getOperand(0))) 4279 return false; 4280 4281 SmallVector<LoadSDNode*, 8> Loads; 4282 SmallPtrSet<SDNode*, 2> NodesWithConsts; 4283 SDNode *FixupNode = nullptr; 4284 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { 4285 if (Loads.size() == 0) 4286 return false; 4287 4288 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); 4289 SDValue MaskOp = N->getOperand(1); 4290 4291 // If it exists, fixup the single node we allow in the tree that needs 4292 // masking. 4293 if (FixupNode) { 4294 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); 4295 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), 4296 FixupNode->getValueType(0), 4297 SDValue(FixupNode, 0), MaskOp); 4298 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); 4299 if (And.getOpcode() == ISD ::AND) 4300 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); 4301 } 4302 4303 // Narrow any constants that need it. 4304 for (auto *LogicN : NodesWithConsts) { 4305 SDValue Op0 = LogicN->getOperand(0); 4306 SDValue Op1 = LogicN->getOperand(1); 4307 4308 if (isa<ConstantSDNode>(Op0)) 4309 std::swap(Op0, Op1); 4310 4311 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), 4312 Op1, MaskOp); 4313 4314 DAG.UpdateNodeOperands(LogicN, Op0, And); 4315 } 4316 4317 // Create narrow loads. 4318 for (auto *Load : Loads) { 4319 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); 4320 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), 4321 SDValue(Load, 0), MaskOp); 4322 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); 4323 if (And.getOpcode() == ISD ::AND) 4324 And = SDValue( 4325 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); 4326 SDValue NewLoad = ReduceLoadWidth(And.getNode()); 4327 assert(NewLoad && 4328 "Shouldn't be masking the load if it can't be narrowed"); 4329 CombineTo(Load, NewLoad, NewLoad.getValue(1)); 4330 } 4331 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); 4332 return true; 4333 } 4334 return false; 4335 } 4336 4337 // Unfold 4338 // x & (-1 'logical shift' y) 4339 // To 4340 // (x 'opposite logical shift' y) 'logical shift' y 4341 // if it is better for performance. 4342 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { 4343 assert(N->getOpcode() == ISD::AND); 4344 4345 SDValue N0 = N->getOperand(0); 4346 SDValue N1 = N->getOperand(1); 4347 4348 // Do we actually prefer shifts over mask? 4349 if (!TLI.preferShiftsToClearExtremeBits(N0)) 4350 return SDValue(); 4351 4352 // Try to match (-1 '[outer] logical shift' y) 4353 unsigned OuterShift; 4354 unsigned InnerShift; // The opposite direction to the OuterShift. 4355 SDValue Y; // Shift amount. 4356 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { 4357 if (!M.hasOneUse()) 4358 return false; 4359 OuterShift = M->getOpcode(); 4360 if (OuterShift == ISD::SHL) 4361 InnerShift = ISD::SRL; 4362 else if (OuterShift == ISD::SRL) 4363 InnerShift = ISD::SHL; 4364 else 4365 return false; 4366 if (!isAllOnesConstant(M->getOperand(0))) 4367 return false; 4368 Y = M->getOperand(1); 4369 return true; 4370 }; 4371 4372 SDValue X; 4373 if (matchMask(N1)) 4374 X = N0; 4375 else if (matchMask(N0)) 4376 X = N1; 4377 else 4378 return SDValue(); 4379 4380 SDLoc DL(N); 4381 EVT VT = N->getValueType(0); 4382 4383 // tmp = x 'opposite logical shift' y 4384 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); 4385 // ret = tmp 'logical shift' y 4386 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); 4387 4388 return T1; 4389 } 4390 4391 SDValue DAGCombiner::visitAND(SDNode *N) { 4392 SDValue N0 = N->getOperand(0); 4393 SDValue N1 = N->getOperand(1); 4394 EVT VT = N1.getValueType(); 4395 4396 // x & x --> x 4397 if (N0 == N1) 4398 return N0; 4399 4400 // fold vector ops 4401 if (VT.isVector()) { 4402 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4403 return FoldedVOp; 4404 4405 // fold (and x, 0) -> 0, vector edition 4406 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4407 // do not return N0, because undef node may exist in N0 4408 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), 4409 SDLoc(N), N0.getValueType()); 4410 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4411 // do not return N1, because undef node may exist in N1 4412 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), 4413 SDLoc(N), N1.getValueType()); 4414 4415 // fold (and x, -1) -> x, vector edition 4416 if (ISD::isBuildVectorAllOnes(N0.getNode())) 4417 return N1; 4418 if (ISD::isBuildVectorAllOnes(N1.getNode())) 4419 return N0; 4420 } 4421 4422 // fold (and c1, c2) -> c1&c2 4423 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4424 ConstantSDNode *N1C = isConstOrConstSplat(N1); 4425 if (N0C && N1C && !N1C->isOpaque()) 4426 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 4427 // canonicalize constant to RHS 4428 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 4429 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 4430 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 4431 // fold (and x, -1) -> x 4432 if (isAllOnesConstant(N1)) 4433 return N0; 4434 // if (and x, c) is known to be zero, return 0 4435 unsigned BitWidth = VT.getScalarSizeInBits(); 4436 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4437 APInt::getAllOnesValue(BitWidth))) 4438 return DAG.getConstant(0, SDLoc(N), VT); 4439 4440 if (SDValue NewSel = foldBinOpIntoSelect(N)) 4441 return NewSel; 4442 4443 // reassociate and 4444 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) 4445 return RAND; 4446 4447 // Try to convert a constant mask AND into a shuffle clear mask. 4448 if (VT.isVector()) 4449 if (SDValue Shuffle = XformToShuffleWithZero(N)) 4450 return Shuffle; 4451 4452 // fold (and (or x, C), D) -> D if (C & D) == D 4453 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { 4454 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); 4455 }; 4456 if (N0.getOpcode() == ISD::OR && 4457 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) 4458 return N1; 4459 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 4460 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4461 SDValue N0Op0 = N0.getOperand(0); 4462 APInt Mask = ~N1C->getAPIntValue(); 4463 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); 4464 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 4465 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4466 N0.getValueType(), N0Op0); 4467 4468 // Replace uses of the AND with uses of the Zero extend node. 4469 CombineTo(N, Zext); 4470 4471 // We actually want to replace all uses of the any_extend with the 4472 // zero_extend, to avoid duplicating things. This will later cause this 4473 // AND to be folded. 4474 CombineTo(N0.getNode(), Zext); 4475 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4476 } 4477 } 4478 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 4479 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 4480 // already be zero by virtue of the width of the base type of the load. 4481 // 4482 // the 'X' node here can either be nothing or an extract_vector_elt to catch 4483 // more cases. 4484 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4485 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && 4486 N0.getOperand(0).getOpcode() == ISD::LOAD && 4487 N0.getOperand(0).getResNo() == 0) || 4488 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { 4489 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 4490 N0 : N0.getOperand(0) ); 4491 4492 // Get the constant (if applicable) the zero'th operand is being ANDed with. 4493 // This can be a pure constant or a vector splat, in which case we treat the 4494 // vector as a scalar and use the splat value. 4495 APInt Constant = APInt::getNullValue(1); 4496 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 4497 Constant = C->getAPIntValue(); 4498 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 4499 APInt SplatValue, SplatUndef; 4500 unsigned SplatBitSize; 4501 bool HasAnyUndefs; 4502 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 4503 SplatBitSize, HasAnyUndefs); 4504 if (IsSplat) { 4505 // Undef bits can contribute to a possible optimisation if set, so 4506 // set them. 4507 SplatValue |= SplatUndef; 4508 4509 // The splat value may be something like "0x00FFFFFF", which means 0 for 4510 // the first vector value and FF for the rest, repeating. We need a mask 4511 // that will apply equally to all members of the vector, so AND all the 4512 // lanes of the constant together. 4513 EVT VT = Vector->getValueType(0); 4514 unsigned BitWidth = VT.getScalarSizeInBits(); 4515 4516 // If the splat value has been compressed to a bitlength lower 4517 // than the size of the vector lane, we need to re-expand it to 4518 // the lane size. 4519 if (BitWidth > SplatBitSize) 4520 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 4521 SplatBitSize < BitWidth; 4522 SplatBitSize = SplatBitSize * 2) 4523 SplatValue |= SplatValue.shl(SplatBitSize); 4524 4525 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 4526 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 4527 if (SplatBitSize % BitWidth == 0) { 4528 Constant = APInt::getAllOnesValue(BitWidth); 4529 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 4530 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 4531 } 4532 } 4533 } 4534 4535 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 4536 // actually legal and isn't going to get expanded, else this is a false 4537 // optimisation. 4538 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 4539 Load->getValueType(0), 4540 Load->getMemoryVT()); 4541 4542 // Resize the constant to the same size as the original memory access before 4543 // extension. If it is still the AllOnesValue then this AND is completely 4544 // unneeded. 4545 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); 4546 4547 bool B; 4548 switch (Load->getExtensionType()) { 4549 default: B = false; break; 4550 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 4551 case ISD::ZEXTLOAD: 4552 case ISD::NON_EXTLOAD: B = true; break; 4553 } 4554 4555 if (B && Constant.isAllOnesValue()) { 4556 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 4557 // preserve semantics once we get rid of the AND. 4558 SDValue NewLoad(Load, 0); 4559 4560 // Fold the AND away. NewLoad may get replaced immediately. 4561 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 4562 4563 if (Load->getExtensionType() == ISD::EXTLOAD) { 4564 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 4565 Load->getValueType(0), SDLoc(Load), 4566 Load->getChain(), Load->getBasePtr(), 4567 Load->getOffset(), Load->getMemoryVT(), 4568 Load->getMemOperand()); 4569 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 4570 if (Load->getNumValues() == 3) { 4571 // PRE/POST_INC loads have 3 values. 4572 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 4573 NewLoad.getValue(2) }; 4574 CombineTo(Load, To, 3, true); 4575 } else { 4576 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 4577 } 4578 } 4579 4580 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4581 } 4582 } 4583 4584 // fold (and (load x), 255) -> (zextload x, i8) 4585 // fold (and (extload x, i16), 255) -> (zextload x, i8) 4586 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 4587 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || 4588 (N0.getOpcode() == ISD::ANY_EXTEND && 4589 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 4590 if (SDValue Res = ReduceLoadWidth(N)) { 4591 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND 4592 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); 4593 4594 AddToWorklist(N); 4595 CombineTo(LN0, Res, Res.getValue(1)); 4596 return SDValue(N, 0); 4597 } 4598 } 4599 4600 if (Level >= AfterLegalizeTypes) { 4601 // Attempt to propagate the AND back up to the leaves which, if they're 4602 // loads, can be combined to narrow loads and the AND node can be removed. 4603 // Perform after legalization so that extend nodes will already be 4604 // combined into the loads. 4605 if (BackwardsPropagateMask(N, DAG)) { 4606 return SDValue(N, 0); 4607 } 4608 } 4609 4610 if (SDValue Combined = visitANDLike(N0, N1, N)) 4611 return Combined; 4612 4613 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 4614 if (N0.getOpcode() == N1.getOpcode()) 4615 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4616 return Tmp; 4617 4618 // Masking the negated extension of a boolean is just the zero-extended 4619 // boolean: 4620 // and (sub 0, zext(bool X)), 1 --> zext(bool X) 4621 // and (sub 0, sext(bool X)), 1 --> zext(bool X) 4622 // 4623 // Note: the SimplifyDemandedBits fold below can make an information-losing 4624 // transform, and then we have no way to find this better fold. 4625 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { 4626 if (isNullOrNullSplat(N0.getOperand(0))) { 4627 SDValue SubRHS = N0.getOperand(1); 4628 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && 4629 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4630 return SubRHS; 4631 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && 4632 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4633 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); 4634 } 4635 } 4636 4637 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 4638 // fold (and (sra)) -> (and (srl)) when possible. 4639 if (SimplifyDemandedBits(SDValue(N, 0))) 4640 return SDValue(N, 0); 4641 4642 // fold (zext_inreg (extload x)) -> (zextload x) 4643 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 4644 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4645 EVT MemVT = LN0->getMemoryVT(); 4646 // If we zero all the possible extended bits, then we can turn this into 4647 // a zextload if we are running before legalize or the operation is legal. 4648 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4649 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4650 BitWidth - MemVT.getScalarSizeInBits())) && 4651 ((!LegalOperations && !LN0->isVolatile()) || 4652 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4653 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4654 LN0->getChain(), LN0->getBasePtr(), 4655 MemVT, LN0->getMemOperand()); 4656 AddToWorklist(N); 4657 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4658 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4659 } 4660 } 4661 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 4662 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4663 N0.hasOneUse()) { 4664 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4665 EVT MemVT = LN0->getMemoryVT(); 4666 // If we zero all the possible extended bits, then we can turn this into 4667 // a zextload if we are running before legalize or the operation is legal. 4668 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4669 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4670 BitWidth - MemVT.getScalarSizeInBits())) && 4671 ((!LegalOperations && !LN0->isVolatile()) || 4672 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4673 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4674 LN0->getChain(), LN0->getBasePtr(), 4675 MemVT, LN0->getMemOperand()); 4676 AddToWorklist(N); 4677 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4678 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4679 } 4680 } 4681 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 4682 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 4683 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 4684 N0.getOperand(1), false)) 4685 return BSwap; 4686 } 4687 4688 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) 4689 return Shifts; 4690 4691 return SDValue(); 4692 } 4693 4694 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 4695 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 4696 bool DemandHighBits) { 4697 if (!LegalOperations) 4698 return SDValue(); 4699 4700 EVT VT = N->getValueType(0); 4701 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 4702 return SDValue(); 4703 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 4704 return SDValue(); 4705 4706 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) 4707 bool LookPassAnd0 = false; 4708 bool LookPassAnd1 = false; 4709 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 4710 std::swap(N0, N1); 4711 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 4712 std::swap(N0, N1); 4713 if (N0.getOpcode() == ISD::AND) { 4714 if (!N0.getNode()->hasOneUse()) 4715 return SDValue(); 4716 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4717 // Also handle 0xffff since the LHS is guaranteed to have zeros there. 4718 // This is needed for X86. 4719 if (!N01C || (N01C->getZExtValue() != 0xFF00 && 4720 N01C->getZExtValue() != 0xFFFF)) 4721 return SDValue(); 4722 N0 = N0.getOperand(0); 4723 LookPassAnd0 = true; 4724 } 4725 4726 if (N1.getOpcode() == ISD::AND) { 4727 if (!N1.getNode()->hasOneUse()) 4728 return SDValue(); 4729 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4730 if (!N11C || N11C->getZExtValue() != 0xFF) 4731 return SDValue(); 4732 N1 = N1.getOperand(0); 4733 LookPassAnd1 = true; 4734 } 4735 4736 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 4737 std::swap(N0, N1); 4738 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 4739 return SDValue(); 4740 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) 4741 return SDValue(); 4742 4743 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4744 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4745 if (!N01C || !N11C) 4746 return SDValue(); 4747 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 4748 return SDValue(); 4749 4750 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 4751 SDValue N00 = N0->getOperand(0); 4752 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 4753 if (!N00.getNode()->hasOneUse()) 4754 return SDValue(); 4755 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 4756 if (!N001C || N001C->getZExtValue() != 0xFF) 4757 return SDValue(); 4758 N00 = N00.getOperand(0); 4759 LookPassAnd0 = true; 4760 } 4761 4762 SDValue N10 = N1->getOperand(0); 4763 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 4764 if (!N10.getNode()->hasOneUse()) 4765 return SDValue(); 4766 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 4767 // Also allow 0xFFFF since the bits will be shifted out. This is needed 4768 // for X86. 4769 if (!N101C || (N101C->getZExtValue() != 0xFF00 && 4770 N101C->getZExtValue() != 0xFFFF)) 4771 return SDValue(); 4772 N10 = N10.getOperand(0); 4773 LookPassAnd1 = true; 4774 } 4775 4776 if (N00 != N10) 4777 return SDValue(); 4778 4779 // Make sure everything beyond the low halfword gets set to zero since the SRL 4780 // 16 will clear the top bits. 4781 unsigned OpSizeInBits = VT.getSizeInBits(); 4782 if (DemandHighBits && OpSizeInBits > 16) { 4783 // If the left-shift isn't masked out then the only way this is a bswap is 4784 // if all bits beyond the low 8 are 0. In that case the entire pattern 4785 // reduces to a left shift anyway: leave it for other parts of the combiner. 4786 if (!LookPassAnd0) 4787 return SDValue(); 4788 4789 // However, if the right shift isn't masked out then it might be because 4790 // it's not needed. See if we can spot that too. 4791 if (!LookPassAnd1 && 4792 !DAG.MaskedValueIsZero( 4793 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 4794 return SDValue(); 4795 } 4796 4797 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 4798 if (OpSizeInBits > 16) { 4799 SDLoc DL(N); 4800 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 4801 DAG.getConstant(OpSizeInBits - 16, DL, 4802 getShiftAmountTy(VT))); 4803 } 4804 return Res; 4805 } 4806 4807 /// Return true if the specified node is an element that makes up a 32-bit 4808 /// packed halfword byteswap. 4809 /// ((x & 0x000000ff) << 8) | 4810 /// ((x & 0x0000ff00) >> 8) | 4811 /// ((x & 0x00ff0000) << 8) | 4812 /// ((x & 0xff000000) >> 8) 4813 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 4814 if (!N.getNode()->hasOneUse()) 4815 return false; 4816 4817 unsigned Opc = N.getOpcode(); 4818 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 4819 return false; 4820 4821 SDValue N0 = N.getOperand(0); 4822 unsigned Opc0 = N0.getOpcode(); 4823 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) 4824 return false; 4825 4826 ConstantSDNode *N1C = nullptr; 4827 // SHL or SRL: look upstream for AND mask operand 4828 if (Opc == ISD::AND) 4829 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4830 else if (Opc0 == ISD::AND) 4831 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4832 if (!N1C) 4833 return false; 4834 4835 unsigned MaskByteOffset; 4836 switch (N1C->getZExtValue()) { 4837 default: 4838 return false; 4839 case 0xFF: MaskByteOffset = 0; break; 4840 case 0xFF00: MaskByteOffset = 1; break; 4841 case 0xFFFF: 4842 // In case demanded bits didn't clear the bits that will be shifted out. 4843 // This is needed for X86. 4844 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { 4845 MaskByteOffset = 1; 4846 break; 4847 } 4848 return false; 4849 case 0xFF0000: MaskByteOffset = 2; break; 4850 case 0xFF000000: MaskByteOffset = 3; break; 4851 } 4852 4853 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 4854 if (Opc == ISD::AND) { 4855 if (MaskByteOffset == 0 || MaskByteOffset == 2) { 4856 // (x >> 8) & 0xff 4857 // (x >> 8) & 0xff0000 4858 if (Opc0 != ISD::SRL) 4859 return false; 4860 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4861 if (!C || C->getZExtValue() != 8) 4862 return false; 4863 } else { 4864 // (x << 8) & 0xff00 4865 // (x << 8) & 0xff000000 4866 if (Opc0 != ISD::SHL) 4867 return false; 4868 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4869 if (!C || C->getZExtValue() != 8) 4870 return false; 4871 } 4872 } else if (Opc == ISD::SHL) { 4873 // (x & 0xff) << 8 4874 // (x & 0xff0000) << 8 4875 if (MaskByteOffset != 0 && MaskByteOffset != 2) 4876 return false; 4877 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4878 if (!C || C->getZExtValue() != 8) 4879 return false; 4880 } else { // Opc == ISD::SRL 4881 // (x & 0xff00) >> 8 4882 // (x & 0xff000000) >> 8 4883 if (MaskByteOffset != 1 && MaskByteOffset != 3) 4884 return false; 4885 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4886 if (!C || C->getZExtValue() != 8) 4887 return false; 4888 } 4889 4890 if (Parts[MaskByteOffset]) 4891 return false; 4892 4893 Parts[MaskByteOffset] = N0.getOperand(0).getNode(); 4894 return true; 4895 } 4896 4897 /// Match a 32-bit packed halfword bswap. That is 4898 /// ((x & 0x000000ff) << 8) | 4899 /// ((x & 0x0000ff00) >> 8) | 4900 /// ((x & 0x00ff0000) << 8) | 4901 /// ((x & 0xff000000) >> 8) 4902 /// => (rotl (bswap x), 16) 4903 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 4904 if (!LegalOperations) 4905 return SDValue(); 4906 4907 EVT VT = N->getValueType(0); 4908 if (VT != MVT::i32) 4909 return SDValue(); 4910 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 4911 return SDValue(); 4912 4913 // Look for either 4914 // (or (or (and), (and)), (or (and), (and))) 4915 // (or (or (or (and), (and)), (and)), (and)) 4916 if (N0.getOpcode() != ISD::OR) 4917 return SDValue(); 4918 SDValue N00 = N0.getOperand(0); 4919 SDValue N01 = N0.getOperand(1); 4920 SDNode *Parts[4] = {}; 4921 4922 if (N1.getOpcode() == ISD::OR && 4923 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 4924 // (or (or (and), (and)), (or (and), (and))) 4925 if (!isBSwapHWordElement(N00, Parts)) 4926 return SDValue(); 4927 4928 if (!isBSwapHWordElement(N01, Parts)) 4929 return SDValue(); 4930 SDValue N10 = N1.getOperand(0); 4931 if (!isBSwapHWordElement(N10, Parts)) 4932 return SDValue(); 4933 SDValue N11 = N1.getOperand(1); 4934 if (!isBSwapHWordElement(N11, Parts)) 4935 return SDValue(); 4936 } else { 4937 // (or (or (or (and), (and)), (and)), (and)) 4938 if (!isBSwapHWordElement(N1, Parts)) 4939 return SDValue(); 4940 if (!isBSwapHWordElement(N01, Parts)) 4941 return SDValue(); 4942 if (N00.getOpcode() != ISD::OR) 4943 return SDValue(); 4944 SDValue N000 = N00.getOperand(0); 4945 if (!isBSwapHWordElement(N000, Parts)) 4946 return SDValue(); 4947 SDValue N001 = N00.getOperand(1); 4948 if (!isBSwapHWordElement(N001, Parts)) 4949 return SDValue(); 4950 } 4951 4952 // Make sure the parts are all coming from the same node. 4953 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 4954 return SDValue(); 4955 4956 SDLoc DL(N); 4957 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 4958 SDValue(Parts[0], 0)); 4959 4960 // Result of the bswap should be rotated by 16. If it's not legal, then 4961 // do (x << 16) | (x >> 16). 4962 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 4963 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 4964 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 4965 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 4966 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 4967 return DAG.getNode(ISD::OR, DL, VT, 4968 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 4969 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 4970 } 4971 4972 /// This contains all DAGCombine rules which reduce two values combined by 4973 /// an Or operation to a single value \see visitANDLike(). 4974 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { 4975 EVT VT = N1.getValueType(); 4976 SDLoc DL(N); 4977 4978 // fold (or x, undef) -> -1 4979 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) 4980 return DAG.getAllOnesConstant(DL, VT); 4981 4982 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL)) 4983 return V; 4984 4985 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 4986 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 4987 // Don't increase # computations. 4988 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 4989 // We can only do this xform if we know that bits from X that are set in C2 4990 // but not in C1 are already zero. Likewise for Y. 4991 if (const ConstantSDNode *N0O1C = 4992 getAsNonOpaqueConstant(N0.getOperand(1))) { 4993 if (const ConstantSDNode *N1O1C = 4994 getAsNonOpaqueConstant(N1.getOperand(1))) { 4995 // We can only do this xform if we know that bits from X that are set in 4996 // C2 but not in C1 are already zero. Likewise for Y. 4997 const APInt &LHSMask = N0O1C->getAPIntValue(); 4998 const APInt &RHSMask = N1O1C->getAPIntValue(); 4999 5000 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 5001 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 5002 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 5003 N0.getOperand(0), N1.getOperand(0)); 5004 return DAG.getNode(ISD::AND, DL, VT, X, 5005 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 5006 } 5007 } 5008 } 5009 } 5010 5011 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 5012 if (N0.getOpcode() == ISD::AND && 5013 N1.getOpcode() == ISD::AND && 5014 N0.getOperand(0) == N1.getOperand(0) && 5015 // Don't increase # computations. 5016 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 5017 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 5018 N0.getOperand(1), N1.getOperand(1)); 5019 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); 5020 } 5021 5022 return SDValue(); 5023 } 5024 5025 SDValue DAGCombiner::visitOR(SDNode *N) { 5026 SDValue N0 = N->getOperand(0); 5027 SDValue N1 = N->getOperand(1); 5028 EVT VT = N1.getValueType(); 5029 5030 // x | x --> x 5031 if (N0 == N1) 5032 return N0; 5033 5034 // fold vector ops 5035 if (VT.isVector()) { 5036 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 5037 return FoldedVOp; 5038 5039 // fold (or x, 0) -> x, vector edition 5040 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5041 return N1; 5042 if (ISD::isBuildVectorAllZeros(N1.getNode())) 5043 return N0; 5044 5045 // fold (or x, -1) -> -1, vector edition 5046 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5047 // do not return N0, because undef node may exist in N0 5048 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); 5049 if (ISD::isBuildVectorAllOnes(N1.getNode())) 5050 // do not return N1, because undef node may exist in N1 5051 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); 5052 5053 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) 5054 // Do this only if the resulting shuffle is legal. 5055 if (isa<ShuffleVectorSDNode>(N0) && 5056 isa<ShuffleVectorSDNode>(N1) && 5057 // Avoid folding a node with illegal type. 5058 TLI.isTypeLegal(VT)) { 5059 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); 5060 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); 5061 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5062 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); 5063 // Ensure both shuffles have a zero input. 5064 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { 5065 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); 5066 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); 5067 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 5068 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 5069 bool CanFold = true; 5070 int NumElts = VT.getVectorNumElements(); 5071 SmallVector<int, 4> Mask(NumElts); 5072 5073 for (int i = 0; i != NumElts; ++i) { 5074 int M0 = SV0->getMaskElt(i); 5075 int M1 = SV1->getMaskElt(i); 5076 5077 // Determine if either index is pointing to a zero vector. 5078 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); 5079 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); 5080 5081 // If one element is zero and the otherside is undef, keep undef. 5082 // This also handles the case that both are undef. 5083 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { 5084 Mask[i] = -1; 5085 continue; 5086 } 5087 5088 // Make sure only one of the elements is zero. 5089 if (M0Zero == M1Zero) { 5090 CanFold = false; 5091 break; 5092 } 5093 5094 assert((M0 >= 0 || M1 >= 0) && "Undef index!"); 5095 5096 // We have a zero and non-zero element. If the non-zero came from 5097 // SV0 make the index a LHS index. If it came from SV1, make it 5098 // a RHS index. We need to mod by NumElts because we don't care 5099 // which operand it came from in the original shuffles. 5100 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; 5101 } 5102 5103 if (CanFold) { 5104 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); 5105 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); 5106 5107 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 5108 if (!LegalMask) { 5109 std::swap(NewLHS, NewRHS); 5110 ShuffleVectorSDNode::commuteMask(Mask); 5111 LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 5112 } 5113 5114 if (LegalMask) 5115 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); 5116 } 5117 } 5118 } 5119 } 5120 5121 // fold (or c1, c2) -> c1|c2 5122 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 5123 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 5124 if (N0C && N1C && !N1C->isOpaque()) 5125 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 5126 // canonicalize constant to RHS 5127 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 5128 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 5129 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 5130 // fold (or x, 0) -> x 5131 if (isNullConstant(N1)) 5132 return N0; 5133 // fold (or x, -1) -> -1 5134 if (isAllOnesConstant(N1)) 5135 return N1; 5136 5137 if (SDValue NewSel = foldBinOpIntoSelect(N)) 5138 return NewSel; 5139 5140 // fold (or x, c) -> c iff (x & ~c) == 0 5141 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 5142 return N1; 5143 5144 if (SDValue Combined = visitORLike(N0, N1, N)) 5145 return Combined; 5146 5147 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 5148 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 5149 return BSwap; 5150 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 5151 return BSwap; 5152 5153 // reassociate or 5154 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags())) 5155 return ROR; 5156 5157 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 5158 // iff (c1 & c2) != 0. 5159 auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { 5160 return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); 5161 }; 5162 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 5163 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { 5164 if (SDValue COR = DAG.FoldConstantArithmetic( 5165 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { 5166 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); 5167 AddToWorklist(IOR.getNode()); 5168 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); 5169 } 5170 } 5171 5172 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 5173 if (N0.getOpcode() == N1.getOpcode()) 5174 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 5175 return Tmp; 5176 5177 // See if this is some rotate idiom. 5178 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 5179 return SDValue(Rot, 0); 5180 5181 if (SDValue Load = MatchLoadCombine(N)) 5182 return Load; 5183 5184 // Simplify the operands using demanded-bits information. 5185 if (SimplifyDemandedBits(SDValue(N, 0))) 5186 return SDValue(N, 0); 5187 5188 return SDValue(); 5189 } 5190 5191 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { 5192 if (Op.getOpcode() == ISD::AND && 5193 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 5194 Mask = Op.getOperand(1); 5195 return Op.getOperand(0); 5196 } 5197 return Op; 5198 } 5199 5200 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 5201 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, 5202 SDValue &Mask) { 5203 Op = stripConstantMask(DAG, Op, Mask); 5204 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 5205 Shift = Op; 5206 return true; 5207 } 5208 return false; 5209 } 5210 5211 /// Helper function for visitOR to extract the needed side of a rotate idiom 5212 /// from a shl/srl/mul/udiv. This is meant to handle cases where 5213 /// InstCombine merged some outside op with one of the shifts from 5214 /// the rotate pattern. 5215 /// \returns An empty \c SDValue if the needed shift couldn't be extracted. 5216 /// Otherwise, returns an expansion of \p ExtractFrom based on the following 5217 /// patterns: 5218 /// 5219 /// (or (mul v c0) (shrl (mul v c1) c2)): 5220 /// expands (mul v c0) -> (shl (mul v c1) c3) 5221 /// 5222 /// (or (udiv v c0) (shl (udiv v c1) c2)): 5223 /// expands (udiv v c0) -> (shrl (udiv v c1) c3) 5224 /// 5225 /// (or (shl v c0) (shrl (shl v c1) c2)): 5226 /// expands (shl v c0) -> (shl (shl v c1) c3) 5227 /// 5228 /// (or (shrl v c0) (shl (shrl v c1) c2)): 5229 /// expands (shrl v c0) -> (shrl (shrl v c1) c3) 5230 /// 5231 /// Such that in all cases, c3+c2==bitwidth(op v c1). 5232 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, 5233 SDValue ExtractFrom, SDValue &Mask, 5234 const SDLoc &DL) { 5235 assert(OppShift && ExtractFrom && "Empty SDValue"); 5236 assert( 5237 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && 5238 "Existing shift must be valid as a rotate half"); 5239 5240 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); 5241 // Preconditions: 5242 // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) 5243 // 5244 // Find opcode of the needed shift to be extracted from (op0 v c0). 5245 unsigned Opcode = ISD::DELETED_NODE; 5246 bool IsMulOrDiv = false; 5247 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift 5248 // opcode or its arithmetic (mul or udiv) variant. 5249 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) { 5250 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant; 5251 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift) 5252 return false; 5253 Opcode = NeededShift; 5254 return true; 5255 }; 5256 // op0 must be either the needed shift opcode or the mul/udiv equivalent 5257 // that the needed shift can be extracted from. 5258 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) && 5259 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV))) 5260 return SDValue(); 5261 5262 // op0 must be the same opcode on both sides, have the same LHS argument, 5263 // and produce the same value type. 5264 SDValue OppShiftLHS = OppShift.getOperand(0); 5265 EVT ShiftedVT = OppShiftLHS.getValueType(); 5266 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || 5267 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || 5268 ShiftedVT != ExtractFrom.getValueType()) 5269 return SDValue(); 5270 5271 // Amount of the existing shift. 5272 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); 5273 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. 5274 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); 5275 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. 5276 ConstantSDNode *ExtractFromCst = 5277 isConstOrConstSplat(ExtractFrom.getOperand(1)); 5278 // TODO: We should be able to handle non-uniform constant vectors for these values 5279 // Check that we have constant values. 5280 if (!OppShiftCst || !OppShiftCst->getAPIntValue() || 5281 !OppLHSCst || !OppLHSCst->getAPIntValue() || 5282 !ExtractFromCst || !ExtractFromCst->getAPIntValue()) 5283 return SDValue(); 5284 5285 // Compute the shift amount we need to extract to complete the rotate. 5286 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits(); 5287 if (OppShiftCst->getAPIntValue().ugt(VTWidth)) 5288 return SDValue(); 5289 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue(); 5290 // Normalize the bitwidth of the two mul/udiv/shift constant operands. 5291 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue(); 5292 APInt OppLHSAmt = OppLHSCst->getAPIntValue(); 5293 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt); 5294 5295 // Now try extract the needed shift from the ExtractFrom op and see if the 5296 // result matches up with the existing shift's LHS op. 5297 if (IsMulOrDiv) { 5298 // Op to extract from is a mul or udiv by a constant. 5299 // Check: 5300 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0 5301 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0 5302 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(), 5303 NeededShiftAmt.getZExtValue()); 5304 APInt ResultAmt; 5305 APInt Rem; 5306 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem); 5307 if (Rem != 0 || ResultAmt != OppLHSAmt) 5308 return SDValue(); 5309 } else { 5310 // Op to extract from is a shift by a constant. 5311 // Check: 5312 // c2 - (bitwidth(op0 v c0) - c1) == c0 5313 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc( 5314 ExtractFromAmt.getBitWidth())) 5315 return SDValue(); 5316 } 5317 5318 // Return the expanded shift op that should allow a rotate to be formed. 5319 EVT ShiftVT = OppShift.getOperand(1).getValueType(); 5320 EVT ResVT = ExtractFrom.getValueType(); 5321 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT); 5322 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode); 5323 } 5324 5325 // Return true if we can prove that, whenever Neg and Pos are both in the 5326 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 5327 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 5328 // 5329 // (or (shift1 X, Neg), (shift2 X, Pos)) 5330 // 5331 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 5332 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 5333 // to consider shift amounts with defined behavior. 5334 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, 5335 SelectionDAG &DAG) { 5336 // If EltSize is a power of 2 then: 5337 // 5338 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 5339 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 5340 // 5341 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 5342 // for the stronger condition: 5343 // 5344 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 5345 // 5346 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 5347 // we can just replace Neg with Neg' for the rest of the function. 5348 // 5349 // In other cases we check for the even stronger condition: 5350 // 5351 // Neg == EltSize - Pos [B] 5352 // 5353 // for all Neg and Pos. Note that the (or ...) then invokes undefined 5354 // behavior if Pos == 0 (and consequently Neg == EltSize). 5355 // 5356 // We could actually use [A] whenever EltSize is a power of 2, but the 5357 // only extra cases that it would match are those uninteresting ones 5358 // where Neg and Pos are never in range at the same time. E.g. for 5359 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 5360 // as well as (sub 32, Pos), but: 5361 // 5362 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 5363 // 5364 // always invokes undefined behavior for 32-bit X. 5365 // 5366 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 5367 unsigned MaskLoBits = 0; 5368 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 5369 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 5370 KnownBits Known; 5371 DAG.computeKnownBits(Neg.getOperand(0), Known); 5372 unsigned Bits = Log2_64(EltSize); 5373 if (NegC->getAPIntValue().getActiveBits() <= Bits && 5374 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { 5375 Neg = Neg.getOperand(0); 5376 MaskLoBits = Bits; 5377 } 5378 } 5379 } 5380 5381 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 5382 if (Neg.getOpcode() != ISD::SUB) 5383 return false; 5384 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 5385 if (!NegC) 5386 return false; 5387 SDValue NegOp1 = Neg.getOperand(1); 5388 5389 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 5390 // Pos'. The truncation is redundant for the purpose of the equality. 5391 if (MaskLoBits && Pos.getOpcode() == ISD::AND) { 5392 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { 5393 KnownBits Known; 5394 DAG.computeKnownBits(Pos.getOperand(0), Known); 5395 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && 5396 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= 5397 MaskLoBits)) 5398 Pos = Pos.getOperand(0); 5399 } 5400 } 5401 5402 // The condition we need is now: 5403 // 5404 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 5405 // 5406 // If NegOp1 == Pos then we need: 5407 // 5408 // EltSize & Mask == NegC & Mask 5409 // 5410 // (because "x & Mask" is a truncation and distributes through subtraction). 5411 APInt Width; 5412 if (Pos == NegOp1) 5413 Width = NegC->getAPIntValue(); 5414 5415 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 5416 // Then the condition we want to prove becomes: 5417 // 5418 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 5419 // 5420 // which, again because "x & Mask" is a truncation, becomes: 5421 // 5422 // NegC & Mask == (EltSize - PosC) & Mask 5423 // EltSize & Mask == (NegC + PosC) & Mask 5424 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 5425 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 5426 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 5427 else 5428 return false; 5429 } else 5430 return false; 5431 5432 // Now we just need to check that EltSize & Mask == Width & Mask. 5433 if (MaskLoBits) 5434 // EltSize & Mask is 0 since Mask is EltSize - 1. 5435 return Width.getLoBits(MaskLoBits) == 0; 5436 return Width == EltSize; 5437 } 5438 5439 // A subroutine of MatchRotate used once we have found an OR of two opposite 5440 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 5441 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 5442 // former being preferred if supported. InnerPos and InnerNeg are Pos and 5443 // Neg with outer conversions stripped away. 5444 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 5445 SDValue Neg, SDValue InnerPos, 5446 SDValue InnerNeg, unsigned PosOpcode, 5447 unsigned NegOpcode, const SDLoc &DL) { 5448 // fold (or (shl x, (*ext y)), 5449 // (srl x, (*ext (sub 32, y)))) -> 5450 // (rotl x, y) or (rotr x, (sub 32, y)) 5451 // 5452 // fold (or (shl x, (*ext (sub 32, y))), 5453 // (srl x, (*ext y))) -> 5454 // (rotr x, y) or (rotl x, (sub 32, y)) 5455 EVT VT = Shifted.getValueType(); 5456 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { 5457 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 5458 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 5459 HasPos ? Pos : Neg).getNode(); 5460 } 5461 5462 return nullptr; 5463 } 5464 5465 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 5466 // idioms for rotate, and if the target supports rotation instructions, generate 5467 // a rot[lr]. 5468 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { 5469 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 5470 EVT VT = LHS.getValueType(); 5471 if (!TLI.isTypeLegal(VT)) return nullptr; 5472 5473 // The target must have at least one rotate flavor. 5474 bool HasROTL = hasOperation(ISD::ROTL, VT); 5475 bool HasROTR = hasOperation(ISD::ROTR, VT); 5476 if (!HasROTL && !HasROTR) return nullptr; 5477 5478 // Check for truncated rotate. 5479 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && 5480 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { 5481 assert(LHS.getValueType() == RHS.getValueType()); 5482 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { 5483 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), 5484 SDValue(Rot, 0)).getNode(); 5485 } 5486 } 5487 5488 // Match "(X shl/srl V1) & V2" where V2 may not be present. 5489 SDValue LHSShift; // The shift. 5490 SDValue LHSMask; // AND value if any. 5491 matchRotateHalf(DAG, LHS, LHSShift, LHSMask); 5492 5493 SDValue RHSShift; // The shift. 5494 SDValue RHSMask; // AND value if any. 5495 matchRotateHalf(DAG, RHS, RHSShift, RHSMask); 5496 5497 // If neither side matched a rotate half, bail 5498 if (!LHSShift && !RHSShift) 5499 return nullptr; 5500 5501 // InstCombine may have combined a constant shl, srl, mul, or udiv with one 5502 // side of the rotate, so try to handle that here. In all cases we need to 5503 // pass the matched shift from the opposite side to compute the opcode and 5504 // needed shift amount to extract. We still want to do this if both sides 5505 // matched a rotate half because one half may be a potential overshift that 5506 // can be broken down (ie if InstCombine merged two shl or srl ops into a 5507 // single one). 5508 5509 // Have LHS side of the rotate, try to extract the needed shift from the RHS. 5510 if (LHSShift) 5511 if (SDValue NewRHSShift = 5512 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL)) 5513 RHSShift = NewRHSShift; 5514 // Have RHS side of the rotate, try to extract the needed shift from the LHS. 5515 if (RHSShift) 5516 if (SDValue NewLHSShift = 5517 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL)) 5518 LHSShift = NewLHSShift; 5519 5520 // If a side is still missing, nothing else we can do. 5521 if (!RHSShift || !LHSShift) 5522 return nullptr; 5523 5524 // At this point we've matched or extracted a shift op on each side. 5525 5526 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 5527 return nullptr; // Not shifting the same value. 5528 5529 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 5530 return nullptr; // Shifts must disagree. 5531 5532 // Canonicalize shl to left side in a shl/srl pair. 5533 if (RHSShift.getOpcode() == ISD::SHL) { 5534 std::swap(LHS, RHS); 5535 std::swap(LHSShift, RHSShift); 5536 std::swap(LHSMask, RHSMask); 5537 } 5538 5539 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 5540 SDValue LHSShiftArg = LHSShift.getOperand(0); 5541 SDValue LHSShiftAmt = LHSShift.getOperand(1); 5542 SDValue RHSShiftArg = RHSShift.getOperand(0); 5543 SDValue RHSShiftAmt = RHSShift.getOperand(1); 5544 5545 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 5546 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 5547 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, 5548 ConstantSDNode *RHS) { 5549 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; 5550 }; 5551 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { 5552 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 5553 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 5554 5555 // If there is an AND of either shifted operand, apply it to the result. 5556 if (LHSMask.getNode() || RHSMask.getNode()) { 5557 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); 5558 SDValue Mask = AllOnes; 5559 5560 if (LHSMask.getNode()) { 5561 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); 5562 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5563 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); 5564 } 5565 if (RHSMask.getNode()) { 5566 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); 5567 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5568 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); 5569 } 5570 5571 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 5572 } 5573 5574 return Rot.getNode(); 5575 } 5576 5577 // If there is a mask here, and we have a variable shift, we can't be sure 5578 // that we're masking out the right stuff. 5579 if (LHSMask.getNode() || RHSMask.getNode()) 5580 return nullptr; 5581 5582 // If the shift amount is sign/zext/any-extended just peel it off. 5583 SDValue LExtOp0 = LHSShiftAmt; 5584 SDValue RExtOp0 = RHSShiftAmt; 5585 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5586 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5587 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5588 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 5589 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5590 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5591 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5592 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 5593 LExtOp0 = LHSShiftAmt.getOperand(0); 5594 RExtOp0 = RHSShiftAmt.getOperand(0); 5595 } 5596 5597 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 5598 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 5599 if (TryL) 5600 return TryL; 5601 5602 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 5603 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 5604 if (TryR) 5605 return TryR; 5606 5607 return nullptr; 5608 } 5609 5610 namespace { 5611 5612 /// Represents known origin of an individual byte in load combine pattern. The 5613 /// value of the byte is either constant zero or comes from memory. 5614 struct ByteProvider { 5615 // For constant zero providers Load is set to nullptr. For memory providers 5616 // Load represents the node which loads the byte from memory. 5617 // ByteOffset is the offset of the byte in the value produced by the load. 5618 LoadSDNode *Load = nullptr; 5619 unsigned ByteOffset = 0; 5620 5621 ByteProvider() = default; 5622 5623 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { 5624 return ByteProvider(Load, ByteOffset); 5625 } 5626 5627 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } 5628 5629 bool isConstantZero() const { return !Load; } 5630 bool isMemory() const { return Load; } 5631 5632 bool operator==(const ByteProvider &Other) const { 5633 return Other.Load == Load && Other.ByteOffset == ByteOffset; 5634 } 5635 5636 private: 5637 ByteProvider(LoadSDNode *Load, unsigned ByteOffset) 5638 : Load(Load), ByteOffset(ByteOffset) {} 5639 }; 5640 5641 } // end anonymous namespace 5642 5643 /// Recursively traverses the expression calculating the origin of the requested 5644 /// byte of the given value. Returns None if the provider can't be calculated. 5645 /// 5646 /// For all the values except the root of the expression verifies that the value 5647 /// has exactly one use and if it's not true return None. This way if the origin 5648 /// of the byte is returned it's guaranteed that the values which contribute to 5649 /// the byte are not used outside of this expression. 5650 /// 5651 /// Because the parts of the expression are not allowed to have more than one 5652 /// use this function iterates over trees, not DAGs. So it never visits the same 5653 /// node more than once. 5654 static const Optional<ByteProvider> 5655 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, 5656 bool Root = false) { 5657 // Typical i64 by i8 pattern requires recursion up to 8 calls depth 5658 if (Depth == 10) 5659 return None; 5660 5661 if (!Root && !Op.hasOneUse()) 5662 return None; 5663 5664 assert(Op.getValueType().isScalarInteger() && "can't handle other types"); 5665 unsigned BitWidth = Op.getValueSizeInBits(); 5666 if (BitWidth % 8 != 0) 5667 return None; 5668 unsigned ByteWidth = BitWidth / 8; 5669 assert(Index < ByteWidth && "invalid index requested"); 5670 (void) ByteWidth; 5671 5672 switch (Op.getOpcode()) { 5673 case ISD::OR: { 5674 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); 5675 if (!LHS) 5676 return None; 5677 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); 5678 if (!RHS) 5679 return None; 5680 5681 if (LHS->isConstantZero()) 5682 return RHS; 5683 if (RHS->isConstantZero()) 5684 return LHS; 5685 return None; 5686 } 5687 case ISD::SHL: { 5688 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); 5689 if (!ShiftOp) 5690 return None; 5691 5692 uint64_t BitShift = ShiftOp->getZExtValue(); 5693 if (BitShift % 8 != 0) 5694 return None; 5695 uint64_t ByteShift = BitShift / 8; 5696 5697 return Index < ByteShift 5698 ? ByteProvider::getConstantZero() 5699 : calculateByteProvider(Op->getOperand(0), Index - ByteShift, 5700 Depth + 1); 5701 } 5702 case ISD::ANY_EXTEND: 5703 case ISD::SIGN_EXTEND: 5704 case ISD::ZERO_EXTEND: { 5705 SDValue NarrowOp = Op->getOperand(0); 5706 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); 5707 if (NarrowBitWidth % 8 != 0) 5708 return None; 5709 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5710 5711 if (Index >= NarrowByteWidth) 5712 return Op.getOpcode() == ISD::ZERO_EXTEND 5713 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5714 : None; 5715 return calculateByteProvider(NarrowOp, Index, Depth + 1); 5716 } 5717 case ISD::BSWAP: 5718 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, 5719 Depth + 1); 5720 case ISD::LOAD: { 5721 auto L = cast<LoadSDNode>(Op.getNode()); 5722 if (L->isVolatile() || L->isIndexed()) 5723 return None; 5724 5725 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); 5726 if (NarrowBitWidth % 8 != 0) 5727 return None; 5728 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5729 5730 if (Index >= NarrowByteWidth) 5731 return L->getExtensionType() == ISD::ZEXTLOAD 5732 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5733 : None; 5734 return ByteProvider::getMemory(L, Index); 5735 } 5736 } 5737 5738 return None; 5739 } 5740 5741 /// Match a pattern where a wide type scalar value is loaded by several narrow 5742 /// loads and combined by shifts and ors. Fold it into a single load or a load 5743 /// and a BSWAP if the targets supports it. 5744 /// 5745 /// Assuming little endian target: 5746 /// i8 *a = ... 5747 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 5748 /// => 5749 /// i32 val = *((i32)a) 5750 /// 5751 /// i8 *a = ... 5752 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 5753 /// => 5754 /// i32 val = BSWAP(*((i32)a)) 5755 /// 5756 /// TODO: This rule matches complex patterns with OR node roots and doesn't 5757 /// interact well with the worklist mechanism. When a part of the pattern is 5758 /// updated (e.g. one of the loads) its direct users are put into the worklist, 5759 /// but the root node of the pattern which triggers the load combine is not 5760 /// necessarily a direct user of the changed node. For example, once the address 5761 /// of t28 load is reassociated load combine won't be triggered: 5762 /// t25: i32 = add t4, Constant:i32<2> 5763 /// t26: i64 = sign_extend t25 5764 /// t27: i64 = add t2, t26 5765 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 5766 /// t29: i32 = zero_extend t28 5767 /// t32: i32 = shl t29, Constant:i8<8> 5768 /// t33: i32 = or t23, t32 5769 /// As a possible fix visitLoad can check if the load can be a part of a load 5770 /// combine pattern and add corresponding OR roots to the worklist. 5771 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { 5772 assert(N->getOpcode() == ISD::OR && 5773 "Can only match load combining against OR nodes"); 5774 5775 // Handles simple types only 5776 EVT VT = N->getValueType(0); 5777 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) 5778 return SDValue(); 5779 unsigned ByteWidth = VT.getSizeInBits() / 8; 5780 5781 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5782 // Before legalize we can introduce too wide illegal loads which will be later 5783 // split into legal sized loads. This enables us to combine i64 load by i8 5784 // patterns to a couple of i32 loads on 32 bit targets. 5785 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) 5786 return SDValue(); 5787 5788 std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = []( 5789 unsigned BW, unsigned i) { return i; }; 5790 std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = []( 5791 unsigned BW, unsigned i) { return BW - i - 1; }; 5792 5793 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); 5794 auto MemoryByteOffset = [&] (ByteProvider P) { 5795 assert(P.isMemory() && "Must be a memory byte provider"); 5796 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); 5797 assert(LoadBitWidth % 8 == 0 && 5798 "can only analyze providers for individual bytes not bit"); 5799 unsigned LoadByteWidth = LoadBitWidth / 8; 5800 return IsBigEndianTarget 5801 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) 5802 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); 5803 }; 5804 5805 Optional<BaseIndexOffset> Base; 5806 SDValue Chain; 5807 5808 SmallPtrSet<LoadSDNode *, 8> Loads; 5809 Optional<ByteProvider> FirstByteProvider; 5810 int64_t FirstOffset = INT64_MAX; 5811 5812 // Check if all the bytes of the OR we are looking at are loaded from the same 5813 // base address. Collect bytes offsets from Base address in ByteOffsets. 5814 SmallVector<int64_t, 4> ByteOffsets(ByteWidth); 5815 for (unsigned i = 0; i < ByteWidth; i++) { 5816 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); 5817 if (!P || !P->isMemory()) // All the bytes must be loaded from memory 5818 return SDValue(); 5819 5820 LoadSDNode *L = P->Load; 5821 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && 5822 "Must be enforced by calculateByteProvider"); 5823 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); 5824 5825 // All loads must share the same chain 5826 SDValue LChain = L->getChain(); 5827 if (!Chain) 5828 Chain = LChain; 5829 else if (Chain != LChain) 5830 return SDValue(); 5831 5832 // Loads must share the same base address 5833 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); 5834 int64_t ByteOffsetFromBase = 0; 5835 if (!Base) 5836 Base = Ptr; 5837 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) 5838 return SDValue(); 5839 5840 // Calculate the offset of the current byte from the base address 5841 ByteOffsetFromBase += MemoryByteOffset(*P); 5842 ByteOffsets[i] = ByteOffsetFromBase; 5843 5844 // Remember the first byte load 5845 if (ByteOffsetFromBase < FirstOffset) { 5846 FirstByteProvider = P; 5847 FirstOffset = ByteOffsetFromBase; 5848 } 5849 5850 Loads.insert(L); 5851 } 5852 assert(!Loads.empty() && "All the bytes of the value must be loaded from " 5853 "memory, so there must be at least one load which produces the value"); 5854 assert(Base && "Base address of the accessed memory location must be set"); 5855 assert(FirstOffset != INT64_MAX && "First byte offset must be set"); 5856 5857 // Check if the bytes of the OR we are looking at match with either big or 5858 // little endian value load 5859 bool BigEndian = true, LittleEndian = true; 5860 for (unsigned i = 0; i < ByteWidth; i++) { 5861 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; 5862 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); 5863 BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); 5864 if (!BigEndian && !LittleEndian) 5865 return SDValue(); 5866 } 5867 assert((BigEndian != LittleEndian) && "should be either or"); 5868 assert(FirstByteProvider && "must be set"); 5869 5870 // Ensure that the first byte is loaded from zero offset of the first load. 5871 // So the combined value can be loaded from the first load address. 5872 if (MemoryByteOffset(*FirstByteProvider) != 0) 5873 return SDValue(); 5874 LoadSDNode *FirstLoad = FirstByteProvider->Load; 5875 5876 // The node we are looking at matches with the pattern, check if we can 5877 // replace it with a single load and bswap if needed. 5878 5879 // If the load needs byte swap check if the target supports it 5880 bool NeedsBswap = IsBigEndianTarget != BigEndian; 5881 5882 // Before legalize we can introduce illegal bswaps which will be later 5883 // converted to an explicit bswap sequence. This way we end up with a single 5884 // load and byte shuffling instead of several loads and byte shuffling. 5885 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) 5886 return SDValue(); 5887 5888 // Check that a load of the wide type is both allowed and fast on the target 5889 bool Fast = false; 5890 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), 5891 VT, FirstLoad->getAddressSpace(), 5892 FirstLoad->getAlignment(), &Fast); 5893 if (!Allowed || !Fast) 5894 return SDValue(); 5895 5896 SDValue NewLoad = 5897 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), 5898 FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); 5899 5900 // Transfer chain users from old loads to the new load. 5901 for (LoadSDNode *L : Loads) 5902 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); 5903 5904 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; 5905 } 5906 5907 // If the target has andn, bsl, or a similar bit-select instruction, 5908 // we want to unfold masked merge, with canonical pattern of: 5909 // | A | |B| 5910 // ((x ^ y) & m) ^ y 5911 // | D | 5912 // Into: 5913 // (x & m) | (y & ~m) 5914 // If y is a constant, and the 'andn' does not work with immediates, 5915 // we unfold into a different pattern: 5916 // ~(~x & m) & (m | y) 5917 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at 5918 // the very least that breaks andnpd / andnps patterns, and because those 5919 // patterns are simplified in IR and shouldn't be created in the DAG 5920 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { 5921 assert(N->getOpcode() == ISD::XOR); 5922 5923 // Don't touch 'not' (i.e. where y = -1). 5924 if (isAllOnesOrAllOnesSplat(N->getOperand(1))) 5925 return SDValue(); 5926 5927 EVT VT = N->getValueType(0); 5928 5929 // There are 3 commutable operators in the pattern, 5930 // so we have to deal with 8 possible variants of the basic pattern. 5931 SDValue X, Y, M; 5932 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { 5933 if (And.getOpcode() != ISD::AND || !And.hasOneUse()) 5934 return false; 5935 SDValue Xor = And.getOperand(XorIdx); 5936 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 5937 return false; 5938 SDValue Xor0 = Xor.getOperand(0); 5939 SDValue Xor1 = Xor.getOperand(1); 5940 // Don't touch 'not' (i.e. where y = -1). 5941 if (isAllOnesOrAllOnesSplat(Xor1)) 5942 return false; 5943 if (Other == Xor0) 5944 std::swap(Xor0, Xor1); 5945 if (Other != Xor1) 5946 return false; 5947 X = Xor0; 5948 Y = Xor1; 5949 M = And.getOperand(XorIdx ? 0 : 1); 5950 return true; 5951 }; 5952 5953 SDValue N0 = N->getOperand(0); 5954 SDValue N1 = N->getOperand(1); 5955 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && 5956 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) 5957 return SDValue(); 5958 5959 // Don't do anything if the mask is constant. This should not be reachable. 5960 // InstCombine should have already unfolded this pattern, and DAGCombiner 5961 // probably shouldn't produce it, too. 5962 if (isa<ConstantSDNode>(M.getNode())) 5963 return SDValue(); 5964 5965 // We can transform if the target has AndNot 5966 if (!TLI.hasAndNot(M)) 5967 return SDValue(); 5968 5969 SDLoc DL(N); 5970 5971 // If Y is a constant, check that 'andn' works with immediates. 5972 if (!TLI.hasAndNot(Y)) { 5973 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); 5974 // If not, we need to do a bit more work to make sure andn is still used. 5975 SDValue NotX = DAG.getNOT(DL, X, VT); 5976 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); 5977 SDValue NotLHS = DAG.getNOT(DL, LHS, VT); 5978 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); 5979 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); 5980 } 5981 5982 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); 5983 SDValue NotM = DAG.getNOT(DL, M, VT); 5984 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); 5985 5986 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); 5987 } 5988 5989 SDValue DAGCombiner::visitXOR(SDNode *N) { 5990 SDValue N0 = N->getOperand(0); 5991 SDValue N1 = N->getOperand(1); 5992 EVT VT = N0.getValueType(); 5993 5994 // fold vector ops 5995 if (VT.isVector()) { 5996 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 5997 return FoldedVOp; 5998 5999 // fold (xor x, 0) -> x, vector edition 6000 if (ISD::isBuildVectorAllZeros(N0.getNode())) 6001 return N1; 6002 if (ISD::isBuildVectorAllZeros(N1.getNode())) 6003 return N0; 6004 } 6005 6006 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 6007 SDLoc DL(N); 6008 if (N0.isUndef() && N1.isUndef()) 6009 return DAG.getConstant(0, DL, VT); 6010 // fold (xor x, undef) -> undef 6011 if (N0.isUndef()) 6012 return N0; 6013 if (N1.isUndef()) 6014 return N1; 6015 // fold (xor c1, c2) -> c1^c2 6016 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6017 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 6018 if (N0C && N1C) 6019 return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); 6020 // canonicalize constant to RHS 6021 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 6022 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 6023 return DAG.getNode(ISD::XOR, DL, VT, N1, N0); 6024 // fold (xor x, 0) -> x 6025 if (isNullConstant(N1)) 6026 return N0; 6027 6028 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6029 return NewSel; 6030 6031 // reassociate xor 6032 if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) 6033 return RXOR; 6034 6035 // fold !(x cc y) -> (x !cc y) 6036 unsigned N0Opcode = N0.getOpcode(); 6037 SDValue LHS, RHS, CC; 6038 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 6039 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 6040 LHS.getValueType().isInteger()); 6041 if (!LegalOperations || 6042 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 6043 switch (N0Opcode) { 6044 default: 6045 llvm_unreachable("Unhandled SetCC Equivalent!"); 6046 case ISD::SETCC: 6047 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); 6048 case ISD::SELECT_CC: 6049 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), 6050 N0.getOperand(3), NotCC); 6051 } 6052 } 6053 } 6054 6055 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 6056 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() && 6057 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 6058 SDValue V = N0.getOperand(0); 6059 SDLoc DL0(N0); 6060 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V, 6061 DAG.getConstant(1, DL0, V.getValueType())); 6062 AddToWorklist(V.getNode()); 6063 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V); 6064 } 6065 6066 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 6067 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && 6068 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { 6069 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 6070 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 6071 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; 6072 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 6073 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 6074 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 6075 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); 6076 } 6077 } 6078 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 6079 if (isAllOnesConstant(N1) && N0.hasOneUse() && 6080 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { 6081 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 6082 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 6083 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; 6084 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 6085 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 6086 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 6087 return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); 6088 } 6089 } 6090 // fold (xor (and x, y), y) -> (and (not x), y) 6091 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) { 6092 SDValue X = N0.getOperand(0); 6093 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 6094 AddToWorklist(NotX.getNode()); 6095 return DAG.getNode(ISD::AND, DL, VT, NotX, N1); 6096 } 6097 6098 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { 6099 ConstantSDNode *XorC = isConstOrConstSplat(N1); 6100 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); 6101 if (XorC && ShiftC) { 6102 APInt Ones = APInt::getAllOnesValue(VT.getScalarSizeInBits()); 6103 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftC->getZExtValue()) 6104 : Ones.lshr(ShiftC->getZExtValue()); 6105 if (XorC->getAPIntValue() == Ones) { 6106 // If the xor constant is a shifted -1, do a 'not' before the shift: 6107 // xor (X << ShiftC), XorC --> (not X) << ShiftC 6108 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC 6109 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); 6110 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); 6111 } 6112 } 6113 } 6114 6115 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) 6116 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 6117 SDValue A = N0Opcode == ISD::ADD ? N0 : N1; 6118 SDValue S = N0Opcode == ISD::SRA ? N0 : N1; 6119 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { 6120 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); 6121 SDValue S0 = S.getOperand(0); 6122 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { 6123 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6124 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) 6125 if (C->getAPIntValue() == (OpSizeInBits - 1)) 6126 return DAG.getNode(ISD::ABS, DL, VT, S0); 6127 } 6128 } 6129 } 6130 6131 // fold (xor x, x) -> 0 6132 if (N0 == N1) 6133 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); 6134 6135 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 6136 // Here is a concrete example of this equivalence: 6137 // i16 x == 14 6138 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 6139 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 6140 // 6141 // => 6142 // 6143 // i16 ~1 == 0b1111111111111110 6144 // i16 rol(~1, 14) == 0b1011111111111111 6145 // 6146 // Some additional tips to help conceptualize this transform: 6147 // - Try to see the operation as placing a single zero in a value of all ones. 6148 // - There exists no value for x which would allow the result to contain zero. 6149 // - Values of x larger than the bitwidth are undefined and do not require a 6150 // consistent result. 6151 // - Pushing the zero left requires shifting one bits in from the right. 6152 // A rotate left of ~1 is a nice way of achieving the desired result. 6153 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL && 6154 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 6155 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 6156 N0.getOperand(1)); 6157 } 6158 6159 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 6160 if (N0Opcode == N1.getOpcode()) 6161 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 6162 return Tmp; 6163 6164 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable 6165 if (SDValue MM = unfoldMaskedMerge(N)) 6166 return MM; 6167 6168 // Simplify the expression using non-local knowledge. 6169 if (SimplifyDemandedBits(SDValue(N, 0))) 6170 return SDValue(N, 0); 6171 6172 return SDValue(); 6173 } 6174 6175 /// Handle transforms common to the three shifts, when the shift amount is a 6176 /// constant. 6177 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 6178 // Do not turn a 'not' into a regular xor. 6179 if (isBitwiseNot(N->getOperand(0))) 6180 return SDValue(); 6181 6182 SDNode *LHS = N->getOperand(0).getNode(); 6183 if (!LHS->hasOneUse()) return SDValue(); 6184 6185 // We want to pull some binops through shifts, so that we have (and (shift)) 6186 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 6187 // thing happens with address calculations, so it's important to canonicalize 6188 // it. 6189 bool HighBitSet = false; // Can we transform this if the high bit is set? 6190 6191 switch (LHS->getOpcode()) { 6192 default: return SDValue(); 6193 case ISD::OR: 6194 case ISD::XOR: 6195 HighBitSet = false; // We can only transform sra if the high bit is clear. 6196 break; 6197 case ISD::AND: 6198 HighBitSet = true; // We can only transform sra if the high bit is set. 6199 break; 6200 case ISD::ADD: 6201 if (N->getOpcode() != ISD::SHL) 6202 return SDValue(); // only shl(add) not sr[al](add). 6203 HighBitSet = false; // We can only transform sra if the high bit is clear. 6204 break; 6205 } 6206 6207 // We require the RHS of the binop to be a constant and not opaque as well. 6208 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 6209 if (!BinOpCst) return SDValue(); 6210 6211 // FIXME: disable this unless the input to the binop is a shift by a constant 6212 // or is copy/select.Enable this in other cases when figure out it's exactly profitable. 6213 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 6214 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || 6215 BinOpLHSVal->getOpcode() == ISD::SRA || 6216 BinOpLHSVal->getOpcode() == ISD::SRL; 6217 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || 6218 BinOpLHSVal->getOpcode() == ISD::SELECT; 6219 6220 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && 6221 !isCopyOrSelect) 6222 return SDValue(); 6223 6224 if (isCopyOrSelect && N->hasOneUse()) 6225 return SDValue(); 6226 6227 EVT VT = N->getValueType(0); 6228 6229 // If this is a signed shift right, and the high bit is modified by the 6230 // logical operation, do not perform the transformation. The highBitSet 6231 // boolean indicates the value of the high bit of the constant which would 6232 // cause it to be modified for this operation. 6233 if (N->getOpcode() == ISD::SRA) { 6234 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 6235 if (BinOpRHSSignSet != HighBitSet) 6236 return SDValue(); 6237 } 6238 6239 if (!TLI.isDesirableToCommuteWithShift(N, Level)) 6240 return SDValue(); 6241 6242 // Fold the constants, shifting the binop RHS by the shift amount. 6243 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 6244 N->getValueType(0), 6245 LHS->getOperand(1), N->getOperand(1)); 6246 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 6247 6248 // Create the new shift. 6249 SDValue NewShift = DAG.getNode(N->getOpcode(), 6250 SDLoc(LHS->getOperand(0)), 6251 VT, LHS->getOperand(0), N->getOperand(1)); 6252 6253 // Create the new binop. 6254 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 6255 } 6256 6257 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 6258 assert(N->getOpcode() == ISD::TRUNCATE); 6259 assert(N->getOperand(0).getOpcode() == ISD::AND); 6260 6261 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 6262 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 6263 SDValue N01 = N->getOperand(0).getOperand(1); 6264 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { 6265 SDLoc DL(N); 6266 EVT TruncVT = N->getValueType(0); 6267 SDValue N00 = N->getOperand(0).getOperand(0); 6268 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); 6269 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); 6270 AddToWorklist(Trunc00.getNode()); 6271 AddToWorklist(Trunc01.getNode()); 6272 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); 6273 } 6274 } 6275 6276 return SDValue(); 6277 } 6278 6279 SDValue DAGCombiner::visitRotate(SDNode *N) { 6280 SDLoc dl(N); 6281 SDValue N0 = N->getOperand(0); 6282 SDValue N1 = N->getOperand(1); 6283 EVT VT = N->getValueType(0); 6284 unsigned Bitsize = VT.getScalarSizeInBits(); 6285 6286 // fold (rot x, 0) -> x 6287 if (isNullOrNullSplat(N1)) 6288 return N0; 6289 6290 // fold (rot x, c) -> (rot x, c % BitSize) 6291 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { 6292 if (Cst->getAPIntValue().uge(Bitsize)) { 6293 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); 6294 return DAG.getNode(N->getOpcode(), dl, VT, N0, 6295 DAG.getConstant(RotAmt, dl, N1.getValueType())); 6296 } 6297 } 6298 6299 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 6300 if (N1.getOpcode() == ISD::TRUNCATE && 6301 N1.getOperand(0).getOpcode() == ISD::AND) { 6302 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6303 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); 6304 } 6305 6306 unsigned NextOp = N0.getOpcode(); 6307 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) 6308 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { 6309 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); 6310 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); 6311 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { 6312 EVT ShiftVT = C1->getValueType(0); 6313 bool SameSide = (N->getOpcode() == NextOp); 6314 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; 6315 if (SDValue CombinedShift = 6316 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { 6317 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); 6318 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( 6319 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), 6320 BitsizeC.getNode()); 6321 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), 6322 CombinedShiftNorm); 6323 } 6324 } 6325 } 6326 return SDValue(); 6327 } 6328 6329 SDValue DAGCombiner::visitSHL(SDNode *N) { 6330 SDValue N0 = N->getOperand(0); 6331 SDValue N1 = N->getOperand(1); 6332 if (SDValue V = DAG.simplifyShift(N0, N1)) 6333 return V; 6334 6335 EVT VT = N0.getValueType(); 6336 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6337 6338 // fold vector ops 6339 if (VT.isVector()) { 6340 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6341 return FoldedVOp; 6342 6343 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 6344 // If setcc produces all-one true value then: 6345 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 6346 if (N1CV && N1CV->isConstant()) { 6347 if (N0.getOpcode() == ISD::AND) { 6348 SDValue N00 = N0->getOperand(0); 6349 SDValue N01 = N0->getOperand(1); 6350 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 6351 6352 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 6353 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 6354 TargetLowering::ZeroOrNegativeOneBooleanContent) { 6355 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 6356 N01CV, N1CV)) 6357 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 6358 } 6359 } 6360 } 6361 } 6362 6363 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6364 6365 // fold (shl c1, c2) -> c1<<c2 6366 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6367 if (N0C && N1C && !N1C->isOpaque()) 6368 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 6369 6370 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6371 return NewSel; 6372 6373 // if (shl x, c) is known to be zero, return 0 6374 if (DAG.MaskedValueIsZero(SDValue(N, 0), 6375 APInt::getAllOnesValue(OpSizeInBits))) 6376 return DAG.getConstant(0, SDLoc(N), VT); 6377 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 6378 if (N1.getOpcode() == ISD::TRUNCATE && 6379 N1.getOperand(0).getOpcode() == ISD::AND) { 6380 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6381 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 6382 } 6383 6384 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6385 return SDValue(N, 0); 6386 6387 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 6388 if (N0.getOpcode() == ISD::SHL) { 6389 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6390 ConstantSDNode *RHS) { 6391 APInt c1 = LHS->getAPIntValue(); 6392 APInt c2 = RHS->getAPIntValue(); 6393 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6394 return (c1 + c2).uge(OpSizeInBits); 6395 }; 6396 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6397 return DAG.getConstant(0, SDLoc(N), VT); 6398 6399 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6400 ConstantSDNode *RHS) { 6401 APInt c1 = LHS->getAPIntValue(); 6402 APInt c2 = RHS->getAPIntValue(); 6403 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6404 return (c1 + c2).ult(OpSizeInBits); 6405 }; 6406 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6407 SDLoc DL(N); 6408 EVT ShiftVT = N1.getValueType(); 6409 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6410 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); 6411 } 6412 } 6413 6414 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 6415 // For this to be valid, the second form must not preserve any of the bits 6416 // that are shifted out by the inner shift in the first form. This means 6417 // the outer shift size must be >= the number of bits added by the ext. 6418 // As a corollary, we don't care what kind of ext it is. 6419 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 6420 N0.getOpcode() == ISD::ANY_EXTEND || 6421 N0.getOpcode() == ISD::SIGN_EXTEND) && 6422 N0.getOperand(0).getOpcode() == ISD::SHL) { 6423 SDValue N0Op0 = N0.getOperand(0); 6424 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6425 APInt c1 = N0Op0C1->getAPIntValue(); 6426 APInt c2 = N1C->getAPIntValue(); 6427 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6428 6429 EVT InnerShiftVT = N0Op0.getValueType(); 6430 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6431 if (c2.uge(OpSizeInBits - InnerShiftSize)) { 6432 SDLoc DL(N0); 6433 APInt Sum = c1 + c2; 6434 if (Sum.uge(OpSizeInBits)) 6435 return DAG.getConstant(0, DL, VT); 6436 6437 return DAG.getNode( 6438 ISD::SHL, DL, VT, 6439 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), 6440 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); 6441 } 6442 } 6443 } 6444 6445 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 6446 // Only fold this if the inner zext has no other uses to avoid increasing 6447 // the total number of instructions. 6448 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 6449 N0.getOperand(0).getOpcode() == ISD::SRL) { 6450 SDValue N0Op0 = N0.getOperand(0); 6451 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6452 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { 6453 uint64_t c1 = N0Op0C1->getZExtValue(); 6454 uint64_t c2 = N1C->getZExtValue(); 6455 if (c1 == c2) { 6456 SDValue NewOp0 = N0.getOperand(0); 6457 EVT CountVT = NewOp0.getOperand(1).getValueType(); 6458 SDLoc DL(N); 6459 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 6460 NewOp0, 6461 DAG.getConstant(c2, DL, CountVT)); 6462 AddToWorklist(NewSHL.getNode()); 6463 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 6464 } 6465 } 6466 } 6467 } 6468 6469 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 6470 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 6471 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 6472 N0->getFlags().hasExact()) { 6473 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6474 uint64_t C1 = N0C1->getZExtValue(); 6475 uint64_t C2 = N1C->getZExtValue(); 6476 SDLoc DL(N); 6477 if (C1 <= C2) 6478 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6479 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 6480 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 6481 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 6482 } 6483 } 6484 6485 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 6486 // (and (srl x, (sub c1, c2), MASK) 6487 // Only fold this if the inner shift has no other uses -- if it does, folding 6488 // this will increase the total number of instructions. 6489 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6490 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6491 uint64_t c1 = N0C1->getZExtValue(); 6492 if (c1 < OpSizeInBits) { 6493 uint64_t c2 = N1C->getZExtValue(); 6494 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 6495 SDValue Shift; 6496 if (c2 > c1) { 6497 Mask <<= c2 - c1; 6498 SDLoc DL(N); 6499 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6500 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 6501 } else { 6502 Mask.lshrInPlace(c1 - c2); 6503 SDLoc DL(N); 6504 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 6505 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 6506 } 6507 SDLoc DL(N0); 6508 return DAG.getNode(ISD::AND, DL, VT, Shift, 6509 DAG.getConstant(Mask, DL, VT)); 6510 } 6511 } 6512 } 6513 6514 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 6515 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && 6516 isConstantOrConstantVector(N1, /* No Opaques */ true)) { 6517 SDLoc DL(N); 6518 SDValue AllBits = DAG.getAllOnesConstant(DL, VT); 6519 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); 6520 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); 6521 } 6522 6523 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 6524 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 6525 // Variant of version done on multiply, except mul by a power of 2 is turned 6526 // into a shift. 6527 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && 6528 N0.getNode()->hasOneUse() && 6529 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6530 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && 6531 TLI.isDesirableToCommuteWithShift(N, Level)) { 6532 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 6533 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6534 AddToWorklist(Shl0.getNode()); 6535 AddToWorklist(Shl1.getNode()); 6536 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); 6537 } 6538 6539 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 6540 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && 6541 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6542 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { 6543 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6544 if (isConstantOrConstantVector(Shl)) 6545 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); 6546 } 6547 6548 if (N1C && !N1C->isOpaque()) 6549 if (SDValue NewSHL = visitShiftByConstant(N, N1C)) 6550 return NewSHL; 6551 6552 return SDValue(); 6553 } 6554 6555 SDValue DAGCombiner::visitSRA(SDNode *N) { 6556 SDValue N0 = N->getOperand(0); 6557 SDValue N1 = N->getOperand(1); 6558 if (SDValue V = DAG.simplifyShift(N0, N1)) 6559 return V; 6560 6561 EVT VT = N0.getValueType(); 6562 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6563 6564 // Arithmetic shifting an all-sign-bit value is a no-op. 6565 // fold (sra 0, x) -> 0 6566 // fold (sra -1, x) -> -1 6567 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) 6568 return N0; 6569 6570 // fold vector ops 6571 if (VT.isVector()) 6572 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6573 return FoldedVOp; 6574 6575 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6576 6577 // fold (sra c1, c2) -> (sra c1, c2) 6578 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6579 if (N0C && N1C && !N1C->isOpaque()) 6580 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 6581 6582 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6583 return NewSel; 6584 6585 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 6586 // sext_inreg. 6587 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 6588 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 6589 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 6590 if (VT.isVector()) 6591 ExtVT = EVT::getVectorVT(*DAG.getContext(), 6592 ExtVT, VT.getVectorNumElements()); 6593 if ((!LegalOperations || 6594 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 6595 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6596 N0.getOperand(0), DAG.getValueType(ExtVT)); 6597 } 6598 6599 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 6600 // clamp (add c1, c2) to max shift. 6601 if (N0.getOpcode() == ISD::SRA) { 6602 SDLoc DL(N); 6603 EVT ShiftVT = N1.getValueType(); 6604 EVT ShiftSVT = ShiftVT.getScalarType(); 6605 SmallVector<SDValue, 16> ShiftValues; 6606 6607 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) { 6608 APInt c1 = LHS->getAPIntValue(); 6609 APInt c2 = RHS->getAPIntValue(); 6610 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6611 APInt Sum = c1 + c2; 6612 unsigned ShiftSum = 6613 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue(); 6614 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT)); 6615 return true; 6616 }; 6617 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) { 6618 SDValue ShiftValue; 6619 if (VT.isVector()) 6620 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues); 6621 else 6622 ShiftValue = ShiftValues[0]; 6623 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue); 6624 } 6625 } 6626 6627 // fold (sra (shl X, m), (sub result_size, n)) 6628 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 6629 // result_size - n != m. 6630 // If truncate is free for the target sext(shl) is likely to result in better 6631 // code. 6632 if (N0.getOpcode() == ISD::SHL && N1C) { 6633 // Get the two constanst of the shifts, CN0 = m, CN = n. 6634 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 6635 if (N01C) { 6636 LLVMContext &Ctx = *DAG.getContext(); 6637 // Determine what the truncate's result bitsize and type would be. 6638 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 6639 6640 if (VT.isVector()) 6641 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 6642 6643 // Determine the residual right-shift amount. 6644 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 6645 6646 // If the shift is not a no-op (in which case this should be just a sign 6647 // extend already), the truncated to type is legal, sign_extend is legal 6648 // on that type, and the truncate to that type is both legal and free, 6649 // perform the transform. 6650 if ((ShiftAmt > 0) && 6651 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 6652 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 6653 TLI.isTruncateFree(VT, TruncVT)) { 6654 SDLoc DL(N); 6655 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 6656 getShiftAmountTy(N0.getOperand(0).getValueType())); 6657 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 6658 N0.getOperand(0), Amt); 6659 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 6660 Shift); 6661 return DAG.getNode(ISD::SIGN_EXTEND, DL, 6662 N->getValueType(0), Trunc); 6663 } 6664 } 6665 } 6666 6667 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 6668 if (N1.getOpcode() == ISD::TRUNCATE && 6669 N1.getOperand(0).getOpcode() == ISD::AND) { 6670 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6671 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 6672 } 6673 6674 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 6675 // if c1 is equal to the number of bits the trunc removes 6676 if (N0.getOpcode() == ISD::TRUNCATE && 6677 (N0.getOperand(0).getOpcode() == ISD::SRL || 6678 N0.getOperand(0).getOpcode() == ISD::SRA) && 6679 N0.getOperand(0).hasOneUse() && 6680 N0.getOperand(0).getOperand(1).hasOneUse() && 6681 N1C) { 6682 SDValue N0Op0 = N0.getOperand(0); 6683 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 6684 unsigned LargeShiftVal = LargeShift->getZExtValue(); 6685 EVT LargeVT = N0Op0.getValueType(); 6686 6687 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 6688 SDLoc DL(N); 6689 SDValue Amt = 6690 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, 6691 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 6692 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, 6693 N0Op0.getOperand(0), Amt); 6694 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); 6695 } 6696 } 6697 } 6698 6699 // Simplify, based on bits shifted out of the LHS. 6700 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6701 return SDValue(N, 0); 6702 6703 // If the sign bit is known to be zero, switch this to a SRL. 6704 if (DAG.SignBitIsZero(N0)) 6705 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 6706 6707 if (N1C && !N1C->isOpaque()) 6708 if (SDValue NewSRA = visitShiftByConstant(N, N1C)) 6709 return NewSRA; 6710 6711 return SDValue(); 6712 } 6713 6714 SDValue DAGCombiner::visitSRL(SDNode *N) { 6715 SDValue N0 = N->getOperand(0); 6716 SDValue N1 = N->getOperand(1); 6717 if (SDValue V = DAG.simplifyShift(N0, N1)) 6718 return V; 6719 6720 EVT VT = N0.getValueType(); 6721 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6722 6723 // fold vector ops 6724 if (VT.isVector()) 6725 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6726 return FoldedVOp; 6727 6728 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6729 6730 // fold (srl c1, c2) -> c1 >>u c2 6731 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6732 if (N0C && N1C && !N1C->isOpaque()) 6733 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); 6734 6735 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6736 return NewSel; 6737 6738 // if (srl x, c) is known to be zero, return 0 6739 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 6740 APInt::getAllOnesValue(OpSizeInBits))) 6741 return DAG.getConstant(0, SDLoc(N), VT); 6742 6743 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 6744 if (N0.getOpcode() == ISD::SRL) { 6745 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6746 ConstantSDNode *RHS) { 6747 APInt c1 = LHS->getAPIntValue(); 6748 APInt c2 = RHS->getAPIntValue(); 6749 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6750 return (c1 + c2).uge(OpSizeInBits); 6751 }; 6752 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6753 return DAG.getConstant(0, SDLoc(N), VT); 6754 6755 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6756 ConstantSDNode *RHS) { 6757 APInt c1 = LHS->getAPIntValue(); 6758 APInt c2 = RHS->getAPIntValue(); 6759 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6760 return (c1 + c2).ult(OpSizeInBits); 6761 }; 6762 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6763 SDLoc DL(N); 6764 EVT ShiftVT = N1.getValueType(); 6765 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6766 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); 6767 } 6768 } 6769 6770 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 6771 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 6772 N0.getOperand(0).getOpcode() == ISD::SRL) { 6773 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { 6774 uint64_t c1 = N001C->getZExtValue(); 6775 uint64_t c2 = N1C->getZExtValue(); 6776 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 6777 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); 6778 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6779 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 6780 if (c1 + OpSizeInBits == InnerShiftSize) { 6781 SDLoc DL(N0); 6782 if (c1 + c2 >= InnerShiftSize) 6783 return DAG.getConstant(0, DL, VT); 6784 return DAG.getNode(ISD::TRUNCATE, DL, VT, 6785 DAG.getNode(ISD::SRL, DL, InnerShiftVT, 6786 N0.getOperand(0).getOperand(0), 6787 DAG.getConstant(c1 + c2, DL, 6788 ShiftCountVT))); 6789 } 6790 } 6791 } 6792 6793 // fold (srl (shl x, c), c) -> (and x, cst2) 6794 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 6795 isConstantOrConstantVector(N1, /* NoOpaques */ true)) { 6796 SDLoc DL(N); 6797 SDValue Mask = 6798 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); 6799 AddToWorklist(Mask.getNode()); 6800 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); 6801 } 6802 6803 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 6804 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 6805 // Shifting in all undef bits? 6806 EVT SmallVT = N0.getOperand(0).getValueType(); 6807 unsigned BitSize = SmallVT.getScalarSizeInBits(); 6808 if (N1C->getZExtValue() >= BitSize) 6809 return DAG.getUNDEF(VT); 6810 6811 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 6812 uint64_t ShiftAmt = N1C->getZExtValue(); 6813 SDLoc DL0(N0); 6814 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, 6815 N0.getOperand(0), 6816 DAG.getConstant(ShiftAmt, DL0, 6817 getShiftAmountTy(SmallVT))); 6818 AddToWorklist(SmallShift.getNode()); 6819 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); 6820 SDLoc DL(N); 6821 return DAG.getNode(ISD::AND, DL, VT, 6822 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), 6823 DAG.getConstant(Mask, DL, VT)); 6824 } 6825 } 6826 6827 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 6828 // bit, which is unmodified by sra. 6829 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 6830 if (N0.getOpcode() == ISD::SRA) 6831 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 6832 } 6833 6834 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 6835 if (N1C && N0.getOpcode() == ISD::CTLZ && 6836 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 6837 KnownBits Known; 6838 DAG.computeKnownBits(N0.getOperand(0), Known); 6839 6840 // If any of the input bits are KnownOne, then the input couldn't be all 6841 // zeros, thus the result of the srl will always be zero. 6842 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); 6843 6844 // If all of the bits input the to ctlz node are known to be zero, then 6845 // the result of the ctlz is "32" and the result of the shift is one. 6846 APInt UnknownBits = ~Known.Zero; 6847 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); 6848 6849 // Otherwise, check to see if there is exactly one bit input to the ctlz. 6850 if (UnknownBits.isPowerOf2()) { 6851 // Okay, we know that only that the single bit specified by UnknownBits 6852 // could be set on input to the CTLZ node. If this bit is set, the SRL 6853 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 6854 // to an SRL/XOR pair, which is likely to simplify more. 6855 unsigned ShAmt = UnknownBits.countTrailingZeros(); 6856 SDValue Op = N0.getOperand(0); 6857 6858 if (ShAmt) { 6859 SDLoc DL(N0); 6860 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 6861 DAG.getConstant(ShAmt, DL, 6862 getShiftAmountTy(Op.getValueType()))); 6863 AddToWorklist(Op.getNode()); 6864 } 6865 6866 SDLoc DL(N); 6867 return DAG.getNode(ISD::XOR, DL, VT, 6868 Op, DAG.getConstant(1, DL, VT)); 6869 } 6870 } 6871 6872 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 6873 if (N1.getOpcode() == ISD::TRUNCATE && 6874 N1.getOperand(0).getOpcode() == ISD::AND) { 6875 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6876 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 6877 } 6878 6879 // fold operands of srl based on knowledge that the low bits are not 6880 // demanded. 6881 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6882 return SDValue(N, 0); 6883 6884 if (N1C && !N1C->isOpaque()) 6885 if (SDValue NewSRL = visitShiftByConstant(N, N1C)) 6886 return NewSRL; 6887 6888 // Attempt to convert a srl of a load into a narrower zero-extending load. 6889 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 6890 return NarrowLoad; 6891 6892 // Here is a common situation. We want to optimize: 6893 // 6894 // %a = ... 6895 // %b = and i32 %a, 2 6896 // %c = srl i32 %b, 1 6897 // brcond i32 %c ... 6898 // 6899 // into 6900 // 6901 // %a = ... 6902 // %b = and %a, 2 6903 // %c = setcc eq %b, 0 6904 // brcond %c ... 6905 // 6906 // However when after the source operand of SRL is optimized into AND, the SRL 6907 // itself may not be optimized further. Look for it and add the BRCOND into 6908 // the worklist. 6909 if (N->hasOneUse()) { 6910 SDNode *Use = *N->use_begin(); 6911 if (Use->getOpcode() == ISD::BRCOND) 6912 AddToWorklist(Use); 6913 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 6914 // Also look pass the truncate. 6915 Use = *Use->use_begin(); 6916 if (Use->getOpcode() == ISD::BRCOND) 6917 AddToWorklist(Use); 6918 } 6919 } 6920 6921 return SDValue(); 6922 } 6923 6924 SDValue DAGCombiner::visitABS(SDNode *N) { 6925 SDValue N0 = N->getOperand(0); 6926 EVT VT = N->getValueType(0); 6927 6928 // fold (abs c1) -> c2 6929 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6930 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); 6931 // fold (abs (abs x)) -> (abs x) 6932 if (N0.getOpcode() == ISD::ABS) 6933 return N0; 6934 // fold (abs x) -> x iff not-negative 6935 if (DAG.SignBitIsZero(N0)) 6936 return N0; 6937 return SDValue(); 6938 } 6939 6940 SDValue DAGCombiner::visitBSWAP(SDNode *N) { 6941 SDValue N0 = N->getOperand(0); 6942 EVT VT = N->getValueType(0); 6943 6944 // fold (bswap c1) -> c2 6945 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6946 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); 6947 // fold (bswap (bswap x)) -> x 6948 if (N0.getOpcode() == ISD::BSWAP) 6949 return N0->getOperand(0); 6950 return SDValue(); 6951 } 6952 6953 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { 6954 SDValue N0 = N->getOperand(0); 6955 EVT VT = N->getValueType(0); 6956 6957 // fold (bitreverse c1) -> c2 6958 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6959 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); 6960 // fold (bitreverse (bitreverse x)) -> x 6961 if (N0.getOpcode() == ISD::BITREVERSE) 6962 return N0.getOperand(0); 6963 return SDValue(); 6964 } 6965 6966 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 6967 SDValue N0 = N->getOperand(0); 6968 EVT VT = N->getValueType(0); 6969 6970 // fold (ctlz c1) -> c2 6971 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6972 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 6973 6974 // If the value is known never to be zero, switch to the undef version. 6975 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { 6976 if (DAG.isKnownNeverZero(N0)) 6977 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6978 } 6979 6980 return SDValue(); 6981 } 6982 6983 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 6984 SDValue N0 = N->getOperand(0); 6985 EVT VT = N->getValueType(0); 6986 6987 // fold (ctlz_zero_undef c1) -> c2 6988 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6989 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6990 return SDValue(); 6991 } 6992 6993 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 6994 SDValue N0 = N->getOperand(0); 6995 EVT VT = N->getValueType(0); 6996 6997 // fold (cttz c1) -> c2 6998 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6999 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 7000 7001 // If the value is known never to be zero, switch to the undef version. 7002 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { 7003 if (DAG.isKnownNeverZero(N0)) 7004 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7005 } 7006 7007 return SDValue(); 7008 } 7009 7010 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 7011 SDValue N0 = N->getOperand(0); 7012 EVT VT = N->getValueType(0); 7013 7014 // fold (cttz_zero_undef c1) -> c2 7015 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7016 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 7017 return SDValue(); 7018 } 7019 7020 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 7021 SDValue N0 = N->getOperand(0); 7022 EVT VT = N->getValueType(0); 7023 7024 // fold (ctpop c1) -> c2 7025 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 7026 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 7027 return SDValue(); 7028 } 7029 7030 // FIXME: This should be checking for no signed zeros on individual operands, as 7031 // well as no nans. 7032 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) { 7033 const TargetOptions &Options = DAG.getTarget().Options; 7034 EVT VT = LHS.getValueType(); 7035 7036 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() && 7037 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); 7038 } 7039 7040 /// Generate Min/Max node 7041 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, 7042 SDValue RHS, SDValue True, SDValue False, 7043 ISD::CondCode CC, const TargetLowering &TLI, 7044 SelectionDAG &DAG) { 7045 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 7046 return SDValue(); 7047 7048 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 7049 switch (CC) { 7050 case ISD::SETOLT: 7051 case ISD::SETOLE: 7052 case ISD::SETLT: 7053 case ISD::SETLE: 7054 case ISD::SETULT: 7055 case ISD::SETULE: { 7056 // Since it's known never nan to get here already, either fminnum or 7057 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is 7058 // expanded in terms of it. 7059 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; 7060 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) 7061 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); 7062 7063 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 7064 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) 7065 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 7066 return SDValue(); 7067 } 7068 case ISD::SETOGT: 7069 case ISD::SETOGE: 7070 case ISD::SETGT: 7071 case ISD::SETGE: 7072 case ISD::SETUGT: 7073 case ISD::SETUGE: { 7074 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; 7075 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) 7076 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); 7077 7078 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 7079 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT)) 7080 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 7081 return SDValue(); 7082 } 7083 default: 7084 return SDValue(); 7085 } 7086 } 7087 7088 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { 7089 SDValue Cond = N->getOperand(0); 7090 SDValue N1 = N->getOperand(1); 7091 SDValue N2 = N->getOperand(2); 7092 EVT VT = N->getValueType(0); 7093 EVT CondVT = Cond.getValueType(); 7094 SDLoc DL(N); 7095 7096 if (!VT.isInteger()) 7097 return SDValue(); 7098 7099 auto *C1 = dyn_cast<ConstantSDNode>(N1); 7100 auto *C2 = dyn_cast<ConstantSDNode>(N2); 7101 if (!C1 || !C2) 7102 return SDValue(); 7103 7104 // Only do this before legalization to avoid conflicting with target-specific 7105 // transforms in the other direction (create a select from a zext/sext). There 7106 // is also a target-independent combine here in DAGCombiner in the other 7107 // direction for (select Cond, -1, 0) when the condition is not i1. 7108 if (CondVT == MVT::i1 && !LegalOperations) { 7109 if (C1->isNullValue() && C2->isOne()) { 7110 // select Cond, 0, 1 --> zext (!Cond) 7111 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 7112 if (VT != MVT::i1) 7113 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); 7114 return NotCond; 7115 } 7116 if (C1->isNullValue() && C2->isAllOnesValue()) { 7117 // select Cond, 0, -1 --> sext (!Cond) 7118 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 7119 if (VT != MVT::i1) 7120 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); 7121 return NotCond; 7122 } 7123 if (C1->isOne() && C2->isNullValue()) { 7124 // select Cond, 1, 0 --> zext (Cond) 7125 if (VT != MVT::i1) 7126 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 7127 return Cond; 7128 } 7129 if (C1->isAllOnesValue() && C2->isNullValue()) { 7130 // select Cond, -1, 0 --> sext (Cond) 7131 if (VT != MVT::i1) 7132 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 7133 return Cond; 7134 } 7135 7136 // For any constants that differ by 1, we can transform the select into an 7137 // extend and add. Use a target hook because some targets may prefer to 7138 // transform in the other direction. 7139 if (TLI.convertSelectOfConstantsToMath(VT)) { 7140 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { 7141 // select Cond, C1, C1-1 --> add (zext Cond), C1-1 7142 if (VT != MVT::i1) 7143 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 7144 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 7145 } 7146 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { 7147 // select Cond, C1, C1+1 --> add (sext Cond), C1+1 7148 if (VT != MVT::i1) 7149 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 7150 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 7151 } 7152 } 7153 7154 return SDValue(); 7155 } 7156 7157 // fold (select Cond, 0, 1) -> (xor Cond, 1) 7158 // We can't do this reliably if integer based booleans have different contents 7159 // to floating point based booleans. This is because we can't tell whether we 7160 // have an integer-based boolean or a floating-point-based boolean unless we 7161 // can find the SETCC that produced it and inspect its operands. This is 7162 // fairly easy if C is the SETCC node, but it can potentially be 7163 // undiscoverable (or not reasonably discoverable). For example, it could be 7164 // in another basic block or it could require searching a complicated 7165 // expression. 7166 if (CondVT.isInteger() && 7167 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == 7168 TargetLowering::ZeroOrOneBooleanContent && 7169 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == 7170 TargetLowering::ZeroOrOneBooleanContent && 7171 C1->isNullValue() && C2->isOne()) { 7172 SDValue NotCond = 7173 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); 7174 if (VT.bitsEq(CondVT)) 7175 return NotCond; 7176 return DAG.getZExtOrTrunc(NotCond, DL, VT); 7177 } 7178 7179 return SDValue(); 7180 } 7181 7182 SDValue DAGCombiner::visitSELECT(SDNode *N) { 7183 SDValue N0 = N->getOperand(0); 7184 SDValue N1 = N->getOperand(1); 7185 SDValue N2 = N->getOperand(2); 7186 EVT VT = N->getValueType(0); 7187 EVT VT0 = N0.getValueType(); 7188 SDLoc DL(N); 7189 7190 if (SDValue V = DAG.simplifySelect(N0, N1, N2)) 7191 return V; 7192 7193 // fold (select X, X, Y) -> (or X, Y) 7194 // fold (select X, 1, Y) -> (or C, Y) 7195 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) 7196 return DAG.getNode(ISD::OR, DL, VT, N0, N2); 7197 7198 if (SDValue V = foldSelectOfConstants(N)) 7199 return V; 7200 7201 // fold (select C, 0, X) -> (and (not C), X) 7202 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { 7203 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 7204 AddToWorklist(NOTNode.getNode()); 7205 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); 7206 } 7207 // fold (select C, X, 1) -> (or (not C), X) 7208 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { 7209 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 7210 AddToWorklist(NOTNode.getNode()); 7211 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); 7212 } 7213 // fold (select X, Y, X) -> (and X, Y) 7214 // fold (select X, Y, 0) -> (and X, Y) 7215 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) 7216 return DAG.getNode(ISD::AND, DL, VT, N0, N1); 7217 7218 // If we can fold this based on the true/false value, do so. 7219 if (SimplifySelectOps(N, N1, N2)) 7220 return SDValue(N, 0); // Don't revisit N. 7221 7222 if (VT0 == MVT::i1) { 7223 // The code in this block deals with the following 2 equivalences: 7224 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) 7225 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) 7226 // The target can specify its preferred form with the 7227 // shouldNormalizeToSelectSequence() callback. However we always transform 7228 // to the right anyway if we find the inner select exists in the DAG anyway 7229 // and we always transform to the left side if we know that we can further 7230 // optimize the combination of the conditions. 7231 bool normalizeToSequence = 7232 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); 7233 // select (and Cond0, Cond1), X, Y 7234 // -> select Cond0, (select Cond1, X, Y), Y 7235 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 7236 SDValue Cond0 = N0->getOperand(0); 7237 SDValue Cond1 = N0->getOperand(1); 7238 SDValue InnerSelect = 7239 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 7240 if (normalizeToSequence || !InnerSelect.use_empty()) 7241 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, 7242 InnerSelect, N2); 7243 } 7244 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 7245 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 7246 SDValue Cond0 = N0->getOperand(0); 7247 SDValue Cond1 = N0->getOperand(1); 7248 SDValue InnerSelect = 7249 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 7250 if (normalizeToSequence || !InnerSelect.use_empty()) 7251 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, 7252 InnerSelect); 7253 } 7254 7255 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 7256 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { 7257 SDValue N1_0 = N1->getOperand(0); 7258 SDValue N1_1 = N1->getOperand(1); 7259 SDValue N1_2 = N1->getOperand(2); 7260 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 7261 // Create the actual and node if we can generate good code for it. 7262 if (!normalizeToSequence) { 7263 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); 7264 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); 7265 } 7266 // Otherwise see if we can optimize the "and" to a better pattern. 7267 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 7268 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, 7269 N2); 7270 } 7271 } 7272 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 7273 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { 7274 SDValue N2_0 = N2->getOperand(0); 7275 SDValue N2_1 = N2->getOperand(1); 7276 SDValue N2_2 = N2->getOperand(2); 7277 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 7278 // Create the actual or node if we can generate good code for it. 7279 if (!normalizeToSequence) { 7280 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); 7281 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); 7282 } 7283 // Otherwise see if we can optimize to a better pattern. 7284 if (SDValue Combined = visitORLike(N0, N2_0, N)) 7285 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, 7286 N2_2); 7287 } 7288 } 7289 } 7290 7291 if (VT0 == MVT::i1) { 7292 // select (not Cond), N1, N2 -> select Cond, N2, N1 7293 if (isBitwiseNot(N0)) 7294 return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); 7295 } 7296 7297 // Fold selects based on a setcc into other things, such as min/max/abs. 7298 if (N0.getOpcode() == ISD::SETCC) { 7299 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); 7300 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7301 7302 // select (fcmp lt x, y), x, y -> fminnum x, y 7303 // select (fcmp gt x, y), x, y -> fmaxnum x, y 7304 // 7305 // This is OK if we don't care what happens if either operand is a NaN. 7306 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2)) 7307 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, 7308 CC, TLI, DAG)) 7309 return FMinMax; 7310 7311 // Use 'unsigned add with overflow' to optimize an unsigned saturating add. 7312 // This is conservatively limited to pre-legal-operations to give targets 7313 // a chance to reverse the transform if they want to do that. Also, it is 7314 // unlikely that the pattern would be formed late, so it's probably not 7315 // worth going through the other checks. 7316 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) && 7317 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) && 7318 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) { 7319 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1)); 7320 auto *NotC = dyn_cast<ConstantSDNode>(Cond1); 7321 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) { 7322 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) --> 7323 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0 7324 // 7325 // The IR equivalent of this transform would have this form: 7326 // %a = add %x, C 7327 // %c = icmp ugt %x, ~C 7328 // %r = select %c, -1, %a 7329 // => 7330 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C) 7331 // %u0 = extractvalue %u, 0 7332 // %u1 = extractvalue %u, 1 7333 // %r = select %u1, -1, %u0 7334 SDVTList VTs = DAG.getVTList(VT, VT0); 7335 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1)); 7336 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0)); 7337 } 7338 } 7339 7340 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) || 7341 (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) 7342 return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2, 7343 N0.getOperand(2)); 7344 7345 return SimplifySelect(DL, N0, N1, N2); 7346 } 7347 7348 return SDValue(); 7349 } 7350 7351 static 7352 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 7353 SDLoc DL(N); 7354 EVT LoVT, HiVT; 7355 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 7356 7357 // Split the inputs. 7358 SDValue Lo, Hi, LL, LH, RL, RH; 7359 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 7360 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 7361 7362 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 7363 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 7364 7365 return std::make_pair(Lo, Hi); 7366 } 7367 7368 // This function assumes all the vselect's arguments are CONCAT_VECTOR 7369 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 7370 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 7371 SDLoc DL(N); 7372 SDValue Cond = N->getOperand(0); 7373 SDValue LHS = N->getOperand(1); 7374 SDValue RHS = N->getOperand(2); 7375 EVT VT = N->getValueType(0); 7376 int NumElems = VT.getVectorNumElements(); 7377 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 7378 RHS.getOpcode() == ISD::CONCAT_VECTORS && 7379 Cond.getOpcode() == ISD::BUILD_VECTOR); 7380 7381 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 7382 // binary ones here. 7383 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 7384 return SDValue(); 7385 7386 // We're sure we have an even number of elements due to the 7387 // concat_vectors we have as arguments to vselect. 7388 // Skip BV elements until we find one that's not an UNDEF 7389 // After we find an UNDEF element, keep looping until we get to half the 7390 // length of the BV and see if all the non-undef nodes are the same. 7391 ConstantSDNode *BottomHalf = nullptr; 7392 for (int i = 0; i < NumElems / 2; ++i) { 7393 if (Cond->getOperand(i)->isUndef()) 7394 continue; 7395 7396 if (BottomHalf == nullptr) 7397 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7398 else if (Cond->getOperand(i).getNode() != BottomHalf) 7399 return SDValue(); 7400 } 7401 7402 // Do the same for the second half of the BuildVector 7403 ConstantSDNode *TopHalf = nullptr; 7404 for (int i = NumElems / 2; i < NumElems; ++i) { 7405 if (Cond->getOperand(i)->isUndef()) 7406 continue; 7407 7408 if (TopHalf == nullptr) 7409 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7410 else if (Cond->getOperand(i).getNode() != TopHalf) 7411 return SDValue(); 7412 } 7413 7414 assert(TopHalf && BottomHalf && 7415 "One half of the selector was all UNDEFs and the other was all the " 7416 "same value. This should have been addressed before this function."); 7417 return DAG.getNode( 7418 ISD::CONCAT_VECTORS, DL, VT, 7419 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 7420 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 7421 } 7422 7423 SDValue DAGCombiner::visitMSCATTER(SDNode *N) { 7424 if (Level >= AfterLegalizeTypes) 7425 return SDValue(); 7426 7427 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 7428 SDValue Mask = MSC->getMask(); 7429 SDValue Data = MSC->getValue(); 7430 SDLoc DL(N); 7431 7432 // If the MSCATTER data type requires splitting and the mask is provided by a 7433 // SETCC, then split both nodes and its operands before legalization. This 7434 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7435 // and enables future optimizations (e.g. min/max pattern matching on X86). 7436 if (Mask.getOpcode() != ISD::SETCC) 7437 return SDValue(); 7438 7439 // Check if any splitting is required. 7440 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 7441 TargetLowering::TypeSplitVector) 7442 return SDValue(); 7443 SDValue MaskLo, MaskHi; 7444 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7445 7446 EVT LoVT, HiVT; 7447 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); 7448 7449 SDValue Chain = MSC->getChain(); 7450 7451 EVT MemoryVT = MSC->getMemoryVT(); 7452 unsigned Alignment = MSC->getOriginalAlignment(); 7453 7454 EVT LoMemVT, HiMemVT; 7455 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7456 7457 SDValue DataLo, DataHi; 7458 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7459 7460 SDValue Scale = MSC->getScale(); 7461 SDValue BasePtr = MSC->getBasePtr(); 7462 SDValue IndexLo, IndexHi; 7463 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); 7464 7465 MachineMemOperand *MMO = DAG.getMachineFunction(). 7466 getMachineMemOperand(MSC->getPointerInfo(), 7467 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7468 Alignment, MSC->getAAInfo(), MSC->getRanges()); 7469 7470 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; 7471 SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), 7472 DataLo.getValueType(), DL, OpsLo, MMO); 7473 7474 // The order of the Scatter operation after split is well defined. The "Hi" 7475 // part comes after the "Lo". So these two operations should be chained one 7476 // after another. 7477 SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; 7478 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), 7479 DL, OpsHi, MMO); 7480 } 7481 7482 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 7483 if (Level >= AfterLegalizeTypes) 7484 return SDValue(); 7485 7486 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 7487 SDValue Mask = MST->getMask(); 7488 SDValue Data = MST->getValue(); 7489 EVT VT = Data.getValueType(); 7490 SDLoc DL(N); 7491 7492 // If the MSTORE data type requires splitting and the mask is provided by a 7493 // SETCC, then split both nodes and its operands before legalization. This 7494 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7495 // and enables future optimizations (e.g. min/max pattern matching on X86). 7496 if (Mask.getOpcode() == ISD::SETCC) { 7497 // Check if any splitting is required. 7498 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7499 TargetLowering::TypeSplitVector) 7500 return SDValue(); 7501 7502 SDValue MaskLo, MaskHi, Lo, Hi; 7503 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7504 7505 SDValue Chain = MST->getChain(); 7506 SDValue Ptr = MST->getBasePtr(); 7507 7508 EVT MemoryVT = MST->getMemoryVT(); 7509 unsigned Alignment = MST->getOriginalAlignment(); 7510 7511 // if Alignment is equal to the vector size, 7512 // take the half of it for the second part 7513 unsigned SecondHalfAlignment = 7514 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; 7515 7516 EVT LoMemVT, HiMemVT; 7517 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7518 7519 SDValue DataLo, DataHi; 7520 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7521 7522 MachineMemOperand *MMO = DAG.getMachineFunction(). 7523 getMachineMemOperand(MST->getPointerInfo(), 7524 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7525 Alignment, MST->getAAInfo(), MST->getRanges()); 7526 7527 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 7528 MST->isTruncatingStore(), 7529 MST->isCompressingStore()); 7530 7531 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7532 MST->isCompressingStore()); 7533 unsigned HiOffset = LoMemVT.getStoreSize(); 7534 7535 MMO = DAG.getMachineFunction().getMachineMemOperand( 7536 MST->getPointerInfo().getWithOffset(HiOffset), 7537 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, 7538 MST->getAAInfo(), MST->getRanges()); 7539 7540 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 7541 MST->isTruncatingStore(), 7542 MST->isCompressingStore()); 7543 7544 AddToWorklist(Lo.getNode()); 7545 AddToWorklist(Hi.getNode()); 7546 7547 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 7548 } 7549 return SDValue(); 7550 } 7551 7552 SDValue DAGCombiner::visitMGATHER(SDNode *N) { 7553 if (Level >= AfterLegalizeTypes) 7554 return SDValue(); 7555 7556 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); 7557 SDValue Mask = MGT->getMask(); 7558 SDLoc DL(N); 7559 7560 // If the MGATHER result requires splitting and the mask is provided by a 7561 // SETCC, then split both nodes and its operands before legalization. This 7562 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7563 // and enables future optimizations (e.g. min/max pattern matching on X86). 7564 7565 if (Mask.getOpcode() != ISD::SETCC) 7566 return SDValue(); 7567 7568 EVT VT = N->getValueType(0); 7569 7570 // Check if any splitting is required. 7571 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7572 TargetLowering::TypeSplitVector) 7573 return SDValue(); 7574 7575 SDValue MaskLo, MaskHi, Lo, Hi; 7576 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7577 7578 SDValue PassThru = MGT->getPassThru(); 7579 SDValue PassThruLo, PassThruHi; 7580 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); 7581 7582 EVT LoVT, HiVT; 7583 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); 7584 7585 SDValue Chain = MGT->getChain(); 7586 EVT MemoryVT = MGT->getMemoryVT(); 7587 unsigned Alignment = MGT->getOriginalAlignment(); 7588 7589 EVT LoMemVT, HiMemVT; 7590 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7591 7592 SDValue Scale = MGT->getScale(); 7593 SDValue BasePtr = MGT->getBasePtr(); 7594 SDValue Index = MGT->getIndex(); 7595 SDValue IndexLo, IndexHi; 7596 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); 7597 7598 MachineMemOperand *MMO = DAG.getMachineFunction(). 7599 getMachineMemOperand(MGT->getPointerInfo(), 7600 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7601 Alignment, MGT->getAAInfo(), MGT->getRanges()); 7602 7603 SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; 7604 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, 7605 MMO); 7606 7607 SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; 7608 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, 7609 MMO); 7610 7611 AddToWorklist(Lo.getNode()); 7612 AddToWorklist(Hi.getNode()); 7613 7614 // Build a factor node to remember that this load is independent of the 7615 // other one. 7616 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7617 Hi.getValue(1)); 7618 7619 // Legalized the chain result - switch anything that used the old chain to 7620 // use the new one. 7621 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); 7622 7623 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7624 7625 SDValue RetOps[] = { GatherRes, Chain }; 7626 return DAG.getMergeValues(RetOps, DL); 7627 } 7628 7629 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 7630 if (Level >= AfterLegalizeTypes) 7631 return SDValue(); 7632 7633 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 7634 SDValue Mask = MLD->getMask(); 7635 SDLoc DL(N); 7636 7637 // If the MLOAD result requires splitting and the mask is provided by a 7638 // SETCC, then split both nodes and its operands before legalization. This 7639 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7640 // and enables future optimizations (e.g. min/max pattern matching on X86). 7641 if (Mask.getOpcode() == ISD::SETCC) { 7642 EVT VT = N->getValueType(0); 7643 7644 // Check if any splitting is required. 7645 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7646 TargetLowering::TypeSplitVector) 7647 return SDValue(); 7648 7649 SDValue MaskLo, MaskHi, Lo, Hi; 7650 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7651 7652 SDValue PassThru = MLD->getPassThru(); 7653 SDValue PassThruLo, PassThruHi; 7654 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); 7655 7656 EVT LoVT, HiVT; 7657 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 7658 7659 SDValue Chain = MLD->getChain(); 7660 SDValue Ptr = MLD->getBasePtr(); 7661 EVT MemoryVT = MLD->getMemoryVT(); 7662 unsigned Alignment = MLD->getOriginalAlignment(); 7663 7664 // if Alignment is equal to the vector size, 7665 // take the half of it for the second part 7666 unsigned SecondHalfAlignment = 7667 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 7668 Alignment/2 : Alignment; 7669 7670 EVT LoMemVT, HiMemVT; 7671 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7672 7673 MachineMemOperand *MMO = DAG.getMachineFunction(). 7674 getMachineMemOperand(MLD->getPointerInfo(), 7675 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7676 Alignment, MLD->getAAInfo(), MLD->getRanges()); 7677 7678 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, 7679 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7680 7681 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7682 MLD->isExpandingLoad()); 7683 unsigned HiOffset = LoMemVT.getStoreSize(); 7684 7685 MMO = DAG.getMachineFunction().getMachineMemOperand( 7686 MLD->getPointerInfo().getWithOffset(HiOffset), 7687 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, 7688 MLD->getAAInfo(), MLD->getRanges()); 7689 7690 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, 7691 MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7692 7693 AddToWorklist(Lo.getNode()); 7694 AddToWorklist(Hi.getNode()); 7695 7696 // Build a factor node to remember that this load is independent of the 7697 // other one. 7698 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7699 Hi.getValue(1)); 7700 7701 // Legalized the chain result - switch anything that used the old chain to 7702 // use the new one. 7703 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 7704 7705 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7706 7707 SDValue RetOps[] = { LoadRes, Chain }; 7708 return DAG.getMergeValues(RetOps, DL); 7709 } 7710 return SDValue(); 7711 } 7712 7713 /// A vector select of 2 constant vectors can be simplified to math/logic to 7714 /// avoid a variable select instruction and possibly avoid constant loads. 7715 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { 7716 SDValue Cond = N->getOperand(0); 7717 SDValue N1 = N->getOperand(1); 7718 SDValue N2 = N->getOperand(2); 7719 EVT VT = N->getValueType(0); 7720 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || 7721 !TLI.convertSelectOfConstantsToMath(VT) || 7722 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || 7723 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) 7724 return SDValue(); 7725 7726 // Check if we can use the condition value to increment/decrement a single 7727 // constant value. This simplifies a select to an add and removes a constant 7728 // load/materialization from the general case. 7729 bool AllAddOne = true; 7730 bool AllSubOne = true; 7731 unsigned Elts = VT.getVectorNumElements(); 7732 for (unsigned i = 0; i != Elts; ++i) { 7733 SDValue N1Elt = N1.getOperand(i); 7734 SDValue N2Elt = N2.getOperand(i); 7735 if (N1Elt.isUndef() || N2Elt.isUndef()) 7736 continue; 7737 7738 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); 7739 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); 7740 if (C1 != C2 + 1) 7741 AllAddOne = false; 7742 if (C1 != C2 - 1) 7743 AllSubOne = false; 7744 } 7745 7746 // Further simplifications for the extra-special cases where the constants are 7747 // all 0 or all -1 should be implemented as folds of these patterns. 7748 SDLoc DL(N); 7749 if (AllAddOne || AllSubOne) { 7750 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C 7751 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C 7752 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; 7753 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); 7754 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); 7755 } 7756 7757 // The general case for select-of-constants: 7758 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 7759 // ...but that only makes sense if a vselect is slower than 2 logic ops, so 7760 // leave that to a machine-specific pass. 7761 return SDValue(); 7762 } 7763 7764 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 7765 SDValue N0 = N->getOperand(0); 7766 SDValue N1 = N->getOperand(1); 7767 SDValue N2 = N->getOperand(2); 7768 SDLoc DL(N); 7769 7770 if (SDValue V = DAG.simplifySelect(N0, N1, N2)) 7771 return V; 7772 7773 // Canonicalize integer abs. 7774 // vselect (setg[te] X, 0), X, -X -> 7775 // vselect (setgt X, -1), X, -X -> 7776 // vselect (setl[te] X, 0), -X, X -> 7777 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 7778 if (N0.getOpcode() == ISD::SETCC) { 7779 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 7780 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7781 bool isAbs = false; 7782 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 7783 7784 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 7785 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 7786 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 7787 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 7788 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 7789 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 7790 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 7791 7792 if (isAbs) { 7793 EVT VT = LHS.getValueType(); 7794 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) 7795 return DAG.getNode(ISD::ABS, DL, VT, LHS); 7796 7797 SDValue Shift = DAG.getNode( 7798 ISD::SRA, DL, VT, LHS, 7799 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); 7800 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 7801 AddToWorklist(Shift.getNode()); 7802 AddToWorklist(Add.getNode()); 7803 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 7804 } 7805 7806 // vselect x, y (fcmp lt x, y) -> fminnum x, y 7807 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y 7808 // 7809 // This is OK if we don't care about what happens if either operand is a 7810 // NaN. 7811 // 7812 EVT VT = N->getValueType(0); 7813 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) { 7814 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7815 if (SDValue FMinMax = combineMinNumMaxNum( 7816 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) 7817 return FMinMax; 7818 } 7819 7820 // If this select has a condition (setcc) with narrower operands than the 7821 // select, try to widen the compare to match the select width. 7822 // TODO: This should be extended to handle any constant. 7823 // TODO: This could be extended to handle non-loading patterns, but that 7824 // requires thorough testing to avoid regressions. 7825 if (isNullOrNullSplat(RHS)) { 7826 EVT NarrowVT = LHS.getValueType(); 7827 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); 7828 EVT SetCCVT = getSetCCResultType(LHS.getValueType()); 7829 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); 7830 unsigned WideWidth = WideVT.getScalarSizeInBits(); 7831 bool IsSigned = isSignedIntSetCC(CC); 7832 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 7833 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && 7834 SetCCWidth != 1 && SetCCWidth < WideWidth && 7835 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && 7836 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { 7837 // Both compare operands can be widened for free. The LHS can use an 7838 // extended load, and the RHS is a constant: 7839 // vselect (ext (setcc load(X), C)), N1, N2 --> 7840 // vselect (setcc extload(X), C'), N1, N2 7841 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 7842 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); 7843 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); 7844 EVT WideSetCCVT = getSetCCResultType(WideVT); 7845 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); 7846 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); 7847 } 7848 } 7849 } 7850 7851 if (SimplifySelectOps(N, N1, N2)) 7852 return SDValue(N, 0); // Don't revisit N. 7853 7854 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 7855 if (ISD::isBuildVectorAllOnes(N0.getNode())) 7856 return N1; 7857 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 7858 if (ISD::isBuildVectorAllZeros(N0.getNode())) 7859 return N2; 7860 7861 // The ConvertSelectToConcatVector function is assuming both the above 7862 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 7863 // and addressed. 7864 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 7865 N2.getOpcode() == ISD::CONCAT_VECTORS && 7866 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 7867 if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) 7868 return CV; 7869 } 7870 7871 if (SDValue V = foldVSelectOfConstants(N)) 7872 return V; 7873 7874 return SDValue(); 7875 } 7876 7877 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 7878 SDValue N0 = N->getOperand(0); 7879 SDValue N1 = N->getOperand(1); 7880 SDValue N2 = N->getOperand(2); 7881 SDValue N3 = N->getOperand(3); 7882 SDValue N4 = N->getOperand(4); 7883 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 7884 7885 // fold select_cc lhs, rhs, x, x, cc -> x 7886 if (N2 == N3) 7887 return N2; 7888 7889 // Determine if the condition we're dealing with is constant 7890 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, 7891 CC, SDLoc(N), false)) { 7892 AddToWorklist(SCC.getNode()); 7893 7894 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 7895 if (!SCCC->isNullValue()) 7896 return N2; // cond always true -> true val 7897 else 7898 return N3; // cond always false -> false val 7899 } else if (SCC->isUndef()) { 7900 // When the condition is UNDEF, just return the first operand. This is 7901 // coherent the DAG creation, no setcc node is created in this case 7902 return N2; 7903 } else if (SCC.getOpcode() == ISD::SETCC) { 7904 // Fold to a simpler select_cc 7905 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 7906 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 7907 SCC.getOperand(2)); 7908 } 7909 } 7910 7911 // If we can fold this based on the true/false value, do so. 7912 if (SimplifySelectOps(N, N2, N3)) 7913 return SDValue(N, 0); // Don't revisit N. 7914 7915 // fold select_cc into other things, such as min/max/abs 7916 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 7917 } 7918 7919 SDValue DAGCombiner::visitSETCC(SDNode *N) { 7920 // setcc is very commonly used as an argument to brcond. This pattern 7921 // also lend itself to numerous combines and, as a result, it is desired 7922 // we keep the argument to a brcond as a setcc as much as possible. 7923 bool PreferSetCC = 7924 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; 7925 7926 SDValue Combined = SimplifySetCC( 7927 N->getValueType(0), N->getOperand(0), N->getOperand(1), 7928 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); 7929 7930 if (!Combined) 7931 return SDValue(); 7932 7933 // If we prefer to have a setcc, and we don't, we'll try our best to 7934 // recreate one using rebuildSetCC. 7935 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { 7936 SDValue NewSetCC = rebuildSetCC(Combined); 7937 7938 // We don't have anything interesting to combine to. 7939 if (NewSetCC.getNode() == N) 7940 return SDValue(); 7941 7942 if (NewSetCC) 7943 return NewSetCC; 7944 } 7945 7946 return Combined; 7947 } 7948 7949 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { 7950 SDValue LHS = N->getOperand(0); 7951 SDValue RHS = N->getOperand(1); 7952 SDValue Carry = N->getOperand(2); 7953 SDValue Cond = N->getOperand(3); 7954 7955 // If Carry is false, fold to a regular SETCC. 7956 if (isNullConstant(Carry)) 7957 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); 7958 7959 return SDValue(); 7960 } 7961 7962 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 7963 /// a build_vector of constants. 7964 /// This function is called by the DAGCombiner when visiting sext/zext/aext 7965 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 7966 /// Vector extends are not folded if operations are legal; this is to 7967 /// avoid introducing illegal build_vector dag nodes. 7968 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 7969 SelectionDAG &DAG, bool LegalTypes) { 7970 unsigned Opcode = N->getOpcode(); 7971 SDValue N0 = N->getOperand(0); 7972 EVT VT = N->getValueType(0); 7973 7974 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 7975 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 7976 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) 7977 && "Expected EXTEND dag node in input!"); 7978 7979 // fold (sext c1) -> c1 7980 // fold (zext c1) -> c1 7981 // fold (aext c1) -> c1 7982 if (isa<ConstantSDNode>(N0)) 7983 return DAG.getNode(Opcode, SDLoc(N), VT, N0); 7984 7985 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 7986 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 7987 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 7988 EVT SVT = VT.getScalarType(); 7989 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) && 7990 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 7991 return SDValue(); 7992 7993 // We can fold this node into a build_vector. 7994 unsigned VTBits = SVT.getSizeInBits(); 7995 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); 7996 SmallVector<SDValue, 8> Elts; 7997 unsigned NumElts = VT.getVectorNumElements(); 7998 SDLoc DL(N); 7999 8000 for (unsigned i=0; i != NumElts; ++i) { 8001 SDValue Op = N0->getOperand(i); 8002 if (Op->isUndef()) { 8003 Elts.push_back(DAG.getUNDEF(SVT)); 8004 continue; 8005 } 8006 8007 SDLoc DL(Op); 8008 // Get the constant value and if needed trunc it to the size of the type. 8009 // Nodes like build_vector might have constants wider than the scalar type. 8010 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); 8011 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 8012 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); 8013 else 8014 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); 8015 } 8016 8017 return DAG.getBuildVector(VT, DL, Elts); 8018 } 8019 8020 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 8021 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 8022 // transformation. Returns true if extension are possible and the above 8023 // mentioned transformation is profitable. 8024 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, 8025 unsigned ExtOpc, 8026 SmallVectorImpl<SDNode *> &ExtendNodes, 8027 const TargetLowering &TLI) { 8028 bool HasCopyToRegUses = false; 8029 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); 8030 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 8031 UE = N0.getNode()->use_end(); 8032 UI != UE; ++UI) { 8033 SDNode *User = *UI; 8034 if (User == N) 8035 continue; 8036 if (UI.getUse().getResNo() != N0.getResNo()) 8037 continue; 8038 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 8039 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 8040 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 8041 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 8042 // Sign bits will be lost after a zext. 8043 return false; 8044 bool Add = false; 8045 for (unsigned i = 0; i != 2; ++i) { 8046 SDValue UseOp = User->getOperand(i); 8047 if (UseOp == N0) 8048 continue; 8049 if (!isa<ConstantSDNode>(UseOp)) 8050 return false; 8051 Add = true; 8052 } 8053 if (Add) 8054 ExtendNodes.push_back(User); 8055 continue; 8056 } 8057 // If truncates aren't free and there are users we can't 8058 // extend, it isn't worthwhile. 8059 if (!isTruncFree) 8060 return false; 8061 // Remember if this value is live-out. 8062 if (User->getOpcode() == ISD::CopyToReg) 8063 HasCopyToRegUses = true; 8064 } 8065 8066 if (HasCopyToRegUses) { 8067 bool BothLiveOut = false; 8068 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 8069 UI != UE; ++UI) { 8070 SDUse &Use = UI.getUse(); 8071 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 8072 BothLiveOut = true; 8073 break; 8074 } 8075 } 8076 if (BothLiveOut) 8077 // Both unextended and extended values are live out. There had better be 8078 // a good reason for the transformation. 8079 return ExtendNodes.size(); 8080 } 8081 return true; 8082 } 8083 8084 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 8085 SDValue OrigLoad, SDValue ExtLoad, 8086 ISD::NodeType ExtType) { 8087 // Extend SetCC uses if necessary. 8088 SDLoc DL(ExtLoad); 8089 for (SDNode *SetCC : SetCCs) { 8090 SmallVector<SDValue, 4> Ops; 8091 8092 for (unsigned j = 0; j != 2; ++j) { 8093 SDValue SOp = SetCC->getOperand(j); 8094 if (SOp == OrigLoad) 8095 Ops.push_back(ExtLoad); 8096 else 8097 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 8098 } 8099 8100 Ops.push_back(SetCC->getOperand(2)); 8101 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 8102 } 8103 } 8104 8105 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). 8106 SDValue DAGCombiner::CombineExtLoad(SDNode *N) { 8107 SDValue N0 = N->getOperand(0); 8108 EVT DstVT = N->getValueType(0); 8109 EVT SrcVT = N0.getValueType(); 8110 8111 assert((N->getOpcode() == ISD::SIGN_EXTEND || 8112 N->getOpcode() == ISD::ZERO_EXTEND) && 8113 "Unexpected node type (not an extend)!"); 8114 8115 // fold (sext (load x)) to multiple smaller sextloads; same for zext. 8116 // For example, on a target with legal v4i32, but illegal v8i32, turn: 8117 // (v8i32 (sext (v8i16 (load x)))) 8118 // into: 8119 // (v8i32 (concat_vectors (v4i32 (sextload x)), 8120 // (v4i32 (sextload (x + 16))))) 8121 // Where uses of the original load, i.e.: 8122 // (v8i16 (load x)) 8123 // are replaced with: 8124 // (v8i16 (truncate 8125 // (v8i32 (concat_vectors (v4i32 (sextload x)), 8126 // (v4i32 (sextload (x + 16))))))) 8127 // 8128 // This combine is only applicable to illegal, but splittable, vectors. 8129 // All legal types, and illegal non-vector types, are handled elsewhere. 8130 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. 8131 // 8132 if (N0->getOpcode() != ISD::LOAD) 8133 return SDValue(); 8134 8135 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8136 8137 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || 8138 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || 8139 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) 8140 return SDValue(); 8141 8142 SmallVector<SDNode *, 4> SetCCs; 8143 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) 8144 return SDValue(); 8145 8146 ISD::LoadExtType ExtType = 8147 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 8148 8149 // Try to split the vector types to get down to legal types. 8150 EVT SplitSrcVT = SrcVT; 8151 EVT SplitDstVT = DstVT; 8152 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && 8153 SplitSrcVT.getVectorNumElements() > 1) { 8154 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; 8155 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; 8156 } 8157 8158 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) 8159 return SDValue(); 8160 8161 SDLoc DL(N); 8162 const unsigned NumSplits = 8163 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); 8164 const unsigned Stride = SplitSrcVT.getStoreSize(); 8165 SmallVector<SDValue, 4> Loads; 8166 SmallVector<SDValue, 4> Chains; 8167 8168 SDValue BasePtr = LN0->getBasePtr(); 8169 for (unsigned Idx = 0; Idx < NumSplits; Idx++) { 8170 const unsigned Offset = Idx * Stride; 8171 const unsigned Align = MinAlign(LN0->getAlignment(), Offset); 8172 8173 SDValue SplitLoad = DAG.getExtLoad( 8174 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, 8175 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, 8176 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 8177 8178 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 8179 DAG.getConstant(Stride, DL, BasePtr.getValueType())); 8180 8181 Loads.push_back(SplitLoad.getValue(0)); 8182 Chains.push_back(SplitLoad.getValue(1)); 8183 } 8184 8185 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 8186 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); 8187 8188 // Simplify TF. 8189 AddToWorklist(NewChain.getNode()); 8190 8191 CombineTo(N, NewValue); 8192 8193 // Replace uses of the original load (before extension) 8194 // with a truncate of the concatenated sextloaded vectors. 8195 SDValue Trunc = 8196 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); 8197 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); 8198 CombineTo(N0.getNode(), Trunc, NewChain); 8199 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8200 } 8201 8202 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 8203 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 8204 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { 8205 assert(N->getOpcode() == ISD::ZERO_EXTEND); 8206 EVT VT = N->getValueType(0); 8207 8208 // and/or/xor 8209 SDValue N0 = N->getOperand(0); 8210 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8211 N0.getOpcode() == ISD::XOR) || 8212 N0.getOperand(1).getOpcode() != ISD::Constant || 8213 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) 8214 return SDValue(); 8215 8216 // shl/shr 8217 SDValue N1 = N0->getOperand(0); 8218 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || 8219 N1.getOperand(1).getOpcode() != ISD::Constant || 8220 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) 8221 return SDValue(); 8222 8223 // load 8224 if (!isa<LoadSDNode>(N1.getOperand(0))) 8225 return SDValue(); 8226 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); 8227 EVT MemVT = Load->getMemoryVT(); 8228 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || 8229 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) 8230 return SDValue(); 8231 8232 8233 // If the shift op is SHL, the logic op must be AND, otherwise the result 8234 // will be wrong. 8235 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) 8236 return SDValue(); 8237 8238 if (!N0.hasOneUse() || !N1.hasOneUse()) 8239 return SDValue(); 8240 8241 SmallVector<SDNode*, 4> SetCCs; 8242 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), 8243 ISD::ZERO_EXTEND, SetCCs, TLI)) 8244 return SDValue(); 8245 8246 // Actually do the transformation. 8247 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, 8248 Load->getChain(), Load->getBasePtr(), 8249 Load->getMemoryVT(), Load->getMemOperand()); 8250 8251 SDLoc DL1(N1); 8252 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, 8253 N1.getOperand(1)); 8254 8255 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8256 Mask = Mask.zext(VT.getSizeInBits()); 8257 SDLoc DL0(N0); 8258 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, 8259 DAG.getConstant(Mask, DL0, VT)); 8260 8261 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 8262 CombineTo(N, And); 8263 if (SDValue(Load, 0).hasOneUse()) { 8264 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); 8265 } else { 8266 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), 8267 Load->getValueType(0), ExtLoad); 8268 CombineTo(Load, Trunc, ExtLoad.getValue(1)); 8269 } 8270 return SDValue(N,0); // Return N so it doesn't get rechecked! 8271 } 8272 8273 /// If we're narrowing or widening the result of a vector select and the final 8274 /// size is the same size as a setcc (compare) feeding the select, then try to 8275 /// apply the cast operation to the select's operands because matching vector 8276 /// sizes for a select condition and other operands should be more efficient. 8277 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { 8278 unsigned CastOpcode = Cast->getOpcode(); 8279 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || 8280 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || 8281 CastOpcode == ISD::FP_ROUND) && 8282 "Unexpected opcode for vector select narrowing/widening"); 8283 8284 // We only do this transform before legal ops because the pattern may be 8285 // obfuscated by target-specific operations after legalization. Do not create 8286 // an illegal select op, however, because that may be difficult to lower. 8287 EVT VT = Cast->getValueType(0); 8288 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) 8289 return SDValue(); 8290 8291 SDValue VSel = Cast->getOperand(0); 8292 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || 8293 VSel.getOperand(0).getOpcode() != ISD::SETCC) 8294 return SDValue(); 8295 8296 // Does the setcc have the same vector size as the casted select? 8297 SDValue SetCC = VSel.getOperand(0); 8298 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); 8299 if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) 8300 return SDValue(); 8301 8302 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) 8303 SDValue A = VSel.getOperand(1); 8304 SDValue B = VSel.getOperand(2); 8305 SDValue CastA, CastB; 8306 SDLoc DL(Cast); 8307 if (CastOpcode == ISD::FP_ROUND) { 8308 // FP_ROUND (fptrunc) has an extra flag operand to pass along. 8309 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); 8310 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); 8311 } else { 8312 CastA = DAG.getNode(CastOpcode, DL, VT, A); 8313 CastB = DAG.getNode(CastOpcode, DL, VT, B); 8314 } 8315 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); 8316 } 8317 8318 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8319 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8320 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, 8321 const TargetLowering &TLI, EVT VT, 8322 bool LegalOperations, SDNode *N, 8323 SDValue N0, ISD::LoadExtType ExtLoadType) { 8324 SDNode *N0Node = N0.getNode(); 8325 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) 8326 : ISD::isZEXTLoad(N0Node); 8327 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || 8328 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) 8329 return {}; 8330 8331 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8332 EVT MemVT = LN0->getMemoryVT(); 8333 if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && 8334 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) 8335 return {}; 8336 8337 SDValue ExtLoad = 8338 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 8339 LN0->getBasePtr(), MemVT, LN0->getMemOperand()); 8340 Combiner.CombineTo(N, ExtLoad); 8341 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8342 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8343 } 8344 8345 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) 8346 // Only generate vector extloads when 1) they're legal, and 2) they are 8347 // deemed desirable by the target. 8348 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, 8349 const TargetLowering &TLI, EVT VT, 8350 bool LegalOperations, SDNode *N, SDValue N0, 8351 ISD::LoadExtType ExtLoadType, 8352 ISD::NodeType ExtOpc) { 8353 if (!ISD::isNON_EXTLoad(N0.getNode()) || 8354 !ISD::isUNINDEXEDLoad(N0.getNode()) || 8355 ((LegalOperations || VT.isVector() || 8356 cast<LoadSDNode>(N0)->isVolatile()) && 8357 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) 8358 return {}; 8359 8360 bool DoXform = true; 8361 SmallVector<SDNode *, 4> SetCCs; 8362 if (!N0.hasOneUse()) 8363 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); 8364 if (VT.isVector()) 8365 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 8366 if (!DoXform) 8367 return {}; 8368 8369 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8370 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 8371 LN0->getBasePtr(), N0.getValueType(), 8372 LN0->getMemOperand()); 8373 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); 8374 // If the load value is used only by N, replace it via CombineTo N. 8375 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); 8376 Combiner.CombineTo(N, ExtLoad); 8377 if (NoReplaceTrunc) { 8378 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8379 } else { 8380 SDValue Trunc = 8381 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); 8382 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 8383 } 8384 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8385 } 8386 8387 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, 8388 bool LegalOperations) { 8389 assert((N->getOpcode() == ISD::SIGN_EXTEND || 8390 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); 8391 8392 SDValue SetCC = N->getOperand(0); 8393 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || 8394 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) 8395 return SDValue(); 8396 8397 SDValue X = SetCC.getOperand(0); 8398 SDValue Ones = SetCC.getOperand(1); 8399 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); 8400 EVT VT = N->getValueType(0); 8401 EVT XVT = X.getValueType(); 8402 // setge X, C is canonicalized to setgt, so we do not need to match that 8403 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does 8404 // not require the 'not' op. 8405 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { 8406 // Invert and smear/shift the sign bit: 8407 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) 8408 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) 8409 SDLoc DL(N); 8410 SDValue NotX = DAG.getNOT(DL, X, VT); 8411 SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); 8412 auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; 8413 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); 8414 } 8415 return SDValue(); 8416 } 8417 8418 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 8419 SDValue N0 = N->getOperand(0); 8420 EVT VT = N->getValueType(0); 8421 SDLoc DL(N); 8422 8423 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 8424 return Res; 8425 8426 // fold (sext (sext x)) -> (sext x) 8427 // fold (sext (aext x)) -> (sext x) 8428 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8429 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); 8430 8431 if (N0.getOpcode() == ISD::TRUNCATE) { 8432 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 8433 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 8434 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8435 SDNode *oye = N0.getOperand(0).getNode(); 8436 if (NarrowLoad.getNode() != N0.getNode()) { 8437 CombineTo(N0.getNode(), NarrowLoad); 8438 // CombineTo deleted the truncate, if needed, but not what's under it. 8439 AddToWorklist(oye); 8440 } 8441 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8442 } 8443 8444 // See if the value being truncated is already sign extended. If so, just 8445 // eliminate the trunc/sext pair. 8446 SDValue Op = N0.getOperand(0); 8447 unsigned OpBits = Op.getScalarValueSizeInBits(); 8448 unsigned MidBits = N0.getScalarValueSizeInBits(); 8449 unsigned DestBits = VT.getScalarSizeInBits(); 8450 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 8451 8452 if (OpBits == DestBits) { 8453 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 8454 // bits, it is already ready. 8455 if (NumSignBits > DestBits-MidBits) 8456 return Op; 8457 } else if (OpBits < DestBits) { 8458 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 8459 // bits, just sext from i32. 8460 if (NumSignBits > OpBits-MidBits) 8461 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); 8462 } else { 8463 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 8464 // bits, just truncate to i32. 8465 if (NumSignBits > OpBits-MidBits) 8466 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 8467 } 8468 8469 // fold (sext (truncate x)) -> (sextinreg x). 8470 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 8471 N0.getValueType())) { 8472 if (OpBits < DestBits) 8473 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 8474 else if (OpBits > DestBits) 8475 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 8476 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, 8477 DAG.getValueType(N0.getValueType())); 8478 } 8479 } 8480 8481 // Try to simplify (sext (load x)). 8482 if (SDValue foldedExt = 8483 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8484 ISD::SEXTLOAD, ISD::SIGN_EXTEND)) 8485 return foldedExt; 8486 8487 // fold (sext (load x)) to multiple smaller sextloads. 8488 // Only on illegal but splittable vectors. 8489 if (SDValue ExtLoad = CombineExtLoad(N)) 8490 return ExtLoad; 8491 8492 // Try to simplify (sext (sextload x)). 8493 if (SDValue foldedExt = tryToFoldExtOfExtload( 8494 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) 8495 return foldedExt; 8496 8497 // fold (sext (and/or/xor (load x), cst)) -> 8498 // (and/or/xor (sextload x), (sext cst)) 8499 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8500 N0.getOpcode() == ISD::XOR) && 8501 isa<LoadSDNode>(N0.getOperand(0)) && 8502 N0.getOperand(1).getOpcode() == ISD::Constant && 8503 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8504 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8505 EVT MemVT = LN00->getMemoryVT(); 8506 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && 8507 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { 8508 SmallVector<SDNode*, 4> SetCCs; 8509 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8510 ISD::SIGN_EXTEND, SetCCs, TLI); 8511 if (DoXform) { 8512 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, 8513 LN00->getChain(), LN00->getBasePtr(), 8514 LN00->getMemoryVT(), 8515 LN00->getMemOperand()); 8516 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8517 Mask = Mask.sext(VT.getSizeInBits()); 8518 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8519 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8520 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); 8521 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8522 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8523 CombineTo(N, And); 8524 // If N0 has multiple uses, change other uses as well. 8525 if (NoReplaceTruncAnd) { 8526 SDValue TruncAnd = 8527 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8528 CombineTo(N0.getNode(), TruncAnd); 8529 } 8530 if (NoReplaceTrunc) { 8531 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8532 } else { 8533 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8534 LN00->getValueType(0), ExtLoad); 8535 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8536 } 8537 return SDValue(N,0); // Return N so it doesn't get rechecked! 8538 } 8539 } 8540 } 8541 8542 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) 8543 return V; 8544 8545 if (N0.getOpcode() == ISD::SETCC) { 8546 SDValue N00 = N0.getOperand(0); 8547 SDValue N01 = N0.getOperand(1); 8548 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 8549 EVT N00VT = N0.getOperand(0).getValueType(); 8550 8551 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 8552 // Only do this before legalize for now. 8553 if (VT.isVector() && !LegalOperations && 8554 TLI.getBooleanContents(N00VT) == 8555 TargetLowering::ZeroOrNegativeOneBooleanContent) { 8556 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 8557 // of the same size as the compared operands. Only optimize sext(setcc()) 8558 // if this is the case. 8559 EVT SVT = getSetCCResultType(N00VT); 8560 8561 // We know that the # elements of the results is the same as the 8562 // # elements of the compare (and the # elements of the compare result 8563 // for that matter). Check to see that they are the same size. If so, 8564 // we know that the element size of the sext'd result matches the 8565 // element size of the compare operands. 8566 if (VT.getSizeInBits() == SVT.getSizeInBits()) 8567 return DAG.getSetCC(DL, VT, N00, N01, CC); 8568 8569 // If the desired elements are smaller or larger than the source 8570 // elements, we can use a matching integer vector type and then 8571 // truncate/sign extend. 8572 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); 8573 if (SVT == MatchingVecType) { 8574 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); 8575 return DAG.getSExtOrTrunc(VsetCC, DL, VT); 8576 } 8577 } 8578 8579 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) 8580 // Here, T can be 1 or -1, depending on the type of the setcc and 8581 // getBooleanContents(). 8582 unsigned SetCCWidth = N0.getScalarValueSizeInBits(); 8583 8584 // To determine the "true" side of the select, we need to know the high bit 8585 // of the value returned by the setcc if it evaluates to true. 8586 // If the type of the setcc is i1, then the true case of the select is just 8587 // sext(i1 1), that is, -1. 8588 // If the type of the setcc is larger (say, i8) then the value of the high 8589 // bit depends on getBooleanContents(), so ask TLI for a real "true" value 8590 // of the appropriate width. 8591 SDValue ExtTrueVal = (SetCCWidth == 1) 8592 ? DAG.getAllOnesConstant(DL, VT) 8593 : DAG.getBoolConstant(true, DL, VT, N00VT); 8594 SDValue Zero = DAG.getConstant(0, DL, VT); 8595 if (SDValue SCC = 8596 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) 8597 return SCC; 8598 8599 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { 8600 EVT SetCCVT = getSetCCResultType(N00VT); 8601 // Don't do this transform for i1 because there's a select transform 8602 // that would reverse it. 8603 // TODO: We should not do this transform at all without a target hook 8604 // because a sext is likely cheaper than a select? 8605 if (SetCCVT.getScalarSizeInBits() != 1 && 8606 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { 8607 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); 8608 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); 8609 } 8610 } 8611 } 8612 8613 // fold (sext x) -> (zext x) if the sign bit is known zero. 8614 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 8615 DAG.SignBitIsZero(N0)) 8616 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); 8617 8618 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 8619 return NewVSel; 8620 8621 return SDValue(); 8622 } 8623 8624 // isTruncateOf - If N is a truncate of some other value, return true, record 8625 // the value being truncated in Op and which of Op's bits are zero/one in Known. 8626 // This function computes KnownBits to avoid a duplicated call to 8627 // computeKnownBits in the caller. 8628 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 8629 KnownBits &Known) { 8630 if (N->getOpcode() == ISD::TRUNCATE) { 8631 Op = N->getOperand(0); 8632 DAG.computeKnownBits(Op, Known); 8633 return true; 8634 } 8635 8636 if (N.getOpcode() != ISD::SETCC || 8637 N.getValueType().getScalarType() != MVT::i1 || 8638 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE) 8639 return false; 8640 8641 SDValue Op0 = N->getOperand(0); 8642 SDValue Op1 = N->getOperand(1); 8643 assert(Op0.getValueType() == Op1.getValueType()); 8644 8645 if (isNullOrNullSplat(Op0)) 8646 Op = Op1; 8647 else if (isNullOrNullSplat(Op1)) 8648 Op = Op0; 8649 else 8650 return false; 8651 8652 DAG.computeKnownBits(Op, Known); 8653 8654 return (Known.Zero | 1).isAllOnesValue(); 8655 } 8656 8657 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 8658 SDValue N0 = N->getOperand(0); 8659 EVT VT = N->getValueType(0); 8660 8661 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 8662 return Res; 8663 8664 // fold (zext (zext x)) -> (zext x) 8665 // fold (zext (aext x)) -> (zext x) 8666 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8667 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 8668 N0.getOperand(0)); 8669 8670 // fold (zext (truncate x)) -> (zext x) or 8671 // (zext (truncate x)) -> (truncate x) 8672 // This is valid when the truncated bits of x are already zero. 8673 SDValue Op; 8674 KnownBits Known; 8675 if (isTruncateOf(DAG, N0, Op, Known)) { 8676 APInt TruncatedBits = 8677 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ? 8678 APInt(Op.getScalarValueSizeInBits(), 0) : 8679 APInt::getBitsSet(Op.getScalarValueSizeInBits(), 8680 N0.getScalarValueSizeInBits(), 8681 std::min(Op.getScalarValueSizeInBits(), 8682 VT.getScalarSizeInBits())); 8683 if (TruncatedBits.isSubsetOf(Known.Zero)) 8684 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8685 } 8686 8687 // fold (zext (truncate x)) -> (and x, mask) 8688 if (N0.getOpcode() == ISD::TRUNCATE) { 8689 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 8690 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 8691 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8692 SDNode *oye = N0.getOperand(0).getNode(); 8693 if (NarrowLoad.getNode() != N0.getNode()) { 8694 CombineTo(N0.getNode(), NarrowLoad); 8695 // CombineTo deleted the truncate, if needed, but not what's under it. 8696 AddToWorklist(oye); 8697 } 8698 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8699 } 8700 8701 EVT SrcVT = N0.getOperand(0).getValueType(); 8702 EVT MinVT = N0.getValueType(); 8703 8704 // Try to mask before the extension to avoid having to generate a larger mask, 8705 // possibly over several sub-vectors. 8706 if (SrcVT.bitsLT(VT) && VT.isVector()) { 8707 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && 8708 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { 8709 SDValue Op = N0.getOperand(0); 8710 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8711 AddToWorklist(Op.getNode()); 8712 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8713 // Transfer the debug info; the new node is equivalent to N0. 8714 DAG.transferDbgValues(N0, ZExtOrTrunc); 8715 return ZExtOrTrunc; 8716 } 8717 } 8718 8719 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { 8720 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 8721 AddToWorklist(Op.getNode()); 8722 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8723 // We may safely transfer the debug info describing the truncate node over 8724 // to the equivalent and operation. 8725 DAG.transferDbgValues(N0, And); 8726 return And; 8727 } 8728 } 8729 8730 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 8731 // if either of the casts is not free. 8732 if (N0.getOpcode() == ISD::AND && 8733 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 8734 N0.getOperand(1).getOpcode() == ISD::Constant && 8735 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 8736 N0.getValueType()) || 8737 !TLI.isZExtFree(N0.getValueType(), VT))) { 8738 SDValue X = N0.getOperand(0).getOperand(0); 8739 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); 8740 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8741 Mask = Mask.zext(VT.getSizeInBits()); 8742 SDLoc DL(N); 8743 return DAG.getNode(ISD::AND, DL, VT, 8744 X, DAG.getConstant(Mask, DL, VT)); 8745 } 8746 8747 // Try to simplify (zext (load x)). 8748 if (SDValue foldedExt = 8749 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8750 ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) 8751 return foldedExt; 8752 8753 // fold (zext (load x)) to multiple smaller zextloads. 8754 // Only on illegal but splittable vectors. 8755 if (SDValue ExtLoad = CombineExtLoad(N)) 8756 return ExtLoad; 8757 8758 // fold (zext (and/or/xor (load x), cst)) -> 8759 // (and/or/xor (zextload x), (zext cst)) 8760 // Unless (and (load x) cst) will match as a zextload already and has 8761 // additional users. 8762 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8763 N0.getOpcode() == ISD::XOR) && 8764 isa<LoadSDNode>(N0.getOperand(0)) && 8765 N0.getOperand(1).getOpcode() == ISD::Constant && 8766 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8767 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8768 EVT MemVT = LN00->getMemoryVT(); 8769 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && 8770 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { 8771 bool DoXform = true; 8772 SmallVector<SDNode*, 4> SetCCs; 8773 if (!N0.hasOneUse()) { 8774 if (N0.getOpcode() == ISD::AND) { 8775 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); 8776 EVT LoadResultTy = AndC->getValueType(0); 8777 EVT ExtVT; 8778 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) 8779 DoXform = false; 8780 } 8781 } 8782 if (DoXform) 8783 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8784 ISD::ZERO_EXTEND, SetCCs, TLI); 8785 if (DoXform) { 8786 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, 8787 LN00->getChain(), LN00->getBasePtr(), 8788 LN00->getMemoryVT(), 8789 LN00->getMemOperand()); 8790 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8791 Mask = Mask.zext(VT.getSizeInBits()); 8792 SDLoc DL(N); 8793 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8794 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8795 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 8796 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8797 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8798 CombineTo(N, And); 8799 // If N0 has multiple uses, change other uses as well. 8800 if (NoReplaceTruncAnd) { 8801 SDValue TruncAnd = 8802 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8803 CombineTo(N0.getNode(), TruncAnd); 8804 } 8805 if (NoReplaceTrunc) { 8806 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8807 } else { 8808 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8809 LN00->getValueType(0), ExtLoad); 8810 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8811 } 8812 return SDValue(N,0); // Return N so it doesn't get rechecked! 8813 } 8814 } 8815 } 8816 8817 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 8818 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 8819 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) 8820 return ZExtLoad; 8821 8822 // Try to simplify (zext (zextload x)). 8823 if (SDValue foldedExt = tryToFoldExtOfExtload( 8824 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) 8825 return foldedExt; 8826 8827 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) 8828 return V; 8829 8830 if (N0.getOpcode() == ISD::SETCC) { 8831 // Only do this before legalize for now. 8832 if (!LegalOperations && VT.isVector() && 8833 N0.getValueType().getVectorElementType() == MVT::i1) { 8834 EVT N00VT = N0.getOperand(0).getValueType(); 8835 if (getSetCCResultType(N00VT) == N0.getValueType()) 8836 return SDValue(); 8837 8838 // We know that the # elements of the results is the same as the # 8839 // elements of the compare (and the # elements of the compare result for 8840 // that matter). Check to see that they are the same size. If so, we know 8841 // that the element size of the sext'd result matches the element size of 8842 // the compare operands. 8843 SDLoc DL(N); 8844 SDValue VecOnes = DAG.getConstant(1, DL, VT); 8845 if (VT.getSizeInBits() == N00VT.getSizeInBits()) { 8846 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 8847 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), 8848 N0.getOperand(1), N0.getOperand(2)); 8849 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); 8850 } 8851 8852 // If the desired elements are smaller or larger than the source 8853 // elements we can use a matching integer vector type and then 8854 // truncate/sign extend. 8855 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 8856 SDValue VsetCC = 8857 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), 8858 N0.getOperand(1), N0.getOperand(2)); 8859 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), 8860 VecOnes); 8861 } 8862 8863 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 8864 SDLoc DL(N); 8865 if (SDValue SCC = SimplifySelectCC( 8866 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 8867 DAG.getConstant(0, DL, VT), 8868 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 8869 return SCC; 8870 } 8871 8872 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 8873 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 8874 isa<ConstantSDNode>(N0.getOperand(1)) && 8875 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 8876 N0.hasOneUse()) { 8877 SDValue ShAmt = N0.getOperand(1); 8878 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8879 if (N0.getOpcode() == ISD::SHL) { 8880 SDValue InnerZExt = N0.getOperand(0); 8881 // If the original shl may be shifting out bits, do not perform this 8882 // transformation. 8883 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - 8884 InnerZExt.getOperand(0).getValueSizeInBits(); 8885 if (ShAmtVal > KnownZeroBits) 8886 return SDValue(); 8887 } 8888 8889 SDLoc DL(N); 8890 8891 // Ensure that the shift amount is wide enough for the shifted value. 8892 if (VT.getSizeInBits() >= 256) 8893 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 8894 8895 return DAG.getNode(N0.getOpcode(), DL, VT, 8896 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 8897 ShAmt); 8898 } 8899 8900 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 8901 return NewVSel; 8902 8903 return SDValue(); 8904 } 8905 8906 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 8907 SDValue N0 = N->getOperand(0); 8908 EVT VT = N->getValueType(0); 8909 8910 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 8911 return Res; 8912 8913 // fold (aext (aext x)) -> (aext x) 8914 // fold (aext (zext x)) -> (zext x) 8915 // fold (aext (sext x)) -> (sext x) 8916 if (N0.getOpcode() == ISD::ANY_EXTEND || 8917 N0.getOpcode() == ISD::ZERO_EXTEND || 8918 N0.getOpcode() == ISD::SIGN_EXTEND) 8919 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 8920 8921 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 8922 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 8923 if (N0.getOpcode() == ISD::TRUNCATE) { 8924 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8925 SDNode *oye = N0.getOperand(0).getNode(); 8926 if (NarrowLoad.getNode() != N0.getNode()) { 8927 CombineTo(N0.getNode(), NarrowLoad); 8928 // CombineTo deleted the truncate, if needed, but not what's under it. 8929 AddToWorklist(oye); 8930 } 8931 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8932 } 8933 } 8934 8935 // fold (aext (truncate x)) 8936 if (N0.getOpcode() == ISD::TRUNCATE) 8937 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 8938 8939 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 8940 // if the trunc is not free. 8941 if (N0.getOpcode() == ISD::AND && 8942 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 8943 N0.getOperand(1).getOpcode() == ISD::Constant && 8944 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 8945 N0.getValueType())) { 8946 SDLoc DL(N); 8947 SDValue X = N0.getOperand(0).getOperand(0); 8948 X = DAG.getAnyExtOrTrunc(X, DL, VT); 8949 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8950 Mask = Mask.zext(VT.getSizeInBits()); 8951 return DAG.getNode(ISD::AND, DL, VT, 8952 X, DAG.getConstant(Mask, DL, VT)); 8953 } 8954 8955 // fold (aext (load x)) -> (aext (truncate (extload x))) 8956 // None of the supported targets knows how to perform load and any_ext 8957 // on vectors in one instruction. We only perform this transformation on 8958 // scalars. 8959 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 8960 ISD::isUNINDEXEDLoad(N0.getNode()) && 8961 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 8962 bool DoXform = true; 8963 SmallVector<SDNode*, 4> SetCCs; 8964 if (!N0.hasOneUse()) 8965 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, 8966 TLI); 8967 if (DoXform) { 8968 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8969 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 8970 LN0->getChain(), 8971 LN0->getBasePtr(), N0.getValueType(), 8972 LN0->getMemOperand()); 8973 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); 8974 // If the load value is used only by N, replace it via CombineTo N. 8975 bool NoReplaceTrunc = N0.hasOneUse(); 8976 CombineTo(N, ExtLoad); 8977 if (NoReplaceTrunc) { 8978 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8979 } else { 8980 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 8981 N0.getValueType(), ExtLoad); 8982 CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 8983 } 8984 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8985 } 8986 } 8987 8988 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 8989 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 8990 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 8991 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && 8992 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 8993 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8994 ISD::LoadExtType ExtType = LN0->getExtensionType(); 8995 EVT MemVT = LN0->getMemoryVT(); 8996 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 8997 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 8998 VT, LN0->getChain(), LN0->getBasePtr(), 8999 MemVT, LN0->getMemOperand()); 9000 CombineTo(N, ExtLoad); 9001 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 9002 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9003 } 9004 } 9005 9006 if (N0.getOpcode() == ISD::SETCC) { 9007 // For vectors: 9008 // aext(setcc) -> vsetcc 9009 // aext(setcc) -> truncate(vsetcc) 9010 // aext(setcc) -> aext(vsetcc) 9011 // Only do this before legalize for now. 9012 if (VT.isVector() && !LegalOperations) { 9013 EVT N00VT = N0.getOperand(0).getValueType(); 9014 if (getSetCCResultType(N00VT) == N0.getValueType()) 9015 return SDValue(); 9016 9017 // We know that the # elements of the results is the same as the 9018 // # elements of the compare (and the # elements of the compare result 9019 // for that matter). Check to see that they are the same size. If so, 9020 // we know that the element size of the sext'd result matches the 9021 // element size of the compare operands. 9022 if (VT.getSizeInBits() == N00VT.getSizeInBits()) 9023 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 9024 N0.getOperand(1), 9025 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9026 9027 // If the desired elements are smaller or larger than the source 9028 // elements we can use a matching integer vector type and then 9029 // truncate/any extend 9030 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 9031 SDValue VsetCC = 9032 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 9033 N0.getOperand(1), 9034 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9035 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 9036 } 9037 9038 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 9039 SDLoc DL(N); 9040 if (SDValue SCC = SimplifySelectCC( 9041 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 9042 DAG.getConstant(0, DL, VT), 9043 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 9044 return SCC; 9045 } 9046 9047 return SDValue(); 9048 } 9049 9050 SDValue DAGCombiner::visitAssertExt(SDNode *N) { 9051 unsigned Opcode = N->getOpcode(); 9052 SDValue N0 = N->getOperand(0); 9053 SDValue N1 = N->getOperand(1); 9054 EVT AssertVT = cast<VTSDNode>(N1)->getVT(); 9055 9056 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) 9057 if (N0.getOpcode() == Opcode && 9058 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) 9059 return N0; 9060 9061 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 9062 N0.getOperand(0).getOpcode() == Opcode) { 9063 // We have an assert, truncate, assert sandwich. Make one stronger assert 9064 // by asserting on the smallest asserted type to the larger source type. 9065 // This eliminates the later assert: 9066 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN 9067 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN 9068 SDValue BigA = N0.getOperand(0); 9069 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); 9070 assert(BigA_AssertVT.bitsLE(N0.getValueType()) && 9071 "Asserting zero/sign-extended bits to a type larger than the " 9072 "truncated destination does not provide information"); 9073 9074 SDLoc DL(N); 9075 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; 9076 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); 9077 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), 9078 BigA.getOperand(0), MinAssertVTVal); 9079 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); 9080 } 9081 9082 return SDValue(); 9083 } 9084 9085 /// If the result of a wider load is shifted to right of N bits and then 9086 /// truncated to a narrower type and where N is a multiple of number of bits of 9087 /// the narrower type, transform it to a narrower load from address + N / num of 9088 /// bits of new type. Also narrow the load if the result is masked with an AND 9089 /// to effectively produce a smaller type. If the result is to be extended, also 9090 /// fold the extension to form a extending load. 9091 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 9092 unsigned Opc = N->getOpcode(); 9093 9094 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 9095 SDValue N0 = N->getOperand(0); 9096 EVT VT = N->getValueType(0); 9097 EVT ExtVT = VT; 9098 9099 // This transformation isn't valid for vector loads. 9100 if (VT.isVector()) 9101 return SDValue(); 9102 9103 unsigned ShAmt = 0; 9104 bool HasShiftedOffset = false; 9105 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 9106 // extended to VT. 9107 if (Opc == ISD::SIGN_EXTEND_INREG) { 9108 ExtType = ISD::SEXTLOAD; 9109 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 9110 } else if (Opc == ISD::SRL) { 9111 // Another special-case: SRL is basically zero-extending a narrower value, 9112 // or it maybe shifting a higher subword, half or byte into the lowest 9113 // bits. 9114 ExtType = ISD::ZEXTLOAD; 9115 N0 = SDValue(N, 0); 9116 9117 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); 9118 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 9119 if (!N01 || !LN0) 9120 return SDValue(); 9121 9122 uint64_t ShiftAmt = N01->getZExtValue(); 9123 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); 9124 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) 9125 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); 9126 else 9127 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 9128 VT.getSizeInBits() - ShiftAmt); 9129 } else if (Opc == ISD::AND) { 9130 // An AND with a constant mask is the same as a truncate + zero-extend. 9131 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9132 if (!AndC) 9133 return SDValue(); 9134 9135 const APInt &Mask = AndC->getAPIntValue(); 9136 unsigned ActiveBits = 0; 9137 if (Mask.isMask()) { 9138 ActiveBits = Mask.countTrailingOnes(); 9139 } else if (Mask.isShiftedMask()) { 9140 ShAmt = Mask.countTrailingZeros(); 9141 APInt ShiftedMask = Mask.lshr(ShAmt); 9142 ActiveBits = ShiftedMask.countTrailingOnes(); 9143 HasShiftedOffset = true; 9144 } else 9145 return SDValue(); 9146 9147 ExtType = ISD::ZEXTLOAD; 9148 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 9149 } 9150 9151 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 9152 SDValue SRL = N0; 9153 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { 9154 ShAmt = ConstShift->getZExtValue(); 9155 unsigned EVTBits = ExtVT.getSizeInBits(); 9156 // Is the shift amount a multiple of size of VT? 9157 if ((ShAmt & (EVTBits-1)) == 0) { 9158 N0 = N0.getOperand(0); 9159 // Is the load width a multiple of size of VT? 9160 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) 9161 return SDValue(); 9162 } 9163 9164 // At this point, we must have a load or else we can't do the transform. 9165 if (!isa<LoadSDNode>(N0)) return SDValue(); 9166 9167 auto *LN0 = cast<LoadSDNode>(N0); 9168 9169 // Because a SRL must be assumed to *need* to zero-extend the high bits 9170 // (as opposed to anyext the high bits), we can't combine the zextload 9171 // lowering of SRL and an sextload. 9172 if (LN0->getExtensionType() == ISD::SEXTLOAD) 9173 return SDValue(); 9174 9175 // If the shift amount is larger than the input type then we're not 9176 // accessing any of the loaded bytes. If the load was a zextload/extload 9177 // then the result of the shift+trunc is zero/undef (handled elsewhere). 9178 if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) 9179 return SDValue(); 9180 9181 // If the SRL is only used by a masking AND, we may be able to adjust 9182 // the ExtVT to make the AND redundant. 9183 SDNode *Mask = *(SRL->use_begin()); 9184 if (Mask->getOpcode() == ISD::AND && 9185 isa<ConstantSDNode>(Mask->getOperand(1))) { 9186 const APInt &ShiftMask = 9187 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); 9188 if (ShiftMask.isMask()) { 9189 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), 9190 ShiftMask.countTrailingOnes()); 9191 // If the mask is smaller, recompute the type. 9192 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && 9193 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) 9194 ExtVT = MaskedVT; 9195 } 9196 } 9197 } 9198 } 9199 9200 // If the load is shifted left (and the result isn't shifted back right), 9201 // we can fold the truncate through the shift. 9202 unsigned ShLeftAmt = 0; 9203 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 9204 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 9205 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 9206 ShLeftAmt = N01->getZExtValue(); 9207 N0 = N0.getOperand(0); 9208 } 9209 } 9210 9211 // If we haven't found a load, we can't narrow it. 9212 if (!isa<LoadSDNode>(N0)) 9213 return SDValue(); 9214 9215 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9216 if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) 9217 return SDValue(); 9218 9219 auto AdjustBigEndianShift = [&](unsigned ShAmt) { 9220 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 9221 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 9222 return LVTStoreBits - EVTStoreBits - ShAmt; 9223 }; 9224 9225 // For big endian targets, we need to adjust the offset to the pointer to 9226 // load the correct bytes. 9227 if (DAG.getDataLayout().isBigEndian()) 9228 ShAmt = AdjustBigEndianShift(ShAmt); 9229 9230 EVT PtrType = N0.getOperand(1).getValueType(); 9231 uint64_t PtrOff = ShAmt / 8; 9232 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 9233 SDLoc DL(LN0); 9234 // The original load itself didn't wrap, so an offset within it doesn't. 9235 SDNodeFlags Flags; 9236 Flags.setNoUnsignedWrap(true); 9237 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, 9238 PtrType, LN0->getBasePtr(), 9239 DAG.getConstant(PtrOff, DL, PtrType), 9240 Flags); 9241 AddToWorklist(NewPtr.getNode()); 9242 9243 SDValue Load; 9244 if (ExtType == ISD::NON_EXTLOAD) 9245 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 9246 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, 9247 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 9248 else 9249 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, 9250 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, 9251 NewAlign, LN0->getMemOperand()->getFlags(), 9252 LN0->getAAInfo()); 9253 9254 // Replace the old load's chain with the new load's chain. 9255 WorklistRemover DeadNodes(*this); 9256 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 9257 9258 // Shift the result left, if we've swallowed a left shift. 9259 SDValue Result = Load; 9260 if (ShLeftAmt != 0) { 9261 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 9262 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 9263 ShImmTy = VT; 9264 // If the shift amount is as large as the result size (but, presumably, 9265 // no larger than the source) then the useful bits of the result are 9266 // zero; we can't simply return the shortened shift, because the result 9267 // of that operation is undefined. 9268 SDLoc DL(N0); 9269 if (ShLeftAmt >= VT.getSizeInBits()) 9270 Result = DAG.getConstant(0, DL, VT); 9271 else 9272 Result = DAG.getNode(ISD::SHL, DL, VT, 9273 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); 9274 } 9275 9276 if (HasShiftedOffset) { 9277 // Recalculate the shift amount after it has been altered to calculate 9278 // the offset. 9279 if (DAG.getDataLayout().isBigEndian()) 9280 ShAmt = AdjustBigEndianShift(ShAmt); 9281 9282 // We're using a shifted mask, so the load now has an offset. This means we 9283 // now need to shift right the mask to match the new load and then shift 9284 // right the result of the AND. 9285 const APInt &Mask = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); 9286 APInt ShiftedMask = Mask.lshr(ShAmt); 9287 DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT)); 9288 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); 9289 SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0), 9290 ShiftC); 9291 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted); 9292 DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC); 9293 } 9294 // Return the new loaded value. 9295 return Result; 9296 } 9297 9298 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 9299 SDValue N0 = N->getOperand(0); 9300 SDValue N1 = N->getOperand(1); 9301 EVT VT = N->getValueType(0); 9302 EVT EVT = cast<VTSDNode>(N1)->getVT(); 9303 unsigned VTBits = VT.getScalarSizeInBits(); 9304 unsigned EVTBits = EVT.getScalarSizeInBits(); 9305 9306 if (N0.isUndef()) 9307 return DAG.getUNDEF(VT); 9308 9309 // fold (sext_in_reg c1) -> c1 9310 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 9311 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 9312 9313 // If the input is already sign extended, just drop the extension. 9314 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 9315 return N0; 9316 9317 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 9318 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 9319 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 9320 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 9321 N0.getOperand(0), N1); 9322 9323 // fold (sext_in_reg (sext x)) -> (sext x) 9324 // fold (sext_in_reg (aext x)) -> (sext x) 9325 // if x is small enough. 9326 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 9327 SDValue N00 = N0.getOperand(0); 9328 if (N00.getScalarValueSizeInBits() <= EVTBits && 9329 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 9330 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 9331 } 9332 9333 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) 9334 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || 9335 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || 9336 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && 9337 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { 9338 if (!LegalOperations || 9339 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) 9340 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, 9341 N0.getOperand(0)); 9342 } 9343 9344 // fold (sext_in_reg (zext x)) -> (sext x) 9345 // iff we are extending the source sign bit. 9346 if (N0.getOpcode() == ISD::ZERO_EXTEND) { 9347 SDValue N00 = N0.getOperand(0); 9348 if (N00.getScalarValueSizeInBits() == EVTBits && 9349 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 9350 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 9351 } 9352 9353 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 9354 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) 9355 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); 9356 9357 // fold operands of sext_in_reg based on knowledge that the top bits are not 9358 // demanded. 9359 if (SimplifyDemandedBits(SDValue(N, 0))) 9360 return SDValue(N, 0); 9361 9362 // fold (sext_in_reg (load x)) -> (smaller sextload x) 9363 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 9364 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 9365 return NarrowLoad; 9366 9367 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 9368 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 9369 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 9370 if (N0.getOpcode() == ISD::SRL) { 9371 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 9372 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 9373 // We can turn this into an SRA iff the input to the SRL is already sign 9374 // extended enough. 9375 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 9376 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 9377 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 9378 N0.getOperand(0), N0.getOperand(1)); 9379 } 9380 } 9381 9382 // fold (sext_inreg (extload x)) -> (sextload x) 9383 // If sextload is not supported by target, we can only do the combine when 9384 // load has one use. Doing otherwise can block folding the extload with other 9385 // extends that the target does support. 9386 if (ISD::isEXTLoad(N0.getNode()) && 9387 ISD::isUNINDEXEDLoad(N0.getNode()) && 9388 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 9389 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && 9390 N0.hasOneUse()) || 9391 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 9392 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9393 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 9394 LN0->getChain(), 9395 LN0->getBasePtr(), EVT, 9396 LN0->getMemOperand()); 9397 CombineTo(N, ExtLoad); 9398 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 9399 AddToWorklist(ExtLoad.getNode()); 9400 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9401 } 9402 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 9403 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 9404 N0.hasOneUse() && 9405 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 9406 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 9407 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 9408 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9409 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 9410 LN0->getChain(), 9411 LN0->getBasePtr(), EVT, 9412 LN0->getMemOperand()); 9413 CombineTo(N, ExtLoad); 9414 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 9415 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9416 } 9417 9418 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 9419 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 9420 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 9421 N0.getOperand(1), false)) 9422 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 9423 BSwap, N1); 9424 } 9425 9426 return SDValue(); 9427 } 9428 9429 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { 9430 SDValue N0 = N->getOperand(0); 9431 EVT VT = N->getValueType(0); 9432 9433 if (N0.isUndef()) 9434 return DAG.getUNDEF(VT); 9435 9436 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 9437 return Res; 9438 9439 return SDValue(); 9440 } 9441 9442 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { 9443 SDValue N0 = N->getOperand(0); 9444 EVT VT = N->getValueType(0); 9445 9446 if (N0.isUndef()) 9447 return DAG.getUNDEF(VT); 9448 9449 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) 9450 return Res; 9451 9452 return SDValue(); 9453 } 9454 9455 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 9456 SDValue N0 = N->getOperand(0); 9457 EVT VT = N->getValueType(0); 9458 bool isLE = DAG.getDataLayout().isLittleEndian(); 9459 9460 // noop truncate 9461 if (N0.getValueType() == N->getValueType(0)) 9462 return N0; 9463 9464 // fold (truncate (truncate x)) -> (truncate x) 9465 if (N0.getOpcode() == ISD::TRUNCATE) 9466 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9467 9468 // fold (truncate c1) -> c1 9469 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 9470 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 9471 if (C.getNode() != N) 9472 return C; 9473 } 9474 9475 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 9476 if (N0.getOpcode() == ISD::ZERO_EXTEND || 9477 N0.getOpcode() == ISD::SIGN_EXTEND || 9478 N0.getOpcode() == ISD::ANY_EXTEND) { 9479 // if the source is smaller than the dest, we still need an extend. 9480 if (N0.getOperand(0).getValueType().bitsLT(VT)) 9481 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 9482 // if the source is larger than the dest, than we just need the truncate. 9483 if (N0.getOperand(0).getValueType().bitsGT(VT)) 9484 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9485 // if the source and dest are the same type, we can drop both the extend 9486 // and the truncate. 9487 return N0.getOperand(0); 9488 } 9489 9490 // If this is anyext(trunc), don't fold it, allow ourselves to be folded. 9491 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) 9492 return SDValue(); 9493 9494 // Fold extract-and-trunc into a narrow extract. For example: 9495 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 9496 // i32 y = TRUNCATE(i64 x) 9497 // -- becomes -- 9498 // v16i8 b = BITCAST (v2i64 val) 9499 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 9500 // 9501 // Note: We only run this optimization after type legalization (which often 9502 // creates this pattern) and before operation legalization after which 9503 // we need to be more careful about the vector instructions that we generate. 9504 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 9505 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 9506 EVT VecTy = N0.getOperand(0).getValueType(); 9507 EVT ExTy = N0.getValueType(); 9508 EVT TrTy = N->getValueType(0); 9509 9510 unsigned NumElem = VecTy.getVectorNumElements(); 9511 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 9512 9513 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 9514 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 9515 9516 SDValue EltNo = N0->getOperand(1); 9517 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 9518 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9519 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 9520 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 9521 9522 SDLoc DL(N); 9523 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, 9524 DAG.getBitcast(NVT, N0.getOperand(0)), 9525 DAG.getConstant(Index, DL, IndexTy)); 9526 } 9527 } 9528 9529 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 9530 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { 9531 EVT SrcVT = N0.getValueType(); 9532 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 9533 TLI.isTruncateFree(SrcVT, VT)) { 9534 SDLoc SL(N0); 9535 SDValue Cond = N0.getOperand(0); 9536 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9537 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 9538 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 9539 } 9540 } 9541 9542 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() 9543 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 9544 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && 9545 TLI.isTypeDesirableForOp(ISD::SHL, VT)) { 9546 SDValue Amt = N0.getOperand(1); 9547 KnownBits Known; 9548 DAG.computeKnownBits(Amt, Known); 9549 unsigned Size = VT.getScalarSizeInBits(); 9550 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 9551 SDLoc SL(N); 9552 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); 9553 9554 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9555 if (AmtVT != Amt.getValueType()) { 9556 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); 9557 AddToWorklist(Amt.getNode()); 9558 } 9559 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); 9560 } 9561 } 9562 9563 // Fold a series of buildvector, bitcast, and truncate if possible. 9564 // For example fold 9565 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 9566 // (2xi32 (buildvector x, y)). 9567 if (Level == AfterLegalizeVectorOps && VT.isVector() && 9568 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 9569 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 9570 N0.getOperand(0).hasOneUse()) { 9571 SDValue BuildVect = N0.getOperand(0); 9572 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 9573 EVT TruncVecEltTy = VT.getVectorElementType(); 9574 9575 // Check that the element types match. 9576 if (BuildVectEltTy == TruncVecEltTy) { 9577 // Now we only need to compute the offset of the truncated elements. 9578 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 9579 unsigned TruncVecNumElts = VT.getVectorNumElements(); 9580 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 9581 9582 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 9583 "Invalid number of elements"); 9584 9585 SmallVector<SDValue, 8> Opnds; 9586 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 9587 Opnds.push_back(BuildVect.getOperand(i)); 9588 9589 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 9590 } 9591 } 9592 9593 // See if we can simplify the input to this truncate through knowledge that 9594 // only the low bits are being used. 9595 // For example "trunc (or (shl x, 8), y)" // -> trunc y 9596 // Currently we only perform this optimization on scalars because vectors 9597 // may have different active low bits. 9598 if (!VT.isVector()) { 9599 APInt Mask = 9600 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); 9601 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) 9602 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 9603 } 9604 9605 // fold (truncate (load x)) -> (smaller load x) 9606 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 9607 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 9608 if (SDValue Reduced = ReduceLoadWidth(N)) 9609 return Reduced; 9610 9611 // Handle the case where the load remains an extending load even 9612 // after truncation. 9613 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 9614 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9615 if (!LN0->isVolatile() && 9616 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 9617 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 9618 VT, LN0->getChain(), LN0->getBasePtr(), 9619 LN0->getMemoryVT(), 9620 LN0->getMemOperand()); 9621 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 9622 return NewLoad; 9623 } 9624 } 9625 } 9626 9627 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 9628 // where ... are all 'undef'. 9629 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 9630 SmallVector<EVT, 8> VTs; 9631 SDValue V; 9632 unsigned Idx = 0; 9633 unsigned NumDefs = 0; 9634 9635 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 9636 SDValue X = N0.getOperand(i); 9637 if (!X.isUndef()) { 9638 V = X; 9639 Idx = i; 9640 NumDefs++; 9641 } 9642 // Stop if more than one members are non-undef. 9643 if (NumDefs > 1) 9644 break; 9645 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 9646 VT.getVectorElementType(), 9647 X.getValueType().getVectorNumElements())); 9648 } 9649 9650 if (NumDefs == 0) 9651 return DAG.getUNDEF(VT); 9652 9653 if (NumDefs == 1) { 9654 assert(V.getNode() && "The single defined operand is empty!"); 9655 SmallVector<SDValue, 8> Opnds; 9656 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 9657 if (i != Idx) { 9658 Opnds.push_back(DAG.getUNDEF(VTs[i])); 9659 continue; 9660 } 9661 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 9662 AddToWorklist(NV.getNode()); 9663 Opnds.push_back(NV); 9664 } 9665 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 9666 } 9667 } 9668 9669 // Fold truncate of a bitcast of a vector to an extract of the low vector 9670 // element. 9671 // 9672 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx 9673 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { 9674 SDValue VecSrc = N0.getOperand(0); 9675 EVT SrcVT = VecSrc.getValueType(); 9676 if (SrcVT.isVector() && SrcVT.getScalarType() == VT && 9677 (!LegalOperations || 9678 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { 9679 SDLoc SL(N); 9680 9681 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); 9682 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; 9683 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, 9684 VecSrc, DAG.getConstant(Idx, SL, IdxVT)); 9685 } 9686 } 9687 9688 // Simplify the operands using demanded-bits information. 9689 if (!VT.isVector() && 9690 SimplifyDemandedBits(SDValue(N, 0))) 9691 return SDValue(N, 0); 9692 9693 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) 9694 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) 9695 // When the adde's carry is not used. 9696 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && 9697 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && 9698 (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { 9699 SDLoc SL(N); 9700 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9701 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9702 auto VTs = DAG.getVTList(VT, N0->getValueType(1)); 9703 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); 9704 } 9705 9706 // fold (truncate (extract_subvector(ext x))) -> 9707 // (extract_subvector x) 9708 // TODO: This can be generalized to cover cases where the truncate and extract 9709 // do not fully cancel each other out. 9710 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { 9711 SDValue N00 = N0.getOperand(0); 9712 if (N00.getOpcode() == ISD::SIGN_EXTEND || 9713 N00.getOpcode() == ISD::ZERO_EXTEND || 9714 N00.getOpcode() == ISD::ANY_EXTEND) { 9715 if (N00.getOperand(0)->getValueType(0).getVectorElementType() == 9716 VT.getVectorElementType()) 9717 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, 9718 N00.getOperand(0), N0.getOperand(1)); 9719 } 9720 } 9721 9722 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 9723 return NewVSel; 9724 9725 return SDValue(); 9726 } 9727 9728 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 9729 SDValue Elt = N->getOperand(i); 9730 if (Elt.getOpcode() != ISD::MERGE_VALUES) 9731 return Elt.getNode(); 9732 return Elt.getOperand(Elt.getResNo()).getNode(); 9733 } 9734 9735 /// build_pair (load, load) -> load 9736 /// if load locations are consecutive. 9737 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 9738 assert(N->getOpcode() == ISD::BUILD_PAIR); 9739 9740 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 9741 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 9742 9743 // A BUILD_PAIR is always having the least significant part in elt 0 and the 9744 // most significant part in elt 1. So when combining into one large load, we 9745 // need to consider the endianness. 9746 if (DAG.getDataLayout().isBigEndian()) 9747 std::swap(LD1, LD2); 9748 9749 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 9750 LD1->getAddressSpace() != LD2->getAddressSpace()) 9751 return SDValue(); 9752 EVT LD1VT = LD1->getValueType(0); 9753 unsigned LD1Bytes = LD1VT.getStoreSize(); 9754 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && 9755 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { 9756 unsigned Align = LD1->getAlignment(); 9757 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 9758 VT.getTypeForEVT(*DAG.getContext())); 9759 9760 if (NewAlign <= Align && 9761 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 9762 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), 9763 LD1->getPointerInfo(), Align); 9764 } 9765 9766 return SDValue(); 9767 } 9768 9769 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { 9770 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi 9771 // and Lo parts; on big-endian machines it doesn't. 9772 return DAG.getDataLayout().isBigEndian() ? 1 : 0; 9773 } 9774 9775 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, 9776 const TargetLowering &TLI) { 9777 // If this is not a bitcast to an FP type or if the target doesn't have 9778 // IEEE754-compliant FP logic, we're done. 9779 EVT VT = N->getValueType(0); 9780 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) 9781 return SDValue(); 9782 9783 // TODO: Handle cases where the integer constant is a different scalar 9784 // bitwidth to the FP. 9785 SDValue N0 = N->getOperand(0); 9786 EVT SourceVT = N0.getValueType(); 9787 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits()) 9788 return SDValue(); 9789 9790 unsigned FPOpcode; 9791 APInt SignMask; 9792 switch (N0.getOpcode()) { 9793 case ISD::AND: 9794 FPOpcode = ISD::FABS; 9795 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits()); 9796 break; 9797 case ISD::XOR: 9798 FPOpcode = ISD::FNEG; 9799 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); 9800 break; 9801 case ISD::OR: 9802 FPOpcode = ISD::FABS; 9803 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits()); 9804 break; 9805 default: 9806 return SDValue(); 9807 } 9808 9809 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X 9810 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X 9811 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) -> 9812 // fneg (fabs X) 9813 SDValue LogicOp0 = N0.getOperand(0); 9814 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true); 9815 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && 9816 LogicOp0.getOpcode() == ISD::BITCAST && 9817 LogicOp0.getOperand(0).getValueType() == VT) { 9818 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0)); 9819 NumFPLogicOpsConv++; 9820 if (N0.getOpcode() == ISD::OR) 9821 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp); 9822 return FPOp; 9823 } 9824 9825 return SDValue(); 9826 } 9827 9828 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 9829 SDValue N0 = N->getOperand(0); 9830 EVT VT = N->getValueType(0); 9831 9832 if (N0.isUndef()) 9833 return DAG.getUNDEF(VT); 9834 9835 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 9836 // Only do this before legalize types, since we might create an illegal 9837 // scalar type. Even if we knew we wouldn't create an illegal scalar type 9838 // we can only do this before legalize ops, since the target maybe 9839 // depending on the bitcast. 9840 // First check to see if this is all constant. 9841 if (!LegalTypes && 9842 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 9843 VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant()) 9844 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), 9845 VT.getVectorElementType()); 9846 9847 // If the input is a constant, let getNode fold it. 9848 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 9849 // If we can't allow illegal operations, we need to check that this is just 9850 // a fp -> int or int -> conversion and that the resulting operation will 9851 // be legal. 9852 if (!LegalOperations || 9853 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 9854 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 9855 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 9856 TLI.isOperationLegal(ISD::Constant, VT))) { 9857 SDValue C = DAG.getBitcast(VT, N0); 9858 if (C.getNode() != N) 9859 return C; 9860 } 9861 } 9862 9863 // (conv (conv x, t1), t2) -> (conv x, t2) 9864 if (N0.getOpcode() == ISD::BITCAST) 9865 return DAG.getBitcast(VT, N0.getOperand(0)); 9866 9867 // fold (conv (load x)) -> (load (conv*)x) 9868 // If the resultant load doesn't need a higher alignment than the original! 9869 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9870 // Do not remove the cast if the types differ in endian layout. 9871 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == 9872 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && 9873 // If the load is volatile, we only want to change the load type if the 9874 // resulting load is legal. Otherwise we might increase the number of 9875 // memory accesses. We don't care if the original type was legal or not 9876 // as we assume software couldn't rely on the number of accesses of an 9877 // illegal type. 9878 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 9879 TLI.isOperationLegal(ISD::LOAD, VT)) && 9880 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 9881 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9882 unsigned OrigAlign = LN0->getAlignment(); 9883 9884 bool Fast = false; 9885 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, 9886 LN0->getAddressSpace(), OrigAlign, &Fast) && 9887 Fast) { 9888 SDValue Load = 9889 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), 9890 LN0->getPointerInfo(), OrigAlign, 9891 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 9892 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 9893 return Load; 9894 } 9895 } 9896 9897 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) 9898 return V; 9899 9900 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 9901 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 9902 // 9903 // For ppc_fp128: 9904 // fold (bitcast (fneg x)) -> 9905 // flipbit = signbit 9906 // (xor (bitcast x) (build_pair flipbit, flipbit)) 9907 // 9908 // fold (bitcast (fabs x)) -> 9909 // flipbit = (and (extract_element (bitcast x), 0), signbit) 9910 // (xor (bitcast x) (build_pair flipbit, flipbit)) 9911 // This often reduces constant pool loads. 9912 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 9913 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 9914 N0.getNode()->hasOneUse() && VT.isInteger() && 9915 !VT.isVector() && !N0.getValueType().isVector()) { 9916 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); 9917 AddToWorklist(NewConv.getNode()); 9918 9919 SDLoc DL(N); 9920 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 9921 assert(VT.getSizeInBits() == 128); 9922 SDValue SignBit = DAG.getConstant( 9923 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); 9924 SDValue FlipBit; 9925 if (N0.getOpcode() == ISD::FNEG) { 9926 FlipBit = SignBit; 9927 AddToWorklist(FlipBit.getNode()); 9928 } else { 9929 assert(N0.getOpcode() == ISD::FABS); 9930 SDValue Hi = 9931 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, 9932 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 9933 SDLoc(NewConv))); 9934 AddToWorklist(Hi.getNode()); 9935 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); 9936 AddToWorklist(FlipBit.getNode()); 9937 } 9938 SDValue FlipBits = 9939 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 9940 AddToWorklist(FlipBits.getNode()); 9941 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); 9942 } 9943 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 9944 if (N0.getOpcode() == ISD::FNEG) 9945 return DAG.getNode(ISD::XOR, DL, VT, 9946 NewConv, DAG.getConstant(SignBit, DL, VT)); 9947 assert(N0.getOpcode() == ISD::FABS); 9948 return DAG.getNode(ISD::AND, DL, VT, 9949 NewConv, DAG.getConstant(~SignBit, DL, VT)); 9950 } 9951 9952 // fold (bitconvert (fcopysign cst, x)) -> 9953 // (or (and (bitconvert x), sign), (and cst, (not sign))) 9954 // Note that we don't handle (copysign x, cst) because this can always be 9955 // folded to an fneg or fabs. 9956 // 9957 // For ppc_fp128: 9958 // fold (bitcast (fcopysign cst, x)) -> 9959 // flipbit = (and (extract_element 9960 // (xor (bitcast cst), (bitcast x)), 0), 9961 // signbit) 9962 // (xor (bitcast cst) (build_pair flipbit, flipbit)) 9963 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 9964 isa<ConstantFPSDNode>(N0.getOperand(0)) && 9965 VT.isInteger() && !VT.isVector()) { 9966 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); 9967 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 9968 if (isTypeLegal(IntXVT)) { 9969 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); 9970 AddToWorklist(X.getNode()); 9971 9972 // If X has a different width than the result/lhs, sext it or truncate it. 9973 unsigned VTWidth = VT.getSizeInBits(); 9974 if (OrigXWidth < VTWidth) { 9975 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 9976 AddToWorklist(X.getNode()); 9977 } else if (OrigXWidth > VTWidth) { 9978 // To get the sign bit in the right place, we have to shift it right 9979 // before truncating. 9980 SDLoc DL(X); 9981 X = DAG.getNode(ISD::SRL, DL, 9982 X.getValueType(), X, 9983 DAG.getConstant(OrigXWidth-VTWidth, DL, 9984 X.getValueType())); 9985 AddToWorklist(X.getNode()); 9986 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 9987 AddToWorklist(X.getNode()); 9988 } 9989 9990 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 9991 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); 9992 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 9993 AddToWorklist(Cst.getNode()); 9994 SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); 9995 AddToWorklist(X.getNode()); 9996 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); 9997 AddToWorklist(XorResult.getNode()); 9998 SDValue XorResult64 = DAG.getNode( 9999 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, 10000 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 10001 SDLoc(XorResult))); 10002 AddToWorklist(XorResult64.getNode()); 10003 SDValue FlipBit = 10004 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, 10005 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); 10006 AddToWorklist(FlipBit.getNode()); 10007 SDValue FlipBits = 10008 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 10009 AddToWorklist(FlipBits.getNode()); 10010 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); 10011 } 10012 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 10013 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 10014 X, DAG.getConstant(SignBit, SDLoc(X), VT)); 10015 AddToWorklist(X.getNode()); 10016 10017 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 10018 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 10019 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); 10020 AddToWorklist(Cst.getNode()); 10021 10022 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 10023 } 10024 } 10025 10026 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 10027 if (N0.getOpcode() == ISD::BUILD_PAIR) 10028 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) 10029 return CombineLD; 10030 10031 // Remove double bitcasts from shuffles - this is often a legacy of 10032 // XformToShuffleWithZero being used to combine bitmaskings (of 10033 // float vectors bitcast to integer vectors) into shuffles. 10034 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) 10035 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && 10036 N0->getOpcode() == ISD::VECTOR_SHUFFLE && 10037 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && 10038 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { 10039 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); 10040 10041 // If operands are a bitcast, peek through if it casts the original VT. 10042 // If operands are a constant, just bitcast back to original VT. 10043 auto PeekThroughBitcast = [&](SDValue Op) { 10044 if (Op.getOpcode() == ISD::BITCAST && 10045 Op.getOperand(0).getValueType() == VT) 10046 return SDValue(Op.getOperand(0)); 10047 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 10048 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 10049 return DAG.getBitcast(VT, Op); 10050 return SDValue(); 10051 }; 10052 10053 // FIXME: If either input vector is bitcast, try to convert the shuffle to 10054 // the result type of this bitcast. This would eliminate at least one 10055 // bitcast. See the transform in InstCombine. 10056 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); 10057 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); 10058 if (!(SV0 && SV1)) 10059 return SDValue(); 10060 10061 int MaskScale = 10062 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); 10063 SmallVector<int, 8> NewMask; 10064 for (int M : SVN->getMask()) 10065 for (int i = 0; i != MaskScale; ++i) 10066 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); 10067 10068 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 10069 if (!LegalMask) { 10070 std::swap(SV0, SV1); 10071 ShuffleVectorSDNode::commuteMask(NewMask); 10072 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 10073 } 10074 10075 if (LegalMask) 10076 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); 10077 } 10078 10079 return SDValue(); 10080 } 10081 10082 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 10083 EVT VT = N->getValueType(0); 10084 return CombineConsecutiveLoads(N, VT); 10085 } 10086 10087 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef 10088 /// operands. DstEltVT indicates the destination element value type. 10089 SDValue DAGCombiner:: 10090 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 10091 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 10092 10093 // If this is already the right type, we're done. 10094 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 10095 10096 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 10097 unsigned DstBitSize = DstEltVT.getSizeInBits(); 10098 10099 // If this is a conversion of N elements of one type to N elements of another 10100 // type, convert each element. This handles FP<->INT cases. 10101 if (SrcBitSize == DstBitSize) { 10102 SmallVector<SDValue, 8> Ops; 10103 for (SDValue Op : BV->op_values()) { 10104 // If the vector element type is not legal, the BUILD_VECTOR operands 10105 // are promoted and implicitly truncated. Make that explicit here. 10106 if (Op.getValueType() != SrcEltVT) 10107 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 10108 Ops.push_back(DAG.getBitcast(DstEltVT, Op)); 10109 AddToWorklist(Ops.back().getNode()); 10110 } 10111 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 10112 BV->getValueType(0).getVectorNumElements()); 10113 return DAG.getBuildVector(VT, SDLoc(BV), Ops); 10114 } 10115 10116 // Otherwise, we're growing or shrinking the elements. To avoid having to 10117 // handle annoying details of growing/shrinking FP values, we convert them to 10118 // int first. 10119 if (SrcEltVT.isFloatingPoint()) { 10120 // Convert the input float vector to a int vector where the elements are the 10121 // same sizes. 10122 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 10123 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 10124 SrcEltVT = IntVT; 10125 } 10126 10127 // Now we know the input is an integer vector. If the output is a FP type, 10128 // convert to integer first, then to FP of the right size. 10129 if (DstEltVT.isFloatingPoint()) { 10130 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 10131 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 10132 10133 // Next, convert to FP elements of the same size. 10134 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 10135 } 10136 10137 SDLoc DL(BV); 10138 10139 // Okay, we know the src/dst types are both integers of differing types. 10140 // Handling growing first. 10141 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 10142 if (SrcBitSize < DstBitSize) { 10143 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 10144 10145 SmallVector<SDValue, 8> Ops; 10146 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 10147 i += NumInputsPerOutput) { 10148 bool isLE = DAG.getDataLayout().isLittleEndian(); 10149 APInt NewBits = APInt(DstBitSize, 0); 10150 bool EltIsUndef = true; 10151 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 10152 // Shift the previously computed bits over. 10153 NewBits <<= SrcBitSize; 10154 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 10155 if (Op.isUndef()) continue; 10156 EltIsUndef = false; 10157 10158 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 10159 zextOrTrunc(SrcBitSize).zext(DstBitSize); 10160 } 10161 10162 if (EltIsUndef) 10163 Ops.push_back(DAG.getUNDEF(DstEltVT)); 10164 else 10165 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); 10166 } 10167 10168 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 10169 return DAG.getBuildVector(VT, DL, Ops); 10170 } 10171 10172 // Finally, this must be the case where we are shrinking elements: each input 10173 // turns into multiple outputs. 10174 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 10175 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 10176 NumOutputsPerInput*BV->getNumOperands()); 10177 SmallVector<SDValue, 8> Ops; 10178 10179 for (const SDValue &Op : BV->op_values()) { 10180 if (Op.isUndef()) { 10181 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); 10182 continue; 10183 } 10184 10185 APInt OpVal = cast<ConstantSDNode>(Op)-> 10186 getAPIntValue().zextOrTrunc(SrcBitSize); 10187 10188 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 10189 APInt ThisVal = OpVal.trunc(DstBitSize); 10190 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); 10191 OpVal.lshrInPlace(DstBitSize); 10192 } 10193 10194 // For big endian targets, swap the order of the pieces of each element. 10195 if (DAG.getDataLayout().isBigEndian()) 10196 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 10197 } 10198 10199 return DAG.getBuildVector(VT, DL, Ops); 10200 } 10201 10202 static bool isContractable(SDNode *N) { 10203 SDNodeFlags F = N->getFlags(); 10204 return F.hasAllowContract() || F.hasAllowReassociation(); 10205 } 10206 10207 /// Try to perform FMA combining on a given FADD node. 10208 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { 10209 SDValue N0 = N->getOperand(0); 10210 SDValue N1 = N->getOperand(1); 10211 EVT VT = N->getValueType(0); 10212 SDLoc SL(N); 10213 10214 const TargetOptions &Options = DAG.getTarget().Options; 10215 10216 // Floating-point multiply-add with intermediate rounding. 10217 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10218 10219 // Floating-point multiply-add without intermediate rounding. 10220 bool HasFMA = 10221 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10222 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10223 10224 // No valid opcode, do not combine. 10225 if (!HasFMAD && !HasFMA) 10226 return SDValue(); 10227 10228 SDNodeFlags Flags = N->getFlags(); 10229 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 10230 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 10231 CanFuse || HasFMAD); 10232 // If the addition is not contractable, do not combine. 10233 if (!AllowFusionGlobally && !isContractable(N)) 10234 return SDValue(); 10235 10236 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 10237 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 10238 return SDValue(); 10239 10240 // Always prefer FMAD to FMA for precision. 10241 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10242 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10243 10244 // Is the node an FMUL and contractable either due to global flags or 10245 // SDNodeFlags. 10246 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 10247 if (N.getOpcode() != ISD::FMUL) 10248 return false; 10249 return AllowFusionGlobally || isContractable(N.getNode()); 10250 }; 10251 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), 10252 // prefer to fold the multiply with fewer uses. 10253 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { 10254 if (N0.getNode()->use_size() > N1.getNode()->use_size()) 10255 std::swap(N0, N1); 10256 } 10257 10258 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 10259 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 10260 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10261 N0.getOperand(0), N0.getOperand(1), N1, Flags); 10262 } 10263 10264 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 10265 // Note: Commutes FADD operands. 10266 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 10267 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10268 N1.getOperand(0), N1.getOperand(1), N0, Flags); 10269 } 10270 10271 // Look through FP_EXTEND nodes to do more combining. 10272 10273 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 10274 if (N0.getOpcode() == ISD::FP_EXTEND) { 10275 SDValue N00 = N0.getOperand(0); 10276 if (isContractableFMUL(N00) && 10277 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10278 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10279 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10280 N00.getOperand(0)), 10281 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10282 N00.getOperand(1)), N1, Flags); 10283 } 10284 } 10285 10286 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 10287 // Note: Commutes FADD operands. 10288 if (N1.getOpcode() == ISD::FP_EXTEND) { 10289 SDValue N10 = N1.getOperand(0); 10290 if (isContractableFMUL(N10) && 10291 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10292 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10293 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10294 N10.getOperand(0)), 10295 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10296 N10.getOperand(1)), N0, Flags); 10297 } 10298 } 10299 10300 // More folding opportunities when target permits. 10301 if (Aggressive) { 10302 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 10303 if (CanFuse && 10304 N0.getOpcode() == PreferredFusedOpcode && 10305 N0.getOperand(2).getOpcode() == ISD::FMUL && 10306 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { 10307 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10308 N0.getOperand(0), N0.getOperand(1), 10309 DAG.getNode(PreferredFusedOpcode, SL, VT, 10310 N0.getOperand(2).getOperand(0), 10311 N0.getOperand(2).getOperand(1), 10312 N1, Flags), Flags); 10313 } 10314 10315 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 10316 if (CanFuse && 10317 N1->getOpcode() == PreferredFusedOpcode && 10318 N1.getOperand(2).getOpcode() == ISD::FMUL && 10319 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { 10320 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10321 N1.getOperand(0), N1.getOperand(1), 10322 DAG.getNode(PreferredFusedOpcode, SL, VT, 10323 N1.getOperand(2).getOperand(0), 10324 N1.getOperand(2).getOperand(1), 10325 N0, Flags), Flags); 10326 } 10327 10328 10329 // fold (fadd (fma x, y, (fpext (fmul u, v))), z) 10330 // -> (fma x, y, (fma (fpext u), (fpext v), z)) 10331 auto FoldFAddFMAFPExtFMul = [&] ( 10332 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 10333 SDNodeFlags Flags) { 10334 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, 10335 DAG.getNode(PreferredFusedOpcode, SL, VT, 10336 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 10337 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 10338 Z, Flags), Flags); 10339 }; 10340 if (N0.getOpcode() == PreferredFusedOpcode) { 10341 SDValue N02 = N0.getOperand(2); 10342 if (N02.getOpcode() == ISD::FP_EXTEND) { 10343 SDValue N020 = N02.getOperand(0); 10344 if (isContractableFMUL(N020) && 10345 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 10346 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), 10347 N020.getOperand(0), N020.getOperand(1), 10348 N1, Flags); 10349 } 10350 } 10351 } 10352 10353 // fold (fadd (fpext (fma x, y, (fmul u, v))), z) 10354 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) 10355 // FIXME: This turns two single-precision and one double-precision 10356 // operation into two double-precision operations, which might not be 10357 // interesting for all targets, especially GPUs. 10358 auto FoldFAddFPExtFMAFMul = [&] ( 10359 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 10360 SDNodeFlags Flags) { 10361 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10362 DAG.getNode(ISD::FP_EXTEND, SL, VT, X), 10363 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), 10364 DAG.getNode(PreferredFusedOpcode, SL, VT, 10365 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 10366 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 10367 Z, Flags), Flags); 10368 }; 10369 if (N0.getOpcode() == ISD::FP_EXTEND) { 10370 SDValue N00 = N0.getOperand(0); 10371 if (N00.getOpcode() == PreferredFusedOpcode) { 10372 SDValue N002 = N00.getOperand(2); 10373 if (isContractableFMUL(N002) && 10374 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10375 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), 10376 N002.getOperand(0), N002.getOperand(1), 10377 N1, Flags); 10378 } 10379 } 10380 } 10381 10382 // fold (fadd x, (fma y, z, (fpext (fmul u, v))) 10383 // -> (fma y, z, (fma (fpext u), (fpext v), x)) 10384 if (N1.getOpcode() == PreferredFusedOpcode) { 10385 SDValue N12 = N1.getOperand(2); 10386 if (N12.getOpcode() == ISD::FP_EXTEND) { 10387 SDValue N120 = N12.getOperand(0); 10388 if (isContractableFMUL(N120) && 10389 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 10390 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), 10391 N120.getOperand(0), N120.getOperand(1), 10392 N0, Flags); 10393 } 10394 } 10395 } 10396 10397 // fold (fadd x, (fpext (fma y, z, (fmul u, v))) 10398 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 10399 // FIXME: This turns two single-precision and one double-precision 10400 // operation into two double-precision operations, which might not be 10401 // interesting for all targets, especially GPUs. 10402 if (N1.getOpcode() == ISD::FP_EXTEND) { 10403 SDValue N10 = N1.getOperand(0); 10404 if (N10.getOpcode() == PreferredFusedOpcode) { 10405 SDValue N102 = N10.getOperand(2); 10406 if (isContractableFMUL(N102) && 10407 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10408 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), 10409 N102.getOperand(0), N102.getOperand(1), 10410 N0, Flags); 10411 } 10412 } 10413 } 10414 } 10415 10416 return SDValue(); 10417 } 10418 10419 /// Try to perform FMA combining on a given FSUB node. 10420 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { 10421 SDValue N0 = N->getOperand(0); 10422 SDValue N1 = N->getOperand(1); 10423 EVT VT = N->getValueType(0); 10424 SDLoc SL(N); 10425 10426 const TargetOptions &Options = DAG.getTarget().Options; 10427 // Floating-point multiply-add with intermediate rounding. 10428 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10429 10430 // Floating-point multiply-add without intermediate rounding. 10431 bool HasFMA = 10432 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10433 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10434 10435 // No valid opcode, do not combine. 10436 if (!HasFMAD && !HasFMA) 10437 return SDValue(); 10438 10439 const SDNodeFlags Flags = N->getFlags(); 10440 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 10441 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 10442 CanFuse || HasFMAD); 10443 10444 // If the subtraction is not contractable, do not combine. 10445 if (!AllowFusionGlobally && !isContractable(N)) 10446 return SDValue(); 10447 10448 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 10449 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 10450 return SDValue(); 10451 10452 // Always prefer FMAD to FMA for precision. 10453 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10454 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10455 10456 // Is the node an FMUL and contractable either due to global flags or 10457 // SDNodeFlags. 10458 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 10459 if (N.getOpcode() != ISD::FMUL) 10460 return false; 10461 return AllowFusionGlobally || isContractable(N.getNode()); 10462 }; 10463 10464 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 10465 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 10466 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10467 N0.getOperand(0), N0.getOperand(1), 10468 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10469 } 10470 10471 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 10472 // Note: Commutes FSUB operands. 10473 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 10474 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10475 DAG.getNode(ISD::FNEG, SL, VT, 10476 N1.getOperand(0)), 10477 N1.getOperand(1), N0, Flags); 10478 } 10479 10480 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 10481 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && 10482 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { 10483 SDValue N00 = N0.getOperand(0).getOperand(0); 10484 SDValue N01 = N0.getOperand(0).getOperand(1); 10485 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10486 DAG.getNode(ISD::FNEG, SL, VT, N00), N01, 10487 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10488 } 10489 10490 // Look through FP_EXTEND nodes to do more combining. 10491 10492 // fold (fsub (fpext (fmul x, y)), z) 10493 // -> (fma (fpext x), (fpext y), (fneg z)) 10494 if (N0.getOpcode() == ISD::FP_EXTEND) { 10495 SDValue N00 = N0.getOperand(0); 10496 if (isContractableFMUL(N00) && 10497 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10498 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10499 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10500 N00.getOperand(0)), 10501 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10502 N00.getOperand(1)), 10503 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10504 } 10505 } 10506 10507 // fold (fsub x, (fpext (fmul y, z))) 10508 // -> (fma (fneg (fpext y)), (fpext z), x) 10509 // Note: Commutes FSUB operands. 10510 if (N1.getOpcode() == ISD::FP_EXTEND) { 10511 SDValue N10 = N1.getOperand(0); 10512 if (isContractableFMUL(N10) && 10513 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10514 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10515 DAG.getNode(ISD::FNEG, SL, VT, 10516 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10517 N10.getOperand(0))), 10518 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10519 N10.getOperand(1)), 10520 N0, Flags); 10521 } 10522 } 10523 10524 // fold (fsub (fpext (fneg (fmul, x, y))), z) 10525 // -> (fneg (fma (fpext x), (fpext y), z)) 10526 // Note: This could be removed with appropriate canonicalization of the 10527 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10528 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10529 // from implementing the canonicalization in visitFSUB. 10530 if (N0.getOpcode() == ISD::FP_EXTEND) { 10531 SDValue N00 = N0.getOperand(0); 10532 if (N00.getOpcode() == ISD::FNEG) { 10533 SDValue N000 = N00.getOperand(0); 10534 if (isContractableFMUL(N000) && 10535 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10536 return DAG.getNode(ISD::FNEG, SL, VT, 10537 DAG.getNode(PreferredFusedOpcode, SL, VT, 10538 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10539 N000.getOperand(0)), 10540 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10541 N000.getOperand(1)), 10542 N1, Flags)); 10543 } 10544 } 10545 } 10546 10547 // fold (fsub (fneg (fpext (fmul, x, y))), z) 10548 // -> (fneg (fma (fpext x)), (fpext y), z) 10549 // Note: This could be removed with appropriate canonicalization of the 10550 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10551 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10552 // from implementing the canonicalization in visitFSUB. 10553 if (N0.getOpcode() == ISD::FNEG) { 10554 SDValue N00 = N0.getOperand(0); 10555 if (N00.getOpcode() == ISD::FP_EXTEND) { 10556 SDValue N000 = N00.getOperand(0); 10557 if (isContractableFMUL(N000) && 10558 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { 10559 return DAG.getNode(ISD::FNEG, SL, VT, 10560 DAG.getNode(PreferredFusedOpcode, SL, VT, 10561 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10562 N000.getOperand(0)), 10563 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10564 N000.getOperand(1)), 10565 N1, Flags)); 10566 } 10567 } 10568 } 10569 10570 // More folding opportunities when target permits. 10571 if (Aggressive) { 10572 // fold (fsub (fma x, y, (fmul u, v)), z) 10573 // -> (fma x, y (fma u, v, (fneg z))) 10574 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && 10575 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && 10576 N0.getOperand(2)->hasOneUse()) { 10577 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10578 N0.getOperand(0), N0.getOperand(1), 10579 DAG.getNode(PreferredFusedOpcode, SL, VT, 10580 N0.getOperand(2).getOperand(0), 10581 N0.getOperand(2).getOperand(1), 10582 DAG.getNode(ISD::FNEG, SL, VT, 10583 N1), Flags), Flags); 10584 } 10585 10586 // fold (fsub x, (fma y, z, (fmul u, v))) 10587 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 10588 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && 10589 isContractableFMUL(N1.getOperand(2))) { 10590 SDValue N20 = N1.getOperand(2).getOperand(0); 10591 SDValue N21 = N1.getOperand(2).getOperand(1); 10592 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10593 DAG.getNode(ISD::FNEG, SL, VT, 10594 N1.getOperand(0)), 10595 N1.getOperand(1), 10596 DAG.getNode(PreferredFusedOpcode, SL, VT, 10597 DAG.getNode(ISD::FNEG, SL, VT, N20), 10598 N21, N0, Flags), Flags); 10599 } 10600 10601 10602 // fold (fsub (fma x, y, (fpext (fmul u, v))), z) 10603 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) 10604 if (N0.getOpcode() == PreferredFusedOpcode) { 10605 SDValue N02 = N0.getOperand(2); 10606 if (N02.getOpcode() == ISD::FP_EXTEND) { 10607 SDValue N020 = N02.getOperand(0); 10608 if (isContractableFMUL(N020) && 10609 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 10610 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10611 N0.getOperand(0), N0.getOperand(1), 10612 DAG.getNode(PreferredFusedOpcode, SL, VT, 10613 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10614 N020.getOperand(0)), 10615 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10616 N020.getOperand(1)), 10617 DAG.getNode(ISD::FNEG, SL, VT, 10618 N1), Flags), Flags); 10619 } 10620 } 10621 } 10622 10623 // fold (fsub (fpext (fma x, y, (fmul u, v))), z) 10624 // -> (fma (fpext x), (fpext y), 10625 // (fma (fpext u), (fpext v), (fneg z))) 10626 // FIXME: This turns two single-precision and one double-precision 10627 // operation into two double-precision operations, which might not be 10628 // interesting for all targets, especially GPUs. 10629 if (N0.getOpcode() == ISD::FP_EXTEND) { 10630 SDValue N00 = N0.getOperand(0); 10631 if (N00.getOpcode() == PreferredFusedOpcode) { 10632 SDValue N002 = N00.getOperand(2); 10633 if (isContractableFMUL(N002) && 10634 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10635 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10636 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10637 N00.getOperand(0)), 10638 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10639 N00.getOperand(1)), 10640 DAG.getNode(PreferredFusedOpcode, SL, VT, 10641 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10642 N002.getOperand(0)), 10643 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10644 N002.getOperand(1)), 10645 DAG.getNode(ISD::FNEG, SL, VT, 10646 N1), Flags), Flags); 10647 } 10648 } 10649 } 10650 10651 // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) 10652 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) 10653 if (N1.getOpcode() == PreferredFusedOpcode && 10654 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { 10655 SDValue N120 = N1.getOperand(2).getOperand(0); 10656 if (isContractableFMUL(N120) && 10657 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 10658 SDValue N1200 = N120.getOperand(0); 10659 SDValue N1201 = N120.getOperand(1); 10660 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10661 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), 10662 N1.getOperand(1), 10663 DAG.getNode(PreferredFusedOpcode, SL, VT, 10664 DAG.getNode(ISD::FNEG, SL, VT, 10665 DAG.getNode(ISD::FP_EXTEND, SL, 10666 VT, N1200)), 10667 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10668 N1201), 10669 N0, Flags), Flags); 10670 } 10671 } 10672 10673 // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 10674 // -> (fma (fneg (fpext y)), (fpext z), 10675 // (fma (fneg (fpext u)), (fpext v), x)) 10676 // FIXME: This turns two single-precision and one double-precision 10677 // operation into two double-precision operations, which might not be 10678 // interesting for all targets, especially GPUs. 10679 if (N1.getOpcode() == ISD::FP_EXTEND && 10680 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { 10681 SDValue CvtSrc = N1.getOperand(0); 10682 SDValue N100 = CvtSrc.getOperand(0); 10683 SDValue N101 = CvtSrc.getOperand(1); 10684 SDValue N102 = CvtSrc.getOperand(2); 10685 if (isContractableFMUL(N102) && 10686 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { 10687 SDValue N1020 = N102.getOperand(0); 10688 SDValue N1021 = N102.getOperand(1); 10689 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10690 DAG.getNode(ISD::FNEG, SL, VT, 10691 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10692 N100)), 10693 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), 10694 DAG.getNode(PreferredFusedOpcode, SL, VT, 10695 DAG.getNode(ISD::FNEG, SL, VT, 10696 DAG.getNode(ISD::FP_EXTEND, SL, 10697 VT, N1020)), 10698 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10699 N1021), 10700 N0, Flags), Flags); 10701 } 10702 } 10703 } 10704 10705 return SDValue(); 10706 } 10707 10708 /// Try to perform FMA combining on a given FMUL node based on the distributive 10709 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, 10710 /// subtraction instead of addition). 10711 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { 10712 SDValue N0 = N->getOperand(0); 10713 SDValue N1 = N->getOperand(1); 10714 EVT VT = N->getValueType(0); 10715 SDLoc SL(N); 10716 const SDNodeFlags Flags = N->getFlags(); 10717 10718 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); 10719 10720 const TargetOptions &Options = DAG.getTarget().Options; 10721 10722 // The transforms below are incorrect when x == 0 and y == inf, because the 10723 // intermediate multiplication produces a nan. 10724 if (!Options.NoInfsFPMath) 10725 return SDValue(); 10726 10727 // Floating-point multiply-add without intermediate rounding. 10728 bool HasFMA = 10729 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 10730 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10731 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10732 10733 // Floating-point multiply-add with intermediate rounding. This can result 10734 // in a less precise result due to the changed rounding order. 10735 bool HasFMAD = Options.UnsafeFPMath && 10736 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10737 10738 // No valid opcode, do not combine. 10739 if (!HasFMAD && !HasFMA) 10740 return SDValue(); 10741 10742 // Always prefer FMAD to FMA for precision. 10743 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10744 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10745 10746 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y) 10747 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y)) 10748 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 10749 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { 10750 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { 10751 if (C->isExactlyValue(+1.0)) 10752 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10753 Y, Flags); 10754 if (C->isExactlyValue(-1.0)) 10755 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10756 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10757 } 10758 } 10759 return SDValue(); 10760 }; 10761 10762 if (SDValue FMA = FuseFADD(N0, N1, Flags)) 10763 return FMA; 10764 if (SDValue FMA = FuseFADD(N1, N0, Flags)) 10765 return FMA; 10766 10767 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y) 10768 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y)) 10769 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y)) 10770 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y) 10771 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 10772 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { 10773 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { 10774 if (C0->isExactlyValue(+1.0)) 10775 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10776 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 10777 Y, Flags); 10778 if (C0->isExactlyValue(-1.0)) 10779 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10780 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 10781 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10782 } 10783 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { 10784 if (C1->isExactlyValue(+1.0)) 10785 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10786 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10787 if (C1->isExactlyValue(-1.0)) 10788 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10789 Y, Flags); 10790 } 10791 } 10792 return SDValue(); 10793 }; 10794 10795 if (SDValue FMA = FuseFSUB(N0, N1, Flags)) 10796 return FMA; 10797 if (SDValue FMA = FuseFSUB(N1, N0, Flags)) 10798 return FMA; 10799 10800 return SDValue(); 10801 } 10802 10803 SDValue DAGCombiner::visitFADD(SDNode *N) { 10804 SDValue N0 = N->getOperand(0); 10805 SDValue N1 = N->getOperand(1); 10806 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 10807 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 10808 EVT VT = N->getValueType(0); 10809 SDLoc DL(N); 10810 const TargetOptions &Options = DAG.getTarget().Options; 10811 const SDNodeFlags Flags = N->getFlags(); 10812 10813 // fold vector ops 10814 if (VT.isVector()) 10815 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10816 return FoldedVOp; 10817 10818 // fold (fadd c1, c2) -> c1 + c2 10819 if (N0CFP && N1CFP) 10820 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); 10821 10822 // canonicalize constant to RHS 10823 if (N0CFP && !N1CFP) 10824 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); 10825 10826 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) 10827 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); 10828 if (N1C && N1C->isZero()) 10829 if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) 10830 return N0; 10831 10832 if (SDValue NewSel = foldBinOpIntoSelect(N)) 10833 return NewSel; 10834 10835 // fold (fadd A, (fneg B)) -> (fsub A, B) 10836 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 10837 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 10838 return DAG.getNode(ISD::FSUB, DL, VT, N0, 10839 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 10840 10841 // fold (fadd (fneg A), B) -> (fsub B, A) 10842 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 10843 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 10844 return DAG.getNode(ISD::FSUB, DL, VT, N1, 10845 GetNegatedExpression(N0, DAG, LegalOperations), Flags); 10846 10847 auto isFMulNegTwo = [](SDValue FMul) { 10848 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) 10849 return false; 10850 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true); 10851 return C && C->isExactlyValue(-2.0); 10852 }; 10853 10854 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B) 10855 if (isFMulNegTwo(N0)) { 10856 SDValue B = N0.getOperand(0); 10857 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); 10858 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags); 10859 } 10860 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B) 10861 if (isFMulNegTwo(N1)) { 10862 SDValue B = N1.getOperand(0); 10863 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags); 10864 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags); 10865 } 10866 10867 // No FP constant should be created after legalization as Instruction 10868 // Selection pass has a hard time dealing with FP constants. 10869 bool AllowNewConst = (Level < AfterLegalizeDAG); 10870 10871 // If 'unsafe math' or nnan is enabled, fold lots of things. 10872 if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { 10873 // If allowed, fold (fadd (fneg x), x) -> 0.0 10874 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 10875 return DAG.getConstantFP(0.0, DL, VT); 10876 10877 // If allowed, fold (fadd x, (fneg x)) -> 0.0 10878 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 10879 return DAG.getConstantFP(0.0, DL, VT); 10880 } 10881 10882 // If 'unsafe math' or reassoc and nsz, fold lots of things. 10883 // TODO: break out portions of the transformations below for which Unsafe is 10884 // considered and which do not require both nsz and reassoc 10885 if ((Options.UnsafeFPMath || 10886 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && 10887 AllowNewConst) { 10888 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 10889 if (N1CFP && N0.getOpcode() == ISD::FADD && 10890 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 10891 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); 10892 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); 10893 } 10894 10895 // We can fold chains of FADD's of the same value into multiplications. 10896 // This transform is not safe in general because we are reducing the number 10897 // of rounding steps. 10898 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 10899 if (N0.getOpcode() == ISD::FMUL) { 10900 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 10901 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); 10902 10903 // (fadd (fmul x, c), x) -> (fmul x, c+1) 10904 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 10905 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 10906 DAG.getConstantFP(1.0, DL, VT), Flags); 10907 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); 10908 } 10909 10910 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 10911 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 10912 N1.getOperand(0) == N1.getOperand(1) && 10913 N0.getOperand(0) == N1.getOperand(0)) { 10914 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 10915 DAG.getConstantFP(2.0, DL, VT), Flags); 10916 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); 10917 } 10918 } 10919 10920 if (N1.getOpcode() == ISD::FMUL) { 10921 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 10922 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); 10923 10924 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 10925 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 10926 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 10927 DAG.getConstantFP(1.0, DL, VT), Flags); 10928 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); 10929 } 10930 10931 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 10932 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 10933 N0.getOperand(0) == N0.getOperand(1) && 10934 N1.getOperand(0) == N0.getOperand(0)) { 10935 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 10936 DAG.getConstantFP(2.0, DL, VT), Flags); 10937 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); 10938 } 10939 } 10940 10941 if (N0.getOpcode() == ISD::FADD) { 10942 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 10943 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 10944 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && 10945 (N0.getOperand(0) == N1)) { 10946 return DAG.getNode(ISD::FMUL, DL, VT, 10947 N1, DAG.getConstantFP(3.0, DL, VT), Flags); 10948 } 10949 } 10950 10951 if (N1.getOpcode() == ISD::FADD) { 10952 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 10953 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 10954 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 10955 N1.getOperand(0) == N0) { 10956 return DAG.getNode(ISD::FMUL, DL, VT, 10957 N0, DAG.getConstantFP(3.0, DL, VT), Flags); 10958 } 10959 } 10960 10961 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 10962 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 10963 N0.getOperand(0) == N0.getOperand(1) && 10964 N1.getOperand(0) == N1.getOperand(1) && 10965 N0.getOperand(0) == N1.getOperand(0)) { 10966 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), 10967 DAG.getConstantFP(4.0, DL, VT), Flags); 10968 } 10969 } 10970 } // enable-unsafe-fp-math 10971 10972 // FADD -> FMA combines: 10973 if (SDValue Fused = visitFADDForFMACombine(N)) { 10974 AddToWorklist(Fused.getNode()); 10975 return Fused; 10976 } 10977 return SDValue(); 10978 } 10979 10980 SDValue DAGCombiner::visitFSUB(SDNode *N) { 10981 SDValue N0 = N->getOperand(0); 10982 SDValue N1 = N->getOperand(1); 10983 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); 10984 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); 10985 EVT VT = N->getValueType(0); 10986 SDLoc DL(N); 10987 const TargetOptions &Options = DAG.getTarget().Options; 10988 const SDNodeFlags Flags = N->getFlags(); 10989 10990 // fold vector ops 10991 if (VT.isVector()) 10992 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10993 return FoldedVOp; 10994 10995 // fold (fsub c1, c2) -> c1-c2 10996 if (N0CFP && N1CFP) 10997 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); 10998 10999 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11000 return NewSel; 11001 11002 // (fsub A, 0) -> A 11003 if (N1CFP && N1CFP->isZero()) { 11004 if (!N1CFP->isNegative() || Options.UnsafeFPMath || 11005 Flags.hasNoSignedZeros()) { 11006 return N0; 11007 } 11008 } 11009 11010 if (N0 == N1) { 11011 // (fsub x, x) -> 0.0 11012 if (Options.UnsafeFPMath || Flags.hasNoNaNs()) 11013 return DAG.getConstantFP(0.0f, DL, VT); 11014 } 11015 11016 // (fsub -0.0, N1) -> -N1 11017 if (N0CFP && N0CFP->isZero()) { 11018 if (N0CFP->isNegative() || 11019 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { 11020 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 11021 return GetNegatedExpression(N1, DAG, LegalOperations); 11022 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11023 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); 11024 } 11025 } 11026 11027 if ((Options.UnsafeFPMath || 11028 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) 11029 && N1.getOpcode() == ISD::FADD) { 11030 // X - (X + Y) -> -Y 11031 if (N0 == N1->getOperand(0)) 11032 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); 11033 // X - (Y + X) -> -Y 11034 if (N0 == N1->getOperand(1)) 11035 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags); 11036 } 11037 11038 // fold (fsub A, (fneg B)) -> (fadd A, B) 11039 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 11040 return DAG.getNode(ISD::FADD, DL, VT, N0, 11041 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 11042 11043 // FSUB -> FMA combines: 11044 if (SDValue Fused = visitFSUBForFMACombine(N)) { 11045 AddToWorklist(Fused.getNode()); 11046 return Fused; 11047 } 11048 11049 return SDValue(); 11050 } 11051 11052 SDValue DAGCombiner::visitFMUL(SDNode *N) { 11053 SDValue N0 = N->getOperand(0); 11054 SDValue N1 = N->getOperand(1); 11055 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); 11056 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); 11057 EVT VT = N->getValueType(0); 11058 SDLoc DL(N); 11059 const TargetOptions &Options = DAG.getTarget().Options; 11060 const SDNodeFlags Flags = N->getFlags(); 11061 11062 // fold vector ops 11063 if (VT.isVector()) { 11064 // This just handles C1 * C2 for vectors. Other vector folds are below. 11065 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11066 return FoldedVOp; 11067 } 11068 11069 // fold (fmul c1, c2) -> c1*c2 11070 if (N0CFP && N1CFP) 11071 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); 11072 11073 // canonicalize constant to RHS 11074 if (isConstantFPBuildVectorOrConstantFP(N0) && 11075 !isConstantFPBuildVectorOrConstantFP(N1)) 11076 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); 11077 11078 // fold (fmul A, 1.0) -> A 11079 if (N1CFP && N1CFP->isExactlyValue(1.0)) 11080 return N0; 11081 11082 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11083 return NewSel; 11084 11085 if (Options.UnsafeFPMath || 11086 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { 11087 // fold (fmul A, 0) -> 0 11088 if (N1CFP && N1CFP->isZero()) 11089 return N1; 11090 } 11091 11092 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { 11093 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 11094 if (isConstantFPBuildVectorOrConstantFP(N1) && 11095 N0.getOpcode() == ISD::FMUL) { 11096 SDValue N00 = N0.getOperand(0); 11097 SDValue N01 = N0.getOperand(1); 11098 // Avoid an infinite loop by making sure that N00 is not a constant 11099 // (the inner multiply has not been constant folded yet). 11100 if (isConstantFPBuildVectorOrConstantFP(N01) && 11101 !isConstantFPBuildVectorOrConstantFP(N00)) { 11102 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); 11103 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); 11104 } 11105 } 11106 11107 // Match a special-case: we convert X * 2.0 into fadd. 11108 // fmul (fadd X, X), C -> fmul X, 2.0 * C 11109 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && 11110 N0.getOperand(0) == N0.getOperand(1)) { 11111 const SDValue Two = DAG.getConstantFP(2.0, DL, VT); 11112 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); 11113 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); 11114 } 11115 } 11116 11117 // fold (fmul X, 2.0) -> (fadd X, X) 11118 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 11119 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); 11120 11121 // fold (fmul X, -1.0) -> (fneg X) 11122 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 11123 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11124 return DAG.getNode(ISD::FNEG, DL, VT, N0); 11125 11126 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 11127 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 11128 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 11129 // Both can be negated for free, check to see if at least one is cheaper 11130 // negated. 11131 if (LHSNeg == 2 || RHSNeg == 2) 11132 return DAG.getNode(ISD::FMUL, DL, VT, 11133 GetNegatedExpression(N0, DAG, LegalOperations), 11134 GetNegatedExpression(N1, DAG, LegalOperations), 11135 Flags); 11136 } 11137 } 11138 11139 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) 11140 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) 11141 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && 11142 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && 11143 TLI.isOperationLegal(ISD::FABS, VT)) { 11144 SDValue Select = N0, X = N1; 11145 if (Select.getOpcode() != ISD::SELECT) 11146 std::swap(Select, X); 11147 11148 SDValue Cond = Select.getOperand(0); 11149 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); 11150 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); 11151 11152 if (TrueOpnd && FalseOpnd && 11153 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && 11154 isa<ConstantFPSDNode>(Cond.getOperand(1)) && 11155 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) { 11156 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 11157 switch (CC) { 11158 default: break; 11159 case ISD::SETOLT: 11160 case ISD::SETULT: 11161 case ISD::SETOLE: 11162 case ISD::SETULE: 11163 case ISD::SETLT: 11164 case ISD::SETLE: 11165 std::swap(TrueOpnd, FalseOpnd); 11166 LLVM_FALLTHROUGH; 11167 case ISD::SETOGT: 11168 case ISD::SETUGT: 11169 case ISD::SETOGE: 11170 case ISD::SETUGE: 11171 case ISD::SETGT: 11172 case ISD::SETGE: 11173 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && 11174 TLI.isOperationLegal(ISD::FNEG, VT)) 11175 return DAG.getNode(ISD::FNEG, DL, VT, 11176 DAG.getNode(ISD::FABS, DL, VT, X)); 11177 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) 11178 return DAG.getNode(ISD::FABS, DL, VT, X); 11179 11180 break; 11181 } 11182 } 11183 } 11184 11185 // FMUL -> FMA combines: 11186 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { 11187 AddToWorklist(Fused.getNode()); 11188 return Fused; 11189 } 11190 11191 return SDValue(); 11192 } 11193 11194 SDValue DAGCombiner::visitFMA(SDNode *N) { 11195 SDValue N0 = N->getOperand(0); 11196 SDValue N1 = N->getOperand(1); 11197 SDValue N2 = N->getOperand(2); 11198 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11199 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11200 EVT VT = N->getValueType(0); 11201 SDLoc DL(N); 11202 const TargetOptions &Options = DAG.getTarget().Options; 11203 11204 // FMA nodes have flags that propagate to the created nodes. 11205 const SDNodeFlags Flags = N->getFlags(); 11206 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); 11207 11208 // Constant fold FMA. 11209 if (isa<ConstantFPSDNode>(N0) && 11210 isa<ConstantFPSDNode>(N1) && 11211 isa<ConstantFPSDNode>(N2)) { 11212 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); 11213 } 11214 11215 if (UnsafeFPMath) { 11216 if (N0CFP && N0CFP->isZero()) 11217 return N2; 11218 if (N1CFP && N1CFP->isZero()) 11219 return N2; 11220 } 11221 // TODO: The FMA node should have flags that propagate to these nodes. 11222 if (N0CFP && N0CFP->isExactlyValue(1.0)) 11223 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 11224 if (N1CFP && N1CFP->isExactlyValue(1.0)) 11225 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 11226 11227 // Canonicalize (fma c, x, y) -> (fma x, c, y) 11228 if (isConstantFPBuildVectorOrConstantFP(N0) && 11229 !isConstantFPBuildVectorOrConstantFP(N1)) 11230 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 11231 11232 if (UnsafeFPMath) { 11233 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 11234 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && 11235 isConstantFPBuildVectorOrConstantFP(N1) && 11236 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { 11237 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11238 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), 11239 Flags), Flags); 11240 } 11241 11242 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 11243 if (N0.getOpcode() == ISD::FMUL && 11244 isConstantFPBuildVectorOrConstantFP(N1) && 11245 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 11246 return DAG.getNode(ISD::FMA, DL, VT, 11247 N0.getOperand(0), 11248 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), 11249 Flags), 11250 N2); 11251 } 11252 } 11253 11254 // (fma x, 1, y) -> (fadd x, y) 11255 // (fma x, -1, y) -> (fadd (fneg x), y) 11256 if (N1CFP) { 11257 if (N1CFP->isExactlyValue(1.0)) 11258 // TODO: The FMA node should have flags that propagate to this node. 11259 return DAG.getNode(ISD::FADD, DL, VT, N0, N2); 11260 11261 if (N1CFP->isExactlyValue(-1.0) && 11262 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 11263 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); 11264 AddToWorklist(RHSNeg.getNode()); 11265 // TODO: The FMA node should have flags that propagate to this node. 11266 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); 11267 } 11268 11269 // fma (fneg x), K, y -> fma x -K, y 11270 if (N0.getOpcode() == ISD::FNEG && 11271 (TLI.isOperationLegal(ISD::ConstantFP, VT) || 11272 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { 11273 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), 11274 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); 11275 } 11276 } 11277 11278 if (UnsafeFPMath) { 11279 // (fma x, c, x) -> (fmul x, (c+1)) 11280 if (N1CFP && N0 == N2) { 11281 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11282 DAG.getNode(ISD::FADD, DL, VT, N1, 11283 DAG.getConstantFP(1.0, DL, VT), Flags), 11284 Flags); 11285 } 11286 11287 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 11288 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 11289 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11290 DAG.getNode(ISD::FADD, DL, VT, N1, 11291 DAG.getConstantFP(-1.0, DL, VT), Flags), 11292 Flags); 11293 } 11294 } 11295 11296 return SDValue(); 11297 } 11298 11299 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 11300 // reciprocal. 11301 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 11302 // Notice that this is not always beneficial. One reason is different targets 11303 // may have different costs for FDIV and FMUL, so sometimes the cost of two 11304 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 11305 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 11306 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { 11307 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; 11308 const SDNodeFlags Flags = N->getFlags(); 11309 if (!UnsafeMath && !Flags.hasAllowReciprocal()) 11310 return SDValue(); 11311 11312 // Skip if current node is a reciprocal. 11313 SDValue N0 = N->getOperand(0); 11314 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11315 if (N0CFP && N0CFP->isExactlyValue(1.0)) 11316 return SDValue(); 11317 11318 // Exit early if the target does not want this transform or if there can't 11319 // possibly be enough uses of the divisor to make the transform worthwhile. 11320 SDValue N1 = N->getOperand(1); 11321 unsigned MinUses = TLI.combineRepeatedFPDivisors(); 11322 if (!MinUses || N1->use_size() < MinUses) 11323 return SDValue(); 11324 11325 // Find all FDIV users of the same divisor. 11326 // Use a set because duplicates may be present in the user list. 11327 SetVector<SDNode *> Users; 11328 for (auto *U : N1->uses()) { 11329 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { 11330 // This division is eligible for optimization only if global unsafe math 11331 // is enabled or if this division allows reciprocal formation. 11332 if (UnsafeMath || U->getFlags().hasAllowReciprocal()) 11333 Users.insert(U); 11334 } 11335 } 11336 11337 // Now that we have the actual number of divisor uses, make sure it meets 11338 // the minimum threshold specified by the target. 11339 if (Users.size() < MinUses) 11340 return SDValue(); 11341 11342 EVT VT = N->getValueType(0); 11343 SDLoc DL(N); 11344 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 11345 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); 11346 11347 // Dividend / Divisor -> Dividend * Reciprocal 11348 for (auto *U : Users) { 11349 SDValue Dividend = U->getOperand(0); 11350 if (Dividend != FPOne) { 11351 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, 11352 Reciprocal, Flags); 11353 CombineTo(U, NewNode); 11354 } else if (U != Reciprocal.getNode()) { 11355 // In the absence of fast-math-flags, this user node is always the 11356 // same node as Reciprocal, but with FMF they may be different nodes. 11357 CombineTo(U, Reciprocal); 11358 } 11359 } 11360 return SDValue(N, 0); // N was replaced. 11361 } 11362 11363 SDValue DAGCombiner::visitFDIV(SDNode *N) { 11364 SDValue N0 = N->getOperand(0); 11365 SDValue N1 = N->getOperand(1); 11366 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11367 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11368 EVT VT = N->getValueType(0); 11369 SDLoc DL(N); 11370 const TargetOptions &Options = DAG.getTarget().Options; 11371 SDNodeFlags Flags = N->getFlags(); 11372 11373 // fold vector ops 11374 if (VT.isVector()) 11375 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 11376 return FoldedVOp; 11377 11378 // fold (fdiv c1, c2) -> c1/c2 11379 if (N0CFP && N1CFP) 11380 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); 11381 11382 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11383 return NewSel; 11384 11385 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { 11386 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 11387 if (N1CFP) { 11388 // Compute the reciprocal 1.0 / c2. 11389 const APFloat &N1APF = N1CFP->getValueAPF(); 11390 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 11391 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 11392 // Only do the transform if the reciprocal is a legal fp immediate that 11393 // isn't too nasty (eg NaN, denormal, ...). 11394 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 11395 (!LegalOperations || 11396 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 11397 // backend)... we should handle this gracefully after Legalize. 11398 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || 11399 TLI.isOperationLegal(ISD::ConstantFP, VT) || 11400 TLI.isFPImmLegal(Recip, VT))) 11401 return DAG.getNode(ISD::FMUL, DL, VT, N0, 11402 DAG.getConstantFP(Recip, DL, VT), Flags); 11403 } 11404 11405 // If this FDIV is part of a reciprocal square root, it may be folded 11406 // into a target-specific square root estimate instruction. 11407 if (N1.getOpcode() == ISD::FSQRT) { 11408 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { 11409 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11410 } 11411 } else if (N1.getOpcode() == ISD::FP_EXTEND && 11412 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11413 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 11414 Flags)) { 11415 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 11416 AddToWorklist(RV.getNode()); 11417 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11418 } 11419 } else if (N1.getOpcode() == ISD::FP_ROUND && 11420 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11421 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 11422 Flags)) { 11423 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 11424 AddToWorklist(RV.getNode()); 11425 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11426 } 11427 } else if (N1.getOpcode() == ISD::FMUL) { 11428 // Look through an FMUL. Even though this won't remove the FDIV directly, 11429 // it's still worthwhile to get rid of the FSQRT if possible. 11430 SDValue SqrtOp; 11431 SDValue OtherOp; 11432 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 11433 SqrtOp = N1.getOperand(0); 11434 OtherOp = N1.getOperand(1); 11435 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 11436 SqrtOp = N1.getOperand(1); 11437 OtherOp = N1.getOperand(0); 11438 } 11439 if (SqrtOp.getNode()) { 11440 // We found a FSQRT, so try to make this fold: 11441 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 11442 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { 11443 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); 11444 AddToWorklist(RV.getNode()); 11445 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11446 } 11447 } 11448 } 11449 11450 // Fold into a reciprocal estimate and multiply instead of a real divide. 11451 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { 11452 AddToWorklist(RV.getNode()); 11453 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11454 } 11455 } 11456 11457 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 11458 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 11459 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 11460 // Both can be negated for free, check to see if at least one is cheaper 11461 // negated. 11462 if (LHSNeg == 2 || RHSNeg == 2) 11463 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 11464 GetNegatedExpression(N0, DAG, LegalOperations), 11465 GetNegatedExpression(N1, DAG, LegalOperations), 11466 Flags); 11467 } 11468 } 11469 11470 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) 11471 return CombineRepeatedDivisors; 11472 11473 return SDValue(); 11474 } 11475 11476 SDValue DAGCombiner::visitFREM(SDNode *N) { 11477 SDValue N0 = N->getOperand(0); 11478 SDValue N1 = N->getOperand(1); 11479 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11480 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11481 EVT VT = N->getValueType(0); 11482 11483 // fold (frem c1, c2) -> fmod(c1,c2) 11484 if (N0CFP && N1CFP) 11485 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); 11486 11487 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11488 return NewSel; 11489 11490 return SDValue(); 11491 } 11492 11493 SDValue DAGCombiner::visitFSQRT(SDNode *N) { 11494 SDNodeFlags Flags = N->getFlags(); 11495 if (!DAG.getTarget().Options.UnsafeFPMath && 11496 !Flags.hasApproximateFuncs()) 11497 return SDValue(); 11498 11499 SDValue N0 = N->getOperand(0); 11500 if (TLI.isFsqrtCheap(N0, DAG)) 11501 return SDValue(); 11502 11503 // FSQRT nodes have flags that propagate to the created nodes. 11504 return buildSqrtEstimate(N0, Flags); 11505 } 11506 11507 /// copysign(x, fp_extend(y)) -> copysign(x, y) 11508 /// copysign(x, fp_round(y)) -> copysign(x, y) 11509 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { 11510 SDValue N1 = N->getOperand(1); 11511 if ((N1.getOpcode() == ISD::FP_EXTEND || 11512 N1.getOpcode() == ISD::FP_ROUND)) { 11513 // Do not optimize out type conversion of f128 type yet. 11514 // For some targets like x86_64, configuration is changed to keep one f128 11515 // value in one SSE register, but instruction selection cannot handle 11516 // FCOPYSIGN on SSE registers yet. 11517 EVT N1VT = N1->getValueType(0); 11518 EVT N1Op0VT = N1->getOperand(0).getValueType(); 11519 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); 11520 } 11521 return false; 11522 } 11523 11524 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 11525 SDValue N0 = N->getOperand(0); 11526 SDValue N1 = N->getOperand(1); 11527 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 11528 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 11529 EVT VT = N->getValueType(0); 11530 11531 if (N0CFP && N1CFP) // Constant fold 11532 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 11533 11534 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) { 11535 const APFloat &V = N1C->getValueAPF(); 11536 // copysign(x, c1) -> fabs(x) iff ispos(c1) 11537 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 11538 if (!V.isNegative()) { 11539 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 11540 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11541 } else { 11542 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11543 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 11544 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 11545 } 11546 } 11547 11548 // copysign(fabs(x), y) -> copysign(x, y) 11549 // copysign(fneg(x), y) -> copysign(x, y) 11550 // copysign(copysign(x,z), y) -> copysign(x, y) 11551 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 11552 N0.getOpcode() == ISD::FCOPYSIGN) 11553 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); 11554 11555 // copysign(x, abs(y)) -> abs(x) 11556 if (N1.getOpcode() == ISD::FABS) 11557 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11558 11559 // copysign(x, copysign(y,z)) -> copysign(x, z) 11560 if (N1.getOpcode() == ISD::FCOPYSIGN) 11561 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); 11562 11563 // copysign(x, fp_extend(y)) -> copysign(x, y) 11564 // copysign(x, fp_round(y)) -> copysign(x, y) 11565 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) 11566 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); 11567 11568 return SDValue(); 11569 } 11570 11571 SDValue DAGCombiner::visitFPOW(SDNode *N) { 11572 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1)); 11573 if (!ExponentC) 11574 return SDValue(); 11575 11576 // Try to convert x ** (1/3) into cube root. 11577 // TODO: Handle the various flavors of long double. 11578 // TODO: Since we're approximating, we don't need an exact 1/3 exponent. 11579 // Some range near 1/3 should be fine. 11580 EVT VT = N->getValueType(0); 11581 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) || 11582 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) { 11583 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0. 11584 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf. 11585 // pow(-val, 1/3) = nan; cbrt(-val) = -num. 11586 // For regular numbers, rounding may cause the results to differ. 11587 // Therefore, we require { nsz ninf nnan afn } for this transform. 11588 // TODO: We could select out the special cases if we don't have nsz/ninf. 11589 SDNodeFlags Flags = N->getFlags(); 11590 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() || 11591 !Flags.hasApproximateFuncs()) 11592 return SDValue(); 11593 11594 // Do not create a cbrt() libcall if the target does not have it, and do not 11595 // turn a pow that has lowering support into a cbrt() libcall. 11596 if (!DAG.getLibInfo().has(LibFunc_cbrt) || 11597 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) && 11598 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT))) 11599 return SDValue(); 11600 11601 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags); 11602 } 11603 11604 // Try to convert x ** (1/4) into square roots. 11605 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case. 11606 // TODO: This could be extended (using a target hook) to handle smaller 11607 // power-of-2 fractional exponents. 11608 if (ExponentC->getValueAPF().isExactlyValue(0.25)) { 11609 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0. 11610 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN. 11611 // For regular numbers, rounding may cause the results to differ. 11612 // Therefore, we require { nsz ninf afn } for this transform. 11613 // TODO: We could select out the special cases if we don't have nsz/ninf. 11614 SDNodeFlags Flags = N->getFlags(); 11615 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || 11616 !Flags.hasApproximateFuncs()) 11617 return SDValue(); 11618 11619 // Don't double the number of libcalls. We are trying to inline fast code. 11620 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT)) 11621 return SDValue(); 11622 11623 // Assume that libcalls are the smallest code. 11624 // TODO: This restriction should probably be lifted for vectors. 11625 if (DAG.getMachineFunction().getFunction().optForSize()) 11626 return SDValue(); 11627 11628 // pow(X, 0.25) --> sqrt(sqrt(X)) 11629 SDLoc DL(N); 11630 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags); 11631 return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags); 11632 } 11633 11634 return SDValue(); 11635 } 11636 11637 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, 11638 const TargetLowering &TLI) { 11639 // This optimization is guarded by a function attribute because it may produce 11640 // unexpected results. Ie, programs may be relying on the platform-specific 11641 // undefined behavior when the float-to-int conversion overflows. 11642 const Function &F = DAG.getMachineFunction().getFunction(); 11643 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); 11644 if (StrictOverflow.getValueAsString().equals("false")) 11645 return SDValue(); 11646 11647 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be 11648 // replacing casts with a libcall. We also must be allowed to ignore -0.0 11649 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer 11650 // conversions would return +0.0. 11651 // FIXME: We should be able to use node-level FMF here. 11652 // TODO: If strict math, should we use FABS (+ range check for signed cast)? 11653 EVT VT = N->getValueType(0); 11654 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || 11655 !DAG.getTarget().Options.NoSignedZerosFPMath) 11656 return SDValue(); 11657 11658 // fptosi/fptoui round towards zero, so converting from FP to integer and 11659 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X 11660 SDValue N0 = N->getOperand(0); 11661 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && 11662 N0.getOperand(0).getValueType() == VT) 11663 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11664 11665 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && 11666 N0.getOperand(0).getValueType() == VT) 11667 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11668 11669 return SDValue(); 11670 } 11671 11672 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 11673 SDValue N0 = N->getOperand(0); 11674 EVT VT = N->getValueType(0); 11675 EVT OpVT = N0.getValueType(); 11676 11677 // fold (sint_to_fp c1) -> c1fp 11678 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11679 // ...but only if the target supports immediate floating-point values 11680 (!LegalOperations || 11681 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11682 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11683 11684 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 11685 // but UINT_TO_FP is legal on this target, try to convert. 11686 if (!hasOperation(ISD::SINT_TO_FP, OpVT) && 11687 hasOperation(ISD::UINT_TO_FP, OpVT)) { 11688 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 11689 if (DAG.SignBitIsZero(N0)) 11690 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11691 } 11692 11693 // The next optimizations are desirable only if SELECT_CC can be lowered. 11694 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11695 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11696 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 11697 !VT.isVector() && 11698 (!LegalOperations || 11699 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11700 SDLoc DL(N); 11701 SDValue Ops[] = 11702 { N0.getOperand(0), N0.getOperand(1), 11703 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11704 N0.getOperand(2) }; 11705 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11706 } 11707 11708 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 11709 // (select_cc x, y, 1.0, 0.0,, cc) 11710 if (N0.getOpcode() == ISD::ZERO_EXTEND && 11711 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 11712 (!LegalOperations || 11713 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11714 SDLoc DL(N); 11715 SDValue Ops[] = 11716 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 11717 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11718 N0.getOperand(0).getOperand(2) }; 11719 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11720 } 11721 } 11722 11723 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 11724 return FTrunc; 11725 11726 return SDValue(); 11727 } 11728 11729 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 11730 SDValue N0 = N->getOperand(0); 11731 EVT VT = N->getValueType(0); 11732 EVT OpVT = N0.getValueType(); 11733 11734 // fold (uint_to_fp c1) -> c1fp 11735 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11736 // ...but only if the target supports immediate floating-point values 11737 (!LegalOperations || 11738 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11739 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11740 11741 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 11742 // but SINT_TO_FP is legal on this target, try to convert. 11743 if (!hasOperation(ISD::UINT_TO_FP, OpVT) && 11744 hasOperation(ISD::SINT_TO_FP, OpVT)) { 11745 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 11746 if (DAG.SignBitIsZero(N0)) 11747 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11748 } 11749 11750 // The next optimizations are desirable only if SELECT_CC can be lowered. 11751 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11752 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11753 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 11754 (!LegalOperations || 11755 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11756 SDLoc DL(N); 11757 SDValue Ops[] = 11758 { N0.getOperand(0), N0.getOperand(1), 11759 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11760 N0.getOperand(2) }; 11761 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11762 } 11763 } 11764 11765 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 11766 return FTrunc; 11767 11768 return SDValue(); 11769 } 11770 11771 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x 11772 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { 11773 SDValue N0 = N->getOperand(0); 11774 EVT VT = N->getValueType(0); 11775 11776 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) 11777 return SDValue(); 11778 11779 SDValue Src = N0.getOperand(0); 11780 EVT SrcVT = Src.getValueType(); 11781 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; 11782 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; 11783 11784 // We can safely assume the conversion won't overflow the output range, 11785 // because (for example) (uint8_t)18293.f is undefined behavior. 11786 11787 // Since we can assume the conversion won't overflow, our decision as to 11788 // whether the input will fit in the float should depend on the minimum 11789 // of the input range and output range. 11790 11791 // This means this is also safe for a signed input and unsigned output, since 11792 // a negative input would lead to undefined behavior. 11793 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; 11794 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; 11795 unsigned ActualSize = std::min(InputSize, OutputSize); 11796 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); 11797 11798 // We can only fold away the float conversion if the input range can be 11799 // represented exactly in the float range. 11800 if (APFloat::semanticsPrecision(sem) >= ActualSize) { 11801 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { 11802 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND 11803 : ISD::ZERO_EXTEND; 11804 return DAG.getNode(ExtOp, SDLoc(N), VT, Src); 11805 } 11806 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) 11807 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); 11808 return DAG.getBitcast(VT, Src); 11809 } 11810 return SDValue(); 11811 } 11812 11813 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 11814 SDValue N0 = N->getOperand(0); 11815 EVT VT = N->getValueType(0); 11816 11817 // fold (fp_to_sint c1fp) -> c1 11818 if (isConstantFPBuildVectorOrConstantFP(N0)) 11819 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 11820 11821 return FoldIntToFPToInt(N, DAG); 11822 } 11823 11824 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 11825 SDValue N0 = N->getOperand(0); 11826 EVT VT = N->getValueType(0); 11827 11828 // fold (fp_to_uint c1fp) -> c1 11829 if (isConstantFPBuildVectorOrConstantFP(N0)) 11830 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 11831 11832 return FoldIntToFPToInt(N, DAG); 11833 } 11834 11835 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 11836 SDValue N0 = N->getOperand(0); 11837 SDValue N1 = N->getOperand(1); 11838 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11839 EVT VT = N->getValueType(0); 11840 11841 // fold (fp_round c1fp) -> c1fp 11842 if (N0CFP) 11843 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 11844 11845 // fold (fp_round (fp_extend x)) -> x 11846 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 11847 return N0.getOperand(0); 11848 11849 // fold (fp_round (fp_round x)) -> (fp_round x) 11850 if (N0.getOpcode() == ISD::FP_ROUND) { 11851 const bool NIsTrunc = N->getConstantOperandVal(1) == 1; 11852 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; 11853 11854 // Skip this folding if it results in an fp_round from f80 to f16. 11855 // 11856 // f80 to f16 always generates an expensive (and as yet, unimplemented) 11857 // libcall to __truncxfhf2 instead of selecting native f16 conversion 11858 // instructions from f32 or f64. Moreover, the first (value-preserving) 11859 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like 11860 // x86. 11861 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) 11862 return SDValue(); 11863 11864 // If the first fp_round isn't a value preserving truncation, it might 11865 // introduce a tie in the second fp_round, that wouldn't occur in the 11866 // single-step fp_round we want to fold to. 11867 // In other words, double rounding isn't the same as rounding. 11868 // Also, this is a value preserving truncation iff both fp_round's are. 11869 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { 11870 SDLoc DL(N); 11871 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), 11872 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); 11873 } 11874 } 11875 11876 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 11877 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 11878 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 11879 N0.getOperand(0), N1); 11880 AddToWorklist(Tmp.getNode()); 11881 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 11882 Tmp, N0.getOperand(1)); 11883 } 11884 11885 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 11886 return NewVSel; 11887 11888 return SDValue(); 11889 } 11890 11891 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 11892 SDValue N0 = N->getOperand(0); 11893 EVT VT = N->getValueType(0); 11894 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 11895 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11896 11897 // fold (fp_round_inreg c1fp) -> c1fp 11898 if (N0CFP && isTypeLegal(EVT)) { 11899 SDLoc DL(N); 11900 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); 11901 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); 11902 } 11903 11904 return SDValue(); 11905 } 11906 11907 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 11908 SDValue N0 = N->getOperand(0); 11909 EVT VT = N->getValueType(0); 11910 11911 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 11912 if (N->hasOneUse() && 11913 N->use_begin()->getOpcode() == ISD::FP_ROUND) 11914 return SDValue(); 11915 11916 // fold (fp_extend c1fp) -> c1fp 11917 if (isConstantFPBuildVectorOrConstantFP(N0)) 11918 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 11919 11920 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) 11921 if (N0.getOpcode() == ISD::FP16_TO_FP && 11922 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) 11923 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); 11924 11925 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 11926 // value of X. 11927 if (N0.getOpcode() == ISD::FP_ROUND 11928 && N0.getConstantOperandVal(1) == 1) { 11929 SDValue In = N0.getOperand(0); 11930 if (In.getValueType() == VT) return In; 11931 if (VT.bitsLT(In.getValueType())) 11932 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 11933 In, N0.getOperand(1)); 11934 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 11935 } 11936 11937 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 11938 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 11939 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 11940 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 11941 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 11942 LN0->getChain(), 11943 LN0->getBasePtr(), N0.getValueType(), 11944 LN0->getMemOperand()); 11945 CombineTo(N, ExtLoad); 11946 CombineTo(N0.getNode(), 11947 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 11948 N0.getValueType(), ExtLoad, 11949 DAG.getIntPtrConstant(1, SDLoc(N0))), 11950 ExtLoad.getValue(1)); 11951 return SDValue(N, 0); // Return N so it doesn't get rechecked! 11952 } 11953 11954 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 11955 return NewVSel; 11956 11957 return SDValue(); 11958 } 11959 11960 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 11961 SDValue N0 = N->getOperand(0); 11962 EVT VT = N->getValueType(0); 11963 11964 // fold (fceil c1) -> fceil(c1) 11965 if (isConstantFPBuildVectorOrConstantFP(N0)) 11966 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 11967 11968 return SDValue(); 11969 } 11970 11971 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 11972 SDValue N0 = N->getOperand(0); 11973 EVT VT = N->getValueType(0); 11974 11975 // fold (ftrunc c1) -> ftrunc(c1) 11976 if (isConstantFPBuildVectorOrConstantFP(N0)) 11977 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 11978 11979 // fold ftrunc (known rounded int x) -> x 11980 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is 11981 // likely to be generated to extract integer from a rounded floating value. 11982 switch (N0.getOpcode()) { 11983 default: break; 11984 case ISD::FRINT: 11985 case ISD::FTRUNC: 11986 case ISD::FNEARBYINT: 11987 case ISD::FFLOOR: 11988 case ISD::FCEIL: 11989 return N0; 11990 } 11991 11992 return SDValue(); 11993 } 11994 11995 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 11996 SDValue N0 = N->getOperand(0); 11997 EVT VT = N->getValueType(0); 11998 11999 // fold (ffloor c1) -> ffloor(c1) 12000 if (isConstantFPBuildVectorOrConstantFP(N0)) 12001 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 12002 12003 return SDValue(); 12004 } 12005 12006 // FIXME: FNEG and FABS have a lot in common; refactor. 12007 SDValue DAGCombiner::visitFNEG(SDNode *N) { 12008 SDValue N0 = N->getOperand(0); 12009 EVT VT = N->getValueType(0); 12010 12011 // Constant fold FNEG. 12012 if (isConstantFPBuildVectorOrConstantFP(N0)) 12013 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 12014 12015 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 12016 &DAG.getTarget().Options)) 12017 return GetNegatedExpression(N0, DAG, LegalOperations); 12018 12019 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 12020 // constant pool values. 12021 if (!TLI.isFNegFree(VT) && 12022 N0.getOpcode() == ISD::BITCAST && 12023 N0.getNode()->hasOneUse()) { 12024 SDValue Int = N0.getOperand(0); 12025 EVT IntVT = Int.getValueType(); 12026 if (IntVT.isInteger() && !IntVT.isVector()) { 12027 APInt SignMask; 12028 if (N0.getValueType().isVector()) { 12029 // For a vector, get a mask such as 0x80... per scalar element 12030 // and splat it. 12031 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); 12032 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 12033 } else { 12034 // For a scalar, just generate 0x80... 12035 SignMask = APInt::getSignMask(IntVT.getSizeInBits()); 12036 } 12037 SDLoc DL0(N0); 12038 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, 12039 DAG.getConstant(SignMask, DL0, IntVT)); 12040 AddToWorklist(Int.getNode()); 12041 return DAG.getBitcast(VT, Int); 12042 } 12043 } 12044 12045 // (fneg (fmul c, x)) -> (fmul -c, x) 12046 if (N0.getOpcode() == ISD::FMUL && 12047 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { 12048 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 12049 if (CFP1) { 12050 APFloat CVal = CFP1->getValueAPF(); 12051 CVal.changeSign(); 12052 if (Level >= AfterLegalizeDAG && 12053 (TLI.isFPImmLegal(CVal, VT) || 12054 TLI.isOperationLegal(ISD::ConstantFP, VT))) 12055 return DAG.getNode( 12056 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 12057 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), 12058 N0->getFlags()); 12059 } 12060 } 12061 12062 return SDValue(); 12063 } 12064 12065 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N, 12066 APFloat (*Op)(const APFloat &, const APFloat &)) { 12067 SDValue N0 = N->getOperand(0); 12068 SDValue N1 = N->getOperand(1); 12069 EVT VT = N->getValueType(0); 12070 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 12071 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 12072 12073 if (N0CFP && N1CFP) { 12074 const APFloat &C0 = N0CFP->getValueAPF(); 12075 const APFloat &C1 = N1CFP->getValueAPF(); 12076 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT); 12077 } 12078 12079 // Canonicalize to constant on RHS. 12080 if (isConstantFPBuildVectorOrConstantFP(N0) && 12081 !isConstantFPBuildVectorOrConstantFP(N1)) 12082 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 12083 12084 return SDValue(); 12085 } 12086 12087 SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 12088 return visitFMinMax(DAG, N, minnum); 12089 } 12090 12091 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 12092 return visitFMinMax(DAG, N, maxnum); 12093 } 12094 12095 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) { 12096 return visitFMinMax(DAG, N, minimum); 12097 } 12098 12099 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) { 12100 return visitFMinMax(DAG, N, maximum); 12101 } 12102 12103 SDValue DAGCombiner::visitFABS(SDNode *N) { 12104 SDValue N0 = N->getOperand(0); 12105 EVT VT = N->getValueType(0); 12106 12107 // fold (fabs c1) -> fabs(c1) 12108 if (isConstantFPBuildVectorOrConstantFP(N0)) 12109 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 12110 12111 // fold (fabs (fabs x)) -> (fabs x) 12112 if (N0.getOpcode() == ISD::FABS) 12113 return N->getOperand(0); 12114 12115 // fold (fabs (fneg x)) -> (fabs x) 12116 // fold (fabs (fcopysign x, y)) -> (fabs x) 12117 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 12118 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 12119 12120 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads. 12121 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { 12122 SDValue Int = N0.getOperand(0); 12123 EVT IntVT = Int.getValueType(); 12124 if (IntVT.isInteger() && !IntVT.isVector()) { 12125 APInt SignMask; 12126 if (N0.getValueType().isVector()) { 12127 // For a vector, get a mask such as 0x7f... per scalar element 12128 // and splat it. 12129 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); 12130 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 12131 } else { 12132 // For a scalar, just generate 0x7f... 12133 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); 12134 } 12135 SDLoc DL(N0); 12136 Int = DAG.getNode(ISD::AND, DL, IntVT, Int, 12137 DAG.getConstant(SignMask, DL, IntVT)); 12138 AddToWorklist(Int.getNode()); 12139 return DAG.getBitcast(N->getValueType(0), Int); 12140 } 12141 } 12142 12143 return SDValue(); 12144 } 12145 12146 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 12147 SDValue Chain = N->getOperand(0); 12148 SDValue N1 = N->getOperand(1); 12149 SDValue N2 = N->getOperand(2); 12150 12151 // If N is a constant we could fold this into a fallthrough or unconditional 12152 // branch. However that doesn't happen very often in normal code, because 12153 // Instcombine/SimplifyCFG should have handled the available opportunities. 12154 // If we did this folding here, it would be necessary to update the 12155 // MachineBasicBlock CFG, which is awkward. 12156 12157 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 12158 // on the target. 12159 if (N1.getOpcode() == ISD::SETCC && 12160 TLI.isOperationLegalOrCustom(ISD::BR_CC, 12161 N1.getOperand(0).getValueType())) { 12162 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 12163 Chain, N1.getOperand(2), 12164 N1.getOperand(0), N1.getOperand(1), N2); 12165 } 12166 12167 if (N1.hasOneUse()) { 12168 if (SDValue NewN1 = rebuildSetCC(N1)) 12169 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); 12170 } 12171 12172 return SDValue(); 12173 } 12174 12175 SDValue DAGCombiner::rebuildSetCC(SDValue N) { 12176 if (N.getOpcode() == ISD::SRL || 12177 (N.getOpcode() == ISD::TRUNCATE && 12178 (N.getOperand(0).hasOneUse() && 12179 N.getOperand(0).getOpcode() == ISD::SRL))) { 12180 // Look pass the truncate. 12181 if (N.getOpcode() == ISD::TRUNCATE) 12182 N = N.getOperand(0); 12183 12184 // Match this pattern so that we can generate simpler code: 12185 // 12186 // %a = ... 12187 // %b = and i32 %a, 2 12188 // %c = srl i32 %b, 1 12189 // brcond i32 %c ... 12190 // 12191 // into 12192 // 12193 // %a = ... 12194 // %b = and i32 %a, 2 12195 // %c = setcc eq %b, 0 12196 // brcond %c ... 12197 // 12198 // This applies only when the AND constant value has one bit set and the 12199 // SRL constant is equal to the log2 of the AND constant. The back-end is 12200 // smart enough to convert the result into a TEST/JMP sequence. 12201 SDValue Op0 = N.getOperand(0); 12202 SDValue Op1 = N.getOperand(1); 12203 12204 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { 12205 SDValue AndOp1 = Op0.getOperand(1); 12206 12207 if (AndOp1.getOpcode() == ISD::Constant) { 12208 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 12209 12210 if (AndConst.isPowerOf2() && 12211 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { 12212 SDLoc DL(N); 12213 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), 12214 Op0, DAG.getConstant(0, DL, Op0.getValueType()), 12215 ISD::SETNE); 12216 } 12217 } 12218 } 12219 } 12220 12221 // Transform br(xor(x, y)) -> br(x != y) 12222 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 12223 if (N.getOpcode() == ISD::XOR) { 12224 // Because we may call this on a speculatively constructed 12225 // SimplifiedSetCC Node, we need to simplify this node first. 12226 // Ideally this should be folded into SimplifySetCC and not 12227 // here. For now, grab a handle to N so we don't lose it from 12228 // replacements interal to the visit. 12229 HandleSDNode XORHandle(N); 12230 while (N.getOpcode() == ISD::XOR) { 12231 SDValue Tmp = visitXOR(N.getNode()); 12232 // No simplification done. 12233 if (!Tmp.getNode()) 12234 break; 12235 // Returning N is form in-visit replacement that may invalidated 12236 // N. Grab value from Handle. 12237 if (Tmp.getNode() == N.getNode()) 12238 N = XORHandle.getValue(); 12239 else // Node simplified. Try simplifying again. 12240 N = Tmp; 12241 } 12242 12243 if (N.getOpcode() != ISD::XOR) 12244 return N; 12245 12246 SDNode *TheXor = N.getNode(); 12247 12248 SDValue Op0 = TheXor->getOperand(0); 12249 SDValue Op1 = TheXor->getOperand(1); 12250 12251 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 12252 bool Equal = false; 12253 if (isOneConstant(Op0) && Op0.hasOneUse() && 12254 Op0.getOpcode() == ISD::XOR) { 12255 TheXor = Op0.getNode(); 12256 Equal = true; 12257 } 12258 12259 EVT SetCCVT = N.getValueType(); 12260 if (LegalTypes) 12261 SetCCVT = getSetCCResultType(SetCCVT); 12262 // Replace the uses of XOR with SETCC 12263 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, 12264 Equal ? ISD::SETEQ : ISD::SETNE); 12265 } 12266 } 12267 12268 return SDValue(); 12269 } 12270 12271 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 12272 // 12273 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 12274 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 12275 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 12276 12277 // If N is a constant we could fold this into a fallthrough or unconditional 12278 // branch. However that doesn't happen very often in normal code, because 12279 // Instcombine/SimplifyCFG should have handled the available opportunities. 12280 // If we did this folding here, it would be necessary to update the 12281 // MachineBasicBlock CFG, which is awkward. 12282 12283 // Use SimplifySetCC to simplify SETCC's. 12284 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 12285 CondLHS, CondRHS, CC->get(), SDLoc(N), 12286 false); 12287 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 12288 12289 // fold to a simpler setcc 12290 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 12291 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 12292 N->getOperand(0), Simp.getOperand(2), 12293 Simp.getOperand(0), Simp.getOperand(1), 12294 N->getOperand(4)); 12295 12296 return SDValue(); 12297 } 12298 12299 /// Return true if 'Use' is a load or a store that uses N as its base pointer 12300 /// and that N may be folded in the load / store addressing mode. 12301 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 12302 SelectionDAG &DAG, 12303 const TargetLowering &TLI) { 12304 EVT VT; 12305 unsigned AS; 12306 12307 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 12308 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 12309 return false; 12310 VT = LD->getMemoryVT(); 12311 AS = LD->getAddressSpace(); 12312 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 12313 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 12314 return false; 12315 VT = ST->getMemoryVT(); 12316 AS = ST->getAddressSpace(); 12317 } else 12318 return false; 12319 12320 TargetLowering::AddrMode AM; 12321 if (N->getOpcode() == ISD::ADD) { 12322 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12323 if (Offset) 12324 // [reg +/- imm] 12325 AM.BaseOffs = Offset->getSExtValue(); 12326 else 12327 // [reg +/- reg] 12328 AM.Scale = 1; 12329 } else if (N->getOpcode() == ISD::SUB) { 12330 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12331 if (Offset) 12332 // [reg +/- imm] 12333 AM.BaseOffs = -Offset->getSExtValue(); 12334 else 12335 // [reg +/- reg] 12336 AM.Scale = 1; 12337 } else 12338 return false; 12339 12340 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, 12341 VT.getTypeForEVT(*DAG.getContext()), AS); 12342 } 12343 12344 /// Try turning a load/store into a pre-indexed load/store when the base 12345 /// pointer is an add or subtract and it has other uses besides the load/store. 12346 /// After the transformation, the new indexed load/store has effectively folded 12347 /// the add/subtract in and all of its other uses are redirected to the 12348 /// new load/store. 12349 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 12350 if (Level < AfterLegalizeDAG) 12351 return false; 12352 12353 bool isLoad = true; 12354 SDValue Ptr; 12355 EVT VT; 12356 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 12357 if (LD->isIndexed()) 12358 return false; 12359 VT = LD->getMemoryVT(); 12360 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 12361 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 12362 return false; 12363 Ptr = LD->getBasePtr(); 12364 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 12365 if (ST->isIndexed()) 12366 return false; 12367 VT = ST->getMemoryVT(); 12368 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 12369 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 12370 return false; 12371 Ptr = ST->getBasePtr(); 12372 isLoad = false; 12373 } else { 12374 return false; 12375 } 12376 12377 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 12378 // out. There is no reason to make this a preinc/predec. 12379 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 12380 Ptr.getNode()->hasOneUse()) 12381 return false; 12382 12383 // Ask the target to do addressing mode selection. 12384 SDValue BasePtr; 12385 SDValue Offset; 12386 ISD::MemIndexedMode AM = ISD::UNINDEXED; 12387 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 12388 return false; 12389 12390 // Backends without true r+i pre-indexed forms may need to pass a 12391 // constant base with a variable offset so that constant coercion 12392 // will work with the patterns in canonical form. 12393 bool Swapped = false; 12394 if (isa<ConstantSDNode>(BasePtr)) { 12395 std::swap(BasePtr, Offset); 12396 Swapped = true; 12397 } 12398 12399 // Don't create a indexed load / store with zero offset. 12400 if (isNullConstant(Offset)) 12401 return false; 12402 12403 // Try turning it into a pre-indexed load / store except when: 12404 // 1) The new base ptr is a frame index. 12405 // 2) If N is a store and the new base ptr is either the same as or is a 12406 // predecessor of the value being stored. 12407 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 12408 // that would create a cycle. 12409 // 4) All uses are load / store ops that use it as old base ptr. 12410 12411 // Check #1. Preinc'ing a frame index would require copying the stack pointer 12412 // (plus the implicit offset) to a register to preinc anyway. 12413 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 12414 return false; 12415 12416 // Check #2. 12417 if (!isLoad) { 12418 SDValue Val = cast<StoreSDNode>(N)->getValue(); 12419 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 12420 return false; 12421 } 12422 12423 // Caches for hasPredecessorHelper. 12424 SmallPtrSet<const SDNode *, 32> Visited; 12425 SmallVector<const SDNode *, 16> Worklist; 12426 Worklist.push_back(N); 12427 12428 // If the offset is a constant, there may be other adds of constants that 12429 // can be folded with this one. We should do this to avoid having to keep 12430 // a copy of the original base pointer. 12431 SmallVector<SDNode *, 16> OtherUses; 12432 if (isa<ConstantSDNode>(Offset)) 12433 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), 12434 UE = BasePtr.getNode()->use_end(); 12435 UI != UE; ++UI) { 12436 SDUse &Use = UI.getUse(); 12437 // Skip the use that is Ptr and uses of other results from BasePtr's 12438 // node (important for nodes that return multiple results). 12439 if (Use.getUser() == Ptr.getNode() || Use != BasePtr) 12440 continue; 12441 12442 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) 12443 continue; 12444 12445 if (Use.getUser()->getOpcode() != ISD::ADD && 12446 Use.getUser()->getOpcode() != ISD::SUB) { 12447 OtherUses.clear(); 12448 break; 12449 } 12450 12451 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); 12452 if (!isa<ConstantSDNode>(Op1)) { 12453 OtherUses.clear(); 12454 break; 12455 } 12456 12457 // FIXME: In some cases, we can be smarter about this. 12458 if (Op1.getValueType() != Offset.getValueType()) { 12459 OtherUses.clear(); 12460 break; 12461 } 12462 12463 OtherUses.push_back(Use.getUser()); 12464 } 12465 12466 if (Swapped) 12467 std::swap(BasePtr, Offset); 12468 12469 // Now check for #3 and #4. 12470 bool RealUse = false; 12471 12472 for (SDNode *Use : Ptr.getNode()->uses()) { 12473 if (Use == N) 12474 continue; 12475 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) 12476 return false; 12477 12478 // If Ptr may be folded in addressing mode of other use, then it's 12479 // not profitable to do this transformation. 12480 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 12481 RealUse = true; 12482 } 12483 12484 if (!RealUse) 12485 return false; 12486 12487 SDValue Result; 12488 if (isLoad) 12489 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 12490 BasePtr, Offset, AM); 12491 else 12492 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 12493 BasePtr, Offset, AM); 12494 ++PreIndexedNodes; 12495 ++NodesCombined; 12496 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; 12497 Result.getNode()->dump(&DAG); dbgs() << '\n'); 12498 WorklistRemover DeadNodes(*this); 12499 if (isLoad) { 12500 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 12501 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 12502 } else { 12503 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 12504 } 12505 12506 // Finally, since the node is now dead, remove it from the graph. 12507 deleteAndRecombine(N); 12508 12509 if (Swapped) 12510 std::swap(BasePtr, Offset); 12511 12512 // Replace other uses of BasePtr that can be updated to use Ptr 12513 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 12514 unsigned OffsetIdx = 1; 12515 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 12516 OffsetIdx = 0; 12517 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 12518 BasePtr.getNode() && "Expected BasePtr operand"); 12519 12520 // We need to replace ptr0 in the following expression: 12521 // x0 * offset0 + y0 * ptr0 = t0 12522 // knowing that 12523 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 12524 // 12525 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 12526 // indexed load/store and the expression that needs to be re-written. 12527 // 12528 // Therefore, we have: 12529 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 12530 12531 ConstantSDNode *CN = 12532 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 12533 int X0, X1, Y0, Y1; 12534 const APInt &Offset0 = CN->getAPIntValue(); 12535 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 12536 12537 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 12538 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 12539 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 12540 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 12541 12542 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 12543 12544 APInt CNV = Offset0; 12545 if (X0 < 0) CNV = -CNV; 12546 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 12547 else CNV = CNV - Offset1; 12548 12549 SDLoc DL(OtherUses[i]); 12550 12551 // We can now generate the new expression. 12552 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); 12553 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 12554 12555 SDValue NewUse = DAG.getNode(Opcode, 12556 DL, 12557 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 12558 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 12559 deleteAndRecombine(OtherUses[i]); 12560 } 12561 12562 // Replace the uses of Ptr with uses of the updated base value. 12563 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 12564 deleteAndRecombine(Ptr.getNode()); 12565 AddToWorklist(Result.getNode()); 12566 12567 return true; 12568 } 12569 12570 /// Try to combine a load/store with a add/sub of the base pointer node into a 12571 /// post-indexed load/store. The transformation folded the add/subtract into the 12572 /// new indexed load/store effectively and all of its uses are redirected to the 12573 /// new load/store. 12574 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 12575 if (Level < AfterLegalizeDAG) 12576 return false; 12577 12578 bool isLoad = true; 12579 SDValue Ptr; 12580 EVT VT; 12581 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 12582 if (LD->isIndexed()) 12583 return false; 12584 VT = LD->getMemoryVT(); 12585 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 12586 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 12587 return false; 12588 Ptr = LD->getBasePtr(); 12589 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 12590 if (ST->isIndexed()) 12591 return false; 12592 VT = ST->getMemoryVT(); 12593 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 12594 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 12595 return false; 12596 Ptr = ST->getBasePtr(); 12597 isLoad = false; 12598 } else { 12599 return false; 12600 } 12601 12602 if (Ptr.getNode()->hasOneUse()) 12603 return false; 12604 12605 for (SDNode *Op : Ptr.getNode()->uses()) { 12606 if (Op == N || 12607 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 12608 continue; 12609 12610 SDValue BasePtr; 12611 SDValue Offset; 12612 ISD::MemIndexedMode AM = ISD::UNINDEXED; 12613 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 12614 // Don't create a indexed load / store with zero offset. 12615 if (isNullConstant(Offset)) 12616 continue; 12617 12618 // Try turning it into a post-indexed load / store except when 12619 // 1) All uses are load / store ops that use it as base ptr (and 12620 // it may be folded as addressing mmode). 12621 // 2) Op must be independent of N, i.e. Op is neither a predecessor 12622 // nor a successor of N. Otherwise, if Op is folded that would 12623 // create a cycle. 12624 12625 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 12626 continue; 12627 12628 // Check for #1. 12629 bool TryNext = false; 12630 for (SDNode *Use : BasePtr.getNode()->uses()) { 12631 if (Use == Ptr.getNode()) 12632 continue; 12633 12634 // If all the uses are load / store addresses, then don't do the 12635 // transformation. 12636 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 12637 bool RealUse = false; 12638 for (SDNode *UseUse : Use->uses()) { 12639 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 12640 RealUse = true; 12641 } 12642 12643 if (!RealUse) { 12644 TryNext = true; 12645 break; 12646 } 12647 } 12648 } 12649 12650 if (TryNext) 12651 continue; 12652 12653 // Check for #2. 12654 SmallPtrSet<const SDNode *, 32> Visited; 12655 SmallVector<const SDNode *, 8> Worklist; 12656 // Ptr is predecessor to both N and Op. 12657 Visited.insert(Ptr.getNode()); 12658 Worklist.push_back(N); 12659 Worklist.push_back(Op); 12660 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && 12661 !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { 12662 SDValue Result = isLoad 12663 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 12664 BasePtr, Offset, AM) 12665 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 12666 BasePtr, Offset, AM); 12667 ++PostIndexedNodes; 12668 ++NodesCombined; 12669 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); 12670 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); 12671 dbgs() << '\n'); 12672 WorklistRemover DeadNodes(*this); 12673 if (isLoad) { 12674 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 12675 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 12676 } else { 12677 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 12678 } 12679 12680 // Finally, since the node is now dead, remove it from the graph. 12681 deleteAndRecombine(N); 12682 12683 // Replace the uses of Use with uses of the updated base value. 12684 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 12685 Result.getValue(isLoad ? 1 : 0)); 12686 deleteAndRecombine(Op); 12687 return true; 12688 } 12689 } 12690 } 12691 12692 return false; 12693 } 12694 12695 /// Return the base-pointer arithmetic from an indexed \p LD. 12696 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 12697 ISD::MemIndexedMode AM = LD->getAddressingMode(); 12698 assert(AM != ISD::UNINDEXED); 12699 SDValue BP = LD->getOperand(1); 12700 SDValue Inc = LD->getOperand(2); 12701 12702 // Some backends use TargetConstants for load offsets, but don't expect 12703 // TargetConstants in general ADD nodes. We can convert these constants into 12704 // regular Constants (if the constant is not opaque). 12705 assert((Inc.getOpcode() != ISD::TargetConstant || 12706 !cast<ConstantSDNode>(Inc)->isOpaque()) && 12707 "Cannot split out indexing using opaque target constants"); 12708 if (Inc.getOpcode() == ISD::TargetConstant) { 12709 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 12710 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), 12711 ConstInc->getValueType(0)); 12712 } 12713 12714 unsigned Opc = 12715 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 12716 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 12717 } 12718 12719 static inline int numVectorEltsOrZero(EVT T) { 12720 return T.isVector() ? T.getVectorNumElements() : 0; 12721 } 12722 12723 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { 12724 Val = ST->getValue(); 12725 EVT STType = Val.getValueType(); 12726 EVT STMemType = ST->getMemoryVT(); 12727 if (STType == STMemType) 12728 return true; 12729 if (isTypeLegal(STMemType)) 12730 return false; // fail. 12731 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() && 12732 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) { 12733 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val); 12734 return true; 12735 } 12736 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) && 12737 STType.isInteger() && STMemType.isInteger()) { 12738 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val); 12739 return true; 12740 } 12741 if (STType.getSizeInBits() == STMemType.getSizeInBits()) { 12742 Val = DAG.getBitcast(STMemType, Val); 12743 return true; 12744 } 12745 return false; // fail. 12746 } 12747 12748 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { 12749 EVT LDMemType = LD->getMemoryVT(); 12750 EVT LDType = LD->getValueType(0); 12751 assert(Val.getValueType() == LDMemType && 12752 "Attempting to extend value of non-matching type"); 12753 if (LDType == LDMemType) 12754 return true; 12755 if (LDMemType.isInteger() && LDType.isInteger()) { 12756 switch (LD->getExtensionType()) { 12757 case ISD::NON_EXTLOAD: 12758 Val = DAG.getBitcast(LDType, Val); 12759 return true; 12760 case ISD::EXTLOAD: 12761 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val); 12762 return true; 12763 case ISD::SEXTLOAD: 12764 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val); 12765 return true; 12766 case ISD::ZEXTLOAD: 12767 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val); 12768 return true; 12769 } 12770 } 12771 return false; 12772 } 12773 12774 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { 12775 if (OptLevel == CodeGenOpt::None || LD->isVolatile()) 12776 return SDValue(); 12777 SDValue Chain = LD->getOperand(0); 12778 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); 12779 if (!ST || ST->isVolatile()) 12780 return SDValue(); 12781 12782 EVT LDType = LD->getValueType(0); 12783 EVT LDMemType = LD->getMemoryVT(); 12784 EVT STMemType = ST->getMemoryVT(); 12785 EVT STType = ST->getValue().getValueType(); 12786 12787 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG); 12788 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG); 12789 int64_t Offset; 12790 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset)) 12791 return SDValue(); 12792 12793 // Normalize for Endianness. After this Offset=0 will denote that the least 12794 // significant bit in the loaded value maps to the least significant bit in 12795 // the stored value). With Offset=n (for n > 0) the loaded value starts at the 12796 // n:th least significant byte of the stored value. 12797 if (DAG.getDataLayout().isBigEndian()) 12798 Offset = (STMemType.getStoreSizeInBits() - 12799 LDMemType.getStoreSizeInBits()) / 8 - Offset; 12800 12801 // Check that the stored value cover all bits that are loaded. 12802 bool STCoversLD = 12803 (Offset >= 0) && 12804 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits()); 12805 12806 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { 12807 if (LD->isIndexed()) { 12808 bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || 12809 LD->getAddressingMode() == ISD::POST_DEC); 12810 unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; 12811 SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), 12812 LD->getOperand(1), LD->getOperand(2)); 12813 SDValue Ops[] = {Val, Idx, Chain}; 12814 return CombineTo(LD, Ops, 3); 12815 } 12816 return CombineTo(LD, Val, Chain); 12817 }; 12818 12819 if (!STCoversLD) 12820 return SDValue(); 12821 12822 // Memory as copy space (potentially masked). 12823 if (Offset == 0 && LDType == STType && STMemType == LDMemType) { 12824 // Simple case: Direct non-truncating forwarding 12825 if (LDType.getSizeInBits() == LDMemType.getSizeInBits()) 12826 return ReplaceLd(LD, ST->getValue(), Chain); 12827 // Can we model the truncate and extension with an and mask? 12828 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() && 12829 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) { 12830 // Mask to size of LDMemType 12831 auto Mask = 12832 DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(), 12833 STMemType.getSizeInBits()), 12834 SDLoc(ST), STType); 12835 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); 12836 return ReplaceLd(LD, Val, Chain); 12837 } 12838 } 12839 12840 // TODO: Deal with nonzero offset. 12841 if (LD->getBasePtr().isUndef() || Offset != 0) 12842 return SDValue(); 12843 // Model necessary truncations / extenstions. 12844 SDValue Val; 12845 // Truncate Value To Stored Memory Size. 12846 do { 12847 if (!getTruncatedStoreValue(ST, Val)) 12848 continue; 12849 if (!isTypeLegal(LDMemType)) 12850 continue; 12851 if (STMemType != LDMemType) { 12852 // TODO: Support vectors? This requires extract_subvector/bitcast. 12853 if (!STMemType.isVector() && !LDMemType.isVector() && 12854 STMemType.isInteger() && LDMemType.isInteger()) 12855 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val); 12856 else 12857 continue; 12858 } 12859 if (!extendLoadedValueToExtension(LD, Val)) 12860 continue; 12861 return ReplaceLd(LD, Val, Chain); 12862 } while (false); 12863 12864 // On failure, cleanup dead nodes we may have created. 12865 if (Val->use_empty()) 12866 deleteAndRecombine(Val.getNode()); 12867 return SDValue(); 12868 } 12869 12870 SDValue DAGCombiner::visitLOAD(SDNode *N) { 12871 LoadSDNode *LD = cast<LoadSDNode>(N); 12872 SDValue Chain = LD->getChain(); 12873 SDValue Ptr = LD->getBasePtr(); 12874 12875 // If load is not volatile and there are no uses of the loaded value (and 12876 // the updated indexed value in case of indexed loads), change uses of the 12877 // chain value into uses of the chain input (i.e. delete the dead load). 12878 if (!LD->isVolatile()) { 12879 if (N->getValueType(1) == MVT::Other) { 12880 // Unindexed loads. 12881 if (!N->hasAnyUseOfValue(0)) { 12882 // It's not safe to use the two value CombineTo variant here. e.g. 12883 // v1, chain2 = load chain1, loc 12884 // v2, chain3 = load chain2, loc 12885 // v3 = add v2, c 12886 // Now we replace use of chain2 with chain1. This makes the second load 12887 // isomorphic to the one we are deleting, and thus makes this load live. 12888 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); 12889 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); 12890 dbgs() << "\n"); 12891 WorklistRemover DeadNodes(*this); 12892 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 12893 AddUsersToWorklist(Chain.getNode()); 12894 if (N->use_empty()) 12895 deleteAndRecombine(N); 12896 12897 return SDValue(N, 0); // Return N so it doesn't get rechecked! 12898 } 12899 } else { 12900 // Indexed loads. 12901 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 12902 12903 // If this load has an opaque TargetConstant offset, then we cannot split 12904 // the indexing into an add/sub directly (that TargetConstant may not be 12905 // valid for a different type of node, and we cannot convert an opaque 12906 // target constant into a regular constant). 12907 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 12908 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 12909 12910 if (!N->hasAnyUseOfValue(0) && 12911 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 12912 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 12913 SDValue Index; 12914 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 12915 Index = SplitIndexingFromLoad(LD); 12916 // Try to fold the base pointer arithmetic into subsequent loads and 12917 // stores. 12918 AddUsersToWorklist(N); 12919 } else 12920 Index = DAG.getUNDEF(N->getValueType(1)); 12921 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); 12922 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); 12923 dbgs() << " and 2 other values\n"); 12924 WorklistRemover DeadNodes(*this); 12925 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 12926 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 12927 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 12928 deleteAndRecombine(N); 12929 return SDValue(N, 0); // Return N so it doesn't get rechecked! 12930 } 12931 } 12932 } 12933 12934 // If this load is directly stored, replace the load value with the stored 12935 // value. 12936 if (auto V = ForwardStoreValueToDirectLoad(LD)) 12937 return V; 12938 12939 // Try to infer better alignment information than the load already has. 12940 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 12941 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 12942 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { 12943 SDValue NewLoad = DAG.getExtLoad( 12944 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, 12945 LD->getPointerInfo(), LD->getMemoryVT(), Align, 12946 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 12947 // NewLoad will always be N as we are only refining the alignment 12948 assert(NewLoad.getNode() == N); 12949 (void)NewLoad; 12950 } 12951 } 12952 } 12953 12954 if (LD->isUnindexed()) { 12955 // Walk up chain skipping non-aliasing memory nodes. 12956 SDValue BetterChain = FindBetterChain(N, Chain); 12957 12958 // If there is a better chain. 12959 if (Chain != BetterChain) { 12960 SDValue ReplLoad; 12961 12962 // Replace the chain to void dependency. 12963 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 12964 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 12965 BetterChain, Ptr, LD->getMemOperand()); 12966 } else { 12967 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 12968 LD->getValueType(0), 12969 BetterChain, Ptr, LD->getMemoryVT(), 12970 LD->getMemOperand()); 12971 } 12972 12973 // Create token factor to keep old chain connected. 12974 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 12975 MVT::Other, Chain, ReplLoad.getValue(1)); 12976 12977 // Replace uses with load result and token factor 12978 return CombineTo(N, ReplLoad.getValue(0), Token); 12979 } 12980 } 12981 12982 // Try transforming N to an indexed load. 12983 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 12984 return SDValue(N, 0); 12985 12986 // Try to slice up N to more direct loads if the slices are mapped to 12987 // different register banks or pairing can take place. 12988 if (SliceUpLoad(N)) 12989 return SDValue(N, 0); 12990 12991 return SDValue(); 12992 } 12993 12994 namespace { 12995 12996 /// Helper structure used to slice a load in smaller loads. 12997 /// Basically a slice is obtained from the following sequence: 12998 /// Origin = load Ty1, Base 12999 /// Shift = srl Ty1 Origin, CstTy Amount 13000 /// Inst = trunc Shift to Ty2 13001 /// 13002 /// Then, it will be rewritten into: 13003 /// Slice = load SliceTy, Base + SliceOffset 13004 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 13005 /// 13006 /// SliceTy is deduced from the number of bits that are actually used to 13007 /// build Inst. 13008 struct LoadedSlice { 13009 /// Helper structure used to compute the cost of a slice. 13010 struct Cost { 13011 /// Are we optimizing for code size. 13012 bool ForCodeSize; 13013 13014 /// Various cost. 13015 unsigned Loads = 0; 13016 unsigned Truncates = 0; 13017 unsigned CrossRegisterBanksCopies = 0; 13018 unsigned ZExts = 0; 13019 unsigned Shift = 0; 13020 13021 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} 13022 13023 /// Get the cost of one isolated slice. 13024 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 13025 : ForCodeSize(ForCodeSize), Loads(1) { 13026 EVT TruncType = LS.Inst->getValueType(0); 13027 EVT LoadedType = LS.getLoadedType(); 13028 if (TruncType != LoadedType && 13029 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 13030 ZExts = 1; 13031 } 13032 13033 /// Account for slicing gain in the current cost. 13034 /// Slicing provide a few gains like removing a shift or a 13035 /// truncate. This method allows to grow the cost of the original 13036 /// load with the gain from this slice. 13037 void addSliceGain(const LoadedSlice &LS) { 13038 // Each slice saves a truncate. 13039 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 13040 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), 13041 LS.Inst->getValueType(0))) 13042 ++Truncates; 13043 // If there is a shift amount, this slice gets rid of it. 13044 if (LS.Shift) 13045 ++Shift; 13046 // If this slice can merge a cross register bank copy, account for it. 13047 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 13048 ++CrossRegisterBanksCopies; 13049 } 13050 13051 Cost &operator+=(const Cost &RHS) { 13052 Loads += RHS.Loads; 13053 Truncates += RHS.Truncates; 13054 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 13055 ZExts += RHS.ZExts; 13056 Shift += RHS.Shift; 13057 return *this; 13058 } 13059 13060 bool operator==(const Cost &RHS) const { 13061 return Loads == RHS.Loads && Truncates == RHS.Truncates && 13062 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 13063 ZExts == RHS.ZExts && Shift == RHS.Shift; 13064 } 13065 13066 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 13067 13068 bool operator<(const Cost &RHS) const { 13069 // Assume cross register banks copies are as expensive as loads. 13070 // FIXME: Do we want some more target hooks? 13071 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 13072 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 13073 // Unless we are optimizing for code size, consider the 13074 // expensive operation first. 13075 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 13076 return ExpensiveOpsLHS < ExpensiveOpsRHS; 13077 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 13078 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 13079 } 13080 13081 bool operator>(const Cost &RHS) const { return RHS < *this; } 13082 13083 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 13084 13085 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 13086 }; 13087 13088 // The last instruction that represent the slice. This should be a 13089 // truncate instruction. 13090 SDNode *Inst; 13091 13092 // The original load instruction. 13093 LoadSDNode *Origin; 13094 13095 // The right shift amount in bits from the original load. 13096 unsigned Shift; 13097 13098 // The DAG from which Origin came from. 13099 // This is used to get some contextual information about legal types, etc. 13100 SelectionDAG *DAG; 13101 13102 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 13103 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 13104 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 13105 13106 /// Get the bits used in a chunk of bits \p BitWidth large. 13107 /// \return Result is \p BitWidth and has used bits set to 1 and 13108 /// not used bits set to 0. 13109 APInt getUsedBits() const { 13110 // Reproduce the trunc(lshr) sequence: 13111 // - Start from the truncated value. 13112 // - Zero extend to the desired bit width. 13113 // - Shift left. 13114 assert(Origin && "No original load to compare against."); 13115 unsigned BitWidth = Origin->getValueSizeInBits(0); 13116 assert(Inst && "This slice is not bound to an instruction"); 13117 assert(Inst->getValueSizeInBits(0) <= BitWidth && 13118 "Extracted slice is bigger than the whole type!"); 13119 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 13120 UsedBits.setAllBits(); 13121 UsedBits = UsedBits.zext(BitWidth); 13122 UsedBits <<= Shift; 13123 return UsedBits; 13124 } 13125 13126 /// Get the size of the slice to be loaded in bytes. 13127 unsigned getLoadedSize() const { 13128 unsigned SliceSize = getUsedBits().countPopulation(); 13129 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 13130 return SliceSize / 8; 13131 } 13132 13133 /// Get the type that will be loaded for this slice. 13134 /// Note: This may not be the final type for the slice. 13135 EVT getLoadedType() const { 13136 assert(DAG && "Missing context"); 13137 LLVMContext &Ctxt = *DAG->getContext(); 13138 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 13139 } 13140 13141 /// Get the alignment of the load used for this slice. 13142 unsigned getAlignment() const { 13143 unsigned Alignment = Origin->getAlignment(); 13144 unsigned Offset = getOffsetFromBase(); 13145 if (Offset != 0) 13146 Alignment = MinAlign(Alignment, Alignment + Offset); 13147 return Alignment; 13148 } 13149 13150 /// Check if this slice can be rewritten with legal operations. 13151 bool isLegal() const { 13152 // An invalid slice is not legal. 13153 if (!Origin || !Inst || !DAG) 13154 return false; 13155 13156 // Offsets are for indexed load only, we do not handle that. 13157 if (!Origin->getOffset().isUndef()) 13158 return false; 13159 13160 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 13161 13162 // Check that the type is legal. 13163 EVT SliceType = getLoadedType(); 13164 if (!TLI.isTypeLegal(SliceType)) 13165 return false; 13166 13167 // Check that the load is legal for this type. 13168 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 13169 return false; 13170 13171 // Check that the offset can be computed. 13172 // 1. Check its type. 13173 EVT PtrType = Origin->getBasePtr().getValueType(); 13174 if (PtrType == MVT::Untyped || PtrType.isExtended()) 13175 return false; 13176 13177 // 2. Check that it fits in the immediate. 13178 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 13179 return false; 13180 13181 // 3. Check that the computation is legal. 13182 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 13183 return false; 13184 13185 // Check that the zext is legal if it needs one. 13186 EVT TruncateType = Inst->getValueType(0); 13187 if (TruncateType != SliceType && 13188 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 13189 return false; 13190 13191 return true; 13192 } 13193 13194 /// Get the offset in bytes of this slice in the original chunk of 13195 /// bits. 13196 /// \pre DAG != nullptr. 13197 uint64_t getOffsetFromBase() const { 13198 assert(DAG && "Missing context."); 13199 bool IsBigEndian = DAG->getDataLayout().isBigEndian(); 13200 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 13201 uint64_t Offset = Shift / 8; 13202 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 13203 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 13204 "The size of the original loaded type is not a multiple of a" 13205 " byte."); 13206 // If Offset is bigger than TySizeInBytes, it means we are loading all 13207 // zeros. This should have been optimized before in the process. 13208 assert(TySizeInBytes > Offset && 13209 "Invalid shift amount for given loaded size"); 13210 if (IsBigEndian) 13211 Offset = TySizeInBytes - Offset - getLoadedSize(); 13212 return Offset; 13213 } 13214 13215 /// Generate the sequence of instructions to load the slice 13216 /// represented by this object and redirect the uses of this slice to 13217 /// this new sequence of instructions. 13218 /// \pre this->Inst && this->Origin are valid Instructions and this 13219 /// object passed the legal check: LoadedSlice::isLegal returned true. 13220 /// \return The last instruction of the sequence used to load the slice. 13221 SDValue loadSlice() const { 13222 assert(Inst && Origin && "Unable to replace a non-existing slice."); 13223 const SDValue &OldBaseAddr = Origin->getBasePtr(); 13224 SDValue BaseAddr = OldBaseAddr; 13225 // Get the offset in that chunk of bytes w.r.t. the endianness. 13226 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 13227 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 13228 if (Offset) { 13229 // BaseAddr = BaseAddr + Offset. 13230 EVT ArithType = BaseAddr.getValueType(); 13231 SDLoc DL(Origin); 13232 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, 13233 DAG->getConstant(Offset, DL, ArithType)); 13234 } 13235 13236 // Create the type of the loaded slice according to its size. 13237 EVT SliceType = getLoadedType(); 13238 13239 // Create the load for the slice. 13240 SDValue LastInst = 13241 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 13242 Origin->getPointerInfo().getWithOffset(Offset), 13243 getAlignment(), Origin->getMemOperand()->getFlags()); 13244 // If the final type is not the same as the loaded type, this means that 13245 // we have to pad with zero. Create a zero extend for that. 13246 EVT FinalType = Inst->getValueType(0); 13247 if (SliceType != FinalType) 13248 LastInst = 13249 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 13250 return LastInst; 13251 } 13252 13253 /// Check if this slice can be merged with an expensive cross register 13254 /// bank copy. E.g., 13255 /// i = load i32 13256 /// f = bitcast i32 i to float 13257 bool canMergeExpensiveCrossRegisterBankCopy() const { 13258 if (!Inst || !Inst->hasOneUse()) 13259 return false; 13260 SDNode *Use = *Inst->use_begin(); 13261 if (Use->getOpcode() != ISD::BITCAST) 13262 return false; 13263 assert(DAG && "Missing context"); 13264 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 13265 EVT ResVT = Use->getValueType(0); 13266 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 13267 const TargetRegisterClass *ArgRC = 13268 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 13269 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 13270 return false; 13271 13272 // At this point, we know that we perform a cross-register-bank copy. 13273 // Check if it is expensive. 13274 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 13275 // Assume bitcasts are cheap, unless both register classes do not 13276 // explicitly share a common sub class. 13277 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 13278 return false; 13279 13280 // Check if it will be merged with the load. 13281 // 1. Check the alignment constraint. 13282 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( 13283 ResVT.getTypeForEVT(*DAG->getContext())); 13284 13285 if (RequiredAlignment > getAlignment()) 13286 return false; 13287 13288 // 2. Check that the load is a legal operation for that type. 13289 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 13290 return false; 13291 13292 // 3. Check that we do not have a zext in the way. 13293 if (Inst->getValueType(0) != getLoadedType()) 13294 return false; 13295 13296 return true; 13297 } 13298 }; 13299 13300 } // end anonymous namespace 13301 13302 /// Check that all bits set in \p UsedBits form a dense region, i.e., 13303 /// \p UsedBits looks like 0..0 1..1 0..0. 13304 static bool areUsedBitsDense(const APInt &UsedBits) { 13305 // If all the bits are one, this is dense! 13306 if (UsedBits.isAllOnesValue()) 13307 return true; 13308 13309 // Get rid of the unused bits on the right. 13310 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 13311 // Get rid of the unused bits on the left. 13312 if (NarrowedUsedBits.countLeadingZeros()) 13313 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 13314 // Check that the chunk of bits is completely used. 13315 return NarrowedUsedBits.isAllOnesValue(); 13316 } 13317 13318 /// Check whether or not \p First and \p Second are next to each other 13319 /// in memory. This means that there is no hole between the bits loaded 13320 /// by \p First and the bits loaded by \p Second. 13321 static bool areSlicesNextToEachOther(const LoadedSlice &First, 13322 const LoadedSlice &Second) { 13323 assert(First.Origin == Second.Origin && First.Origin && 13324 "Unable to match different memory origins."); 13325 APInt UsedBits = First.getUsedBits(); 13326 assert((UsedBits & Second.getUsedBits()) == 0 && 13327 "Slices are not supposed to overlap."); 13328 UsedBits |= Second.getUsedBits(); 13329 return areUsedBitsDense(UsedBits); 13330 } 13331 13332 /// Adjust the \p GlobalLSCost according to the target 13333 /// paring capabilities and the layout of the slices. 13334 /// \pre \p GlobalLSCost should account for at least as many loads as 13335 /// there is in the slices in \p LoadedSlices. 13336 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 13337 LoadedSlice::Cost &GlobalLSCost) { 13338 unsigned NumberOfSlices = LoadedSlices.size(); 13339 // If there is less than 2 elements, no pairing is possible. 13340 if (NumberOfSlices < 2) 13341 return; 13342 13343 // Sort the slices so that elements that are likely to be next to each 13344 // other in memory are next to each other in the list. 13345 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 13346 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 13347 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 13348 }); 13349 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 13350 // First (resp. Second) is the first (resp. Second) potentially candidate 13351 // to be placed in a paired load. 13352 const LoadedSlice *First = nullptr; 13353 const LoadedSlice *Second = nullptr; 13354 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 13355 // Set the beginning of the pair. 13356 First = Second) { 13357 Second = &LoadedSlices[CurrSlice]; 13358 13359 // If First is NULL, it means we start a new pair. 13360 // Get to the next slice. 13361 if (!First) 13362 continue; 13363 13364 EVT LoadedType = First->getLoadedType(); 13365 13366 // If the types of the slices are different, we cannot pair them. 13367 if (LoadedType != Second->getLoadedType()) 13368 continue; 13369 13370 // Check if the target supplies paired loads for this type. 13371 unsigned RequiredAlignment = 0; 13372 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 13373 // move to the next pair, this type is hopeless. 13374 Second = nullptr; 13375 continue; 13376 } 13377 // Check if we meet the alignment requirement. 13378 if (RequiredAlignment > First->getAlignment()) 13379 continue; 13380 13381 // Check that both loads are next to each other in memory. 13382 if (!areSlicesNextToEachOther(*First, *Second)) 13383 continue; 13384 13385 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 13386 --GlobalLSCost.Loads; 13387 // Move to the next pair. 13388 Second = nullptr; 13389 } 13390 } 13391 13392 /// Check the profitability of all involved LoadedSlice. 13393 /// Currently, it is considered profitable if there is exactly two 13394 /// involved slices (1) which are (2) next to each other in memory, and 13395 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 13396 /// 13397 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 13398 /// the elements themselves. 13399 /// 13400 /// FIXME: When the cost model will be mature enough, we can relax 13401 /// constraints (1) and (2). 13402 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 13403 const APInt &UsedBits, bool ForCodeSize) { 13404 unsigned NumberOfSlices = LoadedSlices.size(); 13405 if (StressLoadSlicing) 13406 return NumberOfSlices > 1; 13407 13408 // Check (1). 13409 if (NumberOfSlices != 2) 13410 return false; 13411 13412 // Check (2). 13413 if (!areUsedBitsDense(UsedBits)) 13414 return false; 13415 13416 // Check (3). 13417 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 13418 // The original code has one big load. 13419 OrigCost.Loads = 1; 13420 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 13421 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 13422 // Accumulate the cost of all the slices. 13423 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 13424 GlobalSlicingCost += SliceCost; 13425 13426 // Account as cost in the original configuration the gain obtained 13427 // with the current slices. 13428 OrigCost.addSliceGain(LS); 13429 } 13430 13431 // If the target supports paired load, adjust the cost accordingly. 13432 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 13433 return OrigCost > GlobalSlicingCost; 13434 } 13435 13436 /// If the given load, \p LI, is used only by trunc or trunc(lshr) 13437 /// operations, split it in the various pieces being extracted. 13438 /// 13439 /// This sort of thing is introduced by SROA. 13440 /// This slicing takes care not to insert overlapping loads. 13441 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 13442 bool DAGCombiner::SliceUpLoad(SDNode *N) { 13443 if (Level < AfterLegalizeDAG) 13444 return false; 13445 13446 LoadSDNode *LD = cast<LoadSDNode>(N); 13447 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 13448 !LD->getValueType(0).isInteger()) 13449 return false; 13450 13451 // Keep track of already used bits to detect overlapping values. 13452 // In that case, we will just abort the transformation. 13453 APInt UsedBits(LD->getValueSizeInBits(0), 0); 13454 13455 SmallVector<LoadedSlice, 4> LoadedSlices; 13456 13457 // Check if this load is used as several smaller chunks of bits. 13458 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 13459 // of computation for each trunc. 13460 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 13461 UI != UIEnd; ++UI) { 13462 // Skip the uses of the chain. 13463 if (UI.getUse().getResNo() != 0) 13464 continue; 13465 13466 SDNode *User = *UI; 13467 unsigned Shift = 0; 13468 13469 // Check if this is a trunc(lshr). 13470 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 13471 isa<ConstantSDNode>(User->getOperand(1))) { 13472 Shift = User->getConstantOperandVal(1); 13473 User = *User->use_begin(); 13474 } 13475 13476 // At this point, User is a Truncate, iff we encountered, trunc or 13477 // trunc(lshr). 13478 if (User->getOpcode() != ISD::TRUNCATE) 13479 return false; 13480 13481 // The width of the type must be a power of 2 and greater than 8-bits. 13482 // Otherwise the load cannot be represented in LLVM IR. 13483 // Moreover, if we shifted with a non-8-bits multiple, the slice 13484 // will be across several bytes. We do not support that. 13485 unsigned Width = User->getValueSizeInBits(0); 13486 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 13487 return false; 13488 13489 // Build the slice for this chain of computations. 13490 LoadedSlice LS(User, LD, Shift, &DAG); 13491 APInt CurrentUsedBits = LS.getUsedBits(); 13492 13493 // Check if this slice overlaps with another. 13494 if ((CurrentUsedBits & UsedBits) != 0) 13495 return false; 13496 // Update the bits used globally. 13497 UsedBits |= CurrentUsedBits; 13498 13499 // Check if the new slice would be legal. 13500 if (!LS.isLegal()) 13501 return false; 13502 13503 // Record the slice. 13504 LoadedSlices.push_back(LS); 13505 } 13506 13507 // Abort slicing if it does not seem to be profitable. 13508 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 13509 return false; 13510 13511 ++SlicedLoads; 13512 13513 // Rewrite each chain to use an independent load. 13514 // By construction, each chain can be represented by a unique load. 13515 13516 // Prepare the argument for the new token factor for all the slices. 13517 SmallVector<SDValue, 8> ArgChains; 13518 for (SmallVectorImpl<LoadedSlice>::const_iterator 13519 LSIt = LoadedSlices.begin(), 13520 LSItEnd = LoadedSlices.end(); 13521 LSIt != LSItEnd; ++LSIt) { 13522 SDValue SliceInst = LSIt->loadSlice(); 13523 CombineTo(LSIt->Inst, SliceInst, true); 13524 if (SliceInst.getOpcode() != ISD::LOAD) 13525 SliceInst = SliceInst.getOperand(0); 13526 assert(SliceInst->getOpcode() == ISD::LOAD && 13527 "It takes more than a zext to get to the loaded slice!!"); 13528 ArgChains.push_back(SliceInst.getValue(1)); 13529 } 13530 13531 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 13532 ArgChains); 13533 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 13534 AddToWorklist(Chain.getNode()); 13535 return true; 13536 } 13537 13538 /// Check to see if V is (and load (ptr), imm), where the load is having 13539 /// specific bytes cleared out. If so, return the byte size being masked out 13540 /// and the shift amount. 13541 static std::pair<unsigned, unsigned> 13542 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 13543 std::pair<unsigned, unsigned> Result(0, 0); 13544 13545 // Check for the structure we're looking for. 13546 if (V->getOpcode() != ISD::AND || 13547 !isa<ConstantSDNode>(V->getOperand(1)) || 13548 !ISD::isNormalLoad(V->getOperand(0).getNode())) 13549 return Result; 13550 13551 // Check the chain and pointer. 13552 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 13553 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 13554 13555 // This only handles simple types. 13556 if (V.getValueType() != MVT::i16 && 13557 V.getValueType() != MVT::i32 && 13558 V.getValueType() != MVT::i64) 13559 return Result; 13560 13561 // Check the constant mask. Invert it so that the bits being masked out are 13562 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 13563 // follow the sign bit for uniformity. 13564 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 13565 unsigned NotMaskLZ = countLeadingZeros(NotMask); 13566 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 13567 unsigned NotMaskTZ = countTrailingZeros(NotMask); 13568 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 13569 if (NotMaskLZ == 64) return Result; // All zero mask. 13570 13571 // See if we have a continuous run of bits. If so, we have 0*1+0* 13572 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) 13573 return Result; 13574 13575 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 13576 if (V.getValueType() != MVT::i64 && NotMaskLZ) 13577 NotMaskLZ -= 64-V.getValueSizeInBits(); 13578 13579 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 13580 switch (MaskedBytes) { 13581 case 1: 13582 case 2: 13583 case 4: break; 13584 default: return Result; // All one mask, or 5-byte mask. 13585 } 13586 13587 // Verify that the first bit starts at a multiple of mask so that the access 13588 // is aligned the same as the access width. 13589 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 13590 13591 // For narrowing to be valid, it must be the case that the load the 13592 // immediately preceeding memory operation before the store. 13593 if (LD == Chain.getNode()) 13594 ; // ok. 13595 else if (Chain->getOpcode() == ISD::TokenFactor && 13596 SDValue(LD, 1).hasOneUse()) { 13597 // LD has only 1 chain use so they are no indirect dependencies. 13598 bool isOk = false; 13599 for (const SDValue &ChainOp : Chain->op_values()) 13600 if (ChainOp.getNode() == LD) { 13601 isOk = true; 13602 break; 13603 } 13604 if (!isOk) 13605 return Result; 13606 } else 13607 return Result; // Fail. 13608 13609 Result.first = MaskedBytes; 13610 Result.second = NotMaskTZ/8; 13611 return Result; 13612 } 13613 13614 /// Check to see if IVal is something that provides a value as specified by 13615 /// MaskInfo. If so, replace the specified store with a narrower store of 13616 /// truncated IVal. 13617 static SDNode * 13618 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 13619 SDValue IVal, StoreSDNode *St, 13620 DAGCombiner *DC) { 13621 unsigned NumBytes = MaskInfo.first; 13622 unsigned ByteShift = MaskInfo.second; 13623 SelectionDAG &DAG = DC->getDAG(); 13624 13625 // Check to see if IVal is all zeros in the part being masked in by the 'or' 13626 // that uses this. If not, this is not a replacement. 13627 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 13628 ByteShift*8, (ByteShift+NumBytes)*8); 13629 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 13630 13631 // Check that it is legal on the target to do this. It is legal if the new 13632 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 13633 // legalization. 13634 MVT VT = MVT::getIntegerVT(NumBytes*8); 13635 if (!DC->isTypeLegal(VT)) 13636 return nullptr; 13637 13638 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 13639 // shifted by ByteShift and truncated down to NumBytes. 13640 if (ByteShift) { 13641 SDLoc DL(IVal); 13642 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, 13643 DAG.getConstant(ByteShift*8, DL, 13644 DC->getShiftAmountTy(IVal.getValueType()))); 13645 } 13646 13647 // Figure out the offset for the store and the alignment of the access. 13648 unsigned StOffset; 13649 unsigned NewAlign = St->getAlignment(); 13650 13651 if (DAG.getDataLayout().isLittleEndian()) 13652 StOffset = ByteShift; 13653 else 13654 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 13655 13656 SDValue Ptr = St->getBasePtr(); 13657 if (StOffset) { 13658 SDLoc DL(IVal); 13659 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), 13660 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); 13661 NewAlign = MinAlign(NewAlign, StOffset); 13662 } 13663 13664 // Truncate down to the new size. 13665 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 13666 13667 ++OpsNarrowed; 13668 return DAG 13669 .getStore(St->getChain(), SDLoc(St), IVal, Ptr, 13670 St->getPointerInfo().getWithOffset(StOffset), NewAlign) 13671 .getNode(); 13672 } 13673 13674 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and 13675 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 13676 /// narrowing the load and store if it would end up being a win for performance 13677 /// or code size. 13678 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 13679 StoreSDNode *ST = cast<StoreSDNode>(N); 13680 if (ST->isVolatile()) 13681 return SDValue(); 13682 13683 SDValue Chain = ST->getChain(); 13684 SDValue Value = ST->getValue(); 13685 SDValue Ptr = ST->getBasePtr(); 13686 EVT VT = Value.getValueType(); 13687 13688 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 13689 return SDValue(); 13690 13691 unsigned Opc = Value.getOpcode(); 13692 13693 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 13694 // is a byte mask indicating a consecutive number of bytes, check to see if 13695 // Y is known to provide just those bytes. If so, we try to replace the 13696 // load + replace + store sequence with a single (narrower) store, which makes 13697 // the load dead. 13698 if (Opc == ISD::OR) { 13699 std::pair<unsigned, unsigned> MaskedLoad; 13700 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 13701 if (MaskedLoad.first) 13702 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13703 Value.getOperand(1), ST,this)) 13704 return SDValue(NewST, 0); 13705 13706 // Or is commutative, so try swapping X and Y. 13707 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 13708 if (MaskedLoad.first) 13709 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13710 Value.getOperand(0), ST,this)) 13711 return SDValue(NewST, 0); 13712 } 13713 13714 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 13715 Value.getOperand(1).getOpcode() != ISD::Constant) 13716 return SDValue(); 13717 13718 SDValue N0 = Value.getOperand(0); 13719 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 13720 Chain == SDValue(N0.getNode(), 1)) { 13721 LoadSDNode *LD = cast<LoadSDNode>(N0); 13722 if (LD->getBasePtr() != Ptr || 13723 LD->getPointerInfo().getAddrSpace() != 13724 ST->getPointerInfo().getAddrSpace()) 13725 return SDValue(); 13726 13727 // Find the type to narrow it the load / op / store to. 13728 SDValue N1 = Value.getOperand(1); 13729 unsigned BitWidth = N1.getValueSizeInBits(); 13730 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 13731 if (Opc == ISD::AND) 13732 Imm ^= APInt::getAllOnesValue(BitWidth); 13733 if (Imm == 0 || Imm.isAllOnesValue()) 13734 return SDValue(); 13735 unsigned ShAmt = Imm.countTrailingZeros(); 13736 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 13737 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 13738 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13739 // The narrowing should be profitable, the load/store operation should be 13740 // legal (or custom) and the store size should be equal to the NewVT width. 13741 while (NewBW < BitWidth && 13742 (NewVT.getStoreSizeInBits() != NewBW || 13743 !TLI.isOperationLegalOrCustom(Opc, NewVT) || 13744 !TLI.isNarrowingProfitable(VT, NewVT))) { 13745 NewBW = NextPowerOf2(NewBW); 13746 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13747 } 13748 if (NewBW >= BitWidth) 13749 return SDValue(); 13750 13751 // If the lsb changed does not start at the type bitwidth boundary, 13752 // start at the previous one. 13753 if (ShAmt % NewBW) 13754 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 13755 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 13756 std::min(BitWidth, ShAmt + NewBW)); 13757 if ((Imm & Mask) == Imm) { 13758 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 13759 if (Opc == ISD::AND) 13760 NewImm ^= APInt::getAllOnesValue(NewBW); 13761 uint64_t PtrOff = ShAmt / 8; 13762 // For big endian targets, we need to adjust the offset to the pointer to 13763 // load the correct bytes. 13764 if (DAG.getDataLayout().isBigEndian()) 13765 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 13766 13767 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 13768 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 13769 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) 13770 return SDValue(); 13771 13772 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 13773 Ptr.getValueType(), Ptr, 13774 DAG.getConstant(PtrOff, SDLoc(LD), 13775 Ptr.getValueType())); 13776 SDValue NewLD = 13777 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, 13778 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, 13779 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 13780 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 13781 DAG.getConstant(NewImm, SDLoc(Value), 13782 NewVT)); 13783 SDValue NewST = 13784 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, 13785 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); 13786 13787 AddToWorklist(NewPtr.getNode()); 13788 AddToWorklist(NewLD.getNode()); 13789 AddToWorklist(NewVal.getNode()); 13790 WorklistRemover DeadNodes(*this); 13791 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 13792 ++OpsNarrowed; 13793 return NewST; 13794 } 13795 } 13796 13797 return SDValue(); 13798 } 13799 13800 /// For a given floating point load / store pair, if the load value isn't used 13801 /// by any other operations, then consider transforming the pair to integer 13802 /// load / store operations if the target deems the transformation profitable. 13803 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 13804 StoreSDNode *ST = cast<StoreSDNode>(N); 13805 SDValue Chain = ST->getChain(); 13806 SDValue Value = ST->getValue(); 13807 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 13808 Value.hasOneUse() && 13809 Chain == SDValue(Value.getNode(), 1)) { 13810 LoadSDNode *LD = cast<LoadSDNode>(Value); 13811 EVT VT = LD->getMemoryVT(); 13812 if (!VT.isFloatingPoint() || 13813 VT != ST->getMemoryVT() || 13814 LD->isNonTemporal() || 13815 ST->isNonTemporal() || 13816 LD->getPointerInfo().getAddrSpace() != 0 || 13817 ST->getPointerInfo().getAddrSpace() != 0) 13818 return SDValue(); 13819 13820 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 13821 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 13822 !TLI.isOperationLegal(ISD::STORE, IntVT) || 13823 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 13824 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 13825 return SDValue(); 13826 13827 unsigned LDAlign = LD->getAlignment(); 13828 unsigned STAlign = ST->getAlignment(); 13829 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 13830 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); 13831 if (LDAlign < ABIAlign || STAlign < ABIAlign) 13832 return SDValue(); 13833 13834 SDValue NewLD = 13835 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), 13836 LD->getPointerInfo(), LDAlign); 13837 13838 SDValue NewST = 13839 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), 13840 ST->getPointerInfo(), STAlign); 13841 13842 AddToWorklist(NewLD.getNode()); 13843 AddToWorklist(NewST.getNode()); 13844 WorklistRemover DeadNodes(*this); 13845 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 13846 ++LdStFP2Int; 13847 return NewST; 13848 } 13849 13850 return SDValue(); 13851 } 13852 13853 // This is a helper function for visitMUL to check the profitability 13854 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 13855 // MulNode is the original multiply, AddNode is (add x, c1), 13856 // and ConstNode is c2. 13857 // 13858 // If the (add x, c1) has multiple uses, we could increase 13859 // the number of adds if we make this transformation. 13860 // It would only be worth doing this if we can remove a 13861 // multiply in the process. Check for that here. 13862 // To illustrate: 13863 // (A + c1) * c3 13864 // (A + c2) * c3 13865 // We're checking for cases where we have common "c3 * A" expressions. 13866 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, 13867 SDValue &AddNode, 13868 SDValue &ConstNode) { 13869 APInt Val; 13870 13871 // If the add only has one use, this would be OK to do. 13872 if (AddNode.getNode()->hasOneUse()) 13873 return true; 13874 13875 // Walk all the users of the constant with which we're multiplying. 13876 for (SDNode *Use : ConstNode->uses()) { 13877 if (Use == MulNode) // This use is the one we're on right now. Skip it. 13878 continue; 13879 13880 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. 13881 SDNode *OtherOp; 13882 SDNode *MulVar = AddNode.getOperand(0).getNode(); 13883 13884 // OtherOp is what we're multiplying against the constant. 13885 if (Use->getOperand(0) == ConstNode) 13886 OtherOp = Use->getOperand(1).getNode(); 13887 else 13888 OtherOp = Use->getOperand(0).getNode(); 13889 13890 // Check to see if multiply is with the same operand of our "add". 13891 // 13892 // ConstNode = CONST 13893 // Use = ConstNode * A <-- visiting Use. OtherOp is A. 13894 // ... 13895 // AddNode = (A + c1) <-- MulVar is A. 13896 // = AddNode * ConstNode <-- current visiting instruction. 13897 // 13898 // If we make this transformation, we will have a common 13899 // multiply (ConstNode * A) that we can save. 13900 if (OtherOp == MulVar) 13901 return true; 13902 13903 // Now check to see if a future expansion will give us a common 13904 // multiply. 13905 // 13906 // ConstNode = CONST 13907 // AddNode = (A + c1) 13908 // ... = AddNode * ConstNode <-- current visiting instruction. 13909 // ... 13910 // OtherOp = (A + c2) 13911 // Use = OtherOp * ConstNode <-- visiting Use. 13912 // 13913 // If we make this transformation, we will have a common 13914 // multiply (CONST * A) after we also do the same transformation 13915 // to the "t2" instruction. 13916 if (OtherOp->getOpcode() == ISD::ADD && 13917 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && 13918 OtherOp->getOperand(0).getNode() == MulVar) 13919 return true; 13920 } 13921 } 13922 13923 // Didn't find a case where this would be profitable. 13924 return false; 13925 } 13926 13927 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 13928 unsigned NumStores) { 13929 SmallVector<SDValue, 8> Chains; 13930 SmallPtrSet<const SDNode *, 8> Visited; 13931 SDLoc StoreDL(StoreNodes[0].MemNode); 13932 13933 for (unsigned i = 0; i < NumStores; ++i) { 13934 Visited.insert(StoreNodes[i].MemNode); 13935 } 13936 13937 // don't include nodes that are children 13938 for (unsigned i = 0; i < NumStores; ++i) { 13939 if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0) 13940 Chains.push_back(StoreNodes[i].MemNode->getChain()); 13941 } 13942 13943 assert(Chains.size() > 0 && "Chain should have generated a chain"); 13944 return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains); 13945 } 13946 13947 bool DAGCombiner::MergeStoresOfConstantsOrVecElts( 13948 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, 13949 bool IsConstantSrc, bool UseVector, bool UseTrunc) { 13950 // Make sure we have something to merge. 13951 if (NumStores < 2) 13952 return false; 13953 13954 // The latest Node in the DAG. 13955 SDLoc DL(StoreNodes[0].MemNode); 13956 13957 int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); 13958 unsigned SizeInBits = NumStores * ElementSizeBits; 13959 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 13960 13961 EVT StoreTy; 13962 if (UseVector) { 13963 unsigned Elts = NumStores * NumMemElts; 13964 // Get the type for the merged vector store. 13965 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 13966 } else 13967 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 13968 13969 SDValue StoredVal; 13970 if (UseVector) { 13971 if (IsConstantSrc) { 13972 SmallVector<SDValue, 8> BuildVector; 13973 for (unsigned I = 0; I != NumStores; ++I) { 13974 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); 13975 SDValue Val = St->getValue(); 13976 // If constant is of the wrong type, convert it now. 13977 if (MemVT != Val.getValueType()) { 13978 Val = peekThroughBitcasts(Val); 13979 // Deal with constants of wrong size. 13980 if (ElementSizeBits != Val.getValueSizeInBits()) { 13981 EVT IntMemVT = 13982 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 13983 if (isa<ConstantFPSDNode>(Val)) { 13984 // Not clear how to truncate FP values. 13985 return false; 13986 } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) 13987 Val = DAG.getConstant(C->getAPIntValue() 13988 .zextOrTrunc(Val.getValueSizeInBits()) 13989 .zextOrTrunc(ElementSizeBits), 13990 SDLoc(C), IntMemVT); 13991 } 13992 // Make sure correctly size type is the correct type. 13993 Val = DAG.getBitcast(MemVT, Val); 13994 } 13995 BuildVector.push_back(Val); 13996 } 13997 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 13998 : ISD::BUILD_VECTOR, 13999 DL, StoreTy, BuildVector); 14000 } else { 14001 SmallVector<SDValue, 8> Ops; 14002 for (unsigned i = 0; i < NumStores; ++i) { 14003 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 14004 SDValue Val = peekThroughBitcasts(St->getValue()); 14005 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of 14006 // type MemVT. If the underlying value is not the correct 14007 // type, but it is an extraction of an appropriate vector we 14008 // can recast Val to be of the correct type. This may require 14009 // converting between EXTRACT_VECTOR_ELT and 14010 // EXTRACT_SUBVECTOR. 14011 if ((MemVT != Val.getValueType()) && 14012 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14013 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { 14014 EVT MemVTScalarTy = MemVT.getScalarType(); 14015 // We may need to add a bitcast here to get types to line up. 14016 if (MemVTScalarTy != Val.getValueType().getScalarType()) { 14017 Val = DAG.getBitcast(MemVT, Val); 14018 } else { 14019 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR 14020 : ISD::EXTRACT_VECTOR_ELT; 14021 SDValue Vec = Val.getOperand(0); 14022 SDValue Idx = Val.getOperand(1); 14023 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx); 14024 } 14025 } 14026 Ops.push_back(Val); 14027 } 14028 14029 // Build the extracted vector elements back into a vector. 14030 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 14031 : ISD::BUILD_VECTOR, 14032 DL, StoreTy, Ops); 14033 } 14034 } else { 14035 // We should always use a vector store when merging extracted vector 14036 // elements, so this path implies a store of constants. 14037 assert(IsConstantSrc && "Merged vector elements should use vector store"); 14038 14039 APInt StoreInt(SizeInBits, 0); 14040 14041 // Construct a single integer constant which is made of the smaller 14042 // constant inputs. 14043 bool IsLE = DAG.getDataLayout().isLittleEndian(); 14044 for (unsigned i = 0; i < NumStores; ++i) { 14045 unsigned Idx = IsLE ? (NumStores - 1 - i) : i; 14046 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 14047 14048 SDValue Val = St->getValue(); 14049 Val = peekThroughBitcasts(Val); 14050 StoreInt <<= ElementSizeBits; 14051 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 14052 StoreInt |= C->getAPIntValue() 14053 .zextOrTrunc(ElementSizeBits) 14054 .zextOrTrunc(SizeInBits); 14055 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 14056 StoreInt |= C->getValueAPF() 14057 .bitcastToAPInt() 14058 .zextOrTrunc(ElementSizeBits) 14059 .zextOrTrunc(SizeInBits); 14060 // If fp truncation is necessary give up for now. 14061 if (MemVT.getSizeInBits() != ElementSizeBits) 14062 return false; 14063 } else { 14064 llvm_unreachable("Invalid constant element type"); 14065 } 14066 } 14067 14068 // Create the new Load and Store operations. 14069 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); 14070 } 14071 14072 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14073 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); 14074 14075 // make sure we use trunc store if it's necessary to be legal. 14076 SDValue NewStore; 14077 if (!UseTrunc) { 14078 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), 14079 FirstInChain->getPointerInfo(), 14080 FirstInChain->getAlignment()); 14081 } else { // Must be realized as a trunc store 14082 EVT LegalizedStoredValTy = 14083 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 14084 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); 14085 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); 14086 SDValue ExtendedStoreVal = 14087 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, 14088 LegalizedStoredValTy); 14089 NewStore = DAG.getTruncStore( 14090 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), 14091 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, 14092 FirstInChain->getAlignment(), 14093 FirstInChain->getMemOperand()->getFlags()); 14094 } 14095 14096 // Replace all merged stores with the new store. 14097 for (unsigned i = 0; i < NumStores; ++i) 14098 CombineTo(StoreNodes[i].MemNode, NewStore); 14099 14100 AddToWorklist(NewChain.getNode()); 14101 return true; 14102 } 14103 14104 void DAGCombiner::getStoreMergeCandidates( 14105 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, 14106 SDNode *&RootNode) { 14107 // This holds the base pointer, index, and the offset in bytes from the base 14108 // pointer. 14109 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 14110 EVT MemVT = St->getMemoryVT(); 14111 14112 SDValue Val = peekThroughBitcasts(St->getValue()); 14113 // We must have a base and an offset. 14114 if (!BasePtr.getBase().getNode()) 14115 return; 14116 14117 // Do not handle stores to undef base pointers. 14118 if (BasePtr.getBase().isUndef()) 14119 return; 14120 14121 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); 14122 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14123 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); 14124 bool IsLoadSrc = isa<LoadSDNode>(Val); 14125 BaseIndexOffset LBasePtr; 14126 // Match on loadbaseptr if relevant. 14127 EVT LoadVT; 14128 if (IsLoadSrc) { 14129 auto *Ld = cast<LoadSDNode>(Val); 14130 LBasePtr = BaseIndexOffset::match(Ld, DAG); 14131 LoadVT = Ld->getMemoryVT(); 14132 // Load and store should be the same type. 14133 if (MemVT != LoadVT) 14134 return; 14135 // Loads must only have one use. 14136 if (!Ld->hasNUsesOfValue(1, 0)) 14137 return; 14138 // The memory operands must not be volatile. 14139 if (Ld->isVolatile() || Ld->isIndexed()) 14140 return; 14141 } 14142 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, 14143 int64_t &Offset) -> bool { 14144 if (Other->isVolatile() || Other->isIndexed()) 14145 return false; 14146 SDValue Val = peekThroughBitcasts(Other->getValue()); 14147 // Allow merging constants of different types as integers. 14148 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) 14149 : Other->getMemoryVT() != MemVT; 14150 if (IsLoadSrc) { 14151 if (NoTypeMatch) 14152 return false; 14153 // The Load's Base Ptr must also match 14154 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { 14155 auto LPtr = BaseIndexOffset::match(OtherLd, DAG); 14156 if (LoadVT != OtherLd->getMemoryVT()) 14157 return false; 14158 // Loads must only have one use. 14159 if (!OtherLd->hasNUsesOfValue(1, 0)) 14160 return false; 14161 // The memory operands must not be volatile. 14162 if (OtherLd->isVolatile() || OtherLd->isIndexed()) 14163 return false; 14164 if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) 14165 return false; 14166 } else 14167 return false; 14168 } 14169 if (IsConstantSrc) { 14170 if (NoTypeMatch) 14171 return false; 14172 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) 14173 return false; 14174 } 14175 if (IsExtractVecSrc) { 14176 // Do not merge truncated stores here. 14177 if (Other->isTruncatingStore()) 14178 return false; 14179 if (!MemVT.bitsEq(Val.getValueType())) 14180 return false; 14181 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && 14182 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) 14183 return false; 14184 } 14185 Ptr = BaseIndexOffset::match(Other, DAG); 14186 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); 14187 }; 14188 14189 // We looking for a root node which is an ancestor to all mergable 14190 // stores. We search up through a load, to our root and then down 14191 // through all children. For instance we will find Store{1,2,3} if 14192 // St is Store1, Store2. or Store3 where the root is not a load 14193 // which always true for nonvolatile ops. TODO: Expand 14194 // the search to find all valid candidates through multiple layers of loads. 14195 // 14196 // Root 14197 // |-------|-------| 14198 // Load Load Store3 14199 // | | 14200 // Store1 Store2 14201 // 14202 // FIXME: We should be able to climb and 14203 // descend TokenFactors to find candidates as well. 14204 14205 RootNode = St->getChain().getNode(); 14206 14207 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { 14208 RootNode = Ldn->getChain().getNode(); 14209 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 14210 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain 14211 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) 14212 if (I2.getOperandNo() == 0) 14213 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { 14214 BaseIndexOffset Ptr; 14215 int64_t PtrDiff; 14216 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 14217 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 14218 } 14219 } else 14220 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 14221 if (I.getOperandNo() == 0) 14222 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { 14223 BaseIndexOffset Ptr; 14224 int64_t PtrDiff; 14225 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 14226 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 14227 } 14228 } 14229 14230 // We need to check that merging these stores does not cause a loop in 14231 // the DAG. Any store candidate may depend on another candidate 14232 // indirectly through its operand (we already consider dependencies 14233 // through the chain). Check in parallel by searching up from 14234 // non-chain operands of candidates. 14235 bool DAGCombiner::checkMergeStoreCandidatesForDependencies( 14236 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 14237 SDNode *RootNode) { 14238 // FIXME: We should be able to truncate a full search of 14239 // predecessors by doing a BFS and keeping tabs the originating 14240 // stores from which worklist nodes come from in a similar way to 14241 // TokenFactor simplfication. 14242 14243 SmallPtrSet<const SDNode *, 32> Visited; 14244 SmallVector<const SDNode *, 8> Worklist; 14245 14246 // RootNode is a predecessor to all candidates so we need not search 14247 // past it. Add RootNode (peeking through TokenFactors). Do not count 14248 // these towards size check. 14249 14250 Worklist.push_back(RootNode); 14251 while (!Worklist.empty()) { 14252 auto N = Worklist.pop_back_val(); 14253 if (!Visited.insert(N).second) 14254 continue; // Already present in Visited. 14255 if (N->getOpcode() == ISD::TokenFactor) { 14256 for (SDValue Op : N->ops()) 14257 Worklist.push_back(Op.getNode()); 14258 } 14259 } 14260 14261 // Don't count pruning nodes towards max. 14262 unsigned int Max = 1024 + Visited.size(); 14263 // Search Ops of store candidates. 14264 for (unsigned i = 0; i < NumStores; ++i) { 14265 SDNode *N = StoreNodes[i].MemNode; 14266 // Of the 4 Store Operands: 14267 // * Chain (Op 0) -> We have already considered these 14268 // in candidate selection and can be 14269 // safely ignored 14270 // * Value (Op 1) -> Cycles may happen (e.g. through load chains) 14271 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant, 14272 // but aren't necessarily fromt the same base node, so 14273 // cycles possible (e.g. via indexed store). 14274 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for 14275 // non-indexed stores). Not constant on all targets (e.g. ARM) 14276 // and so can participate in a cycle. 14277 for (unsigned j = 1; j < N->getNumOperands(); ++j) 14278 Worklist.push_back(N->getOperand(j).getNode()); 14279 } 14280 // Search through DAG. We can stop early if we find a store node. 14281 for (unsigned i = 0; i < NumStores; ++i) 14282 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, 14283 Max)) 14284 return false; 14285 return true; 14286 } 14287 14288 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { 14289 if (OptLevel == CodeGenOpt::None) 14290 return false; 14291 14292 EVT MemVT = St->getMemoryVT(); 14293 int64_t ElementSizeBytes = MemVT.getStoreSize(); 14294 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 14295 14296 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) 14297 return false; 14298 14299 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( 14300 Attribute::NoImplicitFloat); 14301 14302 // This function cannot currently deal with non-byte-sized memory sizes. 14303 if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) 14304 return false; 14305 14306 if (!MemVT.isSimple()) 14307 return false; 14308 14309 // Perform an early exit check. Do not bother looking at stored values that 14310 // are not constants, loads, or extracted vector elements. 14311 SDValue StoredVal = peekThroughBitcasts(St->getValue()); 14312 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 14313 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || 14314 isa<ConstantFPSDNode>(StoredVal); 14315 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 14316 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); 14317 14318 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) 14319 return false; 14320 14321 SmallVector<MemOpLink, 8> StoreNodes; 14322 SDNode *RootNode; 14323 // Find potential store merge candidates by searching through chain sub-DAG 14324 getStoreMergeCandidates(St, StoreNodes, RootNode); 14325 14326 // Check if there is anything to merge. 14327 if (StoreNodes.size() < 2) 14328 return false; 14329 14330 // Sort the memory operands according to their distance from the 14331 // base pointer. 14332 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { 14333 return LHS.OffsetFromBase < RHS.OffsetFromBase; 14334 }); 14335 14336 // Store Merge attempts to merge the lowest stores. This generally 14337 // works out as if successful, as the remaining stores are checked 14338 // after the first collection of stores is merged. However, in the 14339 // case that a non-mergeable store is found first, e.g., {p[-2], 14340 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent 14341 // mergeable cases. To prevent this, we prune such stores from the 14342 // front of StoreNodes here. 14343 14344 bool RV = false; 14345 while (StoreNodes.size() > 1) { 14346 unsigned StartIdx = 0; 14347 while ((StartIdx + 1 < StoreNodes.size()) && 14348 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != 14349 StoreNodes[StartIdx + 1].OffsetFromBase) 14350 ++StartIdx; 14351 14352 // Bail if we don't have enough candidates to merge. 14353 if (StartIdx + 1 >= StoreNodes.size()) 14354 return RV; 14355 14356 if (StartIdx) 14357 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); 14358 14359 // Scan the memory operations on the chain and find the first 14360 // non-consecutive store memory address. 14361 unsigned NumConsecutiveStores = 1; 14362 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 14363 // Check that the addresses are consecutive starting from the second 14364 // element in the list of stores. 14365 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { 14366 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 14367 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 14368 break; 14369 NumConsecutiveStores = i + 1; 14370 } 14371 14372 if (NumConsecutiveStores < 2) { 14373 StoreNodes.erase(StoreNodes.begin(), 14374 StoreNodes.begin() + NumConsecutiveStores); 14375 continue; 14376 } 14377 14378 // The node with the lowest store address. 14379 LLVMContext &Context = *DAG.getContext(); 14380 const DataLayout &DL = DAG.getDataLayout(); 14381 14382 // Store the constants into memory as one consecutive store. 14383 if (IsConstantSrc) { 14384 while (NumConsecutiveStores >= 2) { 14385 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14386 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14387 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14388 unsigned LastLegalType = 1; 14389 unsigned LastLegalVectorType = 1; 14390 bool LastIntegerTrunc = false; 14391 bool NonZero = false; 14392 unsigned FirstZeroAfterNonZero = NumConsecutiveStores; 14393 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14394 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); 14395 SDValue StoredVal = ST->getValue(); 14396 bool IsElementZero = false; 14397 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) 14398 IsElementZero = C->isNullValue(); 14399 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) 14400 IsElementZero = C->getConstantFPValue()->isNullValue(); 14401 if (IsElementZero) { 14402 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) 14403 FirstZeroAfterNonZero = i; 14404 } 14405 NonZero |= !IsElementZero; 14406 14407 // Find a legal type for the constant store. 14408 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 14409 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); 14410 bool IsFast = false; 14411 14412 // Break early when size is too large to be legal. 14413 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 14414 break; 14415 14416 if (TLI.isTypeLegal(StoreTy) && 14417 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14418 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14419 FirstStoreAlign, &IsFast) && 14420 IsFast) { 14421 LastIntegerTrunc = false; 14422 LastLegalType = i + 1; 14423 // Or check whether a truncstore is legal. 14424 } else if (TLI.getTypeAction(Context, StoreTy) == 14425 TargetLowering::TypePromoteInteger) { 14426 EVT LegalizedStoredValTy = 14427 TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); 14428 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 14429 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 14430 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14431 FirstStoreAlign, &IsFast) && 14432 IsFast) { 14433 LastIntegerTrunc = true; 14434 LastLegalType = i + 1; 14435 } 14436 } 14437 14438 // We only use vectors if the constant is known to be zero or the 14439 // target allows it and the function is not marked with the 14440 // noimplicitfloat attribute. 14441 if ((!NonZero || 14442 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && 14443 !NoVectors) { 14444 // Find a legal type for the vector store. 14445 unsigned Elts = (i + 1) * NumMemElts; 14446 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14447 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && 14448 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 14449 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 14450 FirstStoreAlign, &IsFast) && 14451 IsFast) 14452 LastLegalVectorType = i + 1; 14453 } 14454 } 14455 14456 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 14457 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; 14458 14459 // Check if we found a legal integer type that creates a meaningful 14460 // merge. 14461 if (NumElem < 2) { 14462 // We know that candidate stores are in order and of correct 14463 // shape. While there is no mergeable sequence from the 14464 // beginning one may start later in the sequence. The only 14465 // reason a merge of size N could have failed where another of 14466 // the same size would not have, is if the alignment has 14467 // improved or we've dropped a non-zero value. Drop as many 14468 // candidates as we can here. 14469 unsigned NumSkip = 1; 14470 while ( 14471 (NumSkip < NumConsecutiveStores) && 14472 (NumSkip < FirstZeroAfterNonZero) && 14473 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14474 NumSkip++; 14475 14476 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14477 NumConsecutiveStores -= NumSkip; 14478 continue; 14479 } 14480 14481 // Check that we can merge these candidates without causing a cycle. 14482 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 14483 RootNode)) { 14484 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14485 NumConsecutiveStores -= NumElem; 14486 continue; 14487 } 14488 14489 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, 14490 UseVector, LastIntegerTrunc); 14491 14492 // Remove merged stores for next iteration. 14493 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14494 NumConsecutiveStores -= NumElem; 14495 } 14496 continue; 14497 } 14498 14499 // When extracting multiple vector elements, try to store them 14500 // in one vector store rather than a sequence of scalar stores. 14501 if (IsExtractVecSrc) { 14502 // Loop on Consecutive Stores on success. 14503 while (NumConsecutiveStores >= 2) { 14504 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14505 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14506 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14507 unsigned NumStoresToMerge = 1; 14508 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14509 // Find a legal type for the vector store. 14510 unsigned Elts = (i + 1) * NumMemElts; 14511 EVT Ty = 14512 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 14513 bool IsFast; 14514 14515 // Break early when size is too large to be legal. 14516 if (Ty.getSizeInBits() > MaximumLegalStoreInBits) 14517 break; 14518 14519 if (TLI.isTypeLegal(Ty) && 14520 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 14521 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 14522 FirstStoreAlign, &IsFast) && 14523 IsFast) 14524 NumStoresToMerge = i + 1; 14525 } 14526 14527 // Check if we found a legal integer type creating a meaningful 14528 // merge. 14529 if (NumStoresToMerge < 2) { 14530 // We know that candidate stores are in order and of correct 14531 // shape. While there is no mergeable sequence from the 14532 // beginning one may start later in the sequence. The only 14533 // reason a merge of size N could have failed where another of 14534 // the same size would not have, is if the alignment has 14535 // improved. Drop as many candidates as we can here. 14536 unsigned NumSkip = 1; 14537 while ( 14538 (NumSkip < NumConsecutiveStores) && 14539 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14540 NumSkip++; 14541 14542 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14543 NumConsecutiveStores -= NumSkip; 14544 continue; 14545 } 14546 14547 // Check that we can merge these candidates without causing a cycle. 14548 if (!checkMergeStoreCandidatesForDependencies( 14549 StoreNodes, NumStoresToMerge, RootNode)) { 14550 StoreNodes.erase(StoreNodes.begin(), 14551 StoreNodes.begin() + NumStoresToMerge); 14552 NumConsecutiveStores -= NumStoresToMerge; 14553 continue; 14554 } 14555 14556 RV |= MergeStoresOfConstantsOrVecElts( 14557 StoreNodes, MemVT, NumStoresToMerge, false, true, false); 14558 14559 StoreNodes.erase(StoreNodes.begin(), 14560 StoreNodes.begin() + NumStoresToMerge); 14561 NumConsecutiveStores -= NumStoresToMerge; 14562 } 14563 continue; 14564 } 14565 14566 // Below we handle the case of multiple consecutive stores that 14567 // come from multiple consecutive loads. We merge them into a single 14568 // wide load and a single wide store. 14569 14570 // Look for load nodes which are used by the stored values. 14571 SmallVector<MemOpLink, 8> LoadNodes; 14572 14573 // Find acceptable loads. Loads need to have the same chain (token factor), 14574 // must not be zext, volatile, indexed, and they must be consecutive. 14575 BaseIndexOffset LdBasePtr; 14576 14577 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 14578 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 14579 SDValue Val = peekThroughBitcasts(St->getValue()); 14580 LoadSDNode *Ld = cast<LoadSDNode>(Val); 14581 14582 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); 14583 // If this is not the first ptr that we check. 14584 int64_t LdOffset = 0; 14585 if (LdBasePtr.getBase().getNode()) { 14586 // The base ptr must be the same. 14587 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) 14588 break; 14589 } else { 14590 // Check that all other base pointers are the same as this one. 14591 LdBasePtr = LdPtr; 14592 } 14593 14594 // We found a potential memory operand to merge. 14595 LoadNodes.push_back(MemOpLink(Ld, LdOffset)); 14596 } 14597 14598 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { 14599 // If we have load/store pair instructions and we only have two values, 14600 // don't bother merging. 14601 unsigned RequiredAlignment; 14602 if (LoadNodes.size() == 2 && 14603 TLI.hasPairedLoad(MemVT, RequiredAlignment) && 14604 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { 14605 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); 14606 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); 14607 break; 14608 } 14609 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 14610 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 14611 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 14612 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 14613 unsigned FirstLoadAS = FirstLoad->getAddressSpace(); 14614 unsigned FirstLoadAlign = FirstLoad->getAlignment(); 14615 14616 // Scan the memory operations on the chain and find the first 14617 // non-consecutive load memory address. These variables hold the index in 14618 // the store node array. 14619 14620 unsigned LastConsecutiveLoad = 1; 14621 14622 // This variable refers to the size and not index in the array. 14623 unsigned LastLegalVectorType = 1; 14624 unsigned LastLegalIntegerType = 1; 14625 bool isDereferenceable = true; 14626 bool DoIntegerTruncate = false; 14627 StartAddress = LoadNodes[0].OffsetFromBase; 14628 SDValue FirstChain = FirstLoad->getChain(); 14629 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 14630 // All loads must share the same chain. 14631 if (LoadNodes[i].MemNode->getChain() != FirstChain) 14632 break; 14633 14634 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 14635 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 14636 break; 14637 LastConsecutiveLoad = i; 14638 14639 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) 14640 isDereferenceable = false; 14641 14642 // Find a legal type for the vector store. 14643 unsigned Elts = (i + 1) * NumMemElts; 14644 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14645 14646 // Break early when size is too large to be legal. 14647 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 14648 break; 14649 14650 bool IsFastSt, IsFastLd; 14651 if (TLI.isTypeLegal(StoreTy) && 14652 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14653 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14654 FirstStoreAlign, &IsFastSt) && 14655 IsFastSt && 14656 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14657 FirstLoadAlign, &IsFastLd) && 14658 IsFastLd) { 14659 LastLegalVectorType = i + 1; 14660 } 14661 14662 // Find a legal type for the integer store. 14663 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 14664 StoreTy = EVT::getIntegerVT(Context, SizeInBits); 14665 if (TLI.isTypeLegal(StoreTy) && 14666 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14667 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14668 FirstStoreAlign, &IsFastSt) && 14669 IsFastSt && 14670 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14671 FirstLoadAlign, &IsFastLd) && 14672 IsFastLd) { 14673 LastLegalIntegerType = i + 1; 14674 DoIntegerTruncate = false; 14675 // Or check whether a truncstore and extload is legal. 14676 } else if (TLI.getTypeAction(Context, StoreTy) == 14677 TargetLowering::TypePromoteInteger) { 14678 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); 14679 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 14680 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 14681 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, 14682 StoreTy) && 14683 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, 14684 StoreTy) && 14685 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && 14686 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14687 FirstStoreAlign, &IsFastSt) && 14688 IsFastSt && 14689 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14690 FirstLoadAlign, &IsFastLd) && 14691 IsFastLd) { 14692 LastLegalIntegerType = i + 1; 14693 DoIntegerTruncate = true; 14694 } 14695 } 14696 } 14697 14698 // Only use vector types if the vector type is larger than the integer 14699 // type. If they are the same, use integers. 14700 bool UseVectorTy = 14701 LastLegalVectorType > LastLegalIntegerType && !NoVectors; 14702 unsigned LastLegalType = 14703 std::max(LastLegalVectorType, LastLegalIntegerType); 14704 14705 // We add +1 here because the LastXXX variables refer to location while 14706 // the NumElem refers to array/index size. 14707 unsigned NumElem = 14708 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); 14709 NumElem = std::min(LastLegalType, NumElem); 14710 14711 if (NumElem < 2) { 14712 // We know that candidate stores are in order and of correct 14713 // shape. While there is no mergeable sequence from the 14714 // beginning one may start later in the sequence. The only 14715 // reason a merge of size N could have failed where another of 14716 // the same size would not have is if the alignment or either 14717 // the load or store has improved. Drop as many candidates as we 14718 // can here. 14719 unsigned NumSkip = 1; 14720 while ((NumSkip < LoadNodes.size()) && 14721 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && 14722 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14723 NumSkip++; 14724 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14725 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); 14726 NumConsecutiveStores -= NumSkip; 14727 continue; 14728 } 14729 14730 // Check that we can merge these candidates without causing a cycle. 14731 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 14732 RootNode)) { 14733 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14734 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 14735 NumConsecutiveStores -= NumElem; 14736 continue; 14737 } 14738 14739 // Find if it is better to use vectors or integers to load and store 14740 // to memory. 14741 EVT JointMemOpVT; 14742 if (UseVectorTy) { 14743 // Find a legal type for the vector store. 14744 unsigned Elts = NumElem * NumMemElts; 14745 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14746 } else { 14747 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 14748 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); 14749 } 14750 14751 SDLoc LoadDL(LoadNodes[0].MemNode); 14752 SDLoc StoreDL(StoreNodes[0].MemNode); 14753 14754 // The merged loads are required to have the same incoming chain, so 14755 // using the first's chain is acceptable. 14756 14757 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); 14758 AddToWorklist(NewStoreChain.getNode()); 14759 14760 MachineMemOperand::Flags MMOFlags = 14761 isDereferenceable ? MachineMemOperand::MODereferenceable 14762 : MachineMemOperand::MONone; 14763 14764 SDValue NewLoad, NewStore; 14765 if (UseVectorTy || !DoIntegerTruncate) { 14766 NewLoad = 14767 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), 14768 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), 14769 FirstLoadAlign, MMOFlags); 14770 NewStore = DAG.getStore( 14771 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), 14772 FirstInChain->getPointerInfo(), FirstStoreAlign); 14773 } else { // This must be the truncstore/extload case 14774 EVT ExtendedTy = 14775 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); 14776 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, 14777 FirstLoad->getChain(), FirstLoad->getBasePtr(), 14778 FirstLoad->getPointerInfo(), JointMemOpVT, 14779 FirstLoadAlign, MMOFlags); 14780 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, 14781 FirstInChain->getBasePtr(), 14782 FirstInChain->getPointerInfo(), 14783 JointMemOpVT, FirstInChain->getAlignment(), 14784 FirstInChain->getMemOperand()->getFlags()); 14785 } 14786 14787 // Transfer chain users from old loads to the new load. 14788 for (unsigned i = 0; i < NumElem; ++i) { 14789 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 14790 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 14791 SDValue(NewLoad.getNode(), 1)); 14792 } 14793 14794 // Replace the all stores with the new store. Recursively remove 14795 // corresponding value if its no longer used. 14796 for (unsigned i = 0; i < NumElem; ++i) { 14797 SDValue Val = StoreNodes[i].MemNode->getOperand(1); 14798 CombineTo(StoreNodes[i].MemNode, NewStore); 14799 if (Val.getNode()->use_empty()) 14800 recursivelyDeleteUnusedNodes(Val.getNode()); 14801 } 14802 14803 RV = true; 14804 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14805 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 14806 NumConsecutiveStores -= NumElem; 14807 } 14808 } 14809 return RV; 14810 } 14811 14812 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { 14813 SDLoc SL(ST); 14814 SDValue ReplStore; 14815 14816 // Replace the chain to avoid dependency. 14817 if (ST->isTruncatingStore()) { 14818 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), 14819 ST->getBasePtr(), ST->getMemoryVT(), 14820 ST->getMemOperand()); 14821 } else { 14822 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), 14823 ST->getMemOperand()); 14824 } 14825 14826 // Create token to keep both nodes around. 14827 SDValue Token = DAG.getNode(ISD::TokenFactor, SL, 14828 MVT::Other, ST->getChain(), ReplStore); 14829 14830 // Make sure the new and old chains are cleaned up. 14831 AddToWorklist(Token.getNode()); 14832 14833 // Don't add users to work list. 14834 return CombineTo(ST, Token, false); 14835 } 14836 14837 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { 14838 SDValue Value = ST->getValue(); 14839 if (Value.getOpcode() == ISD::TargetConstantFP) 14840 return SDValue(); 14841 14842 SDLoc DL(ST); 14843 14844 SDValue Chain = ST->getChain(); 14845 SDValue Ptr = ST->getBasePtr(); 14846 14847 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); 14848 14849 // NOTE: If the original store is volatile, this transform must not increase 14850 // the number of stores. For example, on x86-32 an f64 can be stored in one 14851 // processor operation but an i64 (which is not legal) requires two. So the 14852 // transform should not be done in this case. 14853 14854 SDValue Tmp; 14855 switch (CFP->getSimpleValueType(0).SimpleTy) { 14856 default: 14857 llvm_unreachable("Unknown FP type"); 14858 case MVT::f16: // We don't do this for these yet. 14859 case MVT::f80: 14860 case MVT::f128: 14861 case MVT::ppcf128: 14862 return SDValue(); 14863 case MVT::f32: 14864 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 14865 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 14866 ; 14867 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 14868 bitcastToAPInt().getZExtValue(), SDLoc(CFP), 14869 MVT::i32); 14870 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); 14871 } 14872 14873 return SDValue(); 14874 case MVT::f64: 14875 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 14876 !ST->isVolatile()) || 14877 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 14878 ; 14879 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 14880 getZExtValue(), SDLoc(CFP), MVT::i64); 14881 return DAG.getStore(Chain, DL, Tmp, 14882 Ptr, ST->getMemOperand()); 14883 } 14884 14885 if (!ST->isVolatile() && 14886 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 14887 // Many FP stores are not made apparent until after legalize, e.g. for 14888 // argument passing. Since this is so common, custom legalize the 14889 // 64-bit integer store into two 32-bit stores. 14890 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 14891 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); 14892 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); 14893 if (DAG.getDataLayout().isBigEndian()) 14894 std::swap(Lo, Hi); 14895 14896 unsigned Alignment = ST->getAlignment(); 14897 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 14898 AAMDNodes AAInfo = ST->getAAInfo(); 14899 14900 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 14901 ST->getAlignment(), MMOFlags, AAInfo); 14902 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 14903 DAG.getConstant(4, DL, Ptr.getValueType())); 14904 Alignment = MinAlign(Alignment, 4U); 14905 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, 14906 ST->getPointerInfo().getWithOffset(4), 14907 Alignment, MMOFlags, AAInfo); 14908 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 14909 St0, St1); 14910 } 14911 14912 return SDValue(); 14913 } 14914 } 14915 14916 SDValue DAGCombiner::visitSTORE(SDNode *N) { 14917 StoreSDNode *ST = cast<StoreSDNode>(N); 14918 SDValue Chain = ST->getChain(); 14919 SDValue Value = ST->getValue(); 14920 SDValue Ptr = ST->getBasePtr(); 14921 14922 // If this is a store of a bit convert, store the input value if the 14923 // resultant store does not need a higher alignment than the original. 14924 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 14925 ST->isUnindexed()) { 14926 EVT SVT = Value.getOperand(0).getValueType(); 14927 // If the store is volatile, we only want to change the store type if the 14928 // resulting store is legal. Otherwise we might increase the number of 14929 // memory accesses. We don't care if the original type was legal or not 14930 // as we assume software couldn't rely on the number of accesses of an 14931 // illegal type. 14932 if (((!LegalOperations && !ST->isVolatile()) || 14933 TLI.isOperationLegal(ISD::STORE, SVT)) && 14934 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { 14935 unsigned OrigAlign = ST->getAlignment(); 14936 bool Fast = false; 14937 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, 14938 ST->getAddressSpace(), OrigAlign, &Fast) && 14939 Fast) { 14940 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, 14941 ST->getPointerInfo(), OrigAlign, 14942 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 14943 } 14944 } 14945 } 14946 14947 // Turn 'store undef, Ptr' -> nothing. 14948 if (Value.isUndef() && ST->isUnindexed()) 14949 return Chain; 14950 14951 // Try to infer better alignment information than the store already has. 14952 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 14953 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 14954 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { 14955 SDValue NewStore = 14956 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), 14957 ST->getMemoryVT(), Align, 14958 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 14959 // NewStore will always be N as we are only refining the alignment 14960 assert(NewStore.getNode() == N); 14961 (void)NewStore; 14962 } 14963 } 14964 } 14965 14966 // Try transforming a pair floating point load / store ops to integer 14967 // load / store ops. 14968 if (SDValue NewST = TransformFPLoadStorePair(N)) 14969 return NewST; 14970 14971 if (ST->isUnindexed()) { 14972 // Walk up chain skipping non-aliasing memory nodes, on this store and any 14973 // adjacent stores. 14974 if (findBetterNeighborChains(ST)) { 14975 // replaceStoreChain uses CombineTo, which handled all of the worklist 14976 // manipulation. Return the original node to not do anything else. 14977 return SDValue(ST, 0); 14978 } 14979 Chain = ST->getChain(); 14980 } 14981 14982 // FIXME: is there such a thing as a truncating indexed store? 14983 if (ST->isTruncatingStore() && ST->isUnindexed() && 14984 Value.getValueType().isInteger() && 14985 (!isa<ConstantSDNode>(Value) || 14986 !cast<ConstantSDNode>(Value)->isOpaque())) { 14987 // See if we can simplify the input to this truncstore with knowledge that 14988 // only the low bits are being used. For example: 14989 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 14990 SDValue Shorter = DAG.GetDemandedBits( 14991 Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 14992 ST->getMemoryVT().getScalarSizeInBits())); 14993 AddToWorklist(Value.getNode()); 14994 if (Shorter.getNode()) 14995 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 14996 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 14997 14998 // Otherwise, see if we can simplify the operation with 14999 // SimplifyDemandedBits, which only works if the value has a single use. 15000 if (SimplifyDemandedBits( 15001 Value, 15002 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 15003 ST->getMemoryVT().getScalarSizeInBits()))) { 15004 // Re-visit the store if anything changed and the store hasn't been merged 15005 // with another node (N is deleted) SimplifyDemandedBits will add Value's 15006 // node back to the worklist if necessary, but we also need to re-visit 15007 // the Store node itself. 15008 if (N->getOpcode() != ISD::DELETED_NODE) 15009 AddToWorklist(N); 15010 return SDValue(N, 0); 15011 } 15012 } 15013 15014 // If this is a load followed by a store to the same location, then the store 15015 // is dead/noop. 15016 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 15017 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 15018 ST->isUnindexed() && !ST->isVolatile() && 15019 // There can't be any side effects between the load and store, such as 15020 // a call or store. 15021 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 15022 // The store is dead, remove it. 15023 return Chain; 15024 } 15025 } 15026 15027 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 15028 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && 15029 !ST1->isVolatile() && ST1->getBasePtr() == Ptr && 15030 ST->getMemoryVT() == ST1->getMemoryVT()) { 15031 // If this is a store followed by a store with the same value to the same 15032 // location, then the store is dead/noop. 15033 if (ST1->getValue() == Value) { 15034 // The store is dead, remove it. 15035 return Chain; 15036 } 15037 15038 // If this is a store who's preceeding store to the same location 15039 // and no one other node is chained to that store we can effectively 15040 // drop the store. Do not remove stores to undef as they may be used as 15041 // data sinks. 15042 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && 15043 !ST1->getBasePtr().isUndef()) { 15044 // ST1 is fully overwritten and can be elided. Combine with it's chain 15045 // value. 15046 CombineTo(ST1, ST1->getChain()); 15047 return SDValue(); 15048 } 15049 } 15050 } 15051 15052 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 15053 // truncating store. We can do this even if this is already a truncstore. 15054 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 15055 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 15056 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 15057 ST->getMemoryVT())) { 15058 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 15059 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 15060 } 15061 15062 // Always perform this optimization before types are legal. If the target 15063 // prefers, also try this after legalization to catch stores that were created 15064 // by intrinsics or other nodes. 15065 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { 15066 while (true) { 15067 // There can be multiple store sequences on the same chain. 15068 // Keep trying to merge store sequences until we are unable to do so 15069 // or until we merge the last store on the chain. 15070 bool Changed = MergeConsecutiveStores(ST); 15071 if (!Changed) break; 15072 // Return N as merge only uses CombineTo and no worklist clean 15073 // up is necessary. 15074 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N)) 15075 return SDValue(N, 0); 15076 } 15077 } 15078 15079 // Try transforming N to an indexed store. 15080 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 15081 return SDValue(N, 0); 15082 15083 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 15084 // 15085 // Make sure to do this only after attempting to merge stores in order to 15086 // avoid changing the types of some subset of stores due to visit order, 15087 // preventing their merging. 15088 if (isa<ConstantFPSDNode>(ST->getValue())) { 15089 if (SDValue NewSt = replaceStoreOfFPConstant(ST)) 15090 return NewSt; 15091 } 15092 15093 if (SDValue NewSt = splitMergedValStore(ST)) 15094 return NewSt; 15095 15096 return ReduceLoadOpStoreWidth(N); 15097 } 15098 15099 /// For the instruction sequence of store below, F and I values 15100 /// are bundled together as an i64 value before being stored into memory. 15101 /// Sometimes it is more efficent to generate separate stores for F and I, 15102 /// which can remove the bitwise instructions or sink them to colder places. 15103 /// 15104 /// (store (or (zext (bitcast F to i32) to i64), 15105 /// (shl (zext I to i64), 32)), addr) --> 15106 /// (store F, addr) and (store I, addr+4) 15107 /// 15108 /// Similarly, splitting for other merged store can also be beneficial, like: 15109 /// For pair of {i32, i32}, i64 store --> two i32 stores. 15110 /// For pair of {i32, i16}, i64 store --> two i32 stores. 15111 /// For pair of {i16, i16}, i32 store --> two i16 stores. 15112 /// For pair of {i16, i8}, i32 store --> two i16 stores. 15113 /// For pair of {i8, i8}, i16 store --> two i8 stores. 15114 /// 15115 /// We allow each target to determine specifically which kind of splitting is 15116 /// supported. 15117 /// 15118 /// The store patterns are commonly seen from the simple code snippet below 15119 /// if only std::make_pair(...) is sroa transformed before inlined into hoo. 15120 /// void goo(const std::pair<int, float> &); 15121 /// hoo() { 15122 /// ... 15123 /// goo(std::make_pair(tmp, ftmp)); 15124 /// ... 15125 /// } 15126 /// 15127 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { 15128 if (OptLevel == CodeGenOpt::None) 15129 return SDValue(); 15130 15131 SDValue Val = ST->getValue(); 15132 SDLoc DL(ST); 15133 15134 // Match OR operand. 15135 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) 15136 return SDValue(); 15137 15138 // Match SHL operand and get Lower and Higher parts of Val. 15139 SDValue Op1 = Val.getOperand(0); 15140 SDValue Op2 = Val.getOperand(1); 15141 SDValue Lo, Hi; 15142 if (Op1.getOpcode() != ISD::SHL) { 15143 std::swap(Op1, Op2); 15144 if (Op1.getOpcode() != ISD::SHL) 15145 return SDValue(); 15146 } 15147 Lo = Op2; 15148 Hi = Op1.getOperand(0); 15149 if (!Op1.hasOneUse()) 15150 return SDValue(); 15151 15152 // Match shift amount to HalfValBitSize. 15153 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; 15154 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); 15155 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) 15156 return SDValue(); 15157 15158 // Lo and Hi are zero-extended from int with size less equal than 32 15159 // to i64. 15160 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || 15161 !Lo.getOperand(0).getValueType().isScalarInteger() || 15162 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || 15163 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || 15164 !Hi.getOperand(0).getValueType().isScalarInteger() || 15165 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) 15166 return SDValue(); 15167 15168 // Use the EVT of low and high parts before bitcast as the input 15169 // of target query. 15170 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) 15171 ? Lo.getOperand(0).getValueType() 15172 : Lo.getValueType(); 15173 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) 15174 ? Hi.getOperand(0).getValueType() 15175 : Hi.getValueType(); 15176 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) 15177 return SDValue(); 15178 15179 // Start to split store. 15180 unsigned Alignment = ST->getAlignment(); 15181 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 15182 AAMDNodes AAInfo = ST->getAAInfo(); 15183 15184 // Change the sizes of Lo and Hi's value types to HalfValBitSize. 15185 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); 15186 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); 15187 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); 15188 15189 SDValue Chain = ST->getChain(); 15190 SDValue Ptr = ST->getBasePtr(); 15191 // Lower value store. 15192 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 15193 ST->getAlignment(), MMOFlags, AAInfo); 15194 Ptr = 15195 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 15196 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); 15197 // Higher value store. 15198 SDValue St1 = 15199 DAG.getStore(St0, DL, Hi, Ptr, 15200 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), 15201 Alignment / 2, MMOFlags, AAInfo); 15202 return St1; 15203 } 15204 15205 /// Convert a disguised subvector insertion into a shuffle: 15206 /// insert_vector_elt V, (bitcast X from vector type), IdxC --> 15207 /// bitcast(shuffle (bitcast V), (extended X), Mask) 15208 /// Note: We do not use an insert_subvector node because that requires a legal 15209 /// subvector type. 15210 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { 15211 SDValue InsertVal = N->getOperand(1); 15212 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || 15213 !InsertVal.getOperand(0).getValueType().isVector()) 15214 return SDValue(); 15215 15216 SDValue SubVec = InsertVal.getOperand(0); 15217 SDValue DestVec = N->getOperand(0); 15218 EVT SubVecVT = SubVec.getValueType(); 15219 EVT VT = DestVec.getValueType(); 15220 unsigned NumSrcElts = SubVecVT.getVectorNumElements(); 15221 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); 15222 unsigned NumMaskVals = ExtendRatio * NumSrcElts; 15223 15224 // Step 1: Create a shuffle mask that implements this insert operation. The 15225 // vector that we are inserting into will be operand 0 of the shuffle, so 15226 // those elements are just 'i'. The inserted subvector is in the first 15227 // positions of operand 1 of the shuffle. Example: 15228 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} 15229 SmallVector<int, 16> Mask(NumMaskVals); 15230 for (unsigned i = 0; i != NumMaskVals; ++i) { 15231 if (i / NumSrcElts == InsIndex) 15232 Mask[i] = (i % NumSrcElts) + NumMaskVals; 15233 else 15234 Mask[i] = i; 15235 } 15236 15237 // Bail out if the target can not handle the shuffle we want to create. 15238 EVT SubVecEltVT = SubVecVT.getVectorElementType(); 15239 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); 15240 if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) 15241 return SDValue(); 15242 15243 // Step 2: Create a wide vector from the inserted source vector by appending 15244 // undefined elements. This is the same size as our destination vector. 15245 SDLoc DL(N); 15246 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); 15247 ConcatOps[0] = SubVec; 15248 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); 15249 15250 // Step 3: Shuffle in the padded subvector. 15251 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); 15252 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); 15253 AddToWorklist(PaddedSubV.getNode()); 15254 AddToWorklist(DestVecBC.getNode()); 15255 AddToWorklist(Shuf.getNode()); 15256 return DAG.getBitcast(VT, Shuf); 15257 } 15258 15259 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 15260 SDValue InVec = N->getOperand(0); 15261 SDValue InVal = N->getOperand(1); 15262 SDValue EltNo = N->getOperand(2); 15263 SDLoc DL(N); 15264 15265 // If the inserted element is an UNDEF, just use the input vector. 15266 if (InVal.isUndef()) 15267 return InVec; 15268 15269 EVT VT = InVec.getValueType(); 15270 15271 // Remove redundant insertions: 15272 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x 15273 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15274 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) 15275 return InVec; 15276 15277 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 15278 if (!IndexC) { 15279 // If this is variable insert to undef vector, it might be better to splat: 15280 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > 15281 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { 15282 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); 15283 return DAG.getBuildVector(VT, DL, Ops); 15284 } 15285 return SDValue(); 15286 } 15287 15288 // We must know which element is being inserted for folds below here. 15289 unsigned Elt = IndexC->getZExtValue(); 15290 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) 15291 return Shuf; 15292 15293 // Canonicalize insert_vector_elt dag nodes. 15294 // Example: 15295 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 15296 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 15297 // 15298 // Do this only if the child insert_vector node has one use; also 15299 // do this only if indices are both constants and Idx1 < Idx0. 15300 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 15301 && isa<ConstantSDNode>(InVec.getOperand(2))) { 15302 unsigned OtherElt = InVec.getConstantOperandVal(2); 15303 if (Elt < OtherElt) { 15304 // Swap nodes. 15305 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 15306 InVec.getOperand(0), InVal, EltNo); 15307 AddToWorklist(NewOp.getNode()); 15308 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 15309 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 15310 } 15311 } 15312 15313 // If we can't generate a legal BUILD_VECTOR, exit 15314 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 15315 return SDValue(); 15316 15317 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 15318 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 15319 // vector elements. 15320 SmallVector<SDValue, 8> Ops; 15321 // Do not combine these two vectors if the output vector will not replace 15322 // the input vector. 15323 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 15324 Ops.append(InVec.getNode()->op_begin(), 15325 InVec.getNode()->op_end()); 15326 } else if (InVec.isUndef()) { 15327 unsigned NElts = VT.getVectorNumElements(); 15328 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 15329 } else { 15330 return SDValue(); 15331 } 15332 15333 // Insert the element 15334 if (Elt < Ops.size()) { 15335 // All the operands of BUILD_VECTOR must have the same type; 15336 // we enforce that here. 15337 EVT OpVT = Ops[0].getValueType(); 15338 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; 15339 } 15340 15341 // Return the new vector 15342 return DAG.getBuildVector(VT, DL, Ops); 15343 } 15344 15345 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 15346 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 15347 assert(!OriginalLoad->isVolatile()); 15348 15349 EVT ResultVT = EVE->getValueType(0); 15350 EVT VecEltVT = InVecVT.getVectorElementType(); 15351 unsigned Align = OriginalLoad->getAlignment(); 15352 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 15353 VecEltVT.getTypeForEVT(*DAG.getContext())); 15354 15355 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 15356 return SDValue(); 15357 15358 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? 15359 ISD::NON_EXTLOAD : ISD::EXTLOAD; 15360 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) 15361 return SDValue(); 15362 15363 Align = NewAlign; 15364 15365 SDValue NewPtr = OriginalLoad->getBasePtr(); 15366 SDValue Offset; 15367 EVT PtrType = NewPtr.getValueType(); 15368 MachinePointerInfo MPI; 15369 SDLoc DL(EVE); 15370 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 15371 int Elt = ConstEltNo->getZExtValue(); 15372 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 15373 Offset = DAG.getConstant(PtrOff, DL, PtrType); 15374 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 15375 } else { 15376 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); 15377 Offset = DAG.getNode( 15378 ISD::MUL, DL, PtrType, Offset, 15379 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); 15380 MPI = OriginalLoad->getPointerInfo(); 15381 } 15382 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); 15383 15384 // The replacement we need to do here is a little tricky: we need to 15385 // replace an extractelement of a load with a load. 15386 // Use ReplaceAllUsesOfValuesWith to do the replacement. 15387 // Note that this replacement assumes that the extractvalue is the only 15388 // use of the load; that's okay because we don't want to perform this 15389 // transformation in other cases anyway. 15390 SDValue Load; 15391 SDValue Chain; 15392 if (ResultVT.bitsGT(VecEltVT)) { 15393 // If the result type of vextract is wider than the load, then issue an 15394 // extending load instead. 15395 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 15396 VecEltVT) 15397 ? ISD::ZEXTLOAD 15398 : ISD::EXTLOAD; 15399 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, 15400 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, 15401 Align, OriginalLoad->getMemOperand()->getFlags(), 15402 OriginalLoad->getAAInfo()); 15403 Chain = Load.getValue(1); 15404 } else { 15405 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, 15406 MPI, Align, OriginalLoad->getMemOperand()->getFlags(), 15407 OriginalLoad->getAAInfo()); 15408 Chain = Load.getValue(1); 15409 if (ResultVT.bitsLT(VecEltVT)) 15410 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 15411 else 15412 Load = DAG.getBitcast(ResultVT, Load); 15413 } 15414 WorklistRemover DeadNodes(*this); 15415 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 15416 SDValue To[] = { Load, Chain }; 15417 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 15418 // Since we're explicitly calling ReplaceAllUses, add the new node to the 15419 // worklist explicitly as well. 15420 AddToWorklist(Load.getNode()); 15421 AddUsersToWorklist(Load.getNode()); // Add users too 15422 // Make sure to revisit this node to clean it up; it will usually be dead. 15423 AddToWorklist(EVE); 15424 ++OpsNarrowed; 15425 return SDValue(EVE, 0); 15426 } 15427 15428 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 15429 SDValue InVec = N->getOperand(0); 15430 EVT VT = InVec.getValueType(); 15431 EVT NVT = N->getValueType(0); 15432 if (InVec.isUndef()) 15433 return DAG.getUNDEF(NVT); 15434 15435 // (vextract (scalar_to_vector val, 0) -> val 15436 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 15437 // Check if the result type doesn't match the inserted element type. A 15438 // SCALAR_TO_VECTOR may truncate the inserted element and the 15439 // EXTRACT_VECTOR_ELT may widen the extracted vector. 15440 SDValue InOp = InVec.getOperand(0); 15441 if (InOp.getValueType() != NVT) { 15442 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 15443 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 15444 } 15445 return InOp; 15446 } 15447 15448 SDValue EltNo = N->getOperand(1); 15449 ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 15450 15451 // extract_vector_elt of out-of-bounds element -> UNDEF 15452 if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) 15453 return DAG.getUNDEF(NVT); 15454 15455 // extract_vector_elt (build_vector x, y), 1 -> y 15456 if (ConstEltNo && 15457 InVec.getOpcode() == ISD::BUILD_VECTOR && 15458 TLI.isTypeLegal(VT) && 15459 (InVec.hasOneUse() || 15460 TLI.aggressivelyPreferBuildVectorSources(VT))) { 15461 SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); 15462 EVT InEltVT = Elt.getValueType(); 15463 15464 // Sometimes build_vector's scalar input types do not match result type. 15465 if (NVT == InEltVT) 15466 return Elt; 15467 15468 // TODO: It may be useful to truncate if free if the build_vector implicitly 15469 // converts. 15470 } 15471 15472 // TODO: These transforms should not require the 'hasOneUse' restriction, but 15473 // there are regressions on multiple targets without it. We can end up with a 15474 // mess of scalar and vector code if we reduce only part of the DAG to scalar. 15475 if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && VT.isInteger() && 15476 InVec.hasOneUse()) { 15477 // The vector index of the LSBs of the source depend on the endian-ness. 15478 bool IsLE = DAG.getDataLayout().isLittleEndian(); 15479 unsigned ExtractIndex = ConstEltNo->getZExtValue(); 15480 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) 15481 unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1; 15482 SDValue BCSrc = InVec.getOperand(0); 15483 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) 15484 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); 15485 15486 if (LegalTypes && BCSrc.getValueType().isInteger() && 15487 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { 15488 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> 15489 // trunc i64 X to i32 15490 SDValue X = BCSrc.getOperand(0); 15491 assert(X.getValueType().isScalarInteger() && NVT.isScalarInteger() && 15492 "Extract element and scalar to vector can't change element type " 15493 "from FP to integer."); 15494 unsigned XBitWidth = X.getValueSizeInBits(); 15495 unsigned VecEltBitWidth = VT.getScalarSizeInBits(); 15496 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; 15497 15498 // An extract element return value type can be wider than its vector 15499 // operand element type. In that case, the high bits are undefined, so 15500 // it's possible that we may need to extend rather than truncate. 15501 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { 15502 assert(XBitWidth % VecEltBitWidth == 0 && 15503 "Scalar bitwidth must be a multiple of vector element bitwidth"); 15504 return DAG.getAnyExtOrTrunc(X, SDLoc(N), NVT); 15505 } 15506 } 15507 } 15508 15509 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val 15510 // 15511 // This only really matters if the index is non-constant since other combines 15512 // on the constant elements already work. 15513 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && 15514 EltNo == InVec.getOperand(2)) { 15515 SDValue Elt = InVec.getOperand(1); 15516 return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; 15517 } 15518 15519 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 15520 // We only perform this optimization before the op legalization phase because 15521 // we may introduce new vector instructions which are not backed by TD 15522 // patterns. For example on AVX, extracting elements from a wide vector 15523 // without using extract_subvector. However, if we can find an underlying 15524 // scalar value, then we can always use that. 15525 if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { 15526 int NumElem = VT.getVectorNumElements(); 15527 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 15528 // Find the new index to extract from. 15529 int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); 15530 15531 // Extracting an undef index is undef. 15532 if (OrigElt == -1) 15533 return DAG.getUNDEF(NVT); 15534 15535 // Select the right vector half to extract from. 15536 SDValue SVInVec; 15537 if (OrigElt < NumElem) { 15538 SVInVec = InVec->getOperand(0); 15539 } else { 15540 SVInVec = InVec->getOperand(1); 15541 OrigElt -= NumElem; 15542 } 15543 15544 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 15545 SDValue InOp = SVInVec.getOperand(OrigElt); 15546 if (InOp.getValueType() != NVT) { 15547 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 15548 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 15549 } 15550 15551 return InOp; 15552 } 15553 15554 // FIXME: We should handle recursing on other vector shuffles and 15555 // scalar_to_vector here as well. 15556 15557 if (!LegalOperations || 15558 // FIXME: Should really be just isOperationLegalOrCustom. 15559 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || 15560 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) { 15561 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 15562 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, 15563 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); 15564 } 15565 } 15566 15567 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can 15568 // simplify it based on the (valid) extraction indices. 15569 if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { 15570 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15571 Use->getOperand(0) == InVec && 15572 isa<ConstantSDNode>(Use->getOperand(1)); 15573 })) { 15574 APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); 15575 for (SDNode *Use : InVec->uses()) { 15576 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); 15577 if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) 15578 DemandedElts.setBit(CstElt->getZExtValue()); 15579 } 15580 if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) 15581 return SDValue(N, 0); 15582 } 15583 15584 bool BCNumEltsChanged = false; 15585 EVT ExtVT = VT.getVectorElementType(); 15586 EVT LVT = ExtVT; 15587 15588 // If the result of load has to be truncated, then it's not necessarily 15589 // profitable. 15590 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 15591 return SDValue(); 15592 15593 if (InVec.getOpcode() == ISD::BITCAST) { 15594 // Don't duplicate a load with other uses. 15595 if (!InVec.hasOneUse()) 15596 return SDValue(); 15597 15598 EVT BCVT = InVec.getOperand(0).getValueType(); 15599 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 15600 return SDValue(); 15601 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 15602 BCNumEltsChanged = true; 15603 InVec = InVec.getOperand(0); 15604 ExtVT = BCVT.getVectorElementType(); 15605 } 15606 15607 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 15608 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 15609 ISD::isNormalLoad(InVec.getNode()) && 15610 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 15611 SDValue Index = N->getOperand(1); 15612 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { 15613 if (!OrigLoad->isVolatile()) { 15614 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 15615 OrigLoad); 15616 } 15617 } 15618 } 15619 15620 // Perform only after legalization to ensure build_vector / vector_shuffle 15621 // optimizations have already been done. 15622 if (!LegalOperations) return SDValue(); 15623 15624 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 15625 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 15626 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 15627 15628 if (ConstEltNo) { 15629 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 15630 15631 LoadSDNode *LN0 = nullptr; 15632 const ShuffleVectorSDNode *SVN = nullptr; 15633 if (ISD::isNormalLoad(InVec.getNode())) { 15634 LN0 = cast<LoadSDNode>(InVec); 15635 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 15636 InVec.getOperand(0).getValueType() == ExtVT && 15637 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 15638 // Don't duplicate a load with other uses. 15639 if (!InVec.hasOneUse()) 15640 return SDValue(); 15641 15642 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 15643 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 15644 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 15645 // => 15646 // (load $addr+1*size) 15647 15648 // Don't duplicate a load with other uses. 15649 if (!InVec.hasOneUse()) 15650 return SDValue(); 15651 15652 // If the bit convert changed the number of elements, it is unsafe 15653 // to examine the mask. 15654 if (BCNumEltsChanged) 15655 return SDValue(); 15656 15657 // Select the input vector, guarding against out of range extract vector. 15658 unsigned NumElems = VT.getVectorNumElements(); 15659 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 15660 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 15661 15662 if (InVec.getOpcode() == ISD::BITCAST) { 15663 // Don't duplicate a load with other uses. 15664 if (!InVec.hasOneUse()) 15665 return SDValue(); 15666 15667 InVec = InVec.getOperand(0); 15668 } 15669 if (ISD::isNormalLoad(InVec.getNode())) { 15670 LN0 = cast<LoadSDNode>(InVec); 15671 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 15672 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); 15673 } 15674 } 15675 15676 // Make sure we found a non-volatile load and the extractelement is 15677 // the only use. 15678 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 15679 return SDValue(); 15680 15681 // If Idx was -1 above, Elt is going to be -1, so just return undef. 15682 if (Elt == -1) 15683 return DAG.getUNDEF(LVT); 15684 15685 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 15686 } 15687 15688 return SDValue(); 15689 } 15690 15691 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 15692 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 15693 // We perform this optimization post type-legalization because 15694 // the type-legalizer often scalarizes integer-promoted vectors. 15695 // Performing this optimization before may create bit-casts which 15696 // will be type-legalized to complex code sequences. 15697 // We perform this optimization only before the operation legalizer because we 15698 // may introduce illegal operations. 15699 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 15700 return SDValue(); 15701 15702 unsigned NumInScalars = N->getNumOperands(); 15703 SDLoc DL(N); 15704 EVT VT = N->getValueType(0); 15705 15706 // Check to see if this is a BUILD_VECTOR of a bunch of values 15707 // which come from any_extend or zero_extend nodes. If so, we can create 15708 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 15709 // optimizations. We do not handle sign-extend because we can't fill the sign 15710 // using shuffles. 15711 EVT SourceType = MVT::Other; 15712 bool AllAnyExt = true; 15713 15714 for (unsigned i = 0; i != NumInScalars; ++i) { 15715 SDValue In = N->getOperand(i); 15716 // Ignore undef inputs. 15717 if (In.isUndef()) continue; 15718 15719 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 15720 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 15721 15722 // Abort if the element is not an extension. 15723 if (!ZeroExt && !AnyExt) { 15724 SourceType = MVT::Other; 15725 break; 15726 } 15727 15728 // The input is a ZeroExt or AnyExt. Check the original type. 15729 EVT InTy = In.getOperand(0).getValueType(); 15730 15731 // Check that all of the widened source types are the same. 15732 if (SourceType == MVT::Other) 15733 // First time. 15734 SourceType = InTy; 15735 else if (InTy != SourceType) { 15736 // Multiple income types. Abort. 15737 SourceType = MVT::Other; 15738 break; 15739 } 15740 15741 // Check if all of the extends are ANY_EXTENDs. 15742 AllAnyExt &= AnyExt; 15743 } 15744 15745 // In order to have valid types, all of the inputs must be extended from the 15746 // same source type and all of the inputs must be any or zero extend. 15747 // Scalar sizes must be a power of two. 15748 EVT OutScalarTy = VT.getScalarType(); 15749 bool ValidTypes = SourceType != MVT::Other && 15750 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 15751 isPowerOf2_32(SourceType.getSizeInBits()); 15752 15753 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 15754 // turn into a single shuffle instruction. 15755 if (!ValidTypes) 15756 return SDValue(); 15757 15758 bool isLE = DAG.getDataLayout().isLittleEndian(); 15759 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 15760 assert(ElemRatio > 1 && "Invalid element size ratio"); 15761 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 15762 DAG.getConstant(0, DL, SourceType); 15763 15764 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 15765 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 15766 15767 // Populate the new build_vector 15768 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 15769 SDValue Cast = N->getOperand(i); 15770 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 15771 Cast.getOpcode() == ISD::ZERO_EXTEND || 15772 Cast.isUndef()) && "Invalid cast opcode"); 15773 SDValue In; 15774 if (Cast.isUndef()) 15775 In = DAG.getUNDEF(SourceType); 15776 else 15777 In = Cast->getOperand(0); 15778 unsigned Index = isLE ? (i * ElemRatio) : 15779 (i * ElemRatio + (ElemRatio - 1)); 15780 15781 assert(Index < Ops.size() && "Invalid index"); 15782 Ops[Index] = In; 15783 } 15784 15785 // The type of the new BUILD_VECTOR node. 15786 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 15787 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 15788 "Invalid vector size"); 15789 // Check if the new vector type is legal. 15790 if (!isTypeLegal(VecVT) || 15791 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && 15792 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) 15793 return SDValue(); 15794 15795 // Make the new BUILD_VECTOR. 15796 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); 15797 15798 // The new BUILD_VECTOR node has the potential to be further optimized. 15799 AddToWorklist(BV.getNode()); 15800 // Bitcast to the desired type. 15801 return DAG.getBitcast(VT, BV); 15802 } 15803 15804 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, 15805 ArrayRef<int> VectorMask, 15806 SDValue VecIn1, SDValue VecIn2, 15807 unsigned LeftIdx) { 15808 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 15809 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); 15810 15811 EVT VT = N->getValueType(0); 15812 EVT InVT1 = VecIn1.getValueType(); 15813 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; 15814 15815 unsigned Vec2Offset = 0; 15816 unsigned NumElems = VT.getVectorNumElements(); 15817 unsigned ShuffleNumElems = NumElems; 15818 15819 // In case both the input vectors are extracted from same base 15820 // vector we do not need extra addend (Vec2Offset) while 15821 // computing shuffle mask. 15822 if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 15823 !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 15824 !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) 15825 Vec2Offset = InVT1.getVectorNumElements(); 15826 15827 // We can't generate a shuffle node with mismatched input and output types. 15828 // Try to make the types match the type of the output. 15829 if (InVT1 != VT || InVT2 != VT) { 15830 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { 15831 // If the output vector length is a multiple of both input lengths, 15832 // we can concatenate them and pad the rest with undefs. 15833 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); 15834 assert(NumConcats >= 2 && "Concat needs at least two inputs!"); 15835 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); 15836 ConcatOps[0] = VecIn1; 15837 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); 15838 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 15839 VecIn2 = SDValue(); 15840 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { 15841 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) 15842 return SDValue(); 15843 15844 if (!VecIn2.getNode()) { 15845 // If we only have one input vector, and it's twice the size of the 15846 // output, split it in two. 15847 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, 15848 DAG.getConstant(NumElems, DL, IdxTy)); 15849 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); 15850 // Since we now have shorter input vectors, adjust the offset of the 15851 // second vector's start. 15852 Vec2Offset = NumElems; 15853 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { 15854 // VecIn1 is wider than the output, and we have another, possibly 15855 // smaller input. Pad the smaller input with undefs, shuffle at the 15856 // input vector width, and extract the output. 15857 // The shuffle type is different than VT, so check legality again. 15858 if (LegalOperations && 15859 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) 15860 return SDValue(); 15861 15862 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to 15863 // lower it back into a BUILD_VECTOR. So if the inserted type is 15864 // illegal, don't even try. 15865 if (InVT1 != InVT2) { 15866 if (!TLI.isTypeLegal(InVT2)) 15867 return SDValue(); 15868 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, 15869 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); 15870 } 15871 ShuffleNumElems = NumElems * 2; 15872 } else { 15873 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider 15874 // than VecIn1. We can't handle this for now - this case will disappear 15875 // when we start sorting the vectors by type. 15876 return SDValue(); 15877 } 15878 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && 15879 InVT1.getSizeInBits() == VT.getSizeInBits()) { 15880 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); 15881 ConcatOps[0] = VecIn2; 15882 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 15883 } else { 15884 // TODO: Support cases where the length mismatch isn't exactly by a 15885 // factor of 2. 15886 // TODO: Move this check upwards, so that if we have bad type 15887 // mismatches, we don't create any DAG nodes. 15888 return SDValue(); 15889 } 15890 } 15891 15892 // Initialize mask to undef. 15893 SmallVector<int, 8> Mask(ShuffleNumElems, -1); 15894 15895 // Only need to run up to the number of elements actually used, not the 15896 // total number of elements in the shuffle - if we are shuffling a wider 15897 // vector, the high lanes should be set to undef. 15898 for (unsigned i = 0; i != NumElems; ++i) { 15899 if (VectorMask[i] <= 0) 15900 continue; 15901 15902 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); 15903 if (VectorMask[i] == (int)LeftIdx) { 15904 Mask[i] = ExtIndex; 15905 } else if (VectorMask[i] == (int)LeftIdx + 1) { 15906 Mask[i] = Vec2Offset + ExtIndex; 15907 } 15908 } 15909 15910 // The type the input vectors may have changed above. 15911 InVT1 = VecIn1.getValueType(); 15912 15913 // If we already have a VecIn2, it should have the same type as VecIn1. 15914 // If we don't, get an undef/zero vector of the appropriate type. 15915 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); 15916 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); 15917 15918 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); 15919 if (ShuffleNumElems > NumElems) 15920 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); 15921 15922 return Shuffle; 15923 } 15924 15925 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 15926 // operations. If the types of the vectors we're extracting from allow it, 15927 // turn this into a vector_shuffle node. 15928 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { 15929 SDLoc DL(N); 15930 EVT VT = N->getValueType(0); 15931 15932 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 15933 if (!isTypeLegal(VT)) 15934 return SDValue(); 15935 15936 // May only combine to shuffle after legalize if shuffle is legal. 15937 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 15938 return SDValue(); 15939 15940 bool UsesZeroVector = false; 15941 unsigned NumElems = N->getNumOperands(); 15942 15943 // Record, for each element of the newly built vector, which input vector 15944 // that element comes from. -1 stands for undef, 0 for the zero vector, 15945 // and positive values for the input vectors. 15946 // VectorMask maps each element to its vector number, and VecIn maps vector 15947 // numbers to their initial SDValues. 15948 15949 SmallVector<int, 8> VectorMask(NumElems, -1); 15950 SmallVector<SDValue, 8> VecIn; 15951 VecIn.push_back(SDValue()); 15952 15953 for (unsigned i = 0; i != NumElems; ++i) { 15954 SDValue Op = N->getOperand(i); 15955 15956 if (Op.isUndef()) 15957 continue; 15958 15959 // See if we can use a blend with a zero vector. 15960 // TODO: Should we generalize this to a blend with an arbitrary constant 15961 // vector? 15962 if (isNullConstant(Op) || isNullFPConstant(Op)) { 15963 UsesZeroVector = true; 15964 VectorMask[i] = 0; 15965 continue; 15966 } 15967 15968 // Not an undef or zero. If the input is something other than an 15969 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. 15970 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 15971 !isa<ConstantSDNode>(Op.getOperand(1))) 15972 return SDValue(); 15973 SDValue ExtractedFromVec = Op.getOperand(0); 15974 15975 APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); 15976 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) 15977 return SDValue(); 15978 15979 // All inputs must have the same element type as the output. 15980 if (VT.getVectorElementType() != 15981 ExtractedFromVec.getValueType().getVectorElementType()) 15982 return SDValue(); 15983 15984 // Have we seen this input vector before? 15985 // The vectors are expected to be tiny (usually 1 or 2 elements), so using 15986 // a map back from SDValues to numbers isn't worth it. 15987 unsigned Idx = std::distance( 15988 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); 15989 if (Idx == VecIn.size()) 15990 VecIn.push_back(ExtractedFromVec); 15991 15992 VectorMask[i] = Idx; 15993 } 15994 15995 // If we didn't find at least one input vector, bail out. 15996 if (VecIn.size() < 2) 15997 return SDValue(); 15998 15999 // If all the Operands of BUILD_VECTOR extract from same 16000 // vector, then split the vector efficiently based on the maximum 16001 // vector access index and adjust the VectorMask and 16002 // VecIn accordingly. 16003 if (VecIn.size() == 2) { 16004 unsigned MaxIndex = 0; 16005 unsigned NearestPow2 = 0; 16006 SDValue Vec = VecIn.back(); 16007 EVT InVT = Vec.getValueType(); 16008 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 16009 SmallVector<unsigned, 8> IndexVec(NumElems, 0); 16010 16011 for (unsigned i = 0; i < NumElems; i++) { 16012 if (VectorMask[i] <= 0) 16013 continue; 16014 unsigned Index = N->getOperand(i).getConstantOperandVal(1); 16015 IndexVec[i] = Index; 16016 MaxIndex = std::max(MaxIndex, Index); 16017 } 16018 16019 NearestPow2 = PowerOf2Ceil(MaxIndex); 16020 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && 16021 NumElems * 2 < NearestPow2) { 16022 unsigned SplitSize = NearestPow2 / 2; 16023 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), 16024 InVT.getVectorElementType(), SplitSize); 16025 if (TLI.isTypeLegal(SplitVT)) { 16026 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 16027 DAG.getConstant(SplitSize, DL, IdxTy)); 16028 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 16029 DAG.getConstant(0, DL, IdxTy)); 16030 VecIn.pop_back(); 16031 VecIn.push_back(VecIn1); 16032 VecIn.push_back(VecIn2); 16033 16034 for (unsigned i = 0; i < NumElems; i++) { 16035 if (VectorMask[i] <= 0) 16036 continue; 16037 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; 16038 } 16039 } 16040 } 16041 } 16042 16043 // TODO: We want to sort the vectors by descending length, so that adjacent 16044 // pairs have similar length, and the longer vector is always first in the 16045 // pair. 16046 16047 // TODO: Should this fire if some of the input vectors has illegal type (like 16048 // it does now), or should we let legalization run its course first? 16049 16050 // Shuffle phase: 16051 // Take pairs of vectors, and shuffle them so that the result has elements 16052 // from these vectors in the correct places. 16053 // For example, given: 16054 // t10: i32 = extract_vector_elt t1, Constant:i64<0> 16055 // t11: i32 = extract_vector_elt t2, Constant:i64<0> 16056 // t12: i32 = extract_vector_elt t3, Constant:i64<0> 16057 // t13: i32 = extract_vector_elt t1, Constant:i64<1> 16058 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 16059 // We will generate: 16060 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 16061 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef 16062 SmallVector<SDValue, 4> Shuffles; 16063 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { 16064 unsigned LeftIdx = 2 * In + 1; 16065 SDValue VecLeft = VecIn[LeftIdx]; 16066 SDValue VecRight = 16067 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); 16068 16069 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, 16070 VecRight, LeftIdx)) 16071 Shuffles.push_back(Shuffle); 16072 else 16073 return SDValue(); 16074 } 16075 16076 // If we need the zero vector as an "ingredient" in the blend tree, add it 16077 // to the list of shuffles. 16078 if (UsesZeroVector) 16079 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) 16080 : DAG.getConstantFP(0.0, DL, VT)); 16081 16082 // If we only have one shuffle, we're done. 16083 if (Shuffles.size() == 1) 16084 return Shuffles[0]; 16085 16086 // Update the vector mask to point to the post-shuffle vectors. 16087 for (int &Vec : VectorMask) 16088 if (Vec == 0) 16089 Vec = Shuffles.size() - 1; 16090 else 16091 Vec = (Vec - 1) / 2; 16092 16093 // More than one shuffle. Generate a binary tree of blends, e.g. if from 16094 // the previous step we got the set of shuffles t10, t11, t12, t13, we will 16095 // generate: 16096 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 16097 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 16098 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 16099 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 16100 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 16101 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 16102 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 16103 16104 // Make sure the initial size of the shuffle list is even. 16105 if (Shuffles.size() % 2) 16106 Shuffles.push_back(DAG.getUNDEF(VT)); 16107 16108 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { 16109 if (CurSize % 2) { 16110 Shuffles[CurSize] = DAG.getUNDEF(VT); 16111 CurSize++; 16112 } 16113 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { 16114 int Left = 2 * In; 16115 int Right = 2 * In + 1; 16116 SmallVector<int, 8> Mask(NumElems, -1); 16117 for (unsigned i = 0; i != NumElems; ++i) { 16118 if (VectorMask[i] == Left) { 16119 Mask[i] = i; 16120 VectorMask[i] = In; 16121 } else if (VectorMask[i] == Right) { 16122 Mask[i] = i + NumElems; 16123 VectorMask[i] = In; 16124 } 16125 } 16126 16127 Shuffles[In] = 16128 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); 16129 } 16130 } 16131 return Shuffles[0]; 16132 } 16133 16134 // Try to turn a build vector of zero extends of extract vector elts into a 16135 // a vector zero extend and possibly an extract subvector. 16136 // TODO: Support sign extend or any extend? 16137 // TODO: Allow undef elements? 16138 // TODO: Don't require the extracts to start at element 0. 16139 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { 16140 if (LegalOperations) 16141 return SDValue(); 16142 16143 EVT VT = N->getValueType(0); 16144 16145 SDValue Op0 = N->getOperand(0); 16146 auto checkElem = [&](SDValue Op) -> int64_t { 16147 if (Op.getOpcode() == ISD::ZERO_EXTEND && 16148 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && 16149 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) 16150 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) 16151 return C->getZExtValue(); 16152 return -1; 16153 }; 16154 16155 // Make sure the first element matches 16156 // (zext (extract_vector_elt X, C)) 16157 int64_t Offset = checkElem(Op0); 16158 if (Offset < 0) 16159 return SDValue(); 16160 16161 unsigned NumElems = N->getNumOperands(); 16162 SDValue In = Op0.getOperand(0).getOperand(0); 16163 EVT InSVT = In.getValueType().getScalarType(); 16164 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); 16165 16166 // Don't create an illegal input type after type legalization. 16167 if (LegalTypes && !TLI.isTypeLegal(InVT)) 16168 return SDValue(); 16169 16170 // Ensure all the elements come from the same vector and are adjacent. 16171 for (unsigned i = 1; i != NumElems; ++i) { 16172 if ((Offset + i) != checkElem(N->getOperand(i))) 16173 return SDValue(); 16174 } 16175 16176 SDLoc DL(N); 16177 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, 16178 Op0.getOperand(0).getOperand(1)); 16179 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); 16180 } 16181 16182 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 16183 EVT VT = N->getValueType(0); 16184 16185 // A vector built entirely of undefs is undef. 16186 if (ISD::allOperandsUndef(N)) 16187 return DAG.getUNDEF(VT); 16188 16189 // If this is a splat of a bitcast from another vector, change to a 16190 // concat_vector. 16191 // For example: 16192 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> 16193 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) 16194 // 16195 // If X is a build_vector itself, the concat can become a larger build_vector. 16196 // TODO: Maybe this is useful for non-splat too? 16197 if (!LegalOperations) { 16198 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { 16199 Splat = peekThroughBitcasts(Splat); 16200 EVT SrcVT = Splat.getValueType(); 16201 if (SrcVT.isVector()) { 16202 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); 16203 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), 16204 SrcVT.getVectorElementType(), NumElts); 16205 if (!LegalTypes || TLI.isTypeLegal(NewVT)) { 16206 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); 16207 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), 16208 NewVT, Ops); 16209 return DAG.getBitcast(VT, Concat); 16210 } 16211 } 16212 } 16213 } 16214 16215 // Check if we can express BUILD VECTOR via subvector extract. 16216 if (!LegalTypes && (N->getNumOperands() > 1)) { 16217 SDValue Op0 = N->getOperand(0); 16218 auto checkElem = [&](SDValue Op) -> uint64_t { 16219 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && 16220 (Op0.getOperand(0) == Op.getOperand(0))) 16221 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 16222 return CNode->getZExtValue(); 16223 return -1; 16224 }; 16225 16226 int Offset = checkElem(Op0); 16227 for (unsigned i = 0; i < N->getNumOperands(); ++i) { 16228 if (Offset + i != checkElem(N->getOperand(i))) { 16229 Offset = -1; 16230 break; 16231 } 16232 } 16233 16234 if ((Offset == 0) && 16235 (Op0.getOperand(0).getValueType() == N->getValueType(0))) 16236 return Op0.getOperand(0); 16237 if ((Offset != -1) && 16238 ((Offset % N->getValueType(0).getVectorNumElements()) == 16239 0)) // IDX must be multiple of output size. 16240 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), 16241 Op0.getOperand(0), Op0.getOperand(1)); 16242 } 16243 16244 if (SDValue V = convertBuildVecZextToZext(N)) 16245 return V; 16246 16247 if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) 16248 return V; 16249 16250 if (SDValue V = reduceBuildVecToShuffle(N)) 16251 return V; 16252 16253 return SDValue(); 16254 } 16255 16256 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { 16257 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 16258 EVT OpVT = N->getOperand(0).getValueType(); 16259 16260 // If the operands are legal vectors, leave them alone. 16261 if (TLI.isTypeLegal(OpVT)) 16262 return SDValue(); 16263 16264 SDLoc DL(N); 16265 EVT VT = N->getValueType(0); 16266 SmallVector<SDValue, 8> Ops; 16267 16268 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); 16269 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 16270 16271 // Keep track of what we encounter. 16272 bool AnyInteger = false; 16273 bool AnyFP = false; 16274 for (const SDValue &Op : N->ops()) { 16275 if (ISD::BITCAST == Op.getOpcode() && 16276 !Op.getOperand(0).getValueType().isVector()) 16277 Ops.push_back(Op.getOperand(0)); 16278 else if (ISD::UNDEF == Op.getOpcode()) 16279 Ops.push_back(ScalarUndef); 16280 else 16281 return SDValue(); 16282 16283 // Note whether we encounter an integer or floating point scalar. 16284 // If it's neither, bail out, it could be something weird like x86mmx. 16285 EVT LastOpVT = Ops.back().getValueType(); 16286 if (LastOpVT.isFloatingPoint()) 16287 AnyFP = true; 16288 else if (LastOpVT.isInteger()) 16289 AnyInteger = true; 16290 else 16291 return SDValue(); 16292 } 16293 16294 // If any of the operands is a floating point scalar bitcast to a vector, 16295 // use floating point types throughout, and bitcast everything. 16296 // Replace UNDEFs by another scalar UNDEF node, of the final desired type. 16297 if (AnyFP) { 16298 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); 16299 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 16300 if (AnyInteger) { 16301 for (SDValue &Op : Ops) { 16302 if (Op.getValueType() == SVT) 16303 continue; 16304 if (Op.isUndef()) 16305 Op = ScalarUndef; 16306 else 16307 Op = DAG.getBitcast(SVT, Op); 16308 } 16309 } 16310 } 16311 16312 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, 16313 VT.getSizeInBits() / SVT.getSizeInBits()); 16314 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); 16315 } 16316 16317 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR 16318 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at 16319 // most two distinct vectors the same size as the result, attempt to turn this 16320 // into a legal shuffle. 16321 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { 16322 EVT VT = N->getValueType(0); 16323 EVT OpVT = N->getOperand(0).getValueType(); 16324 int NumElts = VT.getVectorNumElements(); 16325 int NumOpElts = OpVT.getVectorNumElements(); 16326 16327 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); 16328 SmallVector<int, 8> Mask; 16329 16330 for (SDValue Op : N->ops()) { 16331 Op = peekThroughBitcasts(Op); 16332 16333 // UNDEF nodes convert to UNDEF shuffle mask values. 16334 if (Op.isUndef()) { 16335 Mask.append((unsigned)NumOpElts, -1); 16336 continue; 16337 } 16338 16339 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 16340 return SDValue(); 16341 16342 // What vector are we extracting the subvector from and at what index? 16343 SDValue ExtVec = Op.getOperand(0); 16344 16345 // We want the EVT of the original extraction to correctly scale the 16346 // extraction index. 16347 EVT ExtVT = ExtVec.getValueType(); 16348 ExtVec = peekThroughBitcasts(ExtVec); 16349 16350 // UNDEF nodes convert to UNDEF shuffle mask values. 16351 if (ExtVec.isUndef()) { 16352 Mask.append((unsigned)NumOpElts, -1); 16353 continue; 16354 } 16355 16356 if (!isa<ConstantSDNode>(Op.getOperand(1))) 16357 return SDValue(); 16358 int ExtIdx = Op.getConstantOperandVal(1); 16359 16360 // Ensure that we are extracting a subvector from a vector the same 16361 // size as the result. 16362 if (ExtVT.getSizeInBits() != VT.getSizeInBits()) 16363 return SDValue(); 16364 16365 // Scale the subvector index to account for any bitcast. 16366 int NumExtElts = ExtVT.getVectorNumElements(); 16367 if (0 == (NumExtElts % NumElts)) 16368 ExtIdx /= (NumExtElts / NumElts); 16369 else if (0 == (NumElts % NumExtElts)) 16370 ExtIdx *= (NumElts / NumExtElts); 16371 else 16372 return SDValue(); 16373 16374 // At most we can reference 2 inputs in the final shuffle. 16375 if (SV0.isUndef() || SV0 == ExtVec) { 16376 SV0 = ExtVec; 16377 for (int i = 0; i != NumOpElts; ++i) 16378 Mask.push_back(i + ExtIdx); 16379 } else if (SV1.isUndef() || SV1 == ExtVec) { 16380 SV1 = ExtVec; 16381 for (int i = 0; i != NumOpElts; ++i) 16382 Mask.push_back(i + ExtIdx + NumElts); 16383 } else { 16384 return SDValue(); 16385 } 16386 } 16387 16388 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) 16389 return SDValue(); 16390 16391 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), 16392 DAG.getBitcast(VT, SV1), Mask); 16393 } 16394 16395 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 16396 // If we only have one input vector, we don't need to do any concatenation. 16397 if (N->getNumOperands() == 1) 16398 return N->getOperand(0); 16399 16400 // Check if all of the operands are undefs. 16401 EVT VT = N->getValueType(0); 16402 if (ISD::allOperandsUndef(N)) 16403 return DAG.getUNDEF(VT); 16404 16405 // Optimize concat_vectors where all but the first of the vectors are undef. 16406 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { 16407 return Op.isUndef(); 16408 })) { 16409 SDValue In = N->getOperand(0); 16410 assert(In.getValueType().isVector() && "Must concat vectors"); 16411 16412 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 16413 if (In->getOpcode() == ISD::BITCAST && 16414 !In->getOperand(0).getValueType().isVector()) { 16415 SDValue Scalar = In->getOperand(0); 16416 16417 // If the bitcast type isn't legal, it might be a trunc of a legal type; 16418 // look through the trunc so we can still do the transform: 16419 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) 16420 if (Scalar->getOpcode() == ISD::TRUNCATE && 16421 !TLI.isTypeLegal(Scalar.getValueType()) && 16422 TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) 16423 Scalar = Scalar->getOperand(0); 16424 16425 EVT SclTy = Scalar->getValueType(0); 16426 16427 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 16428 return SDValue(); 16429 16430 // Bail out if the vector size is not a multiple of the scalar size. 16431 if (VT.getSizeInBits() % SclTy.getSizeInBits()) 16432 return SDValue(); 16433 16434 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); 16435 if (VNTNumElms < 2) 16436 return SDValue(); 16437 16438 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); 16439 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 16440 return SDValue(); 16441 16442 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); 16443 return DAG.getBitcast(VT, Res); 16444 } 16445 } 16446 16447 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. 16448 // We have already tested above for an UNDEF only concatenation. 16449 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 16450 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 16451 auto IsBuildVectorOrUndef = [](const SDValue &Op) { 16452 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); 16453 }; 16454 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { 16455 SmallVector<SDValue, 8> Opnds; 16456 EVT SVT = VT.getScalarType(); 16457 16458 EVT MinVT = SVT; 16459 if (!SVT.isFloatingPoint()) { 16460 // If BUILD_VECTOR are from built from integer, they may have different 16461 // operand types. Get the smallest type and truncate all operands to it. 16462 bool FoundMinVT = false; 16463 for (const SDValue &Op : N->ops()) 16464 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 16465 EVT OpSVT = Op.getOperand(0).getValueType(); 16466 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; 16467 FoundMinVT = true; 16468 } 16469 assert(FoundMinVT && "Concat vector type mismatch"); 16470 } 16471 16472 for (const SDValue &Op : N->ops()) { 16473 EVT OpVT = Op.getValueType(); 16474 unsigned NumElts = OpVT.getVectorNumElements(); 16475 16476 if (ISD::UNDEF == Op.getOpcode()) 16477 Opnds.append(NumElts, DAG.getUNDEF(MinVT)); 16478 16479 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 16480 if (SVT.isFloatingPoint()) { 16481 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); 16482 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); 16483 } else { 16484 for (unsigned i = 0; i != NumElts; ++i) 16485 Opnds.push_back( 16486 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); 16487 } 16488 } 16489 } 16490 16491 assert(VT.getVectorNumElements() == Opnds.size() && 16492 "Concat vector type mismatch"); 16493 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 16494 } 16495 16496 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. 16497 if (SDValue V = combineConcatVectorOfScalars(N, DAG)) 16498 return V; 16499 16500 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. 16501 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) 16502 if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) 16503 return V; 16504 16505 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 16506 // nodes often generate nop CONCAT_VECTOR nodes. 16507 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 16508 // place the incoming vectors at the exact same location. 16509 SDValue SingleSource = SDValue(); 16510 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 16511 16512 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 16513 SDValue Op = N->getOperand(i); 16514 16515 if (Op.isUndef()) 16516 continue; 16517 16518 // Check if this is the identity extract: 16519 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 16520 return SDValue(); 16521 16522 // Find the single incoming vector for the extract_subvector. 16523 if (SingleSource.getNode()) { 16524 if (Op.getOperand(0) != SingleSource) 16525 return SDValue(); 16526 } else { 16527 SingleSource = Op.getOperand(0); 16528 16529 // Check the source type is the same as the type of the result. 16530 // If not, this concat may extend the vector, so we can not 16531 // optimize it away. 16532 if (SingleSource.getValueType() != N->getValueType(0)) 16533 return SDValue(); 16534 } 16535 16536 unsigned IdentityIndex = i * PartNumElem; 16537 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 16538 // The extract index must be constant. 16539 if (!CS) 16540 return SDValue(); 16541 16542 // Check that we are reading from the identity index. 16543 if (CS->getZExtValue() != IdentityIndex) 16544 return SDValue(); 16545 } 16546 16547 if (SingleSource.getNode()) 16548 return SingleSource; 16549 16550 return SDValue(); 16551 } 16552 16553 /// If we are extracting a subvector produced by a wide binary operator try 16554 /// to use a narrow binary operator and/or avoid concatenation and extraction. 16555 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { 16556 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share 16557 // some of these bailouts with other transforms. 16558 16559 // The extract index must be a constant, so we can map it to a concat operand. 16560 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 16561 if (!ExtractIndexC) 16562 return SDValue(); 16563 16564 // We are looking for an optionally bitcasted wide vector binary operator 16565 // feeding an extract subvector. 16566 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); 16567 if (!ISD::isBinaryOp(BinOp.getNode())) 16568 return SDValue(); 16569 16570 // The binop must be a vector type, so we can chop it in half. 16571 EVT WideBVT = BinOp.getValueType(); 16572 if (!WideBVT.isVector()) 16573 return SDValue(); 16574 16575 EVT VT = Extract->getValueType(0); 16576 unsigned NumElems = VT.getVectorNumElements(); 16577 unsigned ExtractIndex = ExtractIndexC->getZExtValue(); 16578 assert(ExtractIndex % NumElems == 0 && 16579 "Extract index is not a multiple of the vector length."); 16580 EVT SrcVT = Extract->getOperand(0).getValueType(); 16581 16582 // Bail out if this is not a proper multiple width extraction. 16583 unsigned NumSrcElems = SrcVT.getVectorNumElements(); 16584 if (NumSrcElems % NumElems != 0) 16585 return SDValue(); 16586 16587 // Bail out if the target does not support a narrower version of the binop. 16588 unsigned NarrowingRatio = NumSrcElems / NumElems; 16589 unsigned BOpcode = BinOp.getOpcode(); 16590 unsigned WideNumElts = WideBVT.getVectorNumElements(); 16591 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), 16592 WideNumElts / NarrowingRatio); 16593 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 16594 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) 16595 return SDValue(); 16596 16597 // If extraction is cheap, we don't need to look at the binop operands 16598 // for concat ops. The narrow binop alone makes this transform profitable. 16599 // We can't just reuse the original extract index operand because we may have 16600 // bitcasted. 16601 unsigned ConcatOpNum = ExtractIndex / NumElems; 16602 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); 16603 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); 16604 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && 16605 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { 16606 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) 16607 SDLoc DL(Extract); 16608 SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); 16609 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16610 BinOp.getOperand(0), NewExtIndex); 16611 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16612 BinOp.getOperand(1), NewExtIndex); 16613 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, 16614 BinOp.getNode()->getFlags()); 16615 return DAG.getBitcast(VT, NarrowBinOp); 16616 } 16617 16618 // Only handle the case where we are doubling and then halving. A larger ratio 16619 // may require more than two narrow binops to replace the wide binop. 16620 if (NarrowingRatio != 2) 16621 return SDValue(); 16622 16623 // TODO: The motivating case for this transform is an x86 AVX1 target. That 16624 // target has temptingly almost legal versions of bitwise logic ops in 256-bit 16625 // flavors, but no other 256-bit integer support. This could be extended to 16626 // handle any binop, but that may require fixing/adding other folds to avoid 16627 // codegen regressions. 16628 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) 16629 return SDValue(); 16630 16631 // We need at least one concatenation operation of a binop operand to make 16632 // this transform worthwhile. The concat must double the input vector sizes. 16633 // TODO: Should we also handle INSERT_SUBVECTOR patterns? 16634 SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0)); 16635 SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1)); 16636 bool ConcatL = 16637 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; 16638 bool ConcatR = 16639 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; 16640 if (!ConcatL && !ConcatR) 16641 return SDValue(); 16642 16643 // If one of the binop operands was not the result of a concat, we must 16644 // extract a half-sized operand for our new narrow binop. 16645 SDLoc DL(Extract); 16646 16647 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN 16648 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) 16649 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN 16650 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) 16651 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16652 BinOp.getOperand(0), 16653 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 16654 16655 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) 16656 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 16657 BinOp.getOperand(1), 16658 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 16659 16660 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); 16661 return DAG.getBitcast(VT, NarrowBinOp); 16662 } 16663 16664 /// If we are extracting a subvector from a wide vector load, convert to a 16665 /// narrow load to eliminate the extraction: 16666 /// (extract_subvector (load wide vector)) --> (load narrow vector) 16667 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { 16668 // TODO: Add support for big-endian. The offset calculation must be adjusted. 16669 if (DAG.getDataLayout().isBigEndian()) 16670 return SDValue(); 16671 16672 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); 16673 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 16674 if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) 16675 return SDValue(); 16676 16677 // Allow targets to opt-out. 16678 EVT VT = Extract->getValueType(0); 16679 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 16680 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) 16681 return SDValue(); 16682 16683 // The narrow load will be offset from the base address of the old load if 16684 // we are extracting from something besides index 0 (little-endian). 16685 SDLoc DL(Extract); 16686 SDValue BaseAddr = Ld->getOperand(1); 16687 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); 16688 16689 // TODO: Use "BaseIndexOffset" to make this more effective. 16690 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); 16691 MachineFunction &MF = DAG.getMachineFunction(); 16692 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, 16693 VT.getStoreSize()); 16694 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); 16695 DAG.makeEquivalentMemoryOrdering(Ld, NewLd); 16696 return NewLd; 16697 } 16698 16699 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 16700 EVT NVT = N->getValueType(0); 16701 SDValue V = N->getOperand(0); 16702 16703 // Extract from UNDEF is UNDEF. 16704 if (V.isUndef()) 16705 return DAG.getUNDEF(NVT); 16706 16707 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) 16708 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) 16709 return NarrowLoad; 16710 16711 // Combine: 16712 // (extract_subvec (concat V1, V2, ...), i) 16713 // Into: 16714 // Vi if possible 16715 // Only operand 0 is checked as 'concat' assumes all inputs of the same 16716 // type. 16717 if (V->getOpcode() == ISD::CONCAT_VECTORS && 16718 isa<ConstantSDNode>(N->getOperand(1)) && 16719 V->getOperand(0).getValueType() == NVT) { 16720 unsigned Idx = N->getConstantOperandVal(1); 16721 unsigned NumElems = NVT.getVectorNumElements(); 16722 assert((Idx % NumElems) == 0 && 16723 "IDX in concat is not a multiple of the result vector length."); 16724 return V->getOperand(Idx / NumElems); 16725 } 16726 16727 V = peekThroughBitcasts(V); 16728 16729 // If the input is a build vector. Try to make a smaller build vector. 16730 if (V->getOpcode() == ISD::BUILD_VECTOR) { 16731 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 16732 EVT InVT = V->getValueType(0); 16733 unsigned ExtractSize = NVT.getSizeInBits(); 16734 unsigned EltSize = InVT.getScalarSizeInBits(); 16735 // Only do this if we won't split any elements. 16736 if (ExtractSize % EltSize == 0) { 16737 unsigned NumElems = ExtractSize / EltSize; 16738 EVT EltVT = InVT.getVectorElementType(); 16739 EVT ExtractVT = NumElems == 1 ? EltVT : 16740 EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); 16741 if ((Level < AfterLegalizeDAG || 16742 (NumElems == 1 || 16743 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && 16744 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { 16745 unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / 16746 EltSize; 16747 if (NumElems == 1) { 16748 SDValue Src = V->getOperand(IdxVal); 16749 if (EltVT != Src.getValueType()) 16750 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); 16751 16752 return DAG.getBitcast(NVT, Src); 16753 } 16754 16755 // Extract the pieces from the original build_vector. 16756 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), 16757 makeArrayRef(V->op_begin() + IdxVal, 16758 NumElems)); 16759 return DAG.getBitcast(NVT, BuildVec); 16760 } 16761 } 16762 } 16763 } 16764 16765 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 16766 // Handle only simple case where vector being inserted and vector 16767 // being extracted are of same size. 16768 EVT SmallVT = V->getOperand(1).getValueType(); 16769 if (!NVT.bitsEq(SmallVT)) 16770 return SDValue(); 16771 16772 // Only handle cases where both indexes are constants. 16773 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 16774 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 16775 16776 if (InsIdx && ExtIdx) { 16777 // Combine: 16778 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 16779 // Into: 16780 // indices are equal or bit offsets are equal => V1 16781 // otherwise => (extract_subvec V1, ExtIdx) 16782 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == 16783 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) 16784 return DAG.getBitcast(NVT, V->getOperand(1)); 16785 return DAG.getNode( 16786 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, 16787 DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), 16788 N->getOperand(1)); 16789 } 16790 } 16791 16792 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) 16793 return NarrowBOp; 16794 16795 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 16796 return SDValue(N, 0); 16797 16798 return SDValue(); 16799 } 16800 16801 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, 16802 // or turn a shuffle of a single concat into simpler shuffle then concat. 16803 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 16804 EVT VT = N->getValueType(0); 16805 unsigned NumElts = VT.getVectorNumElements(); 16806 16807 SDValue N0 = N->getOperand(0); 16808 SDValue N1 = N->getOperand(1); 16809 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 16810 16811 SmallVector<SDValue, 4> Ops; 16812 EVT ConcatVT = N0.getOperand(0).getValueType(); 16813 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 16814 unsigned NumConcats = NumElts / NumElemsPerConcat; 16815 16816 // Special case: shuffle(concat(A,B)) can be more efficiently represented 16817 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high 16818 // half vector elements. 16819 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && 16820 std::all_of(SVN->getMask().begin() + NumElemsPerConcat, 16821 SVN->getMask().end(), [](int i) { return i == -1; })) { 16822 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), 16823 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); 16824 N1 = DAG.getUNDEF(ConcatVT); 16825 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); 16826 } 16827 16828 // Look at every vector that's inserted. We're looking for exact 16829 // subvector-sized copies from a concatenated vector 16830 for (unsigned I = 0; I != NumConcats; ++I) { 16831 // Make sure we're dealing with a copy. 16832 unsigned Begin = I * NumElemsPerConcat; 16833 bool AllUndef = true, NoUndef = true; 16834 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 16835 if (SVN->getMaskElt(J) >= 0) 16836 AllUndef = false; 16837 else 16838 NoUndef = false; 16839 } 16840 16841 if (NoUndef) { 16842 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 16843 return SDValue(); 16844 16845 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 16846 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 16847 return SDValue(); 16848 16849 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 16850 if (FirstElt < N0.getNumOperands()) 16851 Ops.push_back(N0.getOperand(FirstElt)); 16852 else 16853 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 16854 16855 } else if (AllUndef) { 16856 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 16857 } else { // Mixed with general masks and undefs, can't do optimization. 16858 return SDValue(); 16859 } 16860 } 16861 16862 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 16863 } 16864 16865 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 16866 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 16867 // 16868 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always 16869 // a simplification in some sense, but it isn't appropriate in general: some 16870 // BUILD_VECTORs are substantially cheaper than others. The general case 16871 // of a BUILD_VECTOR requires inserting each element individually (or 16872 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of 16873 // all constants is a single constant pool load. A BUILD_VECTOR where each 16874 // element is identical is a splat. A BUILD_VECTOR where most of the operands 16875 // are undef lowers to a small number of element insertions. 16876 // 16877 // To deal with this, we currently use a bunch of mostly arbitrary heuristics. 16878 // We don't fold shuffles where one side is a non-zero constant, and we don't 16879 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate 16880 // non-constant operands. This seems to work out reasonably well in practice. 16881 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, 16882 SelectionDAG &DAG, 16883 const TargetLowering &TLI) { 16884 EVT VT = SVN->getValueType(0); 16885 unsigned NumElts = VT.getVectorNumElements(); 16886 SDValue N0 = SVN->getOperand(0); 16887 SDValue N1 = SVN->getOperand(1); 16888 16889 if (!N0->hasOneUse()) 16890 return SDValue(); 16891 16892 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as 16893 // discussed above. 16894 if (!N1.isUndef()) { 16895 if (!N1->hasOneUse()) 16896 return SDValue(); 16897 16898 bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); 16899 bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); 16900 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) 16901 return SDValue(); 16902 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) 16903 return SDValue(); 16904 } 16905 16906 // If both inputs are splats of the same value then we can safely merge this 16907 // to a single BUILD_VECTOR with undef elements based on the shuffle mask. 16908 bool IsSplat = false; 16909 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); 16910 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 16911 if (BV0 && BV1) 16912 if (SDValue Splat0 = BV0->getSplatValue()) 16913 IsSplat = (Splat0 == BV1->getSplatValue()); 16914 16915 SmallVector<SDValue, 8> Ops; 16916 SmallSet<SDValue, 16> DuplicateOps; 16917 for (int M : SVN->getMask()) { 16918 SDValue Op = DAG.getUNDEF(VT.getScalarType()); 16919 if (M >= 0) { 16920 int Idx = M < (int)NumElts ? M : M - NumElts; 16921 SDValue &S = (M < (int)NumElts ? N0 : N1); 16922 if (S.getOpcode() == ISD::BUILD_VECTOR) { 16923 Op = S.getOperand(Idx); 16924 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { 16925 assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); 16926 Op = S.getOperand(0); 16927 } else { 16928 // Operand can't be combined - bail out. 16929 return SDValue(); 16930 } 16931 } 16932 16933 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're 16934 // generating a splat; semantically, this is fine, but it's likely to 16935 // generate low-quality code if the target can't reconstruct an appropriate 16936 // shuffle. 16937 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) 16938 if (!IsSplat && !DuplicateOps.insert(Op).second) 16939 return SDValue(); 16940 16941 Ops.push_back(Op); 16942 } 16943 16944 // BUILD_VECTOR requires all inputs to be of the same type, find the 16945 // maximum type and extend them all. 16946 EVT SVT = VT.getScalarType(); 16947 if (SVT.isInteger()) 16948 for (SDValue &Op : Ops) 16949 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 16950 if (SVT != VT.getScalarType()) 16951 for (SDValue &Op : Ops) 16952 Op = TLI.isZExtFree(Op.getValueType(), SVT) 16953 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) 16954 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); 16955 return DAG.getBuildVector(VT, SDLoc(SVN), Ops); 16956 } 16957 16958 // Match shuffles that can be converted to any_vector_extend_in_reg. 16959 // This is often generated during legalization. 16960 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) 16961 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. 16962 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, 16963 SelectionDAG &DAG, 16964 const TargetLowering &TLI, 16965 bool LegalOperations, 16966 bool LegalTypes) { 16967 EVT VT = SVN->getValueType(0); 16968 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 16969 16970 // TODO Add support for big-endian when we have a test case. 16971 if (!VT.isInteger() || IsBigEndian) 16972 return SDValue(); 16973 16974 unsigned NumElts = VT.getVectorNumElements(); 16975 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 16976 ArrayRef<int> Mask = SVN->getMask(); 16977 SDValue N0 = SVN->getOperand(0); 16978 16979 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) 16980 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { 16981 for (unsigned i = 0; i != NumElts; ++i) { 16982 if (Mask[i] < 0) 16983 continue; 16984 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) 16985 continue; 16986 return false; 16987 } 16988 return true; 16989 }; 16990 16991 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for 16992 // power-of-2 extensions as they are the most likely. 16993 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { 16994 // Check for non power of 2 vector sizes 16995 if (NumElts % Scale != 0) 16996 continue; 16997 if (!isAnyExtend(Scale)) 16998 continue; 16999 17000 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); 17001 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); 17002 if (!LegalTypes || TLI.isTypeLegal(OutVT)) 17003 if (!LegalOperations || 17004 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) 17005 return DAG.getBitcast(VT, 17006 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, 17007 SDLoc(SVN), OutVT, N0)); 17008 } 17009 17010 return SDValue(); 17011 } 17012 17013 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of 17014 // each source element of a large type into the lowest elements of a smaller 17015 // destination type. This is often generated during legalization. 17016 // If the source node itself was a '*_extend_vector_inreg' node then we should 17017 // then be able to remove it. 17018 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, 17019 SelectionDAG &DAG) { 17020 EVT VT = SVN->getValueType(0); 17021 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 17022 17023 // TODO Add support for big-endian when we have a test case. 17024 if (!VT.isInteger() || IsBigEndian) 17025 return SDValue(); 17026 17027 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0)); 17028 17029 unsigned Opcode = N0.getOpcode(); 17030 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && 17031 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && 17032 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) 17033 return SDValue(); 17034 17035 SDValue N00 = N0.getOperand(0); 17036 ArrayRef<int> Mask = SVN->getMask(); 17037 unsigned NumElts = VT.getVectorNumElements(); 17038 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 17039 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); 17040 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); 17041 17042 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) 17043 return SDValue(); 17044 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; 17045 17046 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> 17047 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> 17048 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> 17049 auto isTruncate = [&Mask, &NumElts](unsigned Scale) { 17050 for (unsigned i = 0; i != NumElts; ++i) { 17051 if (Mask[i] < 0) 17052 continue; 17053 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) 17054 continue; 17055 return false; 17056 } 17057 return true; 17058 }; 17059 17060 // At the moment we just handle the case where we've truncated back to the 17061 // same size as before the extension. 17062 // TODO: handle more extension/truncation cases as cases arise. 17063 if (EltSizeInBits != ExtSrcSizeInBits) 17064 return SDValue(); 17065 17066 // We can remove *extend_vector_inreg only if the truncation happens at 17067 // the same scale as the extension. 17068 if (isTruncate(ExtScale)) 17069 return DAG.getBitcast(VT, N00); 17070 17071 return SDValue(); 17072 } 17073 17074 // Combine shuffles of splat-shuffles of the form: 17075 // shuffle (shuffle V, undef, splat-mask), undef, M 17076 // If splat-mask contains undef elements, we need to be careful about 17077 // introducing undef's in the folded mask which are not the result of composing 17078 // the masks of the shuffles. 17079 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, 17080 ShuffleVectorSDNode *Splat, 17081 SelectionDAG &DAG) { 17082 ArrayRef<int> SplatMask = Splat->getMask(); 17083 assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); 17084 17085 // Prefer simplifying to the splat-shuffle, if possible. This is legal if 17086 // every undef mask element in the splat-shuffle has a corresponding undef 17087 // element in the user-shuffle's mask or if the composition of mask elements 17088 // would result in undef. 17089 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): 17090 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] 17091 // In this case it is not legal to simplify to the splat-shuffle because we 17092 // may be exposing the users of the shuffle an undef element at index 1 17093 // which was not there before the combine. 17094 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] 17095 // In this case the composition of masks yields SplatMask, so it's ok to 17096 // simplify to the splat-shuffle. 17097 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] 17098 // In this case the composed mask includes all undef elements of SplatMask 17099 // and in addition sets element zero to undef. It is safe to simplify to 17100 // the splat-shuffle. 17101 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, 17102 ArrayRef<int> SplatMask) { 17103 for (unsigned i = 0, e = UserMask.size(); i != e; ++i) 17104 if (UserMask[i] != -1 && SplatMask[i] == -1 && 17105 SplatMask[UserMask[i]] != -1) 17106 return false; 17107 return true; 17108 }; 17109 if (CanSimplifyToExistingSplat(UserMask, SplatMask)) 17110 return SDValue(Splat, 0); 17111 17112 // Create a new shuffle with a mask that is composed of the two shuffles' 17113 // masks. 17114 SmallVector<int, 32> NewMask; 17115 for (int Idx : UserMask) 17116 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); 17117 17118 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), 17119 Splat->getOperand(0), Splat->getOperand(1), 17120 NewMask); 17121 } 17122 17123 /// If the shuffle mask is taking exactly one element from the first vector 17124 /// operand and passing through all other elements from the second vector 17125 /// operand, return the index of the mask element that is choosing an element 17126 /// from the first operand. Otherwise, return -1. 17127 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { 17128 int MaskSize = Mask.size(); 17129 int EltFromOp0 = -1; 17130 // TODO: This does not match if there are undef elements in the shuffle mask. 17131 // Should we ignore undefs in the shuffle mask instead? The trade-off is 17132 // removing an instruction (a shuffle), but losing the knowledge that some 17133 // vector lanes are not needed. 17134 for (int i = 0; i != MaskSize; ++i) { 17135 if (Mask[i] >= 0 && Mask[i] < MaskSize) { 17136 // We're looking for a shuffle of exactly one element from operand 0. 17137 if (EltFromOp0 != -1) 17138 return -1; 17139 EltFromOp0 = i; 17140 } else if (Mask[i] != i + MaskSize) { 17141 // Nothing from operand 1 can change lanes. 17142 return -1; 17143 } 17144 } 17145 return EltFromOp0; 17146 } 17147 17148 /// If a shuffle inserts exactly one element from a source vector operand into 17149 /// another vector operand and we can access the specified element as a scalar, 17150 /// then we can eliminate the shuffle. 17151 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, 17152 SelectionDAG &DAG) { 17153 // First, check if we are taking one element of a vector and shuffling that 17154 // element into another vector. 17155 ArrayRef<int> Mask = Shuf->getMask(); 17156 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); 17157 SDValue Op0 = Shuf->getOperand(0); 17158 SDValue Op1 = Shuf->getOperand(1); 17159 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); 17160 if (ShufOp0Index == -1) { 17161 // Commute mask and check again. 17162 ShuffleVectorSDNode::commuteMask(CommutedMask); 17163 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); 17164 if (ShufOp0Index == -1) 17165 return SDValue(); 17166 // Commute operands to match the commuted shuffle mask. 17167 std::swap(Op0, Op1); 17168 Mask = CommutedMask; 17169 } 17170 17171 // The shuffle inserts exactly one element from operand 0 into operand 1. 17172 // Now see if we can access that element as a scalar via a real insert element 17173 // instruction. 17174 // TODO: We can try harder to locate the element as a scalar. Examples: it 17175 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. 17176 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && 17177 "Shuffle mask value must be from operand 0"); 17178 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) 17179 return SDValue(); 17180 17181 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); 17182 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) 17183 return SDValue(); 17184 17185 // There's an existing insertelement with constant insertion index, so we 17186 // don't need to check the legality/profitability of a replacement operation 17187 // that differs at most in the constant value. The target should be able to 17188 // lower any of those in a similar way. If not, legalization will expand this 17189 // to a scalar-to-vector plus shuffle. 17190 // 17191 // Note that the shuffle may move the scalar from the position that the insert 17192 // element used. Therefore, our new insert element occurs at the shuffle's 17193 // mask index value, not the insert's index value. 17194 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' 17195 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), 17196 Op0.getOperand(2).getValueType()); 17197 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), 17198 Op1, Op0.getOperand(1), NewInsIndex); 17199 } 17200 17201 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 17202 EVT VT = N->getValueType(0); 17203 unsigned NumElts = VT.getVectorNumElements(); 17204 17205 SDValue N0 = N->getOperand(0); 17206 SDValue N1 = N->getOperand(1); 17207 17208 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 17209 17210 // Canonicalize shuffle undef, undef -> undef 17211 if (N0.isUndef() && N1.isUndef()) 17212 return DAG.getUNDEF(VT); 17213 17214 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 17215 17216 // Canonicalize shuffle v, v -> v, undef 17217 if (N0 == N1) { 17218 SmallVector<int, 8> NewMask; 17219 for (unsigned i = 0; i != NumElts; ++i) { 17220 int Idx = SVN->getMaskElt(i); 17221 if (Idx >= (int)NumElts) Idx -= NumElts; 17222 NewMask.push_back(Idx); 17223 } 17224 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); 17225 } 17226 17227 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 17228 if (N0.isUndef()) 17229 return DAG.getCommutedVectorShuffle(*SVN); 17230 17231 // Remove references to rhs if it is undef 17232 if (N1.isUndef()) { 17233 bool Changed = false; 17234 SmallVector<int, 8> NewMask; 17235 for (unsigned i = 0; i != NumElts; ++i) { 17236 int Idx = SVN->getMaskElt(i); 17237 if (Idx >= (int)NumElts) { 17238 Idx = -1; 17239 Changed = true; 17240 } 17241 NewMask.push_back(Idx); 17242 } 17243 if (Changed) 17244 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); 17245 } 17246 17247 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) 17248 return InsElt; 17249 17250 // A shuffle of a single vector that is a splat can always be folded. 17251 if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) 17252 if (N1->isUndef() && N0Shuf->isSplat()) 17253 return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); 17254 17255 // If it is a splat, check if the argument vector is another splat or a 17256 // build_vector. 17257 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 17258 SDNode *V = N0.getNode(); 17259 17260 // If this is a bit convert that changes the element type of the vector but 17261 // not the number of vector elements, look through it. Be careful not to 17262 // look though conversions that change things like v4f32 to v2f64. 17263 if (V->getOpcode() == ISD::BITCAST) { 17264 SDValue ConvInput = V->getOperand(0); 17265 if (ConvInput.getValueType().isVector() && 17266 ConvInput.getValueType().getVectorNumElements() == NumElts) 17267 V = ConvInput.getNode(); 17268 } 17269 17270 if (V->getOpcode() == ISD::BUILD_VECTOR) { 17271 assert(V->getNumOperands() == NumElts && 17272 "BUILD_VECTOR has wrong number of operands"); 17273 SDValue Base; 17274 bool AllSame = true; 17275 for (unsigned i = 0; i != NumElts; ++i) { 17276 if (!V->getOperand(i).isUndef()) { 17277 Base = V->getOperand(i); 17278 break; 17279 } 17280 } 17281 // Splat of <u, u, u, u>, return <u, u, u, u> 17282 if (!Base.getNode()) 17283 return N0; 17284 for (unsigned i = 0; i != NumElts; ++i) { 17285 if (V->getOperand(i) != Base) { 17286 AllSame = false; 17287 break; 17288 } 17289 } 17290 // Splat of <x, x, x, x>, return <x, x, x, x> 17291 if (AllSame) 17292 return N0; 17293 17294 // Canonicalize any other splat as a build_vector. 17295 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); 17296 SmallVector<SDValue, 8> Ops(NumElts, Splatted); 17297 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); 17298 17299 // We may have jumped through bitcasts, so the type of the 17300 // BUILD_VECTOR may not match the type of the shuffle. 17301 if (V->getValueType(0) != VT) 17302 NewBV = DAG.getBitcast(VT, NewBV); 17303 return NewBV; 17304 } 17305 } 17306 17307 // Simplify source operands based on shuffle mask. 17308 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 17309 return SDValue(N, 0); 17310 17311 // Match shuffles that can be converted to any_vector_extend_in_reg. 17312 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) 17313 return V; 17314 17315 // Combine "truncate_vector_in_reg" style shuffles. 17316 if (SDValue V = combineTruncationShuffle(SVN, DAG)) 17317 return V; 17318 17319 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 17320 Level < AfterLegalizeVectorOps && 17321 (N1.isUndef() || 17322 (N1.getOpcode() == ISD::CONCAT_VECTORS && 17323 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 17324 if (SDValue V = partitionShuffleOfConcats(N, DAG)) 17325 return V; 17326 } 17327 17328 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 17329 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 17330 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) 17331 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) 17332 return Res; 17333 17334 // If this shuffle only has a single input that is a bitcasted shuffle, 17335 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output 17336 // back to their original types. 17337 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 17338 N1.isUndef() && Level < AfterLegalizeVectorOps && 17339 TLI.isTypeLegal(VT)) { 17340 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { 17341 if (Scale == 1) 17342 return SmallVector<int, 8>(Mask.begin(), Mask.end()); 17343 17344 SmallVector<int, 8> NewMask; 17345 for (int M : Mask) 17346 for (int s = 0; s != Scale; ++s) 17347 NewMask.push_back(M < 0 ? -1 : Scale * M + s); 17348 return NewMask; 17349 }; 17350 17351 SDValue BC0 = peekThroughOneUseBitcasts(N0); 17352 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { 17353 EVT SVT = VT.getScalarType(); 17354 EVT InnerVT = BC0->getValueType(0); 17355 EVT InnerSVT = InnerVT.getScalarType(); 17356 17357 // Determine which shuffle works with the smaller scalar type. 17358 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; 17359 EVT ScaleSVT = ScaleVT.getScalarType(); 17360 17361 if (TLI.isTypeLegal(ScaleVT) && 17362 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 17363 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { 17364 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 17365 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 17366 17367 // Scale the shuffle masks to the smaller scalar type. 17368 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); 17369 SmallVector<int, 8> InnerMask = 17370 ScaleShuffleMask(InnerSVN->getMask(), InnerScale); 17371 SmallVector<int, 8> OuterMask = 17372 ScaleShuffleMask(SVN->getMask(), OuterScale); 17373 17374 // Merge the shuffle masks. 17375 SmallVector<int, 8> NewMask; 17376 for (int M : OuterMask) 17377 NewMask.push_back(M < 0 ? -1 : InnerMask[M]); 17378 17379 // Test for shuffle mask legality over both commutations. 17380 SDValue SV0 = BC0->getOperand(0); 17381 SDValue SV1 = BC0->getOperand(1); 17382 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 17383 if (!LegalMask) { 17384 std::swap(SV0, SV1); 17385 ShuffleVectorSDNode::commuteMask(NewMask); 17386 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 17387 } 17388 17389 if (LegalMask) { 17390 SV0 = DAG.getBitcast(ScaleVT, SV0); 17391 SV1 = DAG.getBitcast(ScaleVT, SV1); 17392 return DAG.getBitcast( 17393 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); 17394 } 17395 } 17396 } 17397 } 17398 17399 // Canonicalize shuffles according to rules: 17400 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 17401 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 17402 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 17403 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 17404 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 17405 TLI.isTypeLegal(VT)) { 17406 // The incoming shuffle must be of the same type as the result of the 17407 // current shuffle. 17408 assert(N1->getOperand(0).getValueType() == VT && 17409 "Shuffle types don't match"); 17410 17411 SDValue SV0 = N1->getOperand(0); 17412 SDValue SV1 = N1->getOperand(1); 17413 bool HasSameOp0 = N0 == SV0; 17414 bool IsSV1Undef = SV1.isUndef(); 17415 if (HasSameOp0 || IsSV1Undef || N0 == SV1) 17416 // Commute the operands of this shuffle so that next rule 17417 // will trigger. 17418 return DAG.getCommutedVectorShuffle(*SVN); 17419 } 17420 17421 // Try to fold according to rules: 17422 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 17423 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 17424 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 17425 // Don't try to fold shuffles with illegal type. 17426 // Only fold if this shuffle is the only user of the other shuffle. 17427 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && 17428 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { 17429 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 17430 17431 // Don't try to fold splats; they're likely to simplify somehow, or they 17432 // might be free. 17433 if (OtherSV->isSplat()) 17434 return SDValue(); 17435 17436 // The incoming shuffle must be of the same type as the result of the 17437 // current shuffle. 17438 assert(OtherSV->getOperand(0).getValueType() == VT && 17439 "Shuffle types don't match"); 17440 17441 SDValue SV0, SV1; 17442 SmallVector<int, 4> Mask; 17443 // Compute the combined shuffle mask for a shuffle with SV0 as the first 17444 // operand, and SV1 as the second operand. 17445 for (unsigned i = 0; i != NumElts; ++i) { 17446 int Idx = SVN->getMaskElt(i); 17447 if (Idx < 0) { 17448 // Propagate Undef. 17449 Mask.push_back(Idx); 17450 continue; 17451 } 17452 17453 SDValue CurrentVec; 17454 if (Idx < (int)NumElts) { 17455 // This shuffle index refers to the inner shuffle N0. Lookup the inner 17456 // shuffle mask to identify which vector is actually referenced. 17457 Idx = OtherSV->getMaskElt(Idx); 17458 if (Idx < 0) { 17459 // Propagate Undef. 17460 Mask.push_back(Idx); 17461 continue; 17462 } 17463 17464 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 17465 : OtherSV->getOperand(1); 17466 } else { 17467 // This shuffle index references an element within N1. 17468 CurrentVec = N1; 17469 } 17470 17471 // Simple case where 'CurrentVec' is UNDEF. 17472 if (CurrentVec.isUndef()) { 17473 Mask.push_back(-1); 17474 continue; 17475 } 17476 17477 // Canonicalize the shuffle index. We don't know yet if CurrentVec 17478 // will be the first or second operand of the combined shuffle. 17479 Idx = Idx % NumElts; 17480 if (!SV0.getNode() || SV0 == CurrentVec) { 17481 // Ok. CurrentVec is the left hand side. 17482 // Update the mask accordingly. 17483 SV0 = CurrentVec; 17484 Mask.push_back(Idx); 17485 continue; 17486 } 17487 17488 // Bail out if we cannot convert the shuffle pair into a single shuffle. 17489 if (SV1.getNode() && SV1 != CurrentVec) 17490 return SDValue(); 17491 17492 // Ok. CurrentVec is the right hand side. 17493 // Update the mask accordingly. 17494 SV1 = CurrentVec; 17495 Mask.push_back(Idx + NumElts); 17496 } 17497 17498 // Check if all indices in Mask are Undef. In case, propagate Undef. 17499 bool isUndefMask = true; 17500 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 17501 isUndefMask &= Mask[i] < 0; 17502 17503 if (isUndefMask) 17504 return DAG.getUNDEF(VT); 17505 17506 if (!SV0.getNode()) 17507 SV0 = DAG.getUNDEF(VT); 17508 if (!SV1.getNode()) 17509 SV1 = DAG.getUNDEF(VT); 17510 17511 // Avoid introducing shuffles with illegal mask. 17512 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 17513 ShuffleVectorSDNode::commuteMask(Mask); 17514 17515 if (!TLI.isShuffleMaskLegal(Mask, VT)) 17516 return SDValue(); 17517 17518 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 17519 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 17520 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 17521 std::swap(SV0, SV1); 17522 } 17523 17524 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 17525 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 17526 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 17527 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); 17528 } 17529 17530 return SDValue(); 17531 } 17532 17533 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { 17534 SDValue InVal = N->getOperand(0); 17535 EVT VT = N->getValueType(0); 17536 17537 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern 17538 // with a VECTOR_SHUFFLE and possible truncate. 17539 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 17540 SDValue InVec = InVal->getOperand(0); 17541 SDValue EltNo = InVal->getOperand(1); 17542 auto InVecT = InVec.getValueType(); 17543 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { 17544 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); 17545 int Elt = C0->getZExtValue(); 17546 NewMask[0] = Elt; 17547 SDValue Val; 17548 // If we have an implict truncate do truncate here as long as it's legal. 17549 // if it's not legal, this should 17550 if (VT.getScalarType() != InVal.getValueType() && 17551 InVal.getValueType().isScalarInteger() && 17552 isTypeLegal(VT.getScalarType())) { 17553 Val = 17554 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); 17555 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); 17556 } 17557 if (VT.getScalarType() == InVecT.getScalarType() && 17558 VT.getVectorNumElements() <= InVecT.getVectorNumElements() && 17559 TLI.isShuffleMaskLegal(NewMask, VT)) { 17560 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, 17561 DAG.getUNDEF(InVecT), NewMask); 17562 // If the initial vector is the correct size this shuffle is a 17563 // valid result. 17564 if (VT == InVecT) 17565 return Val; 17566 // If not we must truncate the vector. 17567 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { 17568 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 17569 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); 17570 EVT SubVT = 17571 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), 17572 VT.getVectorNumElements()); 17573 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, 17574 ZeroIdx); 17575 return Val; 17576 } 17577 } 17578 } 17579 } 17580 17581 return SDValue(); 17582 } 17583 17584 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 17585 EVT VT = N->getValueType(0); 17586 SDValue N0 = N->getOperand(0); 17587 SDValue N1 = N->getOperand(1); 17588 SDValue N2 = N->getOperand(2); 17589 17590 // If inserting an UNDEF, just return the original vector. 17591 if (N1.isUndef()) 17592 return N0; 17593 17594 // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow 17595 // us to pull BITCASTs from input to output. 17596 if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) 17597 if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) 17598 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); 17599 17600 // If this is an insert of an extracted vector into an undef vector, we can 17601 // just use the input to the extract. 17602 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && 17603 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) 17604 return N1.getOperand(0); 17605 17606 // If we are inserting a bitcast value into an undef, with the same 17607 // number of elements, just use the bitcast input of the extract. 17608 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> 17609 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) 17610 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && 17611 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && 17612 N1.getOperand(0).getOperand(1) == N2 && 17613 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == 17614 VT.getVectorNumElements() && 17615 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == 17616 VT.getSizeInBits()) { 17617 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); 17618 } 17619 17620 // If both N1 and N2 are bitcast values on which insert_subvector 17621 // would makes sense, pull the bitcast through. 17622 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> 17623 // BITCAST (INSERT_SUBVECTOR N0 N1 N2) 17624 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { 17625 SDValue CN0 = N0.getOperand(0); 17626 SDValue CN1 = N1.getOperand(0); 17627 EVT CN0VT = CN0.getValueType(); 17628 EVT CN1VT = CN1.getValueType(); 17629 if (CN0VT.isVector() && CN1VT.isVector() && 17630 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && 17631 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { 17632 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), 17633 CN0.getValueType(), CN0, CN1, N2); 17634 return DAG.getBitcast(VT, NewINSERT); 17635 } 17636 } 17637 17638 // Combine INSERT_SUBVECTORs where we are inserting to the same index. 17639 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) 17640 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) 17641 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && 17642 N0.getOperand(1).getValueType() == N1.getValueType() && 17643 N0.getOperand(2) == N2) 17644 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), 17645 N1, N2); 17646 17647 if (!isa<ConstantSDNode>(N2)) 17648 return SDValue(); 17649 17650 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); 17651 17652 // Canonicalize insert_subvector dag nodes. 17653 // Example: 17654 // (insert_subvector (insert_subvector A, Idx0), Idx1) 17655 // -> (insert_subvector (insert_subvector A, Idx1), Idx0) 17656 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && 17657 N1.getValueType() == N0.getOperand(1).getValueType() && 17658 isa<ConstantSDNode>(N0.getOperand(2))) { 17659 unsigned OtherIdx = N0.getConstantOperandVal(2); 17660 if (InsIdx < OtherIdx) { 17661 // Swap nodes. 17662 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, 17663 N0.getOperand(0), N1, N2); 17664 AddToWorklist(NewOp.getNode()); 17665 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), 17666 VT, NewOp, N0.getOperand(1), N0.getOperand(2)); 17667 } 17668 } 17669 17670 // If the input vector is a concatenation, and the insert replaces 17671 // one of the pieces, we can optimize into a single concat_vectors. 17672 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && 17673 N0.getOperand(0).getValueType() == N1.getValueType()) { 17674 unsigned Factor = N1.getValueType().getVectorNumElements(); 17675 17676 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); 17677 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; 17678 17679 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 17680 } 17681 17682 // Simplify source operands based on insertion. 17683 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 17684 return SDValue(N, 0); 17685 17686 return SDValue(); 17687 } 17688 17689 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { 17690 SDValue N0 = N->getOperand(0); 17691 17692 // fold (fp_to_fp16 (fp16_to_fp op)) -> op 17693 if (N0->getOpcode() == ISD::FP16_TO_FP) 17694 return N0->getOperand(0); 17695 17696 return SDValue(); 17697 } 17698 17699 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { 17700 SDValue N0 = N->getOperand(0); 17701 17702 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) 17703 if (N0->getOpcode() == ISD::AND) { 17704 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); 17705 if (AndConst && AndConst->getAPIntValue() == 0xffff) { 17706 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), 17707 N0.getOperand(0)); 17708 } 17709 } 17710 17711 return SDValue(); 17712 } 17713 17714 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 17715 /// with the destination vector and a zero vector. 17716 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 17717 /// vector_shuffle V, Zero, <0, 4, 2, 4> 17718 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 17719 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); 17720 17721 EVT VT = N->getValueType(0); 17722 SDValue LHS = N->getOperand(0); 17723 SDValue RHS = peekThroughBitcasts(N->getOperand(1)); 17724 SDLoc DL(N); 17725 17726 // Make sure we're not running after operation legalization where it 17727 // may have custom lowered the vector shuffles. 17728 if (LegalOperations) 17729 return SDValue(); 17730 17731 if (RHS.getOpcode() != ISD::BUILD_VECTOR) 17732 return SDValue(); 17733 17734 EVT RVT = RHS.getValueType(); 17735 unsigned NumElts = RHS.getNumOperands(); 17736 17737 // Attempt to create a valid clear mask, splitting the mask into 17738 // sub elements and checking to see if each is 17739 // all zeros or all ones - suitable for shuffle masking. 17740 auto BuildClearMask = [&](int Split) { 17741 int NumSubElts = NumElts * Split; 17742 int NumSubBits = RVT.getScalarSizeInBits() / Split; 17743 17744 SmallVector<int, 8> Indices; 17745 for (int i = 0; i != NumSubElts; ++i) { 17746 int EltIdx = i / Split; 17747 int SubIdx = i % Split; 17748 SDValue Elt = RHS.getOperand(EltIdx); 17749 if (Elt.isUndef()) { 17750 Indices.push_back(-1); 17751 continue; 17752 } 17753 17754 APInt Bits; 17755 if (isa<ConstantSDNode>(Elt)) 17756 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); 17757 else if (isa<ConstantFPSDNode>(Elt)) 17758 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); 17759 else 17760 return SDValue(); 17761 17762 // Extract the sub element from the constant bit mask. 17763 if (DAG.getDataLayout().isBigEndian()) { 17764 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); 17765 } else { 17766 Bits.lshrInPlace(SubIdx * NumSubBits); 17767 } 17768 17769 if (Split > 1) 17770 Bits = Bits.trunc(NumSubBits); 17771 17772 if (Bits.isAllOnesValue()) 17773 Indices.push_back(i); 17774 else if (Bits == 0) 17775 Indices.push_back(i + NumSubElts); 17776 else 17777 return SDValue(); 17778 } 17779 17780 // Let's see if the target supports this vector_shuffle. 17781 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); 17782 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); 17783 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) 17784 return SDValue(); 17785 17786 SDValue Zero = DAG.getConstant(0, DL, ClearVT); 17787 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, 17788 DAG.getBitcast(ClearVT, LHS), 17789 Zero, Indices)); 17790 }; 17791 17792 // Determine maximum split level (byte level masking). 17793 int MaxSplit = 1; 17794 if (RVT.getScalarSizeInBits() % 8 == 0) 17795 MaxSplit = RVT.getScalarSizeInBits() / 8; 17796 17797 for (int Split = 1; Split <= MaxSplit; ++Split) 17798 if (RVT.getScalarSizeInBits() % Split == 0) 17799 if (SDValue S = BuildClearMask(Split)) 17800 return S; 17801 17802 return SDValue(); 17803 } 17804 17805 /// Visit a binary vector operation, like ADD. 17806 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 17807 assert(N->getValueType(0).isVector() && 17808 "SimplifyVBinOp only works on vectors!"); 17809 17810 SDValue LHS = N->getOperand(0); 17811 SDValue RHS = N->getOperand(1); 17812 SDValue Ops[] = {LHS, RHS}; 17813 17814 // See if we can constant fold the vector operation. 17815 if (SDValue Fold = DAG.FoldConstantVectorArithmetic( 17816 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) 17817 return Fold; 17818 17819 // Type legalization might introduce new shuffles in the DAG. 17820 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 17821 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 17822 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 17823 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 17824 LHS.getOperand(1).isUndef() && 17825 RHS.getOperand(1).isUndef()) { 17826 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 17827 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 17828 17829 if (SVN0->getMask().equals(SVN1->getMask())) { 17830 EVT VT = N->getValueType(0); 17831 SDValue UndefVector = LHS.getOperand(1); 17832 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 17833 LHS.getOperand(0), RHS.getOperand(0), 17834 N->getFlags()); 17835 AddUsersToWorklist(N); 17836 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 17837 SVN0->getMask()); 17838 } 17839 } 17840 17841 return SDValue(); 17842 } 17843 17844 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, 17845 SDValue N2) { 17846 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 17847 17848 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 17849 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 17850 17851 // If we got a simplified select_cc node back from SimplifySelectCC, then 17852 // break it down into a new SETCC node, and a new SELECT node, and then return 17853 // the SELECT node, since we were called with a SELECT node. 17854 if (SCC.getNode()) { 17855 // Check to see if we got a select_cc back (to turn into setcc/select). 17856 // Otherwise, just return whatever node we got back, like fabs. 17857 if (SCC.getOpcode() == ISD::SELECT_CC) { 17858 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 17859 N0.getValueType(), 17860 SCC.getOperand(0), SCC.getOperand(1), 17861 SCC.getOperand(4)); 17862 AddToWorklist(SETCC.getNode()); 17863 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 17864 SCC.getOperand(2), SCC.getOperand(3)); 17865 } 17866 17867 return SCC; 17868 } 17869 return SDValue(); 17870 } 17871 17872 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 17873 /// being selected between, see if we can simplify the select. Callers of this 17874 /// should assume that TheSelect is deleted if this returns true. As such, they 17875 /// should return the appropriate thing (e.g. the node) back to the top-level of 17876 /// the DAG combiner loop to avoid it being looked at. 17877 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 17878 SDValue RHS) { 17879 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 17880 // The select + setcc is redundant, because fsqrt returns NaN for X < 0. 17881 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { 17882 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { 17883 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) 17884 SDValue Sqrt = RHS; 17885 ISD::CondCode CC; 17886 SDValue CmpLHS; 17887 const ConstantFPSDNode *Zero = nullptr; 17888 17889 if (TheSelect->getOpcode() == ISD::SELECT_CC) { 17890 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); 17891 CmpLHS = TheSelect->getOperand(0); 17892 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); 17893 } else { 17894 // SELECT or VSELECT 17895 SDValue Cmp = TheSelect->getOperand(0); 17896 if (Cmp.getOpcode() == ISD::SETCC) { 17897 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); 17898 CmpLHS = Cmp.getOperand(0); 17899 Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); 17900 } 17901 } 17902 if (Zero && Zero->isZero() && 17903 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || 17904 CC == ISD::SETULT || CC == ISD::SETLT)) { 17905 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 17906 CombineTo(TheSelect, Sqrt); 17907 return true; 17908 } 17909 } 17910 } 17911 // Cannot simplify select with vector condition 17912 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 17913 17914 // If this is a select from two identical things, try to pull the operation 17915 // through the select. 17916 if (LHS.getOpcode() != RHS.getOpcode() || 17917 !LHS.hasOneUse() || !RHS.hasOneUse()) 17918 return false; 17919 17920 // If this is a load and the token chain is identical, replace the select 17921 // of two loads with a load through a select of the address to load from. 17922 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 17923 // constants have been dropped into the constant pool. 17924 if (LHS.getOpcode() == ISD::LOAD) { 17925 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 17926 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 17927 17928 // Token chains must be identical. 17929 if (LHS.getOperand(0) != RHS.getOperand(0) || 17930 // Do not let this transformation reduce the number of volatile loads. 17931 LLD->isVolatile() || RLD->isVolatile() || 17932 // FIXME: If either is a pre/post inc/dec load, 17933 // we'd need to split out the address adjustment. 17934 LLD->isIndexed() || RLD->isIndexed() || 17935 // If this is an EXTLOAD, the VT's must match. 17936 LLD->getMemoryVT() != RLD->getMemoryVT() || 17937 // If this is an EXTLOAD, the kind of extension must match. 17938 (LLD->getExtensionType() != RLD->getExtensionType() && 17939 // The only exception is if one of the extensions is anyext. 17940 LLD->getExtensionType() != ISD::EXTLOAD && 17941 RLD->getExtensionType() != ISD::EXTLOAD) || 17942 // FIXME: this discards src value information. This is 17943 // over-conservative. It would be beneficial to be able to remember 17944 // both potential memory locations. Since we are discarding 17945 // src value info, don't do the transformation if the memory 17946 // locations are not in the default address space. 17947 LLD->getPointerInfo().getAddrSpace() != 0 || 17948 RLD->getPointerInfo().getAddrSpace() != 0 || 17949 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 17950 LLD->getBasePtr().getValueType())) 17951 return false; 17952 17953 // The loads must not depend on one another. 17954 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) 17955 return false; 17956 17957 // Check that the select condition doesn't reach either load. If so, 17958 // folding this will induce a cycle into the DAG. If not, this is safe to 17959 // xform, so create a select of the addresses. 17960 17961 SmallPtrSet<const SDNode *, 32> Visited; 17962 SmallVector<const SDNode *, 16> Worklist; 17963 17964 // Always fail if LLD and RLD are not independent. TheSelect is a 17965 // predecessor to all Nodes in question so we need not search past it. 17966 17967 Visited.insert(TheSelect); 17968 Worklist.push_back(LLD); 17969 Worklist.push_back(RLD); 17970 17971 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) || 17972 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)) 17973 return false; 17974 17975 SDValue Addr; 17976 if (TheSelect->getOpcode() == ISD::SELECT) { 17977 // We cannot do this optimization if any pair of {RLD, LLD} is a 17978 // predecessor to {RLD, LLD, CondNode}. As we've already compared the 17979 // Loads, we only need to check if CondNode is a successor to one of the 17980 // loads. We can further avoid this if there's no use of their chain 17981 // value. 17982 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 17983 Worklist.push_back(CondNode); 17984 17985 if ((LLD->hasAnyUseOfValue(1) && 17986 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || 17987 (RLD->hasAnyUseOfValue(1) && 17988 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) 17989 return false; 17990 17991 Addr = DAG.getSelect(SDLoc(TheSelect), 17992 LLD->getBasePtr().getValueType(), 17993 TheSelect->getOperand(0), LLD->getBasePtr(), 17994 RLD->getBasePtr()); 17995 } else { // Otherwise SELECT_CC 17996 // We cannot do this optimization if any pair of {RLD, LLD} is a 17997 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared 17998 // the Loads, we only need to check if CondLHS/CondRHS is a successor to 17999 // one of the loads. We can further avoid this if there's no use of their 18000 // chain value. 18001 18002 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 18003 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 18004 Worklist.push_back(CondLHS); 18005 Worklist.push_back(CondRHS); 18006 18007 if ((LLD->hasAnyUseOfValue(1) && 18008 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) || 18009 (RLD->hasAnyUseOfValue(1) && 18010 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))) 18011 return false; 18012 18013 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 18014 LLD->getBasePtr().getValueType(), 18015 TheSelect->getOperand(0), 18016 TheSelect->getOperand(1), 18017 LLD->getBasePtr(), RLD->getBasePtr(), 18018 TheSelect->getOperand(4)); 18019 } 18020 18021 SDValue Load; 18022 // It is safe to replace the two loads if they have different alignments, 18023 // but the new load must be the minimum (most restrictive) alignment of the 18024 // inputs. 18025 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 18026 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); 18027 if (!RLD->isInvariant()) 18028 MMOFlags &= ~MachineMemOperand::MOInvariant; 18029 if (!RLD->isDereferenceable()) 18030 MMOFlags &= ~MachineMemOperand::MODereferenceable; 18031 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 18032 // FIXME: Discards pointer and AA info. 18033 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), 18034 LLD->getChain(), Addr, MachinePointerInfo(), Alignment, 18035 MMOFlags); 18036 } else { 18037 // FIXME: Discards pointer and AA info. 18038 Load = DAG.getExtLoad( 18039 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() 18040 : LLD->getExtensionType(), 18041 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, 18042 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); 18043 } 18044 18045 // Users of the select now use the result of the load. 18046 CombineTo(TheSelect, Load); 18047 18048 // Users of the old loads now use the new load's chain. We know the 18049 // old-load value is dead now. 18050 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 18051 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 18052 return true; 18053 } 18054 18055 return false; 18056 } 18057 18058 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and 18059 /// bitwise 'and'. 18060 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, 18061 SDValue N1, SDValue N2, SDValue N3, 18062 ISD::CondCode CC) { 18063 // If this is a select where the false operand is zero and the compare is a 18064 // check of the sign bit, see if we can perform the "gzip trick": 18065 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A 18066 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A 18067 EVT XType = N0.getValueType(); 18068 EVT AType = N2.getValueType(); 18069 if (!isNullConstant(N3) || !XType.bitsGE(AType)) 18070 return SDValue(); 18071 18072 // If the comparison is testing for a positive value, we have to invert 18073 // the sign bit mask, so only do that transform if the target has a bitwise 18074 // 'and not' instruction (the invert is free). 18075 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { 18076 // (X > -1) ? A : 0 18077 // (X > 0) ? X : 0 <-- This is canonical signed max. 18078 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) 18079 return SDValue(); 18080 } else if (CC == ISD::SETLT) { 18081 // (X < 0) ? A : 0 18082 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. 18083 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) 18084 return SDValue(); 18085 } else { 18086 return SDValue(); 18087 } 18088 18089 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit 18090 // constant. 18091 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 18092 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 18093 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { 18094 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; 18095 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); 18096 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); 18097 AddToWorklist(Shift.getNode()); 18098 18099 if (XType.bitsGT(AType)) { 18100 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 18101 AddToWorklist(Shift.getNode()); 18102 } 18103 18104 if (CC == ISD::SETGT) 18105 Shift = DAG.getNOT(DL, Shift, AType); 18106 18107 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 18108 } 18109 18110 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); 18111 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); 18112 AddToWorklist(Shift.getNode()); 18113 18114 if (XType.bitsGT(AType)) { 18115 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 18116 AddToWorklist(Shift.getNode()); 18117 } 18118 18119 if (CC == ISD::SETGT) 18120 Shift = DAG.getNOT(DL, Shift, AType); 18121 18122 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 18123 } 18124 18125 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 18126 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 18127 /// in it. This may be a win when the constant is not otherwise available 18128 /// because it replaces two constant pool loads with one. 18129 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( 18130 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, 18131 ISD::CondCode CC) { 18132 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) 18133 return SDValue(); 18134 18135 // If we are before legalize types, we want the other legalization to happen 18136 // first (for example, to avoid messing with soft float). 18137 auto *TV = dyn_cast<ConstantFPSDNode>(N2); 18138 auto *FV = dyn_cast<ConstantFPSDNode>(N3); 18139 EVT VT = N2.getValueType(); 18140 if (!TV || !FV || !TLI.isTypeLegal(VT)) 18141 return SDValue(); 18142 18143 // If a constant can be materialized without loads, this does not make sense. 18144 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal || 18145 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) || 18146 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) 18147 return SDValue(); 18148 18149 // If both constants have multiple uses, then we won't need to do an extra 18150 // load. The values are likely around in registers for other users. 18151 if (!TV->hasOneUse() && !FV->hasOneUse()) 18152 return SDValue(); 18153 18154 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()), 18155 const_cast<ConstantFP*>(TV->getConstantFPValue()) }; 18156 Type *FPTy = Elts[0]->getType(); 18157 const DataLayout &TD = DAG.getDataLayout(); 18158 18159 // Create a ConstantArray of the two constants. 18160 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 18161 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), 18162 TD.getPrefTypeAlignment(FPTy)); 18163 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 18164 18165 // Get offsets to the 0 and 1 elements of the array, so we can select between 18166 // them. 18167 SDValue Zero = DAG.getIntPtrConstant(0, DL); 18168 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 18169 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); 18170 SDValue Cond = 18171 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); 18172 AddToWorklist(Cond.getNode()); 18173 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero); 18174 AddToWorklist(CstOffset.getNode()); 18175 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); 18176 AddToWorklist(CPIdx.getNode()); 18177 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 18178 MachinePointerInfo::getConstantPool( 18179 DAG.getMachineFunction()), Alignment); 18180 } 18181 18182 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 18183 /// where 'cond' is the comparison specified by CC. 18184 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 18185 SDValue N2, SDValue N3, ISD::CondCode CC, 18186 bool NotExtCompare) { 18187 // (x ? y : y) -> y. 18188 if (N2 == N3) return N2; 18189 18190 EVT CmpOpVT = N0.getValueType(); 18191 EVT VT = N2.getValueType(); 18192 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 18193 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 18194 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 18195 18196 // Determine if the condition we're dealing with is constant. 18197 SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL, 18198 false); 18199 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 18200 18201 if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 18202 // fold select_cc true, x, y -> x 18203 // fold select_cc false, x, y -> y 18204 return !SCCC->isNullValue() ? N2 : N3; 18205 } 18206 18207 if (SDValue V = 18208 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC)) 18209 return V; 18210 18211 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) 18212 return V; 18213 18214 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 18215 // where y is has a single bit set. 18216 // A plaintext description would be, we can turn the SELECT_CC into an AND 18217 // when the condition can be materialized as an all-ones register. Any 18218 // single bit-test can be materialized as an all-ones register with 18219 // shift-left and shift-right-arith. 18220 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 18221 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { 18222 SDValue AndLHS = N0->getOperand(0); 18223 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 18224 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 18225 // Shift the tested bit over the sign bit. 18226 const APInt &AndMask = ConstAndRHS->getAPIntValue(); 18227 SDValue ShlAmt = 18228 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), 18229 getShiftAmountTy(AndLHS.getValueType())); 18230 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 18231 18232 // Now arithmetic right shift it all the way over, so the result is either 18233 // all-ones, or zero. 18234 SDValue ShrAmt = 18235 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), 18236 getShiftAmountTy(Shl.getValueType())); 18237 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 18238 18239 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 18240 } 18241 } 18242 18243 // fold select C, 16, 0 -> shl C, 4 18244 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2(); 18245 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2(); 18246 18247 if ((Fold || Swap) && 18248 TLI.getBooleanContents(CmpOpVT) == 18249 TargetLowering::ZeroOrOneBooleanContent && 18250 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) { 18251 18252 if (Swap) { 18253 CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger()); 18254 std::swap(N2C, N3C); 18255 } 18256 18257 // If the caller doesn't want us to simplify this into a zext of a compare, 18258 // don't do it. 18259 if (NotExtCompare && N2C->isOne()) 18260 return SDValue(); 18261 18262 SDValue Temp, SCC; 18263 // zext (setcc n0, n1) 18264 if (LegalTypes) { 18265 SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC); 18266 if (VT.bitsLT(SCC.getValueType())) 18267 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); 18268 else 18269 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); 18270 } else { 18271 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 18272 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); 18273 } 18274 18275 AddToWorklist(SCC.getNode()); 18276 AddToWorklist(Temp.getNode()); 18277 18278 if (N2C->isOne()) 18279 return Temp; 18280 18281 // shl setcc result by log2 n2c 18282 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 18283 DAG.getConstant(N2C->getAPIntValue().logBase2(), 18284 SDLoc(Temp), 18285 getShiftAmountTy(Temp.getValueType()))); 18286 } 18287 18288 // Check to see if this is an integer abs. 18289 // select_cc setg[te] X, 0, X, -X -> 18290 // select_cc setgt X, -1, X, -X -> 18291 // select_cc setl[te] X, 0, -X, X -> 18292 // select_cc setlt X, 1, -X, X -> 18293 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 18294 if (N1C) { 18295 ConstantSDNode *SubC = nullptr; 18296 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 18297 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 18298 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 18299 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 18300 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 18301 (N1C->isOne() && CC == ISD::SETLT)) && 18302 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 18303 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 18304 18305 if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) { 18306 SDLoc DL(N0); 18307 SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, 18308 DAG.getConstant(CmpOpVT.getSizeInBits() - 1, 18309 DL, 18310 getShiftAmountTy(CmpOpVT))); 18311 SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift); 18312 AddToWorklist(Shift.getNode()); 18313 AddToWorklist(Add.getNode()); 18314 return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift); 18315 } 18316 } 18317 18318 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) 18319 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) 18320 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) 18321 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) 18322 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) 18323 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) 18324 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) 18325 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) 18326 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 18327 SDValue ValueOnZero = N2; 18328 SDValue Count = N3; 18329 // If the condition is NE instead of E, swap the operands. 18330 if (CC == ISD::SETNE) 18331 std::swap(ValueOnZero, Count); 18332 // Check if the value on zero is a constant equal to the bits in the type. 18333 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { 18334 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { 18335 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is 18336 // legal, combine to just cttz. 18337 if ((Count.getOpcode() == ISD::CTTZ || 18338 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && 18339 N0 == Count.getOperand(0) && 18340 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) 18341 return DAG.getNode(ISD::CTTZ, DL, VT, N0); 18342 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is 18343 // legal, combine to just ctlz. 18344 if ((Count.getOpcode() == ISD::CTLZ || 18345 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && 18346 N0 == Count.getOperand(0) && 18347 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) 18348 return DAG.getNode(ISD::CTLZ, DL, VT, N0); 18349 } 18350 } 18351 } 18352 18353 return SDValue(); 18354 } 18355 18356 /// This is a stub for TargetLowering::SimplifySetCC. 18357 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, 18358 ISD::CondCode Cond, const SDLoc &DL, 18359 bool foldBooleans) { 18360 TargetLowering::DAGCombinerInfo 18361 DagCombineInfo(DAG, Level, false, this); 18362 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 18363 } 18364 18365 /// Given an ISD::SDIV node expressing a divide by constant, return 18366 /// a DAG expression to select that will generate the same value by multiplying 18367 /// by a magic number. 18368 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 18369 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 18370 // when optimising for minimum size, we don't want to expand a div to a mul 18371 // and a shift. 18372 if (DAG.getMachineFunction().getFunction().optForMinSize()) 18373 return SDValue(); 18374 18375 SmallVector<SDNode *, 8> Built; 18376 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) { 18377 for (SDNode *N : Built) 18378 AddToWorklist(N); 18379 return S; 18380 } 18381 18382 return SDValue(); 18383 } 18384 18385 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 18386 /// DAG expression that will generate the same value by right shifting. 18387 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { 18388 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 18389 if (!C) 18390 return SDValue(); 18391 18392 // Avoid division by zero. 18393 if (C->isNullValue()) 18394 return SDValue(); 18395 18396 SmallVector<SDNode *, 8> Built; 18397 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) { 18398 for (SDNode *N : Built) 18399 AddToWorklist(N); 18400 return S; 18401 } 18402 18403 return SDValue(); 18404 } 18405 18406 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG 18407 /// expression that will generate the same value by multiplying by a magic 18408 /// number. 18409 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 18410 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 18411 // when optimising for minimum size, we don't want to expand a div to a mul 18412 // and a shift. 18413 if (DAG.getMachineFunction().getFunction().optForMinSize()) 18414 return SDValue(); 18415 18416 SmallVector<SDNode *, 8> Built; 18417 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) { 18418 for (SDNode *N : Built) 18419 AddToWorklist(N); 18420 return S; 18421 } 18422 18423 return SDValue(); 18424 } 18425 18426 /// Determines the LogBase2 value for a non-null input value using the 18427 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). 18428 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { 18429 EVT VT = V.getValueType(); 18430 unsigned EltBits = VT.getScalarSizeInBits(); 18431 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); 18432 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); 18433 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); 18434 return LogBase2; 18435 } 18436 18437 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18438 /// For the reciprocal, we need to find the zero of the function: 18439 /// F(X) = A X - 1 [which has a zero at X = 1/A] 18440 /// => 18441 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 18442 /// does not require additional intermediate precision] 18443 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { 18444 if (Level >= AfterLegalizeDAG) 18445 return SDValue(); 18446 18447 // TODO: Handle half and/or extended types? 18448 EVT VT = Op.getValueType(); 18449 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 18450 return SDValue(); 18451 18452 // If estimates are explicitly disabled for this function, we're done. 18453 MachineFunction &MF = DAG.getMachineFunction(); 18454 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); 18455 if (Enabled == TLI.ReciprocalEstimate::Disabled) 18456 return SDValue(); 18457 18458 // Estimates may be explicitly enabled for this type with a custom number of 18459 // refinement steps. 18460 int Iterations = TLI.getDivRefinementSteps(VT, MF); 18461 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { 18462 AddToWorklist(Est.getNode()); 18463 18464 if (Iterations) { 18465 EVT VT = Op.getValueType(); 18466 SDLoc DL(Op); 18467 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 18468 18469 // Newton iterations: Est = Est + Est (1 - Arg * Est) 18470 for (int i = 0; i < Iterations; ++i) { 18471 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); 18472 AddToWorklist(NewEst.getNode()); 18473 18474 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); 18475 AddToWorklist(NewEst.getNode()); 18476 18477 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 18478 AddToWorklist(NewEst.getNode()); 18479 18480 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); 18481 AddToWorklist(Est.getNode()); 18482 } 18483 } 18484 return Est; 18485 } 18486 18487 return SDValue(); 18488 } 18489 18490 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18491 /// For the reciprocal sqrt, we need to find the zero of the function: 18492 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 18493 /// => 18494 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 18495 /// As a result, we precompute A/2 prior to the iteration loop. 18496 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, 18497 unsigned Iterations, 18498 SDNodeFlags Flags, bool Reciprocal) { 18499 EVT VT = Arg.getValueType(); 18500 SDLoc DL(Arg); 18501 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); 18502 18503 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 18504 // this entire sequence requires only one FP constant. 18505 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); 18506 AddToWorklist(HalfArg.getNode()); 18507 18508 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); 18509 AddToWorklist(HalfArg.getNode()); 18510 18511 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 18512 for (unsigned i = 0; i < Iterations; ++i) { 18513 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); 18514 AddToWorklist(NewEst.getNode()); 18515 18516 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); 18517 AddToWorklist(NewEst.getNode()); 18518 18519 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); 18520 AddToWorklist(NewEst.getNode()); 18521 18522 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 18523 AddToWorklist(Est.getNode()); 18524 } 18525 18526 // If non-reciprocal square root is requested, multiply the result by Arg. 18527 if (!Reciprocal) { 18528 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); 18529 AddToWorklist(Est.getNode()); 18530 } 18531 18532 return Est; 18533 } 18534 18535 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 18536 /// For the reciprocal sqrt, we need to find the zero of the function: 18537 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 18538 /// => 18539 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 18540 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, 18541 unsigned Iterations, 18542 SDNodeFlags Flags, bool Reciprocal) { 18543 EVT VT = Arg.getValueType(); 18544 SDLoc DL(Arg); 18545 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); 18546 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); 18547 18548 // This routine must enter the loop below to work correctly 18549 // when (Reciprocal == false). 18550 assert(Iterations > 0); 18551 18552 // Newton iterations for reciprocal square root: 18553 // E = (E * -0.5) * ((A * E) * E + -3.0) 18554 for (unsigned i = 0; i < Iterations; ++i) { 18555 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); 18556 AddToWorklist(AE.getNode()); 18557 18558 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); 18559 AddToWorklist(AEE.getNode()); 18560 18561 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); 18562 AddToWorklist(RHS.getNode()); 18563 18564 // When calculating a square root at the last iteration build: 18565 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) 18566 // (notice a common subexpression) 18567 SDValue LHS; 18568 if (Reciprocal || (i + 1) < Iterations) { 18569 // RSQRT: LHS = (E * -0.5) 18570 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); 18571 } else { 18572 // SQRT: LHS = (A * E) * -0.5 18573 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); 18574 } 18575 AddToWorklist(LHS.getNode()); 18576 18577 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); 18578 AddToWorklist(Est.getNode()); 18579 } 18580 18581 return Est; 18582 } 18583 18584 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case 18585 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if 18586 /// Op can be zero. 18587 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, 18588 bool Reciprocal) { 18589 if (Level >= AfterLegalizeDAG) 18590 return SDValue(); 18591 18592 // TODO: Handle half and/or extended types? 18593 EVT VT = Op.getValueType(); 18594 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 18595 return SDValue(); 18596 18597 // If estimates are explicitly disabled for this function, we're done. 18598 MachineFunction &MF = DAG.getMachineFunction(); 18599 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); 18600 if (Enabled == TLI.ReciprocalEstimate::Disabled) 18601 return SDValue(); 18602 18603 // Estimates may be explicitly enabled for this type with a custom number of 18604 // refinement steps. 18605 int Iterations = TLI.getSqrtRefinementSteps(VT, MF); 18606 18607 bool UseOneConstNR = false; 18608 if (SDValue Est = 18609 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, 18610 Reciprocal)) { 18611 AddToWorklist(Est.getNode()); 18612 18613 if (Iterations) { 18614 Est = UseOneConstNR 18615 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) 18616 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); 18617 18618 if (!Reciprocal) { 18619 // The estimate is now completely wrong if the input was exactly 0.0 or 18620 // possibly a denormal. Force the answer to 0.0 for those cases. 18621 EVT VT = Op.getValueType(); 18622 SDLoc DL(Op); 18623 EVT CCVT = getSetCCResultType(VT); 18624 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; 18625 const Function &F = DAG.getMachineFunction().getFunction(); 18626 Attribute Denorms = F.getFnAttribute("denormal-fp-math"); 18627 if (Denorms.getValueAsString().equals("ieee")) { 18628 // fabs(X) < SmallestNormal ? 0.0 : Est 18629 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 18630 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); 18631 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); 18632 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 18633 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); 18634 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); 18635 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); 18636 AddToWorklist(Fabs.getNode()); 18637 AddToWorklist(IsDenorm.getNode()); 18638 AddToWorklist(Est.getNode()); 18639 } else { 18640 // X == 0.0 ? 0.0 : Est 18641 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 18642 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); 18643 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); 18644 AddToWorklist(IsZero.getNode()); 18645 AddToWorklist(Est.getNode()); 18646 } 18647 } 18648 } 18649 return Est; 18650 } 18651 18652 return SDValue(); 18653 } 18654 18655 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { 18656 return buildSqrtEstimateImpl(Op, Flags, true); 18657 } 18658 18659 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { 18660 return buildSqrtEstimateImpl(Op, Flags, false); 18661 } 18662 18663 /// Return true if there is any possibility that the two addresses overlap. 18664 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 18665 // If they are the same then they must be aliases. 18666 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 18667 18668 // If they are both volatile then they cannot be reordered. 18669 if (Op0->isVolatile() && Op1->isVolatile()) return true; 18670 18671 // If one operation reads from invariant memory, and the other may store, they 18672 // cannot alias. These should really be checking the equivalent of mayWrite, 18673 // but it only matters for memory nodes other than load /store. 18674 if (Op0->isInvariant() && Op1->writeMem()) 18675 return false; 18676 18677 if (Op1->isInvariant() && Op0->writeMem()) 18678 return false; 18679 18680 unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); 18681 unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); 18682 18683 // Check for BaseIndexOffset matching. 18684 BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); 18685 BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); 18686 int64_t PtrDiff; 18687 if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { 18688 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) 18689 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); 18690 18691 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be 18692 // able to calculate their relative offset if at least one arises 18693 // from an alloca. However, these allocas cannot overlap and we 18694 // can infer there is no alias. 18695 if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) 18696 if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { 18697 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 18698 // If the base are the same frame index but the we couldn't find a 18699 // constant offset, (indices are different) be conservative. 18700 if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || 18701 !MFI.isFixedObjectIndex(B->getIndex()))) 18702 return false; 18703 } 18704 18705 bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); 18706 bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); 18707 bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); 18708 bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); 18709 bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); 18710 bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); 18711 18712 // If of mismatched base types or checkable indices we can check 18713 // they do not alias. 18714 if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || 18715 (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && 18716 (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) 18717 return false; 18718 } 18719 18720 // If we know required SrcValue1 and SrcValue2 have relatively large 18721 // alignment compared to the size and offset of the access, we may be able 18722 // to prove they do not alias. This check is conservative for now to catch 18723 // cases created by splitting vector types. 18724 int64_t SrcValOffset0 = Op0->getSrcValueOffset(); 18725 int64_t SrcValOffset1 = Op1->getSrcValueOffset(); 18726 unsigned OrigAlignment0 = Op0->getOriginalAlignment(); 18727 unsigned OrigAlignment1 = Op1->getOriginalAlignment(); 18728 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && 18729 NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { 18730 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; 18731 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; 18732 18733 // There is no overlap between these relatively aligned accesses of 18734 // similar size. Return no alias. 18735 if ((OffAlign0 + NumBytes0) <= OffAlign1 || 18736 (OffAlign1 + NumBytes1) <= OffAlign0) 18737 return false; 18738 } 18739 18740 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 18741 ? CombinerGlobalAA 18742 : DAG.getSubtarget().useAA(); 18743 #ifndef NDEBUG 18744 if (CombinerAAOnlyFunc.getNumOccurrences() && 18745 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 18746 UseAA = false; 18747 #endif 18748 18749 if (UseAA && AA && 18750 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 18751 // Use alias analysis information. 18752 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); 18753 int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; 18754 int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; 18755 AliasResult AAResult = 18756 AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, 18757 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 18758 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, 18759 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); 18760 if (AAResult == NoAlias) 18761 return false; 18762 } 18763 18764 // Otherwise we have to assume they alias. 18765 return true; 18766 } 18767 18768 /// Walk up chain skipping non-aliasing memory nodes, 18769 /// looking for aliasing nodes and adding them to the Aliases vector. 18770 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 18771 SmallVectorImpl<SDValue> &Aliases) { 18772 SmallVector<SDValue, 8> Chains; // List of chains to visit. 18773 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 18774 18775 // Get alias information for node. 18776 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 18777 18778 // Starting off. 18779 Chains.push_back(OriginalChain); 18780 unsigned Depth = 0; 18781 18782 // Look at each chain and determine if it is an alias. If so, add it to the 18783 // aliases list. If not, then continue up the chain looking for the next 18784 // candidate. 18785 while (!Chains.empty()) { 18786 SDValue Chain = Chains.pop_back_val(); 18787 18788 // For TokenFactor nodes, look at each operand and only continue up the 18789 // chain until we reach the depth limit. 18790 // 18791 // FIXME: The depth check could be made to return the last non-aliasing 18792 // chain we found before we hit a tokenfactor rather than the original 18793 // chain. 18794 if (Depth > TLI.getGatherAllAliasesMaxDepth()) { 18795 Aliases.clear(); 18796 Aliases.push_back(OriginalChain); 18797 return; 18798 } 18799 18800 // Don't bother if we've been before. 18801 if (!Visited.insert(Chain.getNode()).second) 18802 continue; 18803 18804 switch (Chain.getOpcode()) { 18805 case ISD::EntryToken: 18806 // Entry token is ideal chain operand, but handled in FindBetterChain. 18807 break; 18808 18809 case ISD::LOAD: 18810 case ISD::STORE: { 18811 // Get alias information for Chain. 18812 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 18813 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 18814 18815 // If chain is alias then stop here. 18816 if (!(IsLoad && IsOpLoad) && 18817 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 18818 Aliases.push_back(Chain); 18819 } else { 18820 // Look further up the chain. 18821 Chains.push_back(Chain.getOperand(0)); 18822 ++Depth; 18823 } 18824 break; 18825 } 18826 18827 case ISD::TokenFactor: 18828 // We have to check each of the operands of the token factor for "small" 18829 // token factors, so we queue them up. Adding the operands to the queue 18830 // (stack) in reverse order maintains the original order and increases the 18831 // likelihood that getNode will find a matching token factor (CSE.) 18832 if (Chain.getNumOperands() > 16) { 18833 Aliases.push_back(Chain); 18834 break; 18835 } 18836 for (unsigned n = Chain.getNumOperands(); n;) 18837 Chains.push_back(Chain.getOperand(--n)); 18838 ++Depth; 18839 break; 18840 18841 case ISD::CopyFromReg: 18842 // Forward past CopyFromReg. 18843 Chains.push_back(Chain.getOperand(0)); 18844 ++Depth; 18845 break; 18846 18847 default: 18848 // For all other instructions we will just have to take what we can get. 18849 Aliases.push_back(Chain); 18850 break; 18851 } 18852 } 18853 } 18854 18855 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 18856 /// (aliasing node.) 18857 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 18858 if (OptLevel == CodeGenOpt::None) 18859 return OldChain; 18860 18861 // Ops for replacing token factor. 18862 SmallVector<SDValue, 8> Aliases; 18863 18864 // Accumulate all the aliases to this node. 18865 GatherAllAliases(N, OldChain, Aliases); 18866 18867 // If no operands then chain to entry token. 18868 if (Aliases.size() == 0) 18869 return DAG.getEntryNode(); 18870 18871 // If a single operand then chain to it. We don't need to revisit it. 18872 if (Aliases.size() == 1) 18873 return Aliases[0]; 18874 18875 // Construct a custom tailored token factor. 18876 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 18877 } 18878 18879 // TODO: Replace with with std::monostate when we move to C++17. 18880 struct UnitT { } Unit; 18881 bool operator==(const UnitT &, const UnitT &) { return true; } 18882 bool operator!=(const UnitT &, const UnitT &) { return false; } 18883 18884 // This function tries to collect a bunch of potentially interesting 18885 // nodes to improve the chains of, all at once. This might seem 18886 // redundant, as this function gets called when visiting every store 18887 // node, so why not let the work be done on each store as it's visited? 18888 // 18889 // I believe this is mainly important because MergeConsecutiveStores 18890 // is unable to deal with merging stores of different sizes, so unless 18891 // we improve the chains of all the potential candidates up-front 18892 // before running MergeConsecutiveStores, it might only see some of 18893 // the nodes that will eventually be candidates, and then not be able 18894 // to go from a partially-merged state to the desired final 18895 // fully-merged state. 18896 18897 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { 18898 SmallVector<StoreSDNode *, 8> ChainedStores; 18899 StoreSDNode *STChain = St; 18900 // Intervals records which offsets from BaseIndex have been covered. In 18901 // the common case, every store writes to the immediately previous address 18902 // space and thus merged with the previous interval at insertion time. 18903 18904 using IMap = 18905 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; 18906 IMap::Allocator A; 18907 IMap Intervals(A); 18908 18909 // This holds the base pointer, index, and the offset in bytes from the base 18910 // pointer. 18911 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 18912 18913 // We must have a base and an offset. 18914 if (!BasePtr.getBase().getNode()) 18915 return false; 18916 18917 // Do not handle stores to undef base pointers. 18918 if (BasePtr.getBase().isUndef()) 18919 return false; 18920 18921 // Add ST's interval. 18922 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); 18923 18924 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { 18925 // If the chain has more than one use, then we can't reorder the mem ops. 18926 if (!SDValue(Chain, 0)->hasOneUse()) 18927 break; 18928 if (Chain->isVolatile() || Chain->isIndexed()) 18929 break; 18930 18931 // Find the base pointer and offset for this memory node. 18932 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG); 18933 // Check that the base pointer is the same as the original one. 18934 int64_t Offset; 18935 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset)) 18936 break; 18937 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8; 18938 // Make sure we don't overlap with other intervals by checking the ones to 18939 // the left or right before inserting. 18940 auto I = Intervals.find(Offset); 18941 // If there's a next interval, we should end before it. 18942 if (I != Intervals.end() && I.start() < (Offset + Length)) 18943 break; 18944 // If there's a previous interval, we should start after it. 18945 if (I != Intervals.begin() && (--I).stop() <= Offset) 18946 break; 18947 Intervals.insert(Offset, Offset + Length, Unit); 18948 18949 ChainedStores.push_back(Chain); 18950 STChain = Chain; 18951 } 18952 18953 // If we didn't find a chained store, exit. 18954 if (ChainedStores.size() == 0) 18955 return false; 18956 18957 // Improve all chained stores (St and ChainedStores members) starting from 18958 // where the store chain ended and return single TokenFactor. 18959 SDValue NewChain = STChain->getChain(); 18960 SmallVector<SDValue, 8> TFOps; 18961 for (unsigned I = ChainedStores.size(); I;) { 18962 StoreSDNode *S = ChainedStores[--I]; 18963 SDValue BetterChain = FindBetterChain(S, NewChain); 18964 S = cast<StoreSDNode>(DAG.UpdateNodeOperands( 18965 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3))); 18966 TFOps.push_back(SDValue(S, 0)); 18967 ChainedStores[I] = S; 18968 } 18969 18970 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo. 18971 SDValue BetterChain = FindBetterChain(St, NewChain); 18972 SDValue NewST; 18973 if (St->isTruncatingStore()) 18974 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(), 18975 St->getBasePtr(), St->getMemoryVT(), 18976 St->getMemOperand()); 18977 else 18978 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(), 18979 St->getBasePtr(), St->getMemOperand()); 18980 18981 TFOps.push_back(NewST); 18982 18983 // If we improved every element of TFOps, then we've lost the dependence on 18984 // NewChain to successors of St and we need to add it back to TFOps. Do so at 18985 // the beginning to keep relative order consistent with FindBetterChains. 18986 auto hasImprovedChain = [&](SDValue ST) -> bool { 18987 return ST->getOperand(0) != NewChain; 18988 }; 18989 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain); 18990 if (AddNewChain) 18991 TFOps.insert(TFOps.begin(), NewChain); 18992 18993 SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps); 18994 CombineTo(St, TF); 18995 18996 AddToWorklist(STChain); 18997 // Add TF operands worklist in reverse order. 18998 for (auto I = TF->getNumOperands(); I;) 18999 AddToWorklist(TF->getOperand(--I).getNode()); 19000 AddToWorklist(TF.getNode()); 19001 return true; 19002 } 19003 19004 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { 19005 if (OptLevel == CodeGenOpt::None) 19006 return false; 19007 19008 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 19009 19010 // We must have a base and an offset. 19011 if (!BasePtr.getBase().getNode()) 19012 return false; 19013 19014 // Do not handle stores to undef base pointers. 19015 if (BasePtr.getBase().isUndef()) 19016 return false; 19017 19018 // Directly improve a chain of disjoint stores starting at St. 19019 if (parallelizeChainedStores(St)) 19020 return true; 19021 19022 // Improve St's Chain.. 19023 SDValue BetterChain = FindBetterChain(St, St->getChain()); 19024 if (St->getChain() != BetterChain) { 19025 replaceStoreChain(St, BetterChain); 19026 return true; 19027 } 19028 return false; 19029 } 19030 19031 /// This is the entry point for the file. 19032 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, 19033 CodeGenOpt::Level OptLevel) { 19034 /// This is the main entry point to this class. 19035 DAGCombiner(*this, AA, OptLevel).Run(Level); 19036 } 19037