1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/None.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/ADT/SetVector.h" 27 #include "llvm/ADT/SmallBitVector.h" 28 #include "llvm/ADT/SmallPtrSet.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Analysis/AliasAnalysis.h" 33 #include "llvm/Analysis/MemoryLocation.h" 34 #include "llvm/CodeGen/DAGCombine.h" 35 #include "llvm/CodeGen/ISDOpcodes.h" 36 #include "llvm/CodeGen/MachineFrameInfo.h" 37 #include "llvm/CodeGen/MachineFunction.h" 38 #include "llvm/CodeGen/MachineMemOperand.h" 39 #include "llvm/CodeGen/RuntimeLibcalls.h" 40 #include "llvm/CodeGen/SelectionDAG.h" 41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" 42 #include "llvm/CodeGen/SelectionDAGNodes.h" 43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 44 #include "llvm/CodeGen/TargetLowering.h" 45 #include "llvm/CodeGen/TargetRegisterInfo.h" 46 #include "llvm/CodeGen/TargetSubtargetInfo.h" 47 #include "llvm/CodeGen/ValueTypes.h" 48 #include "llvm/IR/Attributes.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/DataLayout.h" 51 #include "llvm/IR/DerivedTypes.h" 52 #include "llvm/IR/Function.h" 53 #include "llvm/IR/LLVMContext.h" 54 #include "llvm/IR/Metadata.h" 55 #include "llvm/Support/Casting.h" 56 #include "llvm/Support/CodeGen.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Support/Compiler.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/ErrorHandling.h" 61 #include "llvm/Support/KnownBits.h" 62 #include "llvm/Support/MachineValueType.h" 63 #include "llvm/Support/MathExtras.h" 64 #include "llvm/Support/raw_ostream.h" 65 #include "llvm/Target/TargetMachine.h" 66 #include "llvm/Target/TargetOptions.h" 67 #include <algorithm> 68 #include <cassert> 69 #include <cstdint> 70 #include <functional> 71 #include <iterator> 72 #include <string> 73 #include <tuple> 74 #include <utility> 75 #include <vector> 76 77 using namespace llvm; 78 79 #define DEBUG_TYPE "dagcombine" 80 81 STATISTIC(NodesCombined , "Number of dag nodes combined"); 82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 86 STATISTIC(SlicedLoads, "Number of load sliced"); 87 88 static cl::opt<bool> 89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 90 cl::desc("Enable DAG combiner's use of IR alias analysis")); 91 92 static cl::opt<bool> 93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 94 cl::desc("Enable DAG combiner's use of TBAA")); 95 96 #ifndef NDEBUG 97 static cl::opt<std::string> 98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 99 cl::desc("Only use DAG-combiner alias analysis in this" 100 " function")); 101 #endif 102 103 /// Hidden option to stress test load slicing, i.e., when this option 104 /// is enabled, load slicing bypasses most of its profitability guards. 105 static cl::opt<bool> 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 107 cl::desc("Bypass the profitability model of load slicing"), 108 cl::init(false)); 109 110 static cl::opt<bool> 111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 112 cl::desc("DAG combiner may split indexing from loads")); 113 114 namespace { 115 116 class DAGCombiner { 117 SelectionDAG &DAG; 118 const TargetLowering &TLI; 119 CombineLevel Level; 120 CodeGenOpt::Level OptLevel; 121 bool LegalOperations = false; 122 bool LegalTypes = false; 123 bool ForCodeSize; 124 125 /// Worklist of all of the nodes that need to be simplified. 126 /// 127 /// This must behave as a stack -- new nodes to process are pushed onto the 128 /// back and when processing we pop off of the back. 129 /// 130 /// The worklist will not contain duplicates but may contain null entries 131 /// due to nodes being deleted from the underlying DAG. 132 SmallVector<SDNode *, 64> Worklist; 133 134 /// Mapping from an SDNode to its position on the worklist. 135 /// 136 /// This is used to find and remove nodes from the worklist (by nulling 137 /// them) when they are deleted from the underlying DAG. It relies on 138 /// stable indices of nodes within the worklist. 139 DenseMap<SDNode *, unsigned> WorklistMap; 140 141 /// Set of nodes which have been combined (at least once). 142 /// 143 /// This is used to allow us to reliably add any operands of a DAG node 144 /// which have not yet been combined to the worklist. 145 SmallPtrSet<SDNode *, 32> CombinedNodes; 146 147 // AA - Used for DAG load/store alias analysis. 148 AliasAnalysis *AA; 149 150 /// When an instruction is simplified, add all users of the instruction to 151 /// the work lists because they might get more simplified now. 152 void AddUsersToWorklist(SDNode *N) { 153 for (SDNode *Node : N->uses()) 154 AddToWorklist(Node); 155 } 156 157 /// Call the node-specific routine that folds each particular type of node. 158 SDValue visit(SDNode *N); 159 160 public: 161 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) 162 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 163 OptLevel(OL), AA(AA) { 164 ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); 165 166 MaximumLegalStoreInBits = 0; 167 for (MVT VT : MVT::all_valuetypes()) 168 if (EVT(VT).isSimple() && VT != MVT::Other && 169 TLI.isTypeLegal(EVT(VT)) && 170 VT.getSizeInBits() >= MaximumLegalStoreInBits) 171 MaximumLegalStoreInBits = VT.getSizeInBits(); 172 } 173 174 /// Add to the worklist making sure its instance is at the back (next to be 175 /// processed.) 176 void AddToWorklist(SDNode *N) { 177 assert(N->getOpcode() != ISD::DELETED_NODE && 178 "Deleted Node added to Worklist"); 179 180 // Skip handle nodes as they can't usefully be combined and confuse the 181 // zero-use deletion strategy. 182 if (N->getOpcode() == ISD::HANDLENODE) 183 return; 184 185 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 186 Worklist.push_back(N); 187 } 188 189 /// Remove all instances of N from the worklist. 190 void removeFromWorklist(SDNode *N) { 191 CombinedNodes.erase(N); 192 193 auto It = WorklistMap.find(N); 194 if (It == WorklistMap.end()) 195 return; // Not in the worklist. 196 197 // Null out the entry rather than erasing it to avoid a linear operation. 198 Worklist[It->second] = nullptr; 199 WorklistMap.erase(It); 200 } 201 202 void deleteAndRecombine(SDNode *N); 203 bool recursivelyDeleteUnusedNodes(SDNode *N); 204 205 /// Replaces all uses of the results of one DAG node with new values. 206 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 207 bool AddTo = true); 208 209 /// Replaces all uses of the results of one DAG node with new values. 210 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 211 return CombineTo(N, &Res, 1, AddTo); 212 } 213 214 /// Replaces all uses of the results of one DAG node with new values. 215 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 216 bool AddTo = true) { 217 SDValue To[] = { Res0, Res1 }; 218 return CombineTo(N, To, 2, AddTo); 219 } 220 221 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 222 223 private: 224 unsigned MaximumLegalStoreInBits; 225 226 /// Check the specified integer node value to see if it can be simplified or 227 /// if things it uses can be simplified by bit propagation. 228 /// If so, return true. 229 bool SimplifyDemandedBits(SDValue Op) { 230 unsigned BitWidth = Op.getScalarValueSizeInBits(); 231 APInt Demanded = APInt::getAllOnesValue(BitWidth); 232 return SimplifyDemandedBits(Op, Demanded); 233 } 234 235 /// Check the specified vector node value to see if it can be simplified or 236 /// if things it uses can be simplified as it only uses some of the 237 /// elements. If so, return true. 238 bool SimplifyDemandedVectorElts(SDValue Op) { 239 unsigned NumElts = Op.getValueType().getVectorNumElements(); 240 APInt Demanded = APInt::getAllOnesValue(NumElts); 241 return SimplifyDemandedVectorElts(Op, Demanded); 242 } 243 244 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 245 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded); 246 247 bool CombineToPreIndexedLoadStore(SDNode *N); 248 bool CombineToPostIndexedLoadStore(SDNode *N); 249 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 250 bool SliceUpLoad(SDNode *N); 251 252 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 253 /// load. 254 /// 255 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 256 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 257 /// \param EltNo index of the vector element to load. 258 /// \param OriginalLoad load that EVE came from to be replaced. 259 /// \returns EVE on success SDValue() on failure. 260 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 261 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 262 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 263 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 264 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 265 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 266 SDValue PromoteIntBinOp(SDValue Op); 267 SDValue PromoteIntShiftOp(SDValue Op); 268 SDValue PromoteExtend(SDValue Op); 269 bool PromoteLoad(SDValue Op); 270 271 /// Call the node-specific routine that knows how to fold each 272 /// particular type of node. If that doesn't do anything, try the 273 /// target-specific DAG combines. 274 SDValue combine(SDNode *N); 275 276 // Visitation implementation - Implement dag node combining for different 277 // node types. The semantics are as follows: 278 // Return Value: 279 // SDValue.getNode() == 0 - No change was made 280 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 281 // otherwise - N should be replaced by the returned Operand. 282 // 283 SDValue visitTokenFactor(SDNode *N); 284 SDValue visitMERGE_VALUES(SDNode *N); 285 SDValue visitADD(SDNode *N); 286 SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); 287 SDValue visitSUB(SDNode *N); 288 SDValue visitADDC(SDNode *N); 289 SDValue visitUADDO(SDNode *N); 290 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); 291 SDValue visitSUBC(SDNode *N); 292 SDValue visitUSUBO(SDNode *N); 293 SDValue visitADDE(SDNode *N); 294 SDValue visitADDCARRY(SDNode *N); 295 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); 296 SDValue visitSUBE(SDNode *N); 297 SDValue visitSUBCARRY(SDNode *N); 298 SDValue visitMUL(SDNode *N); 299 SDValue useDivRem(SDNode *N); 300 SDValue visitSDIV(SDNode *N); 301 SDValue visitUDIV(SDNode *N); 302 SDValue visitREM(SDNode *N); 303 SDValue visitMULHU(SDNode *N); 304 SDValue visitMULHS(SDNode *N); 305 SDValue visitSMUL_LOHI(SDNode *N); 306 SDValue visitUMUL_LOHI(SDNode *N); 307 SDValue visitSMULO(SDNode *N); 308 SDValue visitUMULO(SDNode *N); 309 SDValue visitIMINMAX(SDNode *N); 310 SDValue visitAND(SDNode *N); 311 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 312 SDValue visitOR(SDNode *N); 313 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 314 SDValue visitXOR(SDNode *N); 315 SDValue SimplifyVBinOp(SDNode *N); 316 SDValue visitSHL(SDNode *N); 317 SDValue visitSRA(SDNode *N); 318 SDValue visitSRL(SDNode *N); 319 SDValue visitRotate(SDNode *N); 320 SDValue visitABS(SDNode *N); 321 SDValue visitBSWAP(SDNode *N); 322 SDValue visitBITREVERSE(SDNode *N); 323 SDValue visitCTLZ(SDNode *N); 324 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 325 SDValue visitCTTZ(SDNode *N); 326 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 327 SDValue visitCTPOP(SDNode *N); 328 SDValue visitSELECT(SDNode *N); 329 SDValue visitVSELECT(SDNode *N); 330 SDValue visitSELECT_CC(SDNode *N); 331 SDValue visitSETCC(SDNode *N); 332 SDValue visitSETCCCARRY(SDNode *N); 333 SDValue visitSIGN_EXTEND(SDNode *N); 334 SDValue visitZERO_EXTEND(SDNode *N); 335 SDValue visitANY_EXTEND(SDNode *N); 336 SDValue visitAssertExt(SDNode *N); 337 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 338 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 339 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); 340 SDValue visitTRUNCATE(SDNode *N); 341 SDValue visitBITCAST(SDNode *N); 342 SDValue visitBUILD_PAIR(SDNode *N); 343 SDValue visitFADD(SDNode *N); 344 SDValue visitFSUB(SDNode *N); 345 SDValue visitFMUL(SDNode *N); 346 SDValue visitFMA(SDNode *N); 347 SDValue visitFDIV(SDNode *N); 348 SDValue visitFREM(SDNode *N); 349 SDValue visitFSQRT(SDNode *N); 350 SDValue visitFCOPYSIGN(SDNode *N); 351 SDValue visitSINT_TO_FP(SDNode *N); 352 SDValue visitUINT_TO_FP(SDNode *N); 353 SDValue visitFP_TO_SINT(SDNode *N); 354 SDValue visitFP_TO_UINT(SDNode *N); 355 SDValue visitFP_ROUND(SDNode *N); 356 SDValue visitFP_ROUND_INREG(SDNode *N); 357 SDValue visitFP_EXTEND(SDNode *N); 358 SDValue visitFNEG(SDNode *N); 359 SDValue visitFABS(SDNode *N); 360 SDValue visitFCEIL(SDNode *N); 361 SDValue visitFTRUNC(SDNode *N); 362 SDValue visitFFLOOR(SDNode *N); 363 SDValue visitFMINNUM(SDNode *N); 364 SDValue visitFMAXNUM(SDNode *N); 365 SDValue visitBRCOND(SDNode *N); 366 SDValue visitBR_CC(SDNode *N); 367 SDValue visitLOAD(SDNode *N); 368 369 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 370 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 371 372 SDValue visitSTORE(SDNode *N); 373 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 374 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 375 SDValue visitBUILD_VECTOR(SDNode *N); 376 SDValue visitCONCAT_VECTORS(SDNode *N); 377 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 378 SDValue visitVECTOR_SHUFFLE(SDNode *N); 379 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 380 SDValue visitINSERT_SUBVECTOR(SDNode *N); 381 SDValue visitMLOAD(SDNode *N); 382 SDValue visitMSTORE(SDNode *N); 383 SDValue visitMGATHER(SDNode *N); 384 SDValue visitMSCATTER(SDNode *N); 385 SDValue visitFP_TO_FP16(SDNode *N); 386 SDValue visitFP16_TO_FP(SDNode *N); 387 388 SDValue visitFADDForFMACombine(SDNode *N); 389 SDValue visitFSUBForFMACombine(SDNode *N); 390 SDValue visitFMULForFMADistributiveCombine(SDNode *N); 391 392 SDValue XformToShuffleWithZero(SDNode *N); 393 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, 394 SDValue RHS); 395 396 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 397 398 SDValue foldSelectOfConstants(SDNode *N); 399 SDValue foldVSelectOfConstants(SDNode *N); 400 SDValue foldBinOpIntoSelect(SDNode *BO); 401 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 402 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 403 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); 404 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 405 SDValue N2, SDValue N3, ISD::CondCode CC, 406 bool NotExtCompare = false); 407 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, 408 SDValue N2, SDValue N3, ISD::CondCode CC); 409 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 410 const SDLoc &DL); 411 SDValue unfoldMaskedMerge(SDNode *N); 412 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 413 const SDLoc &DL, bool foldBooleans); 414 SDValue rebuildSetCC(SDValue N); 415 416 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 417 SDValue &CC) const; 418 bool isOneUseSetCC(SDValue N) const; 419 420 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 421 unsigned HiOp); 422 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 423 SDValue CombineExtLoad(SDNode *N); 424 SDValue CombineZExtLogicopShiftLoad(SDNode *N); 425 SDValue combineRepeatedFPDivisors(SDNode *N); 426 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); 427 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 428 SDValue BuildSDIV(SDNode *N); 429 SDValue BuildSDIVPow2(SDNode *N); 430 SDValue BuildUDIV(SDNode *N); 431 SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); 432 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); 433 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); 434 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); 435 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); 436 SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, 437 SDNodeFlags Flags, bool Reciprocal); 438 SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, 439 SDNodeFlags Flags, bool Reciprocal); 440 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 441 bool DemandHighBits = true); 442 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 443 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 444 SDValue InnerPos, SDValue InnerNeg, 445 unsigned PosOpcode, unsigned NegOpcode, 446 const SDLoc &DL); 447 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); 448 SDValue MatchLoadCombine(SDNode *N); 449 SDValue ReduceLoadWidth(SDNode *N); 450 SDValue ReduceLoadOpStoreWidth(SDNode *N); 451 SDValue splitMergedValStore(StoreSDNode *ST); 452 SDValue TransformFPLoadStorePair(SDNode *N); 453 SDValue convertBuildVecZextToZext(SDNode *N); 454 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 455 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 456 SDValue reduceBuildVecToShuffle(SDNode *N); 457 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, 458 ArrayRef<int> VectorMask, SDValue VecIn1, 459 SDValue VecIn2, unsigned LeftIdx); 460 SDValue matchVSelectOpSizesWithSetCC(SDNode *N); 461 462 /// Walk up chain skipping non-aliasing memory nodes, 463 /// looking for aliasing nodes and adding them to the Aliases vector. 464 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 465 SmallVectorImpl<SDValue> &Aliases); 466 467 /// Return true if there is any possibility that the two addresses overlap. 468 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 469 470 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 471 /// chain (aliasing node.) 472 SDValue FindBetterChain(SDNode *N, SDValue Chain); 473 474 /// Try to replace a store and any possibly adjacent stores on 475 /// consecutive chains with better chains. Return true only if St is 476 /// replaced. 477 /// 478 /// Notice that other chains may still be replaced even if the function 479 /// returns false. 480 bool findBetterNeighborChains(StoreSDNode *St); 481 482 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 483 bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); 484 485 /// Holds a pointer to an LSBaseSDNode as well as information on where it 486 /// is located in a sequence of memory operations connected by a chain. 487 struct MemOpLink { 488 // Ptr to the mem node. 489 LSBaseSDNode *MemNode; 490 491 // Offset from the base ptr. 492 int64_t OffsetFromBase; 493 494 MemOpLink(LSBaseSDNode *N, int64_t Offset) 495 : MemNode(N), OffsetFromBase(Offset) {} 496 }; 497 498 /// This is a helper function for visitMUL to check the profitability 499 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 500 /// MulNode is the original multiply, AddNode is (add x, c1), 501 /// and ConstNode is c2. 502 bool isMulAddWithConstProfitable(SDNode *MulNode, 503 SDValue &AddNode, 504 SDValue &ConstNode); 505 506 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 507 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 508 /// the type of the loaded value to be extended. 509 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 510 EVT LoadResultTy, EVT &ExtVT); 511 512 /// Helper function to calculate whether the given Load can have its 513 /// width reduced to ExtVT. 514 bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, 515 EVT &ExtVT, unsigned ShAmt = 0); 516 517 /// Used by BackwardsPropagateMask to find suitable loads. 518 bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, 519 SmallPtrSetImpl<SDNode*> &NodeWithConsts, 520 ConstantSDNode *Mask, SDNode *&UncombinedNode); 521 /// Attempt to propagate a given AND node back to load leaves so that they 522 /// can be combined into narrow loads. 523 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); 524 525 /// Helper function for MergeConsecutiveStores which merges the 526 /// component store chains. 527 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 528 unsigned NumStores); 529 530 /// This is a helper function for MergeConsecutiveStores. When the 531 /// source elements of the consecutive stores are all constants or 532 /// all extracted vector elements, try to merge them into one 533 /// larger store introducing bitcasts if necessary. \return True 534 /// if a merged store was created. 535 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 536 EVT MemVT, unsigned NumStores, 537 bool IsConstantSrc, bool UseVector, 538 bool UseTrunc); 539 540 /// This is a helper function for MergeConsecutiveStores. Stores 541 /// that potentially may be merged with St are placed in 542 /// StoreNodes. RootNode is a chain predecessor to all store 543 /// candidates. 544 void getStoreMergeCandidates(StoreSDNode *St, 545 SmallVectorImpl<MemOpLink> &StoreNodes, 546 SDNode *&Root); 547 548 /// Helper function for MergeConsecutiveStores. Checks if 549 /// candidate stores have indirect dependency through their 550 /// operands. RootNode is the predecessor to all stores calculated 551 /// by getStoreMergeCandidates and is used to prune the dependency check. 552 /// \return True if safe to merge. 553 bool checkMergeStoreCandidatesForDependencies( 554 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 555 SDNode *RootNode); 556 557 /// Merge consecutive store operations into a wide store. 558 /// This optimization uses wide integers or vectors when possible. 559 /// \return number of stores that were merged into a merged store (the 560 /// affected nodes are stored as a prefix in \p StoreNodes). 561 bool MergeConsecutiveStores(StoreSDNode *N); 562 563 /// Try to transform a truncation where C is a constant: 564 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 565 /// 566 /// \p N needs to be a truncation and its first operand an AND. Other 567 /// requirements are checked by the function (e.g. that trunc is 568 /// single-use) and if missed an empty SDValue is returned. 569 SDValue distributeTruncateThroughAnd(SDNode *N); 570 571 /// Helper function to determine whether the target supports operation 572 /// given by \p Opcode for type \p VT, that is, whether the operation 573 /// is legal or custom before legalizing operations, and whether is 574 /// legal (but not custom) after legalization. 575 bool hasOperation(unsigned Opcode, EVT VT) { 576 if (LegalOperations) 577 return TLI.isOperationLegal(Opcode, VT); 578 return TLI.isOperationLegalOrCustom(Opcode, VT); 579 } 580 581 public: 582 /// Runs the dag combiner on all nodes in the work list 583 void Run(CombineLevel AtLevel); 584 585 SelectionDAG &getDAG() const { return DAG; } 586 587 /// Returns a type large enough to hold any valid shift amount - before type 588 /// legalization these can be huge. 589 EVT getShiftAmountTy(EVT LHSTy) { 590 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 591 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); 592 } 593 594 /// This method returns true if we are running before type legalization or 595 /// if the specified VT is legal. 596 bool isTypeLegal(const EVT &VT) { 597 if (!LegalTypes) return true; 598 return TLI.isTypeLegal(VT); 599 } 600 601 /// Convenience wrapper around TargetLowering::getSetCCResultType 602 EVT getSetCCResultType(EVT VT) const { 603 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 604 } 605 606 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 607 SDValue OrigLoad, SDValue ExtLoad, 608 ISD::NodeType ExtType); 609 }; 610 611 /// This class is a DAGUpdateListener that removes any deleted 612 /// nodes from the worklist. 613 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 614 DAGCombiner &DC; 615 616 public: 617 explicit WorklistRemover(DAGCombiner &dc) 618 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 619 620 void NodeDeleted(SDNode *N, SDNode *E) override { 621 DC.removeFromWorklist(N); 622 } 623 }; 624 625 } // end anonymous namespace 626 627 //===----------------------------------------------------------------------===// 628 // TargetLowering::DAGCombinerInfo implementation 629 //===----------------------------------------------------------------------===// 630 631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 632 ((DAGCombiner*)DC)->AddToWorklist(N); 633 } 634 635 SDValue TargetLowering::DAGCombinerInfo:: 636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 637 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 638 } 639 640 SDValue TargetLowering::DAGCombinerInfo:: 641 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 642 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 643 } 644 645 SDValue TargetLowering::DAGCombinerInfo:: 646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 647 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 648 } 649 650 void TargetLowering::DAGCombinerInfo:: 651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 652 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 653 } 654 655 //===----------------------------------------------------------------------===// 656 // Helper Functions 657 //===----------------------------------------------------------------------===// 658 659 void DAGCombiner::deleteAndRecombine(SDNode *N) { 660 removeFromWorklist(N); 661 662 // If the operands of this node are only used by the node, they will now be 663 // dead. Make sure to re-visit them and recursively delete dead nodes. 664 for (const SDValue &Op : N->ops()) 665 // For an operand generating multiple values, one of the values may 666 // become dead allowing further simplification (e.g. split index 667 // arithmetic from an indexed load). 668 if (Op->hasOneUse() || Op->getNumValues() > 1) 669 AddToWorklist(Op.getNode()); 670 671 DAG.DeleteNode(N); 672 } 673 674 /// Return 1 if we can compute the negated form of the specified expression for 675 /// the same cost as the expression itself, or 2 if we can compute the negated 676 /// form more cheaply than the expression itself. 677 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 678 const TargetLowering &TLI, 679 const TargetOptions *Options, 680 unsigned Depth = 0) { 681 // fneg is removable even if it has multiple uses. 682 if (Op.getOpcode() == ISD::FNEG) return 2; 683 684 // Don't allow anything with multiple uses unless we know it is free. 685 EVT VT = Op.getValueType(); 686 const SDNodeFlags Flags = Op->getFlags(); 687 if (!Op.hasOneUse()) 688 if (!(Op.getOpcode() == ISD::FP_EXTEND && 689 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) 690 return 0; 691 692 // Don't recurse exponentially. 693 if (Depth > 6) return 0; 694 695 switch (Op.getOpcode()) { 696 default: return false; 697 case ISD::ConstantFP: { 698 if (!LegalOperations) 699 return 1; 700 701 // Don't invert constant FP values after legalization unless the target says 702 // the negated constant is legal. 703 return TLI.isOperationLegal(ISD::ConstantFP, VT) || 704 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); 705 } 706 case ISD::FADD: 707 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) 708 return 0; 709 710 // After operation legalization, it might not be legal to create new FSUBs. 711 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) 712 return 0; 713 714 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 715 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 716 Options, Depth + 1)) 717 return V; 718 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 719 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 720 Depth + 1); 721 case ISD::FSUB: 722 // We can't turn -(A-B) into B-A when we honor signed zeros. 723 if (!Options->NoSignedZerosFPMath && 724 !Flags.hasNoSignedZeros()) 725 return 0; 726 727 // fold (fneg (fsub A, B)) -> (fsub B, A) 728 return 1; 729 730 case ISD::FMUL: 731 case ISD::FDIV: 732 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 733 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 734 Options, Depth + 1)) 735 return V; 736 737 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 738 Depth + 1); 739 740 case ISD::FP_EXTEND: 741 case ISD::FP_ROUND: 742 case ISD::FSIN: 743 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 744 Depth + 1); 745 } 746 } 747 748 /// If isNegatibleForFree returns true, return the newly negated expression. 749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 750 bool LegalOperations, unsigned Depth = 0) { 751 const TargetOptions &Options = DAG.getTarget().Options; 752 // fneg is removable even if it has multiple uses. 753 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 754 755 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 756 757 const SDNodeFlags Flags = Op.getNode()->getFlags(); 758 759 switch (Op.getOpcode()) { 760 default: llvm_unreachable("Unknown code"); 761 case ISD::ConstantFP: { 762 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 763 V.changeSign(); 764 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 765 } 766 case ISD::FADD: 767 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); 768 769 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 770 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 771 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 772 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 773 GetNegatedExpression(Op.getOperand(0), DAG, 774 LegalOperations, Depth+1), 775 Op.getOperand(1), Flags); 776 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 777 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 778 GetNegatedExpression(Op.getOperand(1), DAG, 779 LegalOperations, Depth+1), 780 Op.getOperand(0), Flags); 781 case ISD::FSUB: 782 // fold (fneg (fsub 0, B)) -> B 783 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 784 if (N0CFP->isZero()) 785 return Op.getOperand(1); 786 787 // fold (fneg (fsub A, B)) -> (fsub B, A) 788 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 789 Op.getOperand(1), Op.getOperand(0), Flags); 790 791 case ISD::FMUL: 792 case ISD::FDIV: 793 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 794 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 795 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 796 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 797 GetNegatedExpression(Op.getOperand(0), DAG, 798 LegalOperations, Depth+1), 799 Op.getOperand(1), Flags); 800 801 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 802 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 803 Op.getOperand(0), 804 GetNegatedExpression(Op.getOperand(1), DAG, 805 LegalOperations, Depth+1), Flags); 806 807 case ISD::FP_EXTEND: 808 case ISD::FSIN: 809 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 810 GetNegatedExpression(Op.getOperand(0), DAG, 811 LegalOperations, Depth+1)); 812 case ISD::FP_ROUND: 813 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 814 GetNegatedExpression(Op.getOperand(0), DAG, 815 LegalOperations, Depth+1), 816 Op.getOperand(1)); 817 } 818 } 819 820 // APInts must be the same size for most operations, this helper 821 // function zero extends the shorter of the pair so that they match. 822 // We provide an Offset so that we can create bitwidths that won't overflow. 823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { 824 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); 825 LHS = LHS.zextOrSelf(Bits); 826 RHS = RHS.zextOrSelf(Bits); 827 } 828 829 // Return true if this node is a setcc, or is a select_cc 830 // that selects between the target values used for true and false, making it 831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 832 // the appropriate nodes based on the type of node we are checking. This 833 // simplifies life a bit for the callers. 834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 835 SDValue &CC) const { 836 if (N.getOpcode() == ISD::SETCC) { 837 LHS = N.getOperand(0); 838 RHS = N.getOperand(1); 839 CC = N.getOperand(2); 840 return true; 841 } 842 843 if (N.getOpcode() != ISD::SELECT_CC || 844 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 845 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 846 return false; 847 848 if (TLI.getBooleanContents(N.getValueType()) == 849 TargetLowering::UndefinedBooleanContent) 850 return false; 851 852 LHS = N.getOperand(0); 853 RHS = N.getOperand(1); 854 CC = N.getOperand(4); 855 return true; 856 } 857 858 /// Return true if this is a SetCC-equivalent operation with only one use. 859 /// If this is true, it allows the users to invert the operation for free when 860 /// it is profitable to do so. 861 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 862 SDValue N0, N1, N2; 863 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 864 return true; 865 return false; 866 } 867 868 static SDValue peekThroughBitcast(SDValue V) { 869 while (V.getOpcode() == ISD::BITCAST) 870 V = V.getOperand(0); 871 return V; 872 } 873 874 // Returns the SDNode if it is a constant float BuildVector 875 // or constant float. 876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 877 if (isa<ConstantFPSDNode>(N)) 878 return N.getNode(); 879 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 880 return N.getNode(); 881 return nullptr; 882 } 883 884 // Determines if it is a constant integer or a build vector of constant 885 // integers (and undefs). 886 // Do not permit build vector implicit truncation. 887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { 888 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) 889 return !(Const->isOpaque() && NoOpaques); 890 if (N.getOpcode() != ISD::BUILD_VECTOR) 891 return false; 892 unsigned BitWidth = N.getScalarValueSizeInBits(); 893 for (const SDValue &Op : N->op_values()) { 894 if (Op.isUndef()) 895 continue; 896 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); 897 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth || 898 (Const->isOpaque() && NoOpaques)) 899 return false; 900 } 901 return true; 902 } 903 904 // Determines if it is a constant null integer or a splatted vector of a 905 // constant null integer (with no undefs). 906 // Build vector implicit truncation is not an issue for null values. 907 static bool isNullConstantOrNullSplatConstant(SDValue N) { 908 // TODO: may want to use peekThroughBitcast() here. 909 if (ConstantSDNode *Splat = isConstOrConstSplat(N)) 910 return Splat->isNullValue(); 911 return false; 912 } 913 914 // Determines if it is a constant integer of one or a splatted vector of a 915 // constant integer of one (with no undefs). 916 // Do not permit build vector implicit truncation. 917 static bool isOneConstantOrOneSplatConstant(SDValue N) { 918 // TODO: may want to use peekThroughBitcast() here. 919 unsigned BitWidth = N.getScalarValueSizeInBits(); 920 if (ConstantSDNode *Splat = isConstOrConstSplat(N)) 921 return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; 922 return false; 923 } 924 925 // Determines if it is a constant integer of all ones or a splatted vector of a 926 // constant integer of all ones (with no undefs). 927 // Do not permit build vector implicit truncation. 928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { 929 N = peekThroughBitcast(N); 930 unsigned BitWidth = N.getScalarValueSizeInBits(); 931 if (ConstantSDNode *Splat = isConstOrConstSplat(N)) 932 return Splat->isAllOnesValue() && 933 Splat->getAPIntValue().getBitWidth() == BitWidth; 934 return false; 935 } 936 937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with 938 // undef's. 939 static bool isAnyConstantBuildVector(const SDNode *N) { 940 return ISD::isBuildVectorOfConstantSDNodes(N) || 941 ISD::isBuildVectorOfConstantFPSDNodes(N); 942 } 943 944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 945 SDValue N1) { 946 EVT VT = N0.getValueType(); 947 if (N0.getOpcode() == Opc) { 948 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 949 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 950 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 951 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 952 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 953 return SDValue(); 954 } 955 if (N0.hasOneUse()) { 956 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 957 // use 958 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 959 if (!OpNode.getNode()) 960 return SDValue(); 961 AddToWorklist(OpNode.getNode()); 962 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 963 } 964 } 965 } 966 967 if (N1.getOpcode() == Opc) { 968 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 969 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 970 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 971 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 972 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 973 return SDValue(); 974 } 975 if (N1.hasOneUse()) { 976 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one 977 // use 978 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); 979 if (!OpNode.getNode()) 980 return SDValue(); 981 AddToWorklist(OpNode.getNode()); 982 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 983 } 984 } 985 } 986 987 return SDValue(); 988 } 989 990 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 991 bool AddTo) { 992 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 993 ++NodesCombined; 994 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; 995 To[0].getNode()->dump(&DAG); 996 dbgs() << " and " << NumTo - 1 << " other values\n"); 997 for (unsigned i = 0, e = NumTo; i != e; ++i) 998 assert((!To[i].getNode() || 999 N->getValueType(i) == To[i].getValueType()) && 1000 "Cannot combine value to value of different type!"); 1001 1002 WorklistRemover DeadNodes(*this); 1003 DAG.ReplaceAllUsesWith(N, To); 1004 if (AddTo) { 1005 // Push the new nodes and any users onto the worklist 1006 for (unsigned i = 0, e = NumTo; i != e; ++i) { 1007 if (To[i].getNode()) { 1008 AddToWorklist(To[i].getNode()); 1009 AddUsersToWorklist(To[i].getNode()); 1010 } 1011 } 1012 } 1013 1014 // Finally, if the node is now dead, remove it from the graph. The node 1015 // may not be dead if the replacement process recursively simplified to 1016 // something else needing this node. 1017 if (N->use_empty()) 1018 deleteAndRecombine(N); 1019 return SDValue(N, 0); 1020 } 1021 1022 void DAGCombiner:: 1023 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 1024 // Replace all uses. If any nodes become isomorphic to other nodes and 1025 // are deleted, make sure to remove them from our worklist. 1026 WorklistRemover DeadNodes(*this); 1027 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 1028 1029 // Push the new node and any (possibly new) users onto the worklist. 1030 AddToWorklist(TLO.New.getNode()); 1031 AddUsersToWorklist(TLO.New.getNode()); 1032 1033 // Finally, if the node is now dead, remove it from the graph. The node 1034 // may not be dead if the replacement process recursively simplified to 1035 // something else needing this node. 1036 if (TLO.Old.getNode()->use_empty()) 1037 deleteAndRecombine(TLO.Old.getNode()); 1038 } 1039 1040 /// Check the specified integer node value to see if it can be simplified or if 1041 /// things it uses can be simplified by bit propagation. If so, return true. 1042 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 1043 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1044 KnownBits Known; 1045 if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) 1046 return false; 1047 1048 // Revisit the node. 1049 AddToWorklist(Op.getNode()); 1050 1051 // Replace the old value with the new one. 1052 ++NodesCombined; 1053 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1054 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1055 dbgs() << '\n'); 1056 1057 CommitTargetLoweringOpt(TLO); 1058 return true; 1059 } 1060 1061 /// Check the specified vector node value to see if it can be simplified or 1062 /// if things it uses can be simplified as it only uses some of the elements. 1063 /// If so, return true. 1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, 1065 const APInt &Demanded) { 1066 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 1067 APInt KnownUndef, KnownZero; 1068 if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO)) 1069 return false; 1070 1071 // Revisit the node. 1072 AddToWorklist(Op.getNode()); 1073 1074 // Replace the old value with the new one. 1075 ++NodesCombined; 1076 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); 1077 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); 1078 dbgs() << '\n'); 1079 1080 CommitTargetLoweringOpt(TLO); 1081 return true; 1082 } 1083 1084 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 1085 SDLoc DL(Load); 1086 EVT VT = Load->getValueType(0); 1087 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); 1088 1089 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; 1090 Trunc.getNode()->dump(&DAG); dbgs() << '\n'); 1091 WorklistRemover DeadNodes(*this); 1092 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 1093 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 1094 deleteAndRecombine(Load); 1095 AddToWorklist(Trunc.getNode()); 1096 } 1097 1098 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 1099 Replace = false; 1100 SDLoc DL(Op); 1101 if (ISD::isUNINDEXEDLoad(Op.getNode())) { 1102 LoadSDNode *LD = cast<LoadSDNode>(Op); 1103 EVT MemVT = LD->getMemoryVT(); 1104 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1105 : LD->getExtensionType(); 1106 Replace = true; 1107 return DAG.getExtLoad(ExtType, DL, PVT, 1108 LD->getChain(), LD->getBasePtr(), 1109 MemVT, LD->getMemOperand()); 1110 } 1111 1112 unsigned Opc = Op.getOpcode(); 1113 switch (Opc) { 1114 default: break; 1115 case ISD::AssertSext: 1116 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) 1117 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); 1118 break; 1119 case ISD::AssertZext: 1120 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) 1121 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); 1122 break; 1123 case ISD::Constant: { 1124 unsigned ExtOpc = 1125 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1126 return DAG.getNode(ExtOpc, DL, PVT, Op); 1127 } 1128 } 1129 1130 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 1131 return SDValue(); 1132 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); 1133 } 1134 1135 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1136 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1137 return SDValue(); 1138 EVT OldVT = Op.getValueType(); 1139 SDLoc DL(Op); 1140 bool Replace = false; 1141 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1142 if (!NewOp.getNode()) 1143 return SDValue(); 1144 AddToWorklist(NewOp.getNode()); 1145 1146 if (Replace) 1147 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1148 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, 1149 DAG.getValueType(OldVT)); 1150 } 1151 1152 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1153 EVT OldVT = Op.getValueType(); 1154 SDLoc DL(Op); 1155 bool Replace = false; 1156 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1157 if (!NewOp.getNode()) 1158 return SDValue(); 1159 AddToWorklist(NewOp.getNode()); 1160 1161 if (Replace) 1162 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1163 return DAG.getZeroExtendInReg(NewOp, DL, OldVT); 1164 } 1165 1166 /// Promote the specified integer binary operation if the target indicates it is 1167 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1168 /// i32 since i16 instructions are longer. 1169 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1170 if (!LegalOperations) 1171 return SDValue(); 1172 1173 EVT VT = Op.getValueType(); 1174 if (VT.isVector() || !VT.isInteger()) 1175 return SDValue(); 1176 1177 // If operation type is 'undesirable', e.g. i16 on x86, consider 1178 // promoting it. 1179 unsigned Opc = Op.getOpcode(); 1180 if (TLI.isTypeDesirableForOp(Opc, VT)) 1181 return SDValue(); 1182 1183 EVT PVT = VT; 1184 // Consult target whether it is a good idea to promote this operation and 1185 // what's the right type to promote it to. 1186 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1187 assert(PVT != VT && "Don't know what type to promote to!"); 1188 1189 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1190 1191 bool Replace0 = false; 1192 SDValue N0 = Op.getOperand(0); 1193 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1194 1195 bool Replace1 = false; 1196 SDValue N1 = Op.getOperand(1); 1197 SDValue NN1 = PromoteOperand(N1, PVT, Replace1); 1198 SDLoc DL(Op); 1199 1200 SDValue RV = 1201 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); 1202 1203 // We are always replacing N0/N1's use in N and only need 1204 // additional replacements if there are additional uses. 1205 Replace0 &= !N0->hasOneUse(); 1206 Replace1 &= (N0 != N1) && !N1->hasOneUse(); 1207 1208 // Combine Op here so it is preserved past replacements. 1209 CombineTo(Op.getNode(), RV); 1210 1211 // If operands have a use ordering, make sure we deal with 1212 // predecessor first. 1213 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { 1214 std::swap(N0, N1); 1215 std::swap(NN0, NN1); 1216 } 1217 1218 if (Replace0) { 1219 AddToWorklist(NN0.getNode()); 1220 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1221 } 1222 if (Replace1) { 1223 AddToWorklist(NN1.getNode()); 1224 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1225 } 1226 return Op; 1227 } 1228 return SDValue(); 1229 } 1230 1231 /// Promote the specified integer shift operation if the target indicates it is 1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1233 /// i32 since i16 instructions are longer. 1234 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1235 if (!LegalOperations) 1236 return SDValue(); 1237 1238 EVT VT = Op.getValueType(); 1239 if (VT.isVector() || !VT.isInteger()) 1240 return SDValue(); 1241 1242 // If operation type is 'undesirable', e.g. i16 on x86, consider 1243 // promoting it. 1244 unsigned Opc = Op.getOpcode(); 1245 if (TLI.isTypeDesirableForOp(Opc, VT)) 1246 return SDValue(); 1247 1248 EVT PVT = VT; 1249 // Consult target whether it is a good idea to promote this operation and 1250 // what's the right type to promote it to. 1251 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1252 assert(PVT != VT && "Don't know what type to promote to!"); 1253 1254 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1255 1256 bool Replace = false; 1257 SDValue N0 = Op.getOperand(0); 1258 SDValue N1 = Op.getOperand(1); 1259 if (Opc == ISD::SRA) 1260 N0 = SExtPromoteOperand(N0, PVT); 1261 else if (Opc == ISD::SRL) 1262 N0 = ZExtPromoteOperand(N0, PVT); 1263 else 1264 N0 = PromoteOperand(N0, PVT, Replace); 1265 1266 if (!N0.getNode()) 1267 return SDValue(); 1268 1269 SDLoc DL(Op); 1270 SDValue RV = 1271 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); 1272 1273 AddToWorklist(N0.getNode()); 1274 if (Replace) 1275 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1276 1277 // Deal with Op being deleted. 1278 if (Op && Op.getOpcode() != ISD::DELETED_NODE) 1279 return RV; 1280 } 1281 return SDValue(); 1282 } 1283 1284 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1285 if (!LegalOperations) 1286 return SDValue(); 1287 1288 EVT VT = Op.getValueType(); 1289 if (VT.isVector() || !VT.isInteger()) 1290 return SDValue(); 1291 1292 // If operation type is 'undesirable', e.g. i16 on x86, consider 1293 // promoting it. 1294 unsigned Opc = Op.getOpcode(); 1295 if (TLI.isTypeDesirableForOp(Opc, VT)) 1296 return SDValue(); 1297 1298 EVT PVT = VT; 1299 // Consult target whether it is a good idea to promote this operation and 1300 // what's the right type to promote it to. 1301 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1302 assert(PVT != VT && "Don't know what type to promote to!"); 1303 // fold (aext (aext x)) -> (aext x) 1304 // fold (aext (zext x)) -> (zext x) 1305 // fold (aext (sext x)) -> (sext x) 1306 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); 1307 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1308 } 1309 return SDValue(); 1310 } 1311 1312 bool DAGCombiner::PromoteLoad(SDValue Op) { 1313 if (!LegalOperations) 1314 return false; 1315 1316 if (!ISD::isUNINDEXEDLoad(Op.getNode())) 1317 return false; 1318 1319 EVT VT = Op.getValueType(); 1320 if (VT.isVector() || !VT.isInteger()) 1321 return false; 1322 1323 // If operation type is 'undesirable', e.g. i16 on x86, consider 1324 // promoting it. 1325 unsigned Opc = Op.getOpcode(); 1326 if (TLI.isTypeDesirableForOp(Opc, VT)) 1327 return false; 1328 1329 EVT PVT = VT; 1330 // Consult target whether it is a good idea to promote this operation and 1331 // what's the right type to promote it to. 1332 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1333 assert(PVT != VT && "Don't know what type to promote to!"); 1334 1335 SDLoc DL(Op); 1336 SDNode *N = Op.getNode(); 1337 LoadSDNode *LD = cast<LoadSDNode>(N); 1338 EVT MemVT = LD->getMemoryVT(); 1339 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD 1340 : LD->getExtensionType(); 1341 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, 1342 LD->getChain(), LD->getBasePtr(), 1343 MemVT, LD->getMemOperand()); 1344 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); 1345 1346 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; 1347 Result.getNode()->dump(&DAG); dbgs() << '\n'); 1348 WorklistRemover DeadNodes(*this); 1349 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1350 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1351 deleteAndRecombine(N); 1352 AddToWorklist(Result.getNode()); 1353 return true; 1354 } 1355 return false; 1356 } 1357 1358 /// Recursively delete a node which has no uses and any operands for 1359 /// which it is the only use. 1360 /// 1361 /// Note that this both deletes the nodes and removes them from the worklist. 1362 /// It also adds any nodes who have had a user deleted to the worklist as they 1363 /// may now have only one use and subject to other combines. 1364 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1365 if (!N->use_empty()) 1366 return false; 1367 1368 SmallSetVector<SDNode *, 16> Nodes; 1369 Nodes.insert(N); 1370 do { 1371 N = Nodes.pop_back_val(); 1372 if (!N) 1373 continue; 1374 1375 if (N->use_empty()) { 1376 for (const SDValue &ChildN : N->op_values()) 1377 Nodes.insert(ChildN.getNode()); 1378 1379 removeFromWorklist(N); 1380 DAG.DeleteNode(N); 1381 } else { 1382 AddToWorklist(N); 1383 } 1384 } while (!Nodes.empty()); 1385 return true; 1386 } 1387 1388 //===----------------------------------------------------------------------===// 1389 // Main DAG Combiner implementation 1390 //===----------------------------------------------------------------------===// 1391 1392 void DAGCombiner::Run(CombineLevel AtLevel) { 1393 // set the instance variables, so that the various visit routines may use it. 1394 Level = AtLevel; 1395 LegalOperations = Level >= AfterLegalizeVectorOps; 1396 LegalTypes = Level >= AfterLegalizeTypes; 1397 1398 // Add all the dag nodes to the worklist. 1399 for (SDNode &Node : DAG.allnodes()) 1400 AddToWorklist(&Node); 1401 1402 // Create a dummy node (which is not added to allnodes), that adds a reference 1403 // to the root node, preventing it from being deleted, and tracking any 1404 // changes of the root. 1405 HandleSDNode Dummy(DAG.getRoot()); 1406 1407 // While the worklist isn't empty, find a node and try to combine it. 1408 while (!WorklistMap.empty()) { 1409 SDNode *N; 1410 // The Worklist holds the SDNodes in order, but it may contain null entries. 1411 do { 1412 N = Worklist.pop_back_val(); 1413 } while (!N); 1414 1415 bool GoodWorklistEntry = WorklistMap.erase(N); 1416 (void)GoodWorklistEntry; 1417 assert(GoodWorklistEntry && 1418 "Found a worklist entry without a corresponding map entry!"); 1419 1420 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1421 // N is deleted from the DAG, since they too may now be dead or may have a 1422 // reduced number of uses, allowing other xforms. 1423 if (recursivelyDeleteUnusedNodes(N)) 1424 continue; 1425 1426 WorklistRemover DeadNodes(*this); 1427 1428 // If this combine is running after legalizing the DAG, re-legalize any 1429 // nodes pulled off the worklist. 1430 if (Level == AfterLegalizeDAG) { 1431 SmallSetVector<SDNode *, 16> UpdatedNodes; 1432 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1433 1434 for (SDNode *LN : UpdatedNodes) { 1435 AddToWorklist(LN); 1436 AddUsersToWorklist(LN); 1437 } 1438 if (!NIsValid) 1439 continue; 1440 } 1441 1442 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1443 1444 // Add any operands of the new node which have not yet been combined to the 1445 // worklist as well. Because the worklist uniques things already, this 1446 // won't repeatedly process the same operand. 1447 CombinedNodes.insert(N); 1448 for (const SDValue &ChildN : N->op_values()) 1449 if (!CombinedNodes.count(ChildN.getNode())) 1450 AddToWorklist(ChildN.getNode()); 1451 1452 SDValue RV = combine(N); 1453 1454 if (!RV.getNode()) 1455 continue; 1456 1457 ++NodesCombined; 1458 1459 // If we get back the same node we passed in, rather than a new node or 1460 // zero, we know that the node must have defined multiple values and 1461 // CombineTo was used. Since CombineTo takes care of the worklist 1462 // mechanics for us, we have no work to do in this case. 1463 if (RV.getNode() == N) 1464 continue; 1465 1466 assert(N->getOpcode() != ISD::DELETED_NODE && 1467 RV.getOpcode() != ISD::DELETED_NODE && 1468 "Node was deleted but visit returned new node!"); 1469 1470 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); 1471 1472 if (N->getNumValues() == RV.getNode()->getNumValues()) 1473 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1474 else { 1475 assert(N->getValueType(0) == RV.getValueType() && 1476 N->getNumValues() == 1 && "Type mismatch"); 1477 DAG.ReplaceAllUsesWith(N, &RV); 1478 } 1479 1480 // Push the new node and any users onto the worklist 1481 AddToWorklist(RV.getNode()); 1482 AddUsersToWorklist(RV.getNode()); 1483 1484 // Finally, if the node is now dead, remove it from the graph. The node 1485 // may not be dead if the replacement process recursively simplified to 1486 // something else needing this node. This will also take care of adding any 1487 // operands which have lost a user to the worklist. 1488 recursivelyDeleteUnusedNodes(N); 1489 } 1490 1491 // If the root changed (e.g. it was a dead load, update the root). 1492 DAG.setRoot(Dummy.getValue()); 1493 DAG.RemoveDeadNodes(); 1494 } 1495 1496 SDValue DAGCombiner::visit(SDNode *N) { 1497 switch (N->getOpcode()) { 1498 default: break; 1499 case ISD::TokenFactor: return visitTokenFactor(N); 1500 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1501 case ISD::ADD: return visitADD(N); 1502 case ISD::SUB: return visitSUB(N); 1503 case ISD::ADDC: return visitADDC(N); 1504 case ISD::UADDO: return visitUADDO(N); 1505 case ISD::SUBC: return visitSUBC(N); 1506 case ISD::USUBO: return visitUSUBO(N); 1507 case ISD::ADDE: return visitADDE(N); 1508 case ISD::ADDCARRY: return visitADDCARRY(N); 1509 case ISD::SUBE: return visitSUBE(N); 1510 case ISD::SUBCARRY: return visitSUBCARRY(N); 1511 case ISD::MUL: return visitMUL(N); 1512 case ISD::SDIV: return visitSDIV(N); 1513 case ISD::UDIV: return visitUDIV(N); 1514 case ISD::SREM: 1515 case ISD::UREM: return visitREM(N); 1516 case ISD::MULHU: return visitMULHU(N); 1517 case ISD::MULHS: return visitMULHS(N); 1518 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1519 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1520 case ISD::SMULO: return visitSMULO(N); 1521 case ISD::UMULO: return visitUMULO(N); 1522 case ISD::SMIN: 1523 case ISD::SMAX: 1524 case ISD::UMIN: 1525 case ISD::UMAX: return visitIMINMAX(N); 1526 case ISD::AND: return visitAND(N); 1527 case ISD::OR: return visitOR(N); 1528 case ISD::XOR: return visitXOR(N); 1529 case ISD::SHL: return visitSHL(N); 1530 case ISD::SRA: return visitSRA(N); 1531 case ISD::SRL: return visitSRL(N); 1532 case ISD::ROTR: 1533 case ISD::ROTL: return visitRotate(N); 1534 case ISD::ABS: return visitABS(N); 1535 case ISD::BSWAP: return visitBSWAP(N); 1536 case ISD::BITREVERSE: return visitBITREVERSE(N); 1537 case ISD::CTLZ: return visitCTLZ(N); 1538 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1539 case ISD::CTTZ: return visitCTTZ(N); 1540 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1541 case ISD::CTPOP: return visitCTPOP(N); 1542 case ISD::SELECT: return visitSELECT(N); 1543 case ISD::VSELECT: return visitVSELECT(N); 1544 case ISD::SELECT_CC: return visitSELECT_CC(N); 1545 case ISD::SETCC: return visitSETCC(N); 1546 case ISD::SETCCCARRY: return visitSETCCCARRY(N); 1547 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1548 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1549 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1550 case ISD::AssertSext: 1551 case ISD::AssertZext: return visitAssertExt(N); 1552 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1553 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1554 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); 1555 case ISD::TRUNCATE: return visitTRUNCATE(N); 1556 case ISD::BITCAST: return visitBITCAST(N); 1557 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1558 case ISD::FADD: return visitFADD(N); 1559 case ISD::FSUB: return visitFSUB(N); 1560 case ISD::FMUL: return visitFMUL(N); 1561 case ISD::FMA: return visitFMA(N); 1562 case ISD::FDIV: return visitFDIV(N); 1563 case ISD::FREM: return visitFREM(N); 1564 case ISD::FSQRT: return visitFSQRT(N); 1565 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1566 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1567 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1568 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1569 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1570 case ISD::FP_ROUND: return visitFP_ROUND(N); 1571 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1572 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1573 case ISD::FNEG: return visitFNEG(N); 1574 case ISD::FABS: return visitFABS(N); 1575 case ISD::FFLOOR: return visitFFLOOR(N); 1576 case ISD::FMINNUM: return visitFMINNUM(N); 1577 case ISD::FMAXNUM: return visitFMAXNUM(N); 1578 case ISD::FCEIL: return visitFCEIL(N); 1579 case ISD::FTRUNC: return visitFTRUNC(N); 1580 case ISD::BRCOND: return visitBRCOND(N); 1581 case ISD::BR_CC: return visitBR_CC(N); 1582 case ISD::LOAD: return visitLOAD(N); 1583 case ISD::STORE: return visitSTORE(N); 1584 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1585 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1586 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1587 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1588 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1589 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1590 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1591 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1592 case ISD::MGATHER: return visitMGATHER(N); 1593 case ISD::MLOAD: return visitMLOAD(N); 1594 case ISD::MSCATTER: return visitMSCATTER(N); 1595 case ISD::MSTORE: return visitMSTORE(N); 1596 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1597 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1598 } 1599 return SDValue(); 1600 } 1601 1602 SDValue DAGCombiner::combine(SDNode *N) { 1603 SDValue RV = visit(N); 1604 1605 // If nothing happened, try a target-specific DAG combine. 1606 if (!RV.getNode()) { 1607 assert(N->getOpcode() != ISD::DELETED_NODE && 1608 "Node was deleted but visit returned NULL!"); 1609 1610 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1611 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1612 1613 // Expose the DAG combiner to the target combiner impls. 1614 TargetLowering::DAGCombinerInfo 1615 DagCombineInfo(DAG, Level, false, this); 1616 1617 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1618 } 1619 } 1620 1621 // If nothing happened still, try promoting the operation. 1622 if (!RV.getNode()) { 1623 switch (N->getOpcode()) { 1624 default: break; 1625 case ISD::ADD: 1626 case ISD::SUB: 1627 case ISD::MUL: 1628 case ISD::AND: 1629 case ISD::OR: 1630 case ISD::XOR: 1631 RV = PromoteIntBinOp(SDValue(N, 0)); 1632 break; 1633 case ISD::SHL: 1634 case ISD::SRA: 1635 case ISD::SRL: 1636 RV = PromoteIntShiftOp(SDValue(N, 0)); 1637 break; 1638 case ISD::SIGN_EXTEND: 1639 case ISD::ZERO_EXTEND: 1640 case ISD::ANY_EXTEND: 1641 RV = PromoteExtend(SDValue(N, 0)); 1642 break; 1643 case ISD::LOAD: 1644 if (PromoteLoad(SDValue(N, 0))) 1645 RV = SDValue(N, 0); 1646 break; 1647 } 1648 } 1649 1650 // If N is a commutative binary node, try eliminate it if the commuted 1651 // version is already present in the DAG. 1652 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && 1653 N->getNumValues() == 1) { 1654 SDValue N0 = N->getOperand(0); 1655 SDValue N1 = N->getOperand(1); 1656 1657 // Constant operands are canonicalized to RHS. 1658 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { 1659 SDValue Ops[] = {N1, N0}; 1660 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1661 N->getFlags()); 1662 if (CSENode) 1663 return SDValue(CSENode, 0); 1664 } 1665 } 1666 1667 return RV; 1668 } 1669 1670 /// Given a node, return its input chain if it has one, otherwise return a null 1671 /// sd operand. 1672 static SDValue getInputChainForNode(SDNode *N) { 1673 if (unsigned NumOps = N->getNumOperands()) { 1674 if (N->getOperand(0).getValueType() == MVT::Other) 1675 return N->getOperand(0); 1676 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1677 return N->getOperand(NumOps-1); 1678 for (unsigned i = 1; i < NumOps-1; ++i) 1679 if (N->getOperand(i).getValueType() == MVT::Other) 1680 return N->getOperand(i); 1681 } 1682 return SDValue(); 1683 } 1684 1685 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1686 // If N has two operands, where one has an input chain equal to the other, 1687 // the 'other' chain is redundant. 1688 if (N->getNumOperands() == 2) { 1689 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1690 return N->getOperand(0); 1691 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1692 return N->getOperand(1); 1693 } 1694 1695 // Don't simplify token factors if optnone. 1696 if (OptLevel == CodeGenOpt::None) 1697 return SDValue(); 1698 1699 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1700 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1701 SmallPtrSet<SDNode*, 16> SeenOps; 1702 bool Changed = false; // If we should replace this token factor. 1703 1704 // Start out with this token factor. 1705 TFs.push_back(N); 1706 1707 // Iterate through token factors. The TFs grows when new token factors are 1708 // encountered. 1709 for (unsigned i = 0; i < TFs.size(); ++i) { 1710 SDNode *TF = TFs[i]; 1711 1712 // Check each of the operands. 1713 for (const SDValue &Op : TF->op_values()) { 1714 switch (Op.getOpcode()) { 1715 case ISD::EntryToken: 1716 // Entry tokens don't need to be added to the list. They are 1717 // redundant. 1718 Changed = true; 1719 break; 1720 1721 case ISD::TokenFactor: 1722 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) { 1723 // Queue up for processing. 1724 TFs.push_back(Op.getNode()); 1725 // Clean up in case the token factor is removed. 1726 AddToWorklist(Op.getNode()); 1727 Changed = true; 1728 break; 1729 } 1730 LLVM_FALLTHROUGH; 1731 1732 default: 1733 // Only add if it isn't already in the list. 1734 if (SeenOps.insert(Op.getNode()).second) 1735 Ops.push_back(Op); 1736 else 1737 Changed = true; 1738 break; 1739 } 1740 } 1741 } 1742 1743 // Remove Nodes that are chained to another node in the list. Do so 1744 // by walking up chains breath-first stopping when we've seen 1745 // another operand. In general we must climb to the EntryNode, but we can exit 1746 // early if we find all remaining work is associated with just one operand as 1747 // no further pruning is possible. 1748 1749 // List of nodes to search through and original Ops from which they originate. 1750 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; 1751 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. 1752 SmallPtrSet<SDNode *, 16> SeenChains; 1753 bool DidPruneOps = false; 1754 1755 unsigned NumLeftToConsider = 0; 1756 for (const SDValue &Op : Ops) { 1757 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); 1758 OpWorkCount.push_back(1); 1759 } 1760 1761 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { 1762 // If this is an Op, we can remove the op from the list. Remark any 1763 // search associated with it as from the current OpNumber. 1764 if (SeenOps.count(Op) != 0) { 1765 Changed = true; 1766 DidPruneOps = true; 1767 unsigned OrigOpNumber = 0; 1768 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op) 1769 OrigOpNumber++; 1770 assert((OrigOpNumber != Ops.size()) && 1771 "expected to find TokenFactor Operand"); 1772 // Re-mark worklist from OrigOpNumber to OpNumber 1773 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) { 1774 if (Worklist[i].second == OrigOpNumber) { 1775 Worklist[i].second = OpNumber; 1776 } 1777 } 1778 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; 1779 OpWorkCount[OrigOpNumber] = 0; 1780 NumLeftToConsider--; 1781 } 1782 // Add if it's a new chain 1783 if (SeenChains.insert(Op).second) { 1784 OpWorkCount[OpNumber]++; 1785 Worklist.push_back(std::make_pair(Op, OpNumber)); 1786 } 1787 }; 1788 1789 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) { 1790 // We need at least be consider at least 2 Ops to prune. 1791 if (NumLeftToConsider <= 1) 1792 break; 1793 auto CurNode = Worklist[i].first; 1794 auto CurOpNumber = Worklist[i].second; 1795 assert((OpWorkCount[CurOpNumber] > 0) && 1796 "Node should not appear in worklist"); 1797 switch (CurNode->getOpcode()) { 1798 case ISD::EntryToken: 1799 // Hitting EntryToken is the only way for the search to terminate without 1800 // hitting 1801 // another operand's search. Prevent us from marking this operand 1802 // considered. 1803 NumLeftToConsider++; 1804 break; 1805 case ISD::TokenFactor: 1806 for (const SDValue &Op : CurNode->op_values()) 1807 AddToWorklist(i, Op.getNode(), CurOpNumber); 1808 break; 1809 case ISD::CopyFromReg: 1810 case ISD::CopyToReg: 1811 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); 1812 break; 1813 default: 1814 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) 1815 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); 1816 break; 1817 } 1818 OpWorkCount[CurOpNumber]--; 1819 if (OpWorkCount[CurOpNumber] == 0) 1820 NumLeftToConsider--; 1821 } 1822 1823 // If we've changed things around then replace token factor. 1824 if (Changed) { 1825 SDValue Result; 1826 if (Ops.empty()) { 1827 // The entry token is the only possible outcome. 1828 Result = DAG.getEntryNode(); 1829 } else { 1830 if (DidPruneOps) { 1831 SmallVector<SDValue, 8> PrunedOps; 1832 // 1833 for (const SDValue &Op : Ops) { 1834 if (SeenChains.count(Op.getNode()) == 0) 1835 PrunedOps.push_back(Op); 1836 } 1837 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); 1838 } else { 1839 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1840 } 1841 } 1842 return Result; 1843 } 1844 return SDValue(); 1845 } 1846 1847 /// MERGE_VALUES can always be eliminated. 1848 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1849 WorklistRemover DeadNodes(*this); 1850 // Replacing results may cause a different MERGE_VALUES to suddenly 1851 // be CSE'd with N, and carry its uses with it. Iterate until no 1852 // uses remain, to ensure that the node can be safely deleted. 1853 // First add the users of this node to the work list so that they 1854 // can be tried again once they have new operands. 1855 AddUsersToWorklist(N); 1856 do { 1857 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1858 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1859 } while (!N->use_empty()); 1860 deleteAndRecombine(N); 1861 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1862 } 1863 1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a 1865 /// ConstantSDNode pointer else nullptr. 1866 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1867 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1868 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1869 } 1870 1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { 1872 auto BinOpcode = BO->getOpcode(); 1873 assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || 1874 BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || 1875 BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || 1876 BinOpcode == ISD::UREM || BinOpcode == ISD::AND || 1877 BinOpcode == ISD::OR || BinOpcode == ISD::XOR || 1878 BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || 1879 BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || 1880 BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || 1881 BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && 1882 "Unexpected binary operator"); 1883 1884 // Don't do this unless the old select is going away. We want to eliminate the 1885 // binary operator, not replace a binop with a select. 1886 // TODO: Handle ISD::SELECT_CC. 1887 unsigned SelOpNo = 0; 1888 SDValue Sel = BO->getOperand(0); 1889 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { 1890 SelOpNo = 1; 1891 Sel = BO->getOperand(1); 1892 } 1893 1894 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) 1895 return SDValue(); 1896 1897 SDValue CT = Sel.getOperand(1); 1898 if (!isConstantOrConstantVector(CT, true) && 1899 !isConstantFPBuildVectorOrConstantFP(CT)) 1900 return SDValue(); 1901 1902 SDValue CF = Sel.getOperand(2); 1903 if (!isConstantOrConstantVector(CF, true) && 1904 !isConstantFPBuildVectorOrConstantFP(CF)) 1905 return SDValue(); 1906 1907 // Bail out if any constants are opaque because we can't constant fold those. 1908 // The exception is "and" and "or" with either 0 or -1 in which case we can 1909 // propagate non constant operands into select. I.e.: 1910 // and (select Cond, 0, -1), X --> select Cond, 0, X 1911 // or X, (select Cond, -1, 0) --> select Cond, -1, X 1912 bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && 1913 (isNullConstantOrNullSplatConstant(CT) || 1914 isAllOnesConstantOrAllOnesSplatConstant(CT)) && 1915 (isNullConstantOrNullSplatConstant(CF) || 1916 isAllOnesConstantOrAllOnesSplatConstant(CF)); 1917 1918 SDValue CBO = BO->getOperand(SelOpNo ^ 1); 1919 if (!CanFoldNonConst && 1920 !isConstantOrConstantVector(CBO, true) && 1921 !isConstantFPBuildVectorOrConstantFP(CBO)) 1922 return SDValue(); 1923 1924 EVT VT = Sel.getValueType(); 1925 1926 // In case of shift value and shift amount may have different VT. For instance 1927 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have 1928 // swapped operands and value types do not match. NB: x86 is fine if operands 1929 // are not swapped with shift amount VT being not bigger than shifted value. 1930 // TODO: that is possible to check for a shift operation, correct VTs and 1931 // still perform optimization on x86 if needed. 1932 if (SelOpNo && VT != CBO.getValueType()) 1933 return SDValue(); 1934 1935 // We have a select-of-constants followed by a binary operator with a 1936 // constant. Eliminate the binop by pulling the constant math into the select. 1937 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO 1938 SDLoc DL(Sel); 1939 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) 1940 : DAG.getNode(BinOpcode, DL, VT, CT, CBO); 1941 if (!CanFoldNonConst && !NewCT.isUndef() && 1942 !isConstantOrConstantVector(NewCT, true) && 1943 !isConstantFPBuildVectorOrConstantFP(NewCT)) 1944 return SDValue(); 1945 1946 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) 1947 : DAG.getNode(BinOpcode, DL, VT, CF, CBO); 1948 if (!CanFoldNonConst && !NewCF.isUndef() && 1949 !isConstantOrConstantVector(NewCF, true) && 1950 !isConstantFPBuildVectorOrConstantFP(NewCF)) 1951 return SDValue(); 1952 1953 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); 1954 } 1955 1956 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { 1957 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && 1958 "Expecting add or sub"); 1959 1960 // Match a constant operand and a zext operand for the math instruction: 1961 // add Z, C 1962 // sub C, Z 1963 bool IsAdd = N->getOpcode() == ISD::ADD; 1964 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); 1965 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); 1966 auto *CN = dyn_cast<ConstantSDNode>(C); 1967 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) 1968 return SDValue(); 1969 1970 // Match the zext operand as a setcc of a boolean. 1971 if (Z.getOperand(0).getOpcode() != ISD::SETCC || 1972 Z.getOperand(0).getValueType() != MVT::i1) 1973 return SDValue(); 1974 1975 // Match the compare as: setcc (X & 1), 0, eq. 1976 SDValue SetCC = Z.getOperand(0); 1977 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); 1978 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || 1979 SetCC.getOperand(0).getOpcode() != ISD::AND || 1980 !isOneConstant(SetCC.getOperand(0).getOperand(1))) 1981 return SDValue(); 1982 1983 // We are adding/subtracting a constant and an inverted low bit. Turn that 1984 // into a subtract/add of the low bit with incremented/decremented constant: 1985 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) 1986 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) 1987 EVT VT = C.getValueType(); 1988 SDLoc DL(N); 1989 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); 1990 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : 1991 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); 1992 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); 1993 } 1994 1995 SDValue DAGCombiner::visitADD(SDNode *N) { 1996 SDValue N0 = N->getOperand(0); 1997 SDValue N1 = N->getOperand(1); 1998 EVT VT = N0.getValueType(); 1999 SDLoc DL(N); 2000 2001 // fold vector ops 2002 if (VT.isVector()) { 2003 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2004 return FoldedVOp; 2005 2006 // fold (add x, 0) -> x, vector edition 2007 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2008 return N0; 2009 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2010 return N1; 2011 } 2012 2013 // fold (add x, undef) -> undef 2014 if (N0.isUndef()) 2015 return N0; 2016 2017 if (N1.isUndef()) 2018 return N1; 2019 2020 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 2021 // canonicalize constant to RHS 2022 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2023 return DAG.getNode(ISD::ADD, DL, VT, N1, N0); 2024 // fold (add c1, c2) -> c1+c2 2025 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), 2026 N1.getNode()); 2027 } 2028 2029 // fold (add x, 0) -> x 2030 if (isNullConstant(N1)) 2031 return N0; 2032 2033 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) { 2034 // fold ((c1-A)+c2) -> (c1+c2)-A 2035 if (N0.getOpcode() == ISD::SUB && 2036 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { 2037 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. 2038 return DAG.getNode(ISD::SUB, DL, VT, 2039 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), 2040 N0.getOperand(1)); 2041 } 2042 2043 // add (sext i1 X), 1 -> zext (not i1 X) 2044 // We don't transform this pattern: 2045 // add (zext i1 X), -1 -> sext (not i1 X) 2046 // because most (?) targets generate better code for the zext form. 2047 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() && 2048 isOneConstantOrOneSplatConstant(N1)) { 2049 SDValue X = N0.getOperand(0); 2050 if ((!LegalOperations || 2051 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && 2052 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && 2053 X.getScalarValueSizeInBits() == 1) { 2054 SDValue Not = DAG.getNOT(DL, X, X.getValueType()); 2055 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); 2056 } 2057 } 2058 2059 // Undo the add -> or combine to merge constant offsets from a frame index. 2060 if (N0.getOpcode() == ISD::OR && 2061 isa<FrameIndexSDNode>(N0.getOperand(0)) && 2062 isa<ConstantSDNode>(N0.getOperand(1)) && 2063 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { 2064 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); 2065 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); 2066 } 2067 } 2068 2069 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2070 return NewSel; 2071 2072 // reassociate add 2073 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1)) 2074 return RADD; 2075 2076 // fold ((0-A) + B) -> B-A 2077 if (N0.getOpcode() == ISD::SUB && 2078 isNullConstantOrNullSplatConstant(N0.getOperand(0))) 2079 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); 2080 2081 // fold (A + (0-B)) -> A-B 2082 if (N1.getOpcode() == ISD::SUB && 2083 isNullConstantOrNullSplatConstant(N1.getOperand(0))) 2084 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); 2085 2086 // fold (A+(B-A)) -> B 2087 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 2088 return N1.getOperand(0); 2089 2090 // fold ((B-A)+A) -> B 2091 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 2092 return N0.getOperand(0); 2093 2094 // fold (A+(B-(A+C))) to (B-C) 2095 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2096 N0 == N1.getOperand(1).getOperand(0)) 2097 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2098 N1.getOperand(1).getOperand(1)); 2099 2100 // fold (A+(B-(C+A))) to (B-C) 2101 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 2102 N0 == N1.getOperand(1).getOperand(1)) 2103 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), 2104 N1.getOperand(1).getOperand(0)); 2105 2106 // fold (A+((B-A)+or-C)) to (B+or-C) 2107 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 2108 N1.getOperand(0).getOpcode() == ISD::SUB && 2109 N0 == N1.getOperand(0).getOperand(1)) 2110 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), 2111 N1.getOperand(1)); 2112 2113 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 2114 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 2115 SDValue N00 = N0.getOperand(0); 2116 SDValue N01 = N0.getOperand(1); 2117 SDValue N10 = N1.getOperand(0); 2118 SDValue N11 = N1.getOperand(1); 2119 2120 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10)) 2121 return DAG.getNode(ISD::SUB, DL, VT, 2122 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 2123 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 2124 } 2125 2126 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2127 return V; 2128 2129 if (SimplifyDemandedBits(SDValue(N, 0))) 2130 return SDValue(N, 0); 2131 2132 // fold (a+b) -> (a|b) iff a and b share no bits. 2133 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 2134 DAG.haveNoCommonBitsSet(N0, N1)) 2135 return DAG.getNode(ISD::OR, DL, VT, N0, N1); 2136 2137 // fold (add (xor a, -1), 1) -> (sub 0, a) 2138 if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) 2139 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), 2140 N0.getOperand(0)); 2141 2142 if (SDValue Combined = visitADDLike(N0, N1, N)) 2143 return Combined; 2144 2145 if (SDValue Combined = visitADDLike(N1, N0, N)) 2146 return Combined; 2147 2148 return SDValue(); 2149 } 2150 2151 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { 2152 bool Masked = false; 2153 2154 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. 2155 while (true) { 2156 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) { 2157 V = V.getOperand(0); 2158 continue; 2159 } 2160 2161 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) { 2162 Masked = true; 2163 V = V.getOperand(0); 2164 continue; 2165 } 2166 2167 break; 2168 } 2169 2170 // If this is not a carry, return. 2171 if (V.getResNo() != 1) 2172 return SDValue(); 2173 2174 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY && 2175 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO) 2176 return SDValue(); 2177 2178 // If the result is masked, then no matter what kind of bool it is we can 2179 // return. If it isn't, then we need to make sure the bool type is either 0 or 2180 // 1 and not other values. 2181 if (Masked || 2182 TLI.getBooleanContents(V.getValueType()) == 2183 TargetLoweringBase::ZeroOrOneBooleanContent) 2184 return V; 2185 2186 return SDValue(); 2187 } 2188 2189 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { 2190 EVT VT = N0.getValueType(); 2191 SDLoc DL(LocReference); 2192 2193 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 2194 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 2195 isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) 2196 return DAG.getNode(ISD::SUB, DL, VT, N0, 2197 DAG.getNode(ISD::SHL, DL, VT, 2198 N1.getOperand(0).getOperand(1), 2199 N1.getOperand(1))); 2200 2201 if (N1.getOpcode() == ISD::AND) { 2202 SDValue AndOp0 = N1.getOperand(0); 2203 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 2204 unsigned DestBits = VT.getScalarSizeInBits(); 2205 2206 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 2207 // and similar xforms where the inner op is either ~0 or 0. 2208 if (NumSignBits == DestBits && 2209 isOneConstantOrOneSplatConstant(N1->getOperand(1))) 2210 return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); 2211 } 2212 2213 // add (sext i1), X -> sub X, (zext i1) 2214 if (N0.getOpcode() == ISD::SIGN_EXTEND && 2215 N0.getOperand(0).getValueType() == MVT::i1 && 2216 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 2217 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 2218 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 2219 } 2220 2221 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 2222 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2223 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2224 if (TN->getVT() == MVT::i1) { 2225 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2226 DAG.getConstant(1, DL, VT)); 2227 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 2228 } 2229 } 2230 2231 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2232 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && 2233 N1.getResNo() == 0) 2234 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), 2235 N0, N1.getOperand(0), N1.getOperand(2)); 2236 2237 // (add X, Carry) -> (addcarry X, 0, Carry) 2238 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2239 if (SDValue Carry = getAsCarry(TLI, N1)) 2240 return DAG.getNode(ISD::ADDCARRY, DL, 2241 DAG.getVTList(VT, Carry.getValueType()), N0, 2242 DAG.getConstant(0, DL, VT), Carry); 2243 2244 return SDValue(); 2245 } 2246 2247 SDValue DAGCombiner::visitADDC(SDNode *N) { 2248 SDValue N0 = N->getOperand(0); 2249 SDValue N1 = N->getOperand(1); 2250 EVT VT = N0.getValueType(); 2251 SDLoc DL(N); 2252 2253 // If the flag result is dead, turn this into an ADD. 2254 if (!N->hasAnyUseOfValue(1)) 2255 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2256 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2257 2258 // canonicalize constant to RHS. 2259 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2260 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2261 if (N0C && !N1C) 2262 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); 2263 2264 // fold (addc x, 0) -> x + no carry out 2265 if (isNullConstant(N1)) 2266 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 2267 DL, MVT::Glue)); 2268 2269 // If it cannot overflow, transform into an add. 2270 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2271 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2272 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2273 2274 return SDValue(); 2275 } 2276 2277 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, 2278 SelectionDAG &DAG, const TargetLowering &TLI) { 2279 SDValue Cst; 2280 switch (TLI.getBooleanContents(VT)) { 2281 case TargetLowering::ZeroOrOneBooleanContent: 2282 case TargetLowering::UndefinedBooleanContent: 2283 Cst = DAG.getConstant(1, DL, VT); 2284 break; 2285 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2286 Cst = DAG.getConstant(-1, DL, VT); 2287 break; 2288 } 2289 2290 return DAG.getNode(ISD::XOR, DL, VT, V, Cst); 2291 } 2292 2293 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { 2294 if (V.getOpcode() != ISD::XOR) return false; 2295 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); 2296 if (!Const) return false; 2297 2298 switch(TLI.getBooleanContents(VT)) { 2299 case TargetLowering::ZeroOrOneBooleanContent: 2300 return Const->isOne(); 2301 case TargetLowering::ZeroOrNegativeOneBooleanContent: 2302 return Const->isAllOnesValue(); 2303 case TargetLowering::UndefinedBooleanContent: 2304 return (Const->getAPIntValue() & 0x01) == 1; 2305 } 2306 llvm_unreachable("Unsupported boolean content"); 2307 } 2308 2309 SDValue DAGCombiner::visitUADDO(SDNode *N) { 2310 SDValue N0 = N->getOperand(0); 2311 SDValue N1 = N->getOperand(1); 2312 EVT VT = N0.getValueType(); 2313 if (VT.isVector()) 2314 return SDValue(); 2315 2316 EVT CarryVT = N->getValueType(1); 2317 SDLoc DL(N); 2318 2319 // If the flag result is dead, turn this into an ADD. 2320 if (!N->hasAnyUseOfValue(1)) 2321 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2322 DAG.getUNDEF(CarryVT)); 2323 2324 // canonicalize constant to RHS. 2325 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2326 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2327 if (N0C && !N1C) 2328 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); 2329 2330 // fold (uaddo x, 0) -> x + no carry out 2331 if (isNullConstant(N1)) 2332 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2333 2334 // If it cannot overflow, transform into an add. 2335 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never) 2336 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), 2337 DAG.getConstant(0, DL, CarryVT)); 2338 2339 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. 2340 if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { 2341 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), 2342 DAG.getConstant(0, DL, VT), 2343 N0.getOperand(0)); 2344 return CombineTo(N, Sub, 2345 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2346 } 2347 2348 if (SDValue Combined = visitUADDOLike(N0, N1, N)) 2349 return Combined; 2350 2351 if (SDValue Combined = visitUADDOLike(N1, N0, N)) 2352 return Combined; 2353 2354 return SDValue(); 2355 } 2356 2357 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { 2358 auto VT = N0.getValueType(); 2359 2360 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) 2361 // If Y + 1 cannot overflow. 2362 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) { 2363 SDValue Y = N1.getOperand(0); 2364 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); 2365 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) 2366 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, 2367 N1.getOperand(2)); 2368 } 2369 2370 // (uaddo X, Carry) -> (addcarry X, 0, Carry) 2371 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) 2372 if (SDValue Carry = getAsCarry(TLI, N1)) 2373 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2374 DAG.getConstant(0, SDLoc(N), VT), Carry); 2375 2376 return SDValue(); 2377 } 2378 2379 SDValue DAGCombiner::visitADDE(SDNode *N) { 2380 SDValue N0 = N->getOperand(0); 2381 SDValue N1 = N->getOperand(1); 2382 SDValue CarryIn = N->getOperand(2); 2383 2384 // canonicalize constant to RHS 2385 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2386 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2387 if (N0C && !N1C) 2388 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 2389 N1, N0, CarryIn); 2390 2391 // fold (adde x, y, false) -> (addc x, y) 2392 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2393 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 2394 2395 return SDValue(); 2396 } 2397 2398 SDValue DAGCombiner::visitADDCARRY(SDNode *N) { 2399 SDValue N0 = N->getOperand(0); 2400 SDValue N1 = N->getOperand(1); 2401 SDValue CarryIn = N->getOperand(2); 2402 SDLoc DL(N); 2403 2404 // canonicalize constant to RHS 2405 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2406 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2407 if (N0C && !N1C) 2408 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); 2409 2410 // fold (addcarry x, y, false) -> (uaddo x, y) 2411 if (isNullConstant(CarryIn)) { 2412 if (!LegalOperations || 2413 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) 2414 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); 2415 } 2416 2417 EVT CarryVT = CarryIn.getValueType(); 2418 2419 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. 2420 if (isNullConstant(N0) && isNullConstant(N1)) { 2421 EVT VT = N0.getValueType(); 2422 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); 2423 AddToWorklist(CarryExt.getNode()); 2424 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, 2425 DAG.getConstant(1, DL, VT)), 2426 DAG.getConstant(0, DL, CarryVT)); 2427 } 2428 2429 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. 2430 if (isBitwiseNot(N0) && isNullConstant(N1) && 2431 isBooleanFlip(CarryIn, CarryVT, TLI)) { 2432 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), 2433 DAG.getConstant(0, DL, N0.getValueType()), 2434 N0.getOperand(0), CarryIn.getOperand(0)); 2435 return CombineTo(N, Sub, 2436 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); 2437 } 2438 2439 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) 2440 return Combined; 2441 2442 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) 2443 return Combined; 2444 2445 return SDValue(); 2446 } 2447 2448 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, 2449 SDNode *N) { 2450 // Iff the flag result is dead: 2451 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) 2452 if ((N0.getOpcode() == ISD::ADD || 2453 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && 2454 isNullConstant(N1) && !N->hasAnyUseOfValue(1)) 2455 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), 2456 N0.getOperand(0), N0.getOperand(1), CarryIn); 2457 2458 /** 2459 * When one of the addcarry argument is itself a carry, we may be facing 2460 * a diamond carry propagation. In which case we try to transform the DAG 2461 * to ensure linear carry propagation if that is possible. 2462 * 2463 * We are trying to get: 2464 * (addcarry X, 0, (addcarry A, B, Z):Carry) 2465 */ 2466 if (auto Y = getAsCarry(TLI, N1)) { 2467 /** 2468 * (uaddo A, B) 2469 * / \ 2470 * Carry Sum 2471 * | \ 2472 * | (addcarry *, 0, Z) 2473 * | / 2474 * \ Carry 2475 * | / 2476 * (addcarry X, *, *) 2477 */ 2478 if (Y.getOpcode() == ISD::UADDO && 2479 CarryIn.getResNo() == 1 && 2480 CarryIn.getOpcode() == ISD::ADDCARRY && 2481 isNullConstant(CarryIn.getOperand(1)) && 2482 CarryIn.getOperand(0) == Y.getValue(0)) { 2483 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), 2484 Y.getOperand(0), Y.getOperand(1), 2485 CarryIn.getOperand(2)); 2486 AddToWorklist(NewY.getNode()); 2487 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, 2488 DAG.getConstant(0, SDLoc(N), N0.getValueType()), 2489 NewY.getValue(1)); 2490 } 2491 } 2492 2493 return SDValue(); 2494 } 2495 2496 // Since it may not be valid to emit a fold to zero for vector initializers 2497 // check if we can before folding. 2498 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, 2499 SelectionDAG &DAG, bool LegalOperations, 2500 bool LegalTypes) { 2501 if (!VT.isVector()) 2502 return DAG.getConstant(0, DL, VT); 2503 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 2504 return DAG.getConstant(0, DL, VT); 2505 return SDValue(); 2506 } 2507 2508 SDValue DAGCombiner::visitSUB(SDNode *N) { 2509 SDValue N0 = N->getOperand(0); 2510 SDValue N1 = N->getOperand(1); 2511 EVT VT = N0.getValueType(); 2512 SDLoc DL(N); 2513 2514 // fold vector ops 2515 if (VT.isVector()) { 2516 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2517 return FoldedVOp; 2518 2519 // fold (sub x, 0) -> x, vector edition 2520 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2521 return N0; 2522 } 2523 2524 // fold (sub x, x) -> 0 2525 // FIXME: Refactor this and xor and other similar operations together. 2526 if (N0 == N1) 2527 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); 2528 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2529 DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 2530 // fold (sub c1, c2) -> c1-c2 2531 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), 2532 N1.getNode()); 2533 } 2534 2535 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2536 return NewSel; 2537 2538 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2539 2540 // fold (sub x, c) -> (add x, -c) 2541 if (N1C) { 2542 return DAG.getNode(ISD::ADD, DL, VT, N0, 2543 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 2544 } 2545 2546 if (isNullConstantOrNullSplatConstant(N0)) { 2547 unsigned BitWidth = VT.getScalarSizeInBits(); 2548 // Right-shifting everything out but the sign bit followed by negation is 2549 // the same as flipping arithmetic/logical shift type without the negation: 2550 // -(X >>u 31) -> (X >>s 31) 2551 // -(X >>s 31) -> (X >>u 31) 2552 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) { 2553 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); 2554 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) { 2555 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA; 2556 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT)) 2557 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); 2558 } 2559 } 2560 2561 // 0 - X --> 0 if the sub is NUW. 2562 if (N->getFlags().hasNoUnsignedWrap()) 2563 return N0; 2564 2565 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { 2566 // N1 is either 0 or the minimum signed value. If the sub is NSW, then 2567 // N1 must be 0 because negating the minimum signed value is undefined. 2568 if (N->getFlags().hasNoSignedWrap()) 2569 return N0; 2570 2571 // 0 - X --> X if X is 0 or the minimum signed value. 2572 return N1; 2573 } 2574 } 2575 2576 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 2577 if (isAllOnesConstantOrAllOnesSplatConstant(N0)) 2578 return DAG.getNode(ISD::XOR, DL, VT, N1, N0); 2579 2580 // fold A-(A-B) -> B 2581 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 2582 return N1.getOperand(1); 2583 2584 // fold (A+B)-A -> B 2585 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 2586 return N0.getOperand(1); 2587 2588 // fold (A+B)-B -> A 2589 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 2590 return N0.getOperand(0); 2591 2592 // fold C2-(A+C1) -> (C2-C1)-A 2593 if (N1.getOpcode() == ISD::ADD) { 2594 SDValue N11 = N1.getOperand(1); 2595 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && 2596 isConstantOrConstantVector(N11, /* NoOpaques */ true)) { 2597 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); 2598 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); 2599 } 2600 } 2601 2602 // fold ((A+(B+or-C))-B) -> A+or-C 2603 if (N0.getOpcode() == ISD::ADD && 2604 (N0.getOperand(1).getOpcode() == ISD::SUB || 2605 N0.getOperand(1).getOpcode() == ISD::ADD) && 2606 N0.getOperand(1).getOperand(0) == N1) 2607 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), 2608 N0.getOperand(1).getOperand(1)); 2609 2610 // fold ((A+(C+B))-B) -> A+C 2611 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && 2612 N0.getOperand(1).getOperand(1) == N1) 2613 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), 2614 N0.getOperand(1).getOperand(0)); 2615 2616 // fold ((A-(B-C))-C) -> A-B 2617 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && 2618 N0.getOperand(1).getOperand(1) == N1) 2619 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), 2620 N0.getOperand(1).getOperand(0)); 2621 2622 // If either operand of a sub is undef, the result is undef 2623 if (N0.isUndef()) 2624 return N0; 2625 if (N1.isUndef()) 2626 return N1; 2627 2628 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) 2629 return V; 2630 2631 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) 2632 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 2633 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { 2634 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); 2635 SDValue S0 = N1.getOperand(0); 2636 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { 2637 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 2638 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) 2639 if (C->getAPIntValue() == (OpSizeInBits - 1)) 2640 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); 2641 } 2642 } 2643 } 2644 2645 // If the relocation model supports it, consider symbol offsets. 2646 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 2647 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 2648 // fold (sub Sym, c) -> Sym-c 2649 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 2650 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 2651 GA->getOffset() - 2652 (uint64_t)N1C->getSExtValue()); 2653 // fold (sub Sym+c1, Sym+c2) -> c1-c2 2654 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 2655 if (GA->getGlobal() == GB->getGlobal()) 2656 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 2657 DL, VT); 2658 } 2659 2660 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 2661 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 2662 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 2663 if (TN->getVT() == MVT::i1) { 2664 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 2665 DAG.getConstant(1, DL, VT)); 2666 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 2667 } 2668 } 2669 2670 return SDValue(); 2671 } 2672 2673 SDValue DAGCombiner::visitSUBC(SDNode *N) { 2674 SDValue N0 = N->getOperand(0); 2675 SDValue N1 = N->getOperand(1); 2676 EVT VT = N0.getValueType(); 2677 SDLoc DL(N); 2678 2679 // If the flag result is dead, turn this into an SUB. 2680 if (!N->hasAnyUseOfValue(1)) 2681 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2682 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2683 2684 // fold (subc x, x) -> 0 + no borrow 2685 if (N0 == N1) 2686 return CombineTo(N, DAG.getConstant(0, DL, VT), 2687 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2688 2689 // fold (subc x, 0) -> x + no borrow 2690 if (isNullConstant(N1)) 2691 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2692 2693 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2694 if (isAllOnesConstant(N0)) 2695 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2696 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2697 2698 return SDValue(); 2699 } 2700 2701 SDValue DAGCombiner::visitUSUBO(SDNode *N) { 2702 SDValue N0 = N->getOperand(0); 2703 SDValue N1 = N->getOperand(1); 2704 EVT VT = N0.getValueType(); 2705 if (VT.isVector()) 2706 return SDValue(); 2707 2708 EVT CarryVT = N->getValueType(1); 2709 SDLoc DL(N); 2710 2711 // If the flag result is dead, turn this into an SUB. 2712 if (!N->hasAnyUseOfValue(1)) 2713 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 2714 DAG.getUNDEF(CarryVT)); 2715 2716 // fold (usubo x, x) -> 0 + no borrow 2717 if (N0 == N1) 2718 return CombineTo(N, DAG.getConstant(0, DL, VT), 2719 DAG.getConstant(0, DL, CarryVT)); 2720 2721 // fold (usubo x, 0) -> x + no borrow 2722 if (isNullConstant(N1)) 2723 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); 2724 2725 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2726 if (isAllOnesConstant(N0)) 2727 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2728 DAG.getConstant(0, DL, CarryVT)); 2729 2730 return SDValue(); 2731 } 2732 2733 SDValue DAGCombiner::visitSUBE(SDNode *N) { 2734 SDValue N0 = N->getOperand(0); 2735 SDValue N1 = N->getOperand(1); 2736 SDValue CarryIn = N->getOperand(2); 2737 2738 // fold (sube x, y, false) -> (subc x, y) 2739 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2740 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2741 2742 return SDValue(); 2743 } 2744 2745 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { 2746 SDValue N0 = N->getOperand(0); 2747 SDValue N1 = N->getOperand(1); 2748 SDValue CarryIn = N->getOperand(2); 2749 2750 // fold (subcarry x, y, false) -> (usubo x, y) 2751 if (isNullConstant(CarryIn)) { 2752 if (!LegalOperations || 2753 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) 2754 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); 2755 } 2756 2757 return SDValue(); 2758 } 2759 2760 SDValue DAGCombiner::visitMUL(SDNode *N) { 2761 SDValue N0 = N->getOperand(0); 2762 SDValue N1 = N->getOperand(1); 2763 EVT VT = N0.getValueType(); 2764 2765 // fold (mul x, undef) -> 0 2766 if (N0.isUndef() || N1.isUndef()) 2767 return DAG.getConstant(0, SDLoc(N), VT); 2768 2769 bool N0IsConst = false; 2770 bool N1IsConst = false; 2771 bool N1IsOpaqueConst = false; 2772 bool N0IsOpaqueConst = false; 2773 APInt ConstValue0, ConstValue1; 2774 // fold vector ops 2775 if (VT.isVector()) { 2776 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2777 return FoldedVOp; 2778 2779 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); 2780 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); 2781 assert((!N0IsConst || 2782 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && 2783 "Splat APInt should be element width"); 2784 assert((!N1IsConst || 2785 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && 2786 "Splat APInt should be element width"); 2787 } else { 2788 N0IsConst = isa<ConstantSDNode>(N0); 2789 if (N0IsConst) { 2790 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2791 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2792 } 2793 N1IsConst = isa<ConstantSDNode>(N1); 2794 if (N1IsConst) { 2795 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2796 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2797 } 2798 } 2799 2800 // fold (mul c1, c2) -> c1*c2 2801 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2802 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2803 N0.getNode(), N1.getNode()); 2804 2805 // canonicalize constant to RHS (vector doesn't have to splat) 2806 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2807 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2808 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2809 // fold (mul x, 0) -> 0 2810 if (N1IsConst && ConstValue1.isNullValue()) 2811 return N1; 2812 // fold (mul x, 1) -> x 2813 if (N1IsConst && ConstValue1.isOneValue()) 2814 return N0; 2815 2816 if (SDValue NewSel = foldBinOpIntoSelect(N)) 2817 return NewSel; 2818 2819 // fold (mul x, -1) -> 0-x 2820 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2821 SDLoc DL(N); 2822 return DAG.getNode(ISD::SUB, DL, VT, 2823 DAG.getConstant(0, DL, VT), N0); 2824 } 2825 // fold (mul x, (1 << c)) -> x << c 2826 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 2827 DAG.isKnownToBeAPowerOfTwo(N1) && 2828 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { 2829 SDLoc DL(N); 2830 SDValue LogBase2 = BuildLogBase2(N1, DL); 2831 AddToWorklist(LogBase2.getNode()); 2832 2833 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 2834 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 2835 AddToWorklist(Trunc.getNode()); 2836 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); 2837 } 2838 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2839 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { 2840 unsigned Log2Val = (-ConstValue1).logBase2(); 2841 SDLoc DL(N); 2842 // FIXME: If the input is something that is easily negated (e.g. a 2843 // single-use add), we should put the negate there. 2844 return DAG.getNode(ISD::SUB, DL, VT, 2845 DAG.getConstant(0, DL, VT), 2846 DAG.getNode(ISD::SHL, DL, VT, N0, 2847 DAG.getConstant(Log2Val, DL, 2848 getShiftAmountTy(N0.getValueType())))); 2849 } 2850 2851 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2852 if (N0.getOpcode() == ISD::SHL && 2853 isConstantOrConstantVector(N1, /* NoOpaques */ true) && 2854 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { 2855 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); 2856 if (isConstantOrConstantVector(C3)) 2857 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); 2858 } 2859 2860 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2861 // use. 2862 { 2863 SDValue Sh(nullptr, 0), Y(nullptr, 0); 2864 2865 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2866 if (N0.getOpcode() == ISD::SHL && 2867 isConstantOrConstantVector(N0.getOperand(1)) && 2868 N0.getNode()->hasOneUse()) { 2869 Sh = N0; Y = N1; 2870 } else if (N1.getOpcode() == ISD::SHL && 2871 isConstantOrConstantVector(N1.getOperand(1)) && 2872 N1.getNode()->hasOneUse()) { 2873 Sh = N1; Y = N0; 2874 } 2875 2876 if (Sh.getNode()) { 2877 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); 2878 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); 2879 } 2880 } 2881 2882 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2883 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && 2884 N0.getOpcode() == ISD::ADD && 2885 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2886 isMulAddWithConstProfitable(N, N0, N1)) 2887 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2888 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2889 N0.getOperand(0), N1), 2890 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2891 N0.getOperand(1), N1)); 2892 2893 // reassociate mul 2894 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2895 return RMUL; 2896 2897 return SDValue(); 2898 } 2899 2900 /// Return true if divmod libcall is available. 2901 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2902 const TargetLowering &TLI) { 2903 RTLIB::Libcall LC; 2904 EVT NodeType = Node->getValueType(0); 2905 if (!NodeType.isSimple()) 2906 return false; 2907 switch (NodeType.getSimpleVT().SimpleTy) { 2908 default: return false; // No libcall for vector types. 2909 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2910 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2911 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2912 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2913 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 2914 } 2915 2916 return TLI.getLibcallName(LC) != nullptr; 2917 } 2918 2919 /// Issue divrem if both quotient and remainder are needed. 2920 SDValue DAGCombiner::useDivRem(SDNode *Node) { 2921 if (Node->use_empty()) 2922 return SDValue(); // This is a dead node, leave it alone. 2923 2924 unsigned Opcode = Node->getOpcode(); 2925 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 2926 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 2927 2928 // DivMod lib calls can still work on non-legal types if using lib-calls. 2929 EVT VT = Node->getValueType(0); 2930 if (VT.isVector() || !VT.isInteger()) 2931 return SDValue(); 2932 2933 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) 2934 return SDValue(); 2935 2936 // If DIVREM is going to get expanded into a libcall, 2937 // but there is no libcall available, then don't combine. 2938 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 2939 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 2940 return SDValue(); 2941 2942 // If div is legal, it's better to do the normal expansion 2943 unsigned OtherOpcode = 0; 2944 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 2945 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 2946 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 2947 return SDValue(); 2948 } else { 2949 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2950 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 2951 return SDValue(); 2952 } 2953 2954 SDValue Op0 = Node->getOperand(0); 2955 SDValue Op1 = Node->getOperand(1); 2956 SDValue combined; 2957 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 2958 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 2959 SDNode *User = *UI; 2960 if (User == Node || User->getOpcode() == ISD::DELETED_NODE || 2961 User->use_empty()) 2962 continue; 2963 // Convert the other matching node(s), too; 2964 // otherwise, the DIVREM may get target-legalized into something 2965 // target-specific that we won't be able to recognize. 2966 unsigned UserOpc = User->getOpcode(); 2967 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 2968 User->getOperand(0) == Op0 && 2969 User->getOperand(1) == Op1) { 2970 if (!combined) { 2971 if (UserOpc == OtherOpcode) { 2972 SDVTList VTs = DAG.getVTList(VT, VT); 2973 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 2974 } else if (UserOpc == DivRemOpc) { 2975 combined = SDValue(User, 0); 2976 } else { 2977 assert(UserOpc == Opcode); 2978 continue; 2979 } 2980 } 2981 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 2982 CombineTo(User, combined); 2983 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 2984 CombineTo(User, combined.getValue(1)); 2985 } 2986 } 2987 return combined; 2988 } 2989 2990 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { 2991 SDValue N0 = N->getOperand(0); 2992 SDValue N1 = N->getOperand(1); 2993 EVT VT = N->getValueType(0); 2994 SDLoc DL(N); 2995 2996 if (DAG.isUndef(N->getOpcode(), {N0, N1})) 2997 return DAG.getUNDEF(VT); 2998 2999 // undef / X -> 0 3000 // undef % X -> 0 3001 if (N0.isUndef()) 3002 return DAG.getConstant(0, DL, VT); 3003 3004 return SDValue(); 3005 } 3006 3007 SDValue DAGCombiner::visitSDIV(SDNode *N) { 3008 SDValue N0 = N->getOperand(0); 3009 SDValue N1 = N->getOperand(1); 3010 EVT VT = N->getValueType(0); 3011 unsigned BitWidth = VT.getScalarSizeInBits(); 3012 3013 // fold vector ops 3014 if (VT.isVector()) 3015 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3016 return FoldedVOp; 3017 3018 SDLoc DL(N); 3019 3020 // fold (sdiv c1, c2) -> c1/c2 3021 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3022 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3023 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 3024 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 3025 // fold (sdiv X, 1) -> X 3026 if (N1C && N1C->isOne()) 3027 return N0; 3028 // fold (sdiv X, -1) -> 0-X 3029 if (N1C && N1C->isAllOnesValue()) 3030 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); 3031 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) 3032 if (N1C && N1C->getAPIntValue().isMinSignedValue()) 3033 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, VT, N0, N1, ISD::SETEQ), 3034 DAG.getConstant(1, DL, VT), 3035 DAG.getConstant(0, DL, VT)); 3036 3037 if (SDValue V = simplifyDivRem(N, DAG)) 3038 return V; 3039 3040 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3041 return NewSel; 3042 3043 // If we know the sign bits of both operands are zero, strength reduce to a 3044 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 3045 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3046 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 3047 3048 // Helper for determining whether a value is a power-2 constant scalar or a 3049 // vector of such elements. 3050 SmallBitVector KnownNegatives( 3051 (N1C || !VT.isVector()) ? 1 : VT.getVectorNumElements(), false); 3052 auto IsPowerOfTwo = [](ConstantSDNode *C) { 3053 if (C->isNullValue() || C->isOpaque()) 3054 return false; 3055 if (C->getAPIntValue().isAllOnesValue()) 3056 return false; 3057 if (C->getAPIntValue().isMinSignedValue()) 3058 return false; 3059 3060 if (C->getAPIntValue().isPowerOf2()) 3061 return true; 3062 if ((-C->getAPIntValue()).isPowerOf2()) 3063 return true; 3064 return false; 3065 }; 3066 3067 // fold (sdiv X, pow2) -> simple ops after legalize 3068 // FIXME: We check for the exact bit here because the generic lowering gives 3069 // better results in that case. The target-specific lowering should learn how 3070 // to handle exact sdivs efficiently. 3071 if (!N->getFlags().hasExact() && 3072 ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) { 3073 // Target-specific implementation of sdiv x, pow2. 3074 if (SDValue Res = BuildSDIVPow2(N)) 3075 return Res; 3076 3077 // Create constants that are functions of the shift amount value. 3078 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 3079 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); 3080 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); 3081 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); 3082 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); 3083 if (!isConstantOrConstantVector(Inexact)) 3084 return SDValue(); 3085 3086 // Splat the sign bit into the register 3087 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, 3088 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); 3089 AddToWorklist(Sign.getNode()); 3090 3091 // Add (N0 < 0) ? abs2 - 1 : 0; 3092 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); 3093 AddToWorklist(Srl.getNode()); 3094 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); 3095 AddToWorklist(Add.getNode()); 3096 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); 3097 AddToWorklist(Sra.getNode()); 3098 3099 // If dividing by a positive value, we're done. Otherwise, the result must 3100 // be negated. 3101 SDValue Zero = DAG.getConstant(0, DL, VT); 3102 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); 3103 3104 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. 3105 SDValue Res = DAG.getSelect( 3106 DL, VT, DAG.getSetCC(DL, VT, N1, Zero, ISD::SETLT), Sub, Sra); 3107 // Special case: (sdiv X, 1) -> X 3108 SDValue One = DAG.getConstant(1, DL, VT); 3109 Res = DAG.getSelect(DL, VT, DAG.getSetCC(DL, VT, N1, One, ISD::SETEQ), N0, 3110 Res); 3111 return Res; 3112 } 3113 3114 // If integer divide is expensive and we satisfy the requirements, emit an 3115 // alternate sequence. Targets may check function attributes for size/speed 3116 // trade-offs. 3117 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3118 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3119 if (SDValue Op = BuildSDIV(N)) 3120 return Op; 3121 3122 // sdiv, srem -> sdivrem 3123 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3124 // true. Otherwise, we break the simplification logic in visitREM(). 3125 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3126 if (SDValue DivRem = useDivRem(N)) 3127 return DivRem; 3128 3129 return SDValue(); 3130 } 3131 3132 SDValue DAGCombiner::visitUDIV(SDNode *N) { 3133 SDValue N0 = N->getOperand(0); 3134 SDValue N1 = N->getOperand(1); 3135 EVT VT = N->getValueType(0); 3136 3137 // fold vector ops 3138 if (VT.isVector()) 3139 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3140 return FoldedVOp; 3141 3142 SDLoc DL(N); 3143 3144 // fold (udiv c1, c2) -> c1/c2 3145 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3146 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3147 if (N0C && N1C) 3148 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 3149 N0C, N1C)) 3150 return Folded; 3151 3152 if (SDValue V = simplifyDivRem(N, DAG)) 3153 return V; 3154 3155 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3156 return NewSel; 3157 3158 // fold (udiv x, (1 << c)) -> x >>u c 3159 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && 3160 DAG.isKnownToBeAPowerOfTwo(N1)) { 3161 SDValue LogBase2 = BuildLogBase2(N1, DL); 3162 AddToWorklist(LogBase2.getNode()); 3163 3164 EVT ShiftVT = getShiftAmountTy(N0.getValueType()); 3165 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); 3166 AddToWorklist(Trunc.getNode()); 3167 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); 3168 } 3169 3170 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 3171 if (N1.getOpcode() == ISD::SHL) { 3172 SDValue N10 = N1.getOperand(0); 3173 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && 3174 DAG.isKnownToBeAPowerOfTwo(N10)) { 3175 SDValue LogBase2 = BuildLogBase2(N10, DL); 3176 AddToWorklist(LogBase2.getNode()); 3177 3178 EVT ADDVT = N1.getOperand(1).getValueType(); 3179 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); 3180 AddToWorklist(Trunc.getNode()); 3181 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); 3182 AddToWorklist(Add.getNode()); 3183 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 3184 } 3185 } 3186 3187 // fold (udiv x, c) -> alternate 3188 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3189 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 3190 if (SDValue Op = BuildUDIV(N)) 3191 return Op; 3192 3193 // sdiv, srem -> sdivrem 3194 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is 3195 // true. Otherwise, we break the simplification logic in visitREM(). 3196 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 3197 if (SDValue DivRem = useDivRem(N)) 3198 return DivRem; 3199 3200 return SDValue(); 3201 } 3202 3203 // handles ISD::SREM and ISD::UREM 3204 SDValue DAGCombiner::visitREM(SDNode *N) { 3205 unsigned Opcode = N->getOpcode(); 3206 SDValue N0 = N->getOperand(0); 3207 SDValue N1 = N->getOperand(1); 3208 EVT VT = N->getValueType(0); 3209 bool isSigned = (Opcode == ISD::SREM); 3210 SDLoc DL(N); 3211 3212 // fold (rem c1, c2) -> c1%c2 3213 ConstantSDNode *N0C = isConstOrConstSplat(N0); 3214 ConstantSDNode *N1C = isConstOrConstSplat(N1); 3215 if (N0C && N1C) 3216 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 3217 return Folded; 3218 3219 if (SDValue V = simplifyDivRem(N, DAG)) 3220 return V; 3221 3222 if (SDValue NewSel = foldBinOpIntoSelect(N)) 3223 return NewSel; 3224 3225 if (isSigned) { 3226 // If we know the sign bits of both operands are zero, strength reduce to a 3227 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 3228 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 3229 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 3230 } else { 3231 SDValue NegOne = DAG.getAllOnesConstant(DL, VT); 3232 if (DAG.isKnownToBeAPowerOfTwo(N1)) { 3233 // fold (urem x, pow2) -> (and x, pow2-1) 3234 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3235 AddToWorklist(Add.getNode()); 3236 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3237 } 3238 if (N1.getOpcode() == ISD::SHL && 3239 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) { 3240 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 3241 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); 3242 AddToWorklist(Add.getNode()); 3243 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 3244 } 3245 } 3246 3247 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); 3248 3249 // If X/C can be simplified by the division-by-constant logic, lower 3250 // X%C to the equivalent of X-X/C*C. 3251 // To avoid mangling nodes, this simplification requires that the combine() 3252 // call for the speculative DIV must not cause a DIVREM conversion. We guard 3253 // against this by skipping the simplification if isIntDivCheap(). When 3254 // div is not cheap, combine will not return a DIVREM. Regardless, 3255 // checking cheapness here makes sense since the simplification results in 3256 // fatter code. 3257 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { 3258 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 3259 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); 3260 AddToWorklist(Div.getNode()); 3261 SDValue OptimizedDiv = combine(Div.getNode()); 3262 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode() && 3263 OptimizedDiv.getOpcode() != ISD::UDIVREM && 3264 OptimizedDiv.getOpcode() != ISD::SDIVREM) { 3265 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 3266 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 3267 AddToWorklist(Mul.getNode()); 3268 return Sub; 3269 } 3270 } 3271 3272 // sdiv, srem -> sdivrem 3273 if (SDValue DivRem = useDivRem(N)) 3274 return DivRem.getValue(1); 3275 3276 return SDValue(); 3277 } 3278 3279 SDValue DAGCombiner::visitMULHS(SDNode *N) { 3280 SDValue N0 = N->getOperand(0); 3281 SDValue N1 = N->getOperand(1); 3282 EVT VT = N->getValueType(0); 3283 SDLoc DL(N); 3284 3285 if (VT.isVector()) { 3286 // fold (mulhs x, 0) -> 0 3287 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3288 return N1; 3289 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3290 return N0; 3291 } 3292 3293 // fold (mulhs x, 0) -> 0 3294 if (isNullConstant(N1)) 3295 return N1; 3296 // fold (mulhs x, 1) -> (sra x, size(x)-1) 3297 if (isOneConstant(N1)) 3298 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 3299 DAG.getConstant(N0.getValueSizeInBits() - 1, DL, 3300 getShiftAmountTy(N0.getValueType()))); 3301 3302 // fold (mulhs x, undef) -> 0 3303 if (N0.isUndef() || N1.isUndef()) 3304 return DAG.getConstant(0, DL, VT); 3305 3306 // If the type twice as wide is legal, transform the mulhs to a wider multiply 3307 // plus a shift. 3308 if (VT.isSimple() && !VT.isVector()) { 3309 MVT Simple = VT.getSimpleVT(); 3310 unsigned SimpleSize = Simple.getSizeInBits(); 3311 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3312 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3313 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 3314 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 3315 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3316 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3317 DAG.getConstant(SimpleSize, DL, 3318 getShiftAmountTy(N1.getValueType()))); 3319 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3320 } 3321 } 3322 3323 return SDValue(); 3324 } 3325 3326 SDValue DAGCombiner::visitMULHU(SDNode *N) { 3327 SDValue N0 = N->getOperand(0); 3328 SDValue N1 = N->getOperand(1); 3329 EVT VT = N->getValueType(0); 3330 SDLoc DL(N); 3331 3332 if (VT.isVector()) { 3333 // fold (mulhu x, 0) -> 0 3334 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3335 return N1; 3336 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3337 return N0; 3338 } 3339 3340 // fold (mulhu x, 0) -> 0 3341 if (isNullConstant(N1)) 3342 return N1; 3343 // fold (mulhu x, 1) -> 0 3344 if (isOneConstant(N1)) 3345 return DAG.getConstant(0, DL, N0.getValueType()); 3346 // fold (mulhu x, undef) -> 0 3347 if (N0.isUndef() || N1.isUndef()) 3348 return DAG.getConstant(0, DL, VT); 3349 3350 // If the type twice as wide is legal, transform the mulhu to a wider multiply 3351 // plus a shift. 3352 if (VT.isSimple() && !VT.isVector()) { 3353 MVT Simple = VT.getSimpleVT(); 3354 unsigned SimpleSize = Simple.getSizeInBits(); 3355 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3356 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3357 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 3358 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 3359 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 3360 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 3361 DAG.getConstant(SimpleSize, DL, 3362 getShiftAmountTy(N1.getValueType()))); 3363 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 3364 } 3365 } 3366 3367 return SDValue(); 3368 } 3369 3370 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 3371 /// give the opcodes for the two computations that are being performed. Return 3372 /// true if a simplification was made. 3373 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 3374 unsigned HiOp) { 3375 // If the high half is not needed, just compute the low half. 3376 bool HiExists = N->hasAnyUseOfValue(1); 3377 if (!HiExists && 3378 (!LegalOperations || 3379 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 3380 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3381 return CombineTo(N, Res, Res); 3382 } 3383 3384 // If the low half is not needed, just compute the high half. 3385 bool LoExists = N->hasAnyUseOfValue(0); 3386 if (!LoExists && 3387 (!LegalOperations || 3388 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 3389 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3390 return CombineTo(N, Res, Res); 3391 } 3392 3393 // If both halves are used, return as it is. 3394 if (LoExists && HiExists) 3395 return SDValue(); 3396 3397 // If the two computed results can be simplified separately, separate them. 3398 if (LoExists) { 3399 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 3400 AddToWorklist(Lo.getNode()); 3401 SDValue LoOpt = combine(Lo.getNode()); 3402 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 3403 (!LegalOperations || 3404 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 3405 return CombineTo(N, LoOpt, LoOpt); 3406 } 3407 3408 if (HiExists) { 3409 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 3410 AddToWorklist(Hi.getNode()); 3411 SDValue HiOpt = combine(Hi.getNode()); 3412 if (HiOpt.getNode() && HiOpt != Hi && 3413 (!LegalOperations || 3414 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 3415 return CombineTo(N, HiOpt, HiOpt); 3416 } 3417 3418 return SDValue(); 3419 } 3420 3421 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 3422 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 3423 return Res; 3424 3425 EVT VT = N->getValueType(0); 3426 SDLoc DL(N); 3427 3428 // If the type is twice as wide is legal, transform the mulhu to a wider 3429 // multiply plus a shift. 3430 if (VT.isSimple() && !VT.isVector()) { 3431 MVT Simple = VT.getSimpleVT(); 3432 unsigned SimpleSize = Simple.getSizeInBits(); 3433 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3434 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3435 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 3436 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 3437 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3438 // Compute the high part as N1. 3439 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3440 DAG.getConstant(SimpleSize, DL, 3441 getShiftAmountTy(Lo.getValueType()))); 3442 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3443 // Compute the low part as N0. 3444 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3445 return CombineTo(N, Lo, Hi); 3446 } 3447 } 3448 3449 return SDValue(); 3450 } 3451 3452 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 3453 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 3454 return Res; 3455 3456 EVT VT = N->getValueType(0); 3457 SDLoc DL(N); 3458 3459 // If the type is twice as wide is legal, transform the mulhu to a wider 3460 // multiply plus a shift. 3461 if (VT.isSimple() && !VT.isVector()) { 3462 MVT Simple = VT.getSimpleVT(); 3463 unsigned SimpleSize = Simple.getSizeInBits(); 3464 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 3465 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 3466 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 3467 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 3468 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 3469 // Compute the high part as N1. 3470 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 3471 DAG.getConstant(SimpleSize, DL, 3472 getShiftAmountTy(Lo.getValueType()))); 3473 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 3474 // Compute the low part as N0. 3475 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 3476 return CombineTo(N, Lo, Hi); 3477 } 3478 } 3479 3480 return SDValue(); 3481 } 3482 3483 SDValue DAGCombiner::visitSMULO(SDNode *N) { 3484 // (smulo x, 2) -> (saddo x, x) 3485 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3486 if (C2->getAPIntValue() == 2) 3487 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 3488 N->getOperand(0), N->getOperand(0)); 3489 3490 return SDValue(); 3491 } 3492 3493 SDValue DAGCombiner::visitUMULO(SDNode *N) { 3494 // (umulo x, 2) -> (uaddo x, x) 3495 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 3496 if (C2->getAPIntValue() == 2) 3497 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 3498 N->getOperand(0), N->getOperand(0)); 3499 3500 return SDValue(); 3501 } 3502 3503 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 3504 SDValue N0 = N->getOperand(0); 3505 SDValue N1 = N->getOperand(1); 3506 EVT VT = N0.getValueType(); 3507 3508 // fold vector ops 3509 if (VT.isVector()) 3510 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3511 return FoldedVOp; 3512 3513 // fold operation with constant operands. 3514 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3515 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 3516 if (N0C && N1C) 3517 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 3518 3519 // canonicalize constant to RHS 3520 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 3521 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 3522 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 3523 3524 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. 3525 // Only do this if the current op isn't legal and the flipped is. 3526 unsigned Opcode = N->getOpcode(); 3527 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3528 if (!TLI.isOperationLegal(Opcode, VT) && 3529 (N0.isUndef() || DAG.SignBitIsZero(N0)) && 3530 (N1.isUndef() || DAG.SignBitIsZero(N1))) { 3531 unsigned AltOpcode; 3532 switch (Opcode) { 3533 case ISD::SMIN: AltOpcode = ISD::UMIN; break; 3534 case ISD::SMAX: AltOpcode = ISD::UMAX; break; 3535 case ISD::UMIN: AltOpcode = ISD::SMIN; break; 3536 case ISD::UMAX: AltOpcode = ISD::SMAX; break; 3537 default: llvm_unreachable("Unknown MINMAX opcode"); 3538 } 3539 if (TLI.isOperationLegal(AltOpcode, VT)) 3540 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); 3541 } 3542 3543 return SDValue(); 3544 } 3545 3546 /// If this is a binary operator with two operands of the same opcode, try to 3547 /// simplify it. 3548 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 3549 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3550 EVT VT = N0.getValueType(); 3551 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 3552 3553 // Bail early if none of these transforms apply. 3554 if (N0.getNumOperands() == 0) return SDValue(); 3555 3556 // For each of OP in AND/OR/XOR: 3557 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 3558 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 3559 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 3560 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 3561 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 3562 // 3563 // do not sink logical op inside of a vector extend, since it may combine 3564 // into a vsetcc. 3565 EVT Op0VT = N0.getOperand(0).getValueType(); 3566 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 3567 N0.getOpcode() == ISD::SIGN_EXTEND || 3568 N0.getOpcode() == ISD::BSWAP || 3569 // Avoid infinite looping with PromoteIntBinOp. 3570 (N0.getOpcode() == ISD::ANY_EXTEND && 3571 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 3572 (N0.getOpcode() == ISD::TRUNCATE && 3573 (!TLI.isZExtFree(VT, Op0VT) || 3574 !TLI.isTruncateFree(Op0VT, VT)) && 3575 TLI.isTypeLegal(Op0VT))) && 3576 !VT.isVector() && 3577 Op0VT == N1.getOperand(0).getValueType() && 3578 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 3579 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 3580 N0.getOperand(0).getValueType(), 3581 N0.getOperand(0), N1.getOperand(0)); 3582 AddToWorklist(ORNode.getNode()); 3583 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 3584 } 3585 3586 // For each of OP in SHL/SRL/SRA/AND... 3587 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 3588 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 3589 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 3590 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 3591 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 3592 N0.getOperand(1) == N1.getOperand(1)) { 3593 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 3594 N0.getOperand(0).getValueType(), 3595 N0.getOperand(0), N1.getOperand(0)); 3596 AddToWorklist(ORNode.getNode()); 3597 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 3598 ORNode, N0.getOperand(1)); 3599 } 3600 3601 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 3602 // Only perform this optimization up until type legalization, before 3603 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 3604 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 3605 // we don't want to undo this promotion. 3606 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 3607 // on scalars. 3608 if ((N0.getOpcode() == ISD::BITCAST || 3609 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 3610 Level <= AfterLegalizeTypes) { 3611 SDValue In0 = N0.getOperand(0); 3612 SDValue In1 = N1.getOperand(0); 3613 EVT In0Ty = In0.getValueType(); 3614 EVT In1Ty = In1.getValueType(); 3615 SDLoc DL(N); 3616 // If both incoming values are integers, and the original types are the 3617 // same. 3618 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 3619 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 3620 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 3621 AddToWorklist(Op.getNode()); 3622 return BC; 3623 } 3624 } 3625 3626 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 3627 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 3628 // If both shuffles use the same mask, and both shuffle within a single 3629 // vector, then it is worthwhile to move the swizzle after the operation. 3630 // The type-legalizer generates this pattern when loading illegal 3631 // vector types from memory. In many cases this allows additional shuffle 3632 // optimizations. 3633 // There are other cases where moving the shuffle after the xor/and/or 3634 // is profitable even if shuffles don't perform a swizzle. 3635 // If both shuffles use the same mask, and both shuffles have the same first 3636 // or second operand, then it might still be profitable to move the shuffle 3637 // after the xor/and/or operation. 3638 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 3639 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 3640 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 3641 3642 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 3643 "Inputs to shuffles are not the same type"); 3644 3645 // Check that both shuffles use the same mask. The masks are known to be of 3646 // the same length because the result vector type is the same. 3647 // Check also that shuffles have only one use to avoid introducing extra 3648 // instructions. 3649 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 3650 SVN0->getMask().equals(SVN1->getMask())) { 3651 SDValue ShOp = N0->getOperand(1); 3652 3653 // Don't try to fold this node if it requires introducing a 3654 // build vector of all zeros that might be illegal at this stage. 3655 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 3656 if (!LegalTypes) 3657 ShOp = DAG.getConstant(0, SDLoc(N), VT); 3658 else 3659 ShOp = SDValue(); 3660 } 3661 3662 // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) 3663 // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C) 3664 // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0) 3665 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 3666 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 3667 N0->getOperand(0), N1->getOperand(0)); 3668 AddToWorklist(NewNode.getNode()); 3669 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 3670 SVN0->getMask()); 3671 } 3672 3673 // Don't try to fold this node if it requires introducing a 3674 // build vector of all zeros that might be illegal at this stage. 3675 ShOp = N0->getOperand(0); 3676 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 3677 if (!LegalTypes) 3678 ShOp = DAG.getConstant(0, SDLoc(N), VT); 3679 else 3680 ShOp = SDValue(); 3681 } 3682 3683 // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) 3684 // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B)) 3685 // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B)) 3686 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 3687 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 3688 N0->getOperand(1), N1->getOperand(1)); 3689 AddToWorklist(NewNode.getNode()); 3690 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 3691 SVN0->getMask()); 3692 } 3693 } 3694 } 3695 3696 return SDValue(); 3697 } 3698 3699 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. 3700 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, 3701 const SDLoc &DL) { 3702 SDValue LL, LR, RL, RR, N0CC, N1CC; 3703 if (!isSetCCEquivalent(N0, LL, LR, N0CC) || 3704 !isSetCCEquivalent(N1, RL, RR, N1CC)) 3705 return SDValue(); 3706 3707 assert(N0.getValueType() == N1.getValueType() && 3708 "Unexpected operand types for bitwise logic op"); 3709 assert(LL.getValueType() == LR.getValueType() && 3710 RL.getValueType() == RR.getValueType() && 3711 "Unexpected operand types for setcc"); 3712 3713 // If we're here post-legalization or the logic op type is not i1, the logic 3714 // op type must match a setcc result type. Also, all folds require new 3715 // operations on the left and right operands, so those types must match. 3716 EVT VT = N0.getValueType(); 3717 EVT OpVT = LL.getValueType(); 3718 if (LegalOperations || VT.getScalarType() != MVT::i1) 3719 if (VT != getSetCCResultType(OpVT)) 3720 return SDValue(); 3721 if (OpVT != RL.getValueType()) 3722 return SDValue(); 3723 3724 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get(); 3725 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); 3726 bool IsInteger = OpVT.isInteger(); 3727 if (LR == RR && CC0 == CC1 && IsInteger) { 3728 bool IsZero = isNullConstantOrNullSplatConstant(LR); 3729 bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR); 3730 3731 // All bits clear? 3732 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero; 3733 // All sign bits clear? 3734 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1; 3735 // Any bits set? 3736 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero; 3737 // Any sign bits set? 3738 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero; 3739 3740 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) 3741 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) 3742 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) 3743 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) 3744 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) { 3745 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); 3746 AddToWorklist(Or.getNode()); 3747 return DAG.getSetCC(DL, VT, Or, LR, CC1); 3748 } 3749 3750 // All bits set? 3751 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1; 3752 // All sign bits set? 3753 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero; 3754 // Any bits clear? 3755 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1; 3756 // Any sign bits clear? 3757 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1; 3758 3759 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) 3760 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) 3761 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) 3762 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) 3763 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) { 3764 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); 3765 AddToWorklist(And.getNode()); 3766 return DAG.getSetCC(DL, VT, And, LR, CC1); 3767 } 3768 } 3769 3770 // TODO: What is the 'or' equivalent of this fold? 3771 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) 3772 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 && 3773 IsInteger && CC0 == ISD::SETNE && 3774 ((isNullConstant(LR) && isAllOnesConstant(RR)) || 3775 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 3776 SDValue One = DAG.getConstant(1, DL, OpVT); 3777 SDValue Two = DAG.getConstant(2, DL, OpVT); 3778 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); 3779 AddToWorklist(Add.getNode()); 3780 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); 3781 } 3782 3783 // Try more general transforms if the predicates match and the only user of 3784 // the compares is the 'and' or 'or'. 3785 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 && 3786 N0.hasOneUse() && N1.hasOneUse()) { 3787 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 3788 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 3789 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) { 3790 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); 3791 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); 3792 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); 3793 SDValue Zero = DAG.getConstant(0, DL, OpVT); 3794 return DAG.getSetCC(DL, VT, Or, Zero, CC1); 3795 } 3796 } 3797 3798 // Canonicalize equivalent operands to LL == RL. 3799 if (LL == RR && LR == RL) { 3800 CC1 = ISD::getSetCCSwappedOperands(CC1); 3801 std::swap(RL, RR); 3802 } 3803 3804 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 3805 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) 3806 if (LL == RL && LR == RR) { 3807 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) 3808 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); 3809 if (NewCC != ISD::SETCC_INVALID && 3810 (!LegalOperations || 3811 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && 3812 TLI.isOperationLegal(ISD::SETCC, OpVT)))) 3813 return DAG.getSetCC(DL, VT, LL, LR, NewCC); 3814 } 3815 3816 return SDValue(); 3817 } 3818 3819 /// This contains all DAGCombine rules which reduce two values combined by 3820 /// an And operation to a single value. This makes them reusable in the context 3821 /// of visitSELECT(). Rules involving constants are not included as 3822 /// visitSELECT() already handles those cases. 3823 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { 3824 EVT VT = N1.getValueType(); 3825 SDLoc DL(N); 3826 3827 // fold (and x, undef) -> 0 3828 if (N0.isUndef() || N1.isUndef()) 3829 return DAG.getConstant(0, DL, VT); 3830 3831 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) 3832 return V; 3833 3834 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 3835 VT.getSizeInBits() <= 64) { 3836 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3837 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 3838 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 3839 // immediate for an add, but it is legal if its top c2 bits are set, 3840 // transform the ADD so the immediate doesn't need to be materialized 3841 // in a register. 3842 APInt ADDC = ADDI->getAPIntValue(); 3843 APInt SRLC = SRLI->getAPIntValue(); 3844 if (ADDC.getMinSignedBits() <= 64 && 3845 SRLC.ult(VT.getSizeInBits()) && 3846 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3847 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3848 SRLC.getZExtValue()); 3849 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 3850 ADDC |= Mask; 3851 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 3852 SDLoc DL0(N0); 3853 SDValue NewAdd = 3854 DAG.getNode(ISD::ADD, DL0, VT, 3855 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 3856 CombineTo(N0.getNode(), NewAdd); 3857 // Return N so it doesn't get rechecked! 3858 return SDValue(N, 0); 3859 } 3860 } 3861 } 3862 } 3863 } 3864 } 3865 3866 // Reduce bit extract of low half of an integer to the narrower type. 3867 // (and (srl i64:x, K), KMask) -> 3868 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) 3869 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 3870 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { 3871 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3872 unsigned Size = VT.getSizeInBits(); 3873 const APInt &AndMask = CAnd->getAPIntValue(); 3874 unsigned ShiftBits = CShift->getZExtValue(); 3875 3876 // Bail out, this node will probably disappear anyway. 3877 if (ShiftBits == 0) 3878 return SDValue(); 3879 3880 unsigned MaskBits = AndMask.countTrailingOnes(); 3881 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); 3882 3883 if (AndMask.isMask() && 3884 // Required bits must not span the two halves of the integer and 3885 // must fit in the half size type. 3886 (ShiftBits + MaskBits <= Size / 2) && 3887 TLI.isNarrowingProfitable(VT, HalfVT) && 3888 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && 3889 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && 3890 TLI.isTruncateFree(VT, HalfVT) && 3891 TLI.isZExtFree(HalfVT, VT)) { 3892 // The isNarrowingProfitable is to avoid regressions on PPC and 3893 // AArch64 which match a few 64-bit bit insert / bit extract patterns 3894 // on downstream users of this. Those patterns could probably be 3895 // extended to handle extensions mixed in. 3896 3897 SDValue SL(N0); 3898 assert(MaskBits <= Size); 3899 3900 // Extracting the highest bit of the low half. 3901 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); 3902 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, 3903 N0.getOperand(0)); 3904 3905 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); 3906 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); 3907 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); 3908 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); 3909 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); 3910 } 3911 } 3912 } 3913 } 3914 3915 return SDValue(); 3916 } 3917 3918 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 3919 EVT LoadResultTy, EVT &ExtVT) { 3920 if (!AndC->getAPIntValue().isMask()) 3921 return false; 3922 3923 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); 3924 3925 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 3926 EVT LoadedVT = LoadN->getMemoryVT(); 3927 3928 if (ExtVT == LoadedVT && 3929 (!LegalOperations || 3930 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 3931 // ZEXTLOAD will match without needing to change the size of the value being 3932 // loaded. 3933 return true; 3934 } 3935 3936 // Do not change the width of a volatile load. 3937 if (LoadN->isVolatile()) 3938 return false; 3939 3940 // Do not generate loads of non-round integer types since these can 3941 // be expensive (and would be wrong if the type is not byte sized). 3942 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 3943 return false; 3944 3945 if (LegalOperations && 3946 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 3947 return false; 3948 3949 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 3950 return false; 3951 3952 return true; 3953 } 3954 3955 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, 3956 EVT &ExtVT, unsigned ShAmt) { 3957 // Don't transform one with multiple uses, this would require adding a new 3958 // load. 3959 if (!SDValue(LoadN, 0).hasOneUse()) 3960 return false; 3961 3962 if (LegalOperations && 3963 !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) 3964 return false; 3965 3966 // Do not generate loads of non-round integer types since these can 3967 // be expensive (and would be wrong if the type is not byte sized). 3968 if (!ExtVT.isRound()) 3969 return false; 3970 3971 // Don't change the width of a volatile load. 3972 if (LoadN->isVolatile()) 3973 return false; 3974 3975 // Verify that we are actually reducing a load width here. 3976 if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) 3977 return false; 3978 3979 // For the transform to be legal, the load must produce only two values 3980 // (the value loaded and the chain). Don't transform a pre-increment 3981 // load, for example, which produces an extra value. Otherwise the 3982 // transformation is not equivalent, and the downstream logic to replace 3983 // uses gets things wrong. 3984 if (LoadN->getNumValues() > 2) 3985 return false; 3986 3987 // Only allow byte offsets. 3988 if (ShAmt % 8) 3989 return false; 3990 3991 // Ensure that this isn't going to produce an unsupported unaligned access. 3992 if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), 3993 ExtVT, LoadN->getAddressSpace(), 3994 ShAmt / 8)) 3995 return false; 3996 3997 3998 // If the load that we're shrinking is an extload and we're not just 3999 // discarding the extension we can't simply shrink the load. Bail. 4000 // TODO: It would be possible to merge the extensions in some cases. 4001 if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && 4002 LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 4003 return false; 4004 4005 if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) 4006 return false; 4007 4008 // It's not possible to generate a constant of extended or untyped type. 4009 EVT PtrType = LoadN->getOperand(1).getValueType(); 4010 if (PtrType == MVT::Untyped || PtrType.isExtended()) 4011 return false; 4012 4013 return true; 4014 } 4015 4016 bool DAGCombiner::SearchForAndLoads(SDNode *N, 4017 SmallPtrSetImpl<LoadSDNode*> &Loads, 4018 SmallPtrSetImpl<SDNode*> &NodesWithConsts, 4019 ConstantSDNode *Mask, 4020 SDNode *&NodeToMask) { 4021 // Recursively search for the operands, looking for loads which can be 4022 // narrowed. 4023 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { 4024 SDValue Op = N->getOperand(i); 4025 4026 if (Op.getValueType().isVector()) 4027 return false; 4028 4029 // Some constants may need fixing up later if they are too large. 4030 if (auto *C = dyn_cast<ConstantSDNode>(Op)) { 4031 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && 4032 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) 4033 NodesWithConsts.insert(N); 4034 continue; 4035 } 4036 4037 if (!Op.hasOneUse()) 4038 return false; 4039 4040 switch(Op.getOpcode()) { 4041 case ISD::LOAD: { 4042 auto *Load = cast<LoadSDNode>(Op); 4043 EVT ExtVT; 4044 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && 4045 isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { 4046 4047 // ZEXTLOAD is already small enough. 4048 if (Load->getExtensionType() == ISD::ZEXTLOAD && 4049 ExtVT.bitsGE(Load->getMemoryVT())) 4050 continue; 4051 4052 // Use LE to convert equal sized loads to zext. 4053 if (ExtVT.bitsLE(Load->getMemoryVT())) 4054 Loads.insert(Load); 4055 4056 continue; 4057 } 4058 return false; 4059 } 4060 case ISD::ZERO_EXTEND: 4061 case ISD::AssertZext: { 4062 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); 4063 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 4064 EVT VT = Op.getOpcode() == ISD::AssertZext ? 4065 cast<VTSDNode>(Op.getOperand(1))->getVT() : 4066 Op.getOperand(0).getValueType(); 4067 4068 // We can accept extending nodes if the mask is wider or an equal 4069 // width to the original type. 4070 if (ExtVT.bitsGE(VT)) 4071 continue; 4072 break; 4073 } 4074 case ISD::OR: 4075 case ISD::XOR: 4076 case ISD::AND: 4077 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, 4078 NodeToMask)) 4079 return false; 4080 continue; 4081 } 4082 4083 // Allow one node which will masked along with any loads found. 4084 if (NodeToMask) 4085 return false; 4086 4087 // Also ensure that the node to be masked only produces one data result. 4088 NodeToMask = Op.getNode(); 4089 if (NodeToMask->getNumValues() > 1) { 4090 bool HasValue = false; 4091 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { 4092 MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); 4093 if (VT != MVT::Glue && VT != MVT::Other) { 4094 if (HasValue) { 4095 NodeToMask = nullptr; 4096 return false; 4097 } 4098 HasValue = true; 4099 } 4100 } 4101 assert(HasValue && "Node to be masked has no data result?"); 4102 } 4103 } 4104 return true; 4105 } 4106 4107 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { 4108 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4109 if (!Mask) 4110 return false; 4111 4112 if (!Mask->getAPIntValue().isMask()) 4113 return false; 4114 4115 // No need to do anything if the and directly uses a load. 4116 if (isa<LoadSDNode>(N->getOperand(0))) 4117 return false; 4118 4119 SmallPtrSet<LoadSDNode*, 8> Loads; 4120 SmallPtrSet<SDNode*, 2> NodesWithConsts; 4121 SDNode *FixupNode = nullptr; 4122 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { 4123 if (Loads.size() == 0) 4124 return false; 4125 4126 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); 4127 SDValue MaskOp = N->getOperand(1); 4128 4129 // If it exists, fixup the single node we allow in the tree that needs 4130 // masking. 4131 if (FixupNode) { 4132 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); 4133 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), 4134 FixupNode->getValueType(0), 4135 SDValue(FixupNode, 0), MaskOp); 4136 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); 4137 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), 4138 MaskOp); 4139 } 4140 4141 // Narrow any constants that need it. 4142 for (auto *LogicN : NodesWithConsts) { 4143 SDValue Op0 = LogicN->getOperand(0); 4144 SDValue Op1 = LogicN->getOperand(1); 4145 4146 if (isa<ConstantSDNode>(Op0)) 4147 std::swap(Op0, Op1); 4148 4149 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), 4150 Op1, MaskOp); 4151 4152 DAG.UpdateNodeOperands(LogicN, Op0, And); 4153 } 4154 4155 // Create narrow loads. 4156 for (auto *Load : Loads) { 4157 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); 4158 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), 4159 SDValue(Load, 0), MaskOp); 4160 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); 4161 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); 4162 SDValue NewLoad = ReduceLoadWidth(And.getNode()); 4163 assert(NewLoad && 4164 "Shouldn't be masking the load if it can't be narrowed"); 4165 CombineTo(Load, NewLoad, NewLoad.getValue(1)); 4166 } 4167 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); 4168 return true; 4169 } 4170 return false; 4171 } 4172 4173 SDValue DAGCombiner::visitAND(SDNode *N) { 4174 SDValue N0 = N->getOperand(0); 4175 SDValue N1 = N->getOperand(1); 4176 EVT VT = N1.getValueType(); 4177 4178 // x & x --> x 4179 if (N0 == N1) 4180 return N0; 4181 4182 // fold vector ops 4183 if (VT.isVector()) { 4184 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4185 return FoldedVOp; 4186 4187 // fold (and x, 0) -> 0, vector edition 4188 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4189 // do not return N0, because undef node may exist in N0 4190 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), 4191 SDLoc(N), N0.getValueType()); 4192 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4193 // do not return N1, because undef node may exist in N1 4194 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), 4195 SDLoc(N), N1.getValueType()); 4196 4197 // fold (and x, -1) -> x, vector edition 4198 if (ISD::isBuildVectorAllOnes(N0.getNode())) 4199 return N1; 4200 if (ISD::isBuildVectorAllOnes(N1.getNode())) 4201 return N0; 4202 } 4203 4204 // fold (and c1, c2) -> c1&c2 4205 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4206 ConstantSDNode *N1C = isConstOrConstSplat(N1); 4207 if (N0C && N1C && !N1C->isOpaque()) 4208 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 4209 // canonicalize constant to RHS 4210 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 4211 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 4212 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 4213 // fold (and x, -1) -> x 4214 if (isAllOnesConstant(N1)) 4215 return N0; 4216 // if (and x, c) is known to be zero, return 0 4217 unsigned BitWidth = VT.getScalarSizeInBits(); 4218 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4219 APInt::getAllOnesValue(BitWidth))) 4220 return DAG.getConstant(0, SDLoc(N), VT); 4221 4222 if (SDValue NewSel = foldBinOpIntoSelect(N)) 4223 return NewSel; 4224 4225 // reassociate and 4226 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 4227 return RAND; 4228 4229 // Try to convert a constant mask AND into a shuffle clear mask. 4230 if (VT.isVector()) 4231 if (SDValue Shuffle = XformToShuffleWithZero(N)) 4232 return Shuffle; 4233 4234 // fold (and (or x, C), D) -> D if (C & D) == D 4235 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { 4236 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); 4237 }; 4238 if (N0.getOpcode() == ISD::OR && 4239 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) 4240 return N1; 4241 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 4242 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4243 SDValue N0Op0 = N0.getOperand(0); 4244 APInt Mask = ~N1C->getAPIntValue(); 4245 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); 4246 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 4247 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 4248 N0.getValueType(), N0Op0); 4249 4250 // Replace uses of the AND with uses of the Zero extend node. 4251 CombineTo(N, Zext); 4252 4253 // We actually want to replace all uses of the any_extend with the 4254 // zero_extend, to avoid duplicating things. This will later cause this 4255 // AND to be folded. 4256 CombineTo(N0.getNode(), Zext); 4257 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4258 } 4259 } 4260 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 4261 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 4262 // already be zero by virtue of the width of the base type of the load. 4263 // 4264 // the 'X' node here can either be nothing or an extract_vector_elt to catch 4265 // more cases. 4266 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4267 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && 4268 N0.getOperand(0).getOpcode() == ISD::LOAD && 4269 N0.getOperand(0).getResNo() == 0) || 4270 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { 4271 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 4272 N0 : N0.getOperand(0) ); 4273 4274 // Get the constant (if applicable) the zero'th operand is being ANDed with. 4275 // This can be a pure constant or a vector splat, in which case we treat the 4276 // vector as a scalar and use the splat value. 4277 APInt Constant = APInt::getNullValue(1); 4278 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 4279 Constant = C->getAPIntValue(); 4280 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 4281 APInt SplatValue, SplatUndef; 4282 unsigned SplatBitSize; 4283 bool HasAnyUndefs; 4284 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 4285 SplatBitSize, HasAnyUndefs); 4286 if (IsSplat) { 4287 // Undef bits can contribute to a possible optimisation if set, so 4288 // set them. 4289 SplatValue |= SplatUndef; 4290 4291 // The splat value may be something like "0x00FFFFFF", which means 0 for 4292 // the first vector value and FF for the rest, repeating. We need a mask 4293 // that will apply equally to all members of the vector, so AND all the 4294 // lanes of the constant together. 4295 EVT VT = Vector->getValueType(0); 4296 unsigned BitWidth = VT.getScalarSizeInBits(); 4297 4298 // If the splat value has been compressed to a bitlength lower 4299 // than the size of the vector lane, we need to re-expand it to 4300 // the lane size. 4301 if (BitWidth > SplatBitSize) 4302 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 4303 SplatBitSize < BitWidth; 4304 SplatBitSize = SplatBitSize * 2) 4305 SplatValue |= SplatValue.shl(SplatBitSize); 4306 4307 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 4308 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 4309 if (SplatBitSize % BitWidth == 0) { 4310 Constant = APInt::getAllOnesValue(BitWidth); 4311 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 4312 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 4313 } 4314 } 4315 } 4316 4317 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 4318 // actually legal and isn't going to get expanded, else this is a false 4319 // optimisation. 4320 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 4321 Load->getValueType(0), 4322 Load->getMemoryVT()); 4323 4324 // Resize the constant to the same size as the original memory access before 4325 // extension. If it is still the AllOnesValue then this AND is completely 4326 // unneeded. 4327 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); 4328 4329 bool B; 4330 switch (Load->getExtensionType()) { 4331 default: B = false; break; 4332 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 4333 case ISD::ZEXTLOAD: 4334 case ISD::NON_EXTLOAD: B = true; break; 4335 } 4336 4337 if (B && Constant.isAllOnesValue()) { 4338 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 4339 // preserve semantics once we get rid of the AND. 4340 SDValue NewLoad(Load, 0); 4341 4342 // Fold the AND away. NewLoad may get replaced immediately. 4343 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 4344 4345 if (Load->getExtensionType() == ISD::EXTLOAD) { 4346 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 4347 Load->getValueType(0), SDLoc(Load), 4348 Load->getChain(), Load->getBasePtr(), 4349 Load->getOffset(), Load->getMemoryVT(), 4350 Load->getMemOperand()); 4351 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 4352 if (Load->getNumValues() == 3) { 4353 // PRE/POST_INC loads have 3 values. 4354 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 4355 NewLoad.getValue(2) }; 4356 CombineTo(Load, To, 3, true); 4357 } else { 4358 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 4359 } 4360 } 4361 4362 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4363 } 4364 } 4365 4366 // fold (and (load x), 255) -> (zextload x, i8) 4367 // fold (and (extload x, i16), 255) -> (zextload x, i8) 4368 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 4369 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || 4370 (N0.getOpcode() == ISD::ANY_EXTEND && 4371 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 4372 if (SDValue Res = ReduceLoadWidth(N)) { 4373 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND 4374 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); 4375 4376 AddToWorklist(N); 4377 CombineTo(LN0, Res, Res.getValue(1)); 4378 return SDValue(N, 0); 4379 } 4380 } 4381 4382 if (Level >= AfterLegalizeTypes) { 4383 // Attempt to propagate the AND back up to the leaves which, if they're 4384 // loads, can be combined to narrow loads and the AND node can be removed. 4385 // Perform after legalization so that extend nodes will already be 4386 // combined into the loads. 4387 if (BackwardsPropagateMask(N, DAG)) { 4388 return SDValue(N, 0); 4389 } 4390 } 4391 4392 if (SDValue Combined = visitANDLike(N0, N1, N)) 4393 return Combined; 4394 4395 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 4396 if (N0.getOpcode() == N1.getOpcode()) 4397 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4398 return Tmp; 4399 4400 // Masking the negated extension of a boolean is just the zero-extended 4401 // boolean: 4402 // and (sub 0, zext(bool X)), 1 --> zext(bool X) 4403 // and (sub 0, sext(bool X)), 1 --> zext(bool X) 4404 // 4405 // Note: the SimplifyDemandedBits fold below can make an information-losing 4406 // transform, and then we have no way to find this better fold. 4407 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) { 4408 if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) { 4409 SDValue SubRHS = N0.getOperand(1); 4410 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && 4411 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4412 return SubRHS; 4413 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND && 4414 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) 4415 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); 4416 } 4417 } 4418 4419 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 4420 // fold (and (sra)) -> (and (srl)) when possible. 4421 if (SimplifyDemandedBits(SDValue(N, 0))) 4422 return SDValue(N, 0); 4423 4424 // fold (zext_inreg (extload x)) -> (zextload x) 4425 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 4426 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4427 EVT MemVT = LN0->getMemoryVT(); 4428 // If we zero all the possible extended bits, then we can turn this into 4429 // a zextload if we are running before legalize or the operation is legal. 4430 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4431 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4432 BitWidth - MemVT.getScalarSizeInBits())) && 4433 ((!LegalOperations && !LN0->isVolatile()) || 4434 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4435 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4436 LN0->getChain(), LN0->getBasePtr(), 4437 MemVT, LN0->getMemOperand()); 4438 AddToWorklist(N); 4439 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4440 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4441 } 4442 } 4443 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 4444 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4445 N0.hasOneUse()) { 4446 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4447 EVT MemVT = LN0->getMemoryVT(); 4448 // If we zero all the possible extended bits, then we can turn this into 4449 // a zextload if we are running before legalize or the operation is legal. 4450 unsigned BitWidth = N1.getScalarValueSizeInBits(); 4451 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 4452 BitWidth - MemVT.getScalarSizeInBits())) && 4453 ((!LegalOperations && !LN0->isVolatile()) || 4454 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 4455 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 4456 LN0->getChain(), LN0->getBasePtr(), 4457 MemVT, LN0->getMemOperand()); 4458 AddToWorklist(N); 4459 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4460 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4461 } 4462 } 4463 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 4464 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 4465 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 4466 N0.getOperand(1), false)) 4467 return BSwap; 4468 } 4469 4470 return SDValue(); 4471 } 4472 4473 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 4474 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 4475 bool DemandHighBits) { 4476 if (!LegalOperations) 4477 return SDValue(); 4478 4479 EVT VT = N->getValueType(0); 4480 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 4481 return SDValue(); 4482 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 4483 return SDValue(); 4484 4485 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) 4486 bool LookPassAnd0 = false; 4487 bool LookPassAnd1 = false; 4488 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 4489 std::swap(N0, N1); 4490 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 4491 std::swap(N0, N1); 4492 if (N0.getOpcode() == ISD::AND) { 4493 if (!N0.getNode()->hasOneUse()) 4494 return SDValue(); 4495 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4496 // Also handle 0xffff since the LHS is guaranteed to have zeros there. 4497 // This is needed for X86. 4498 if (!N01C || (N01C->getZExtValue() != 0xFF00 && 4499 N01C->getZExtValue() != 0xFFFF)) 4500 return SDValue(); 4501 N0 = N0.getOperand(0); 4502 LookPassAnd0 = true; 4503 } 4504 4505 if (N1.getOpcode() == ISD::AND) { 4506 if (!N1.getNode()->hasOneUse()) 4507 return SDValue(); 4508 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4509 if (!N11C || N11C->getZExtValue() != 0xFF) 4510 return SDValue(); 4511 N1 = N1.getOperand(0); 4512 LookPassAnd1 = true; 4513 } 4514 4515 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 4516 std::swap(N0, N1); 4517 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 4518 return SDValue(); 4519 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse()) 4520 return SDValue(); 4521 4522 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4523 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4524 if (!N01C || !N11C) 4525 return SDValue(); 4526 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 4527 return SDValue(); 4528 4529 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 4530 SDValue N00 = N0->getOperand(0); 4531 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 4532 if (!N00.getNode()->hasOneUse()) 4533 return SDValue(); 4534 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 4535 if (!N001C || N001C->getZExtValue() != 0xFF) 4536 return SDValue(); 4537 N00 = N00.getOperand(0); 4538 LookPassAnd0 = true; 4539 } 4540 4541 SDValue N10 = N1->getOperand(0); 4542 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 4543 if (!N10.getNode()->hasOneUse()) 4544 return SDValue(); 4545 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 4546 // Also allow 0xFFFF since the bits will be shifted out. This is needed 4547 // for X86. 4548 if (!N101C || (N101C->getZExtValue() != 0xFF00 && 4549 N101C->getZExtValue() != 0xFFFF)) 4550 return SDValue(); 4551 N10 = N10.getOperand(0); 4552 LookPassAnd1 = true; 4553 } 4554 4555 if (N00 != N10) 4556 return SDValue(); 4557 4558 // Make sure everything beyond the low halfword gets set to zero since the SRL 4559 // 16 will clear the top bits. 4560 unsigned OpSizeInBits = VT.getSizeInBits(); 4561 if (DemandHighBits && OpSizeInBits > 16) { 4562 // If the left-shift isn't masked out then the only way this is a bswap is 4563 // if all bits beyond the low 8 are 0. In that case the entire pattern 4564 // reduces to a left shift anyway: leave it for other parts of the combiner. 4565 if (!LookPassAnd0) 4566 return SDValue(); 4567 4568 // However, if the right shift isn't masked out then it might be because 4569 // it's not needed. See if we can spot that too. 4570 if (!LookPassAnd1 && 4571 !DAG.MaskedValueIsZero( 4572 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 4573 return SDValue(); 4574 } 4575 4576 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 4577 if (OpSizeInBits > 16) { 4578 SDLoc DL(N); 4579 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 4580 DAG.getConstant(OpSizeInBits - 16, DL, 4581 getShiftAmountTy(VT))); 4582 } 4583 return Res; 4584 } 4585 4586 /// Return true if the specified node is an element that makes up a 32-bit 4587 /// packed halfword byteswap. 4588 /// ((x & 0x000000ff) << 8) | 4589 /// ((x & 0x0000ff00) >> 8) | 4590 /// ((x & 0x00ff0000) << 8) | 4591 /// ((x & 0xff000000) >> 8) 4592 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 4593 if (!N.getNode()->hasOneUse()) 4594 return false; 4595 4596 unsigned Opc = N.getOpcode(); 4597 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 4598 return false; 4599 4600 SDValue N0 = N.getOperand(0); 4601 unsigned Opc0 = N0.getOpcode(); 4602 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL) 4603 return false; 4604 4605 ConstantSDNode *N1C = nullptr; 4606 // SHL or SRL: look upstream for AND mask operand 4607 if (Opc == ISD::AND) 4608 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4609 else if (Opc0 == ISD::AND) 4610 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4611 if (!N1C) 4612 return false; 4613 4614 unsigned MaskByteOffset; 4615 switch (N1C->getZExtValue()) { 4616 default: 4617 return false; 4618 case 0xFF: MaskByteOffset = 0; break; 4619 case 0xFF00: MaskByteOffset = 1; break; 4620 case 0xFFFF: 4621 // In case demanded bits didn't clear the bits that will be shifted out. 4622 // This is needed for X86. 4623 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { 4624 MaskByteOffset = 1; 4625 break; 4626 } 4627 return false; 4628 case 0xFF0000: MaskByteOffset = 2; break; 4629 case 0xFF000000: MaskByteOffset = 3; break; 4630 } 4631 4632 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 4633 if (Opc == ISD::AND) { 4634 if (MaskByteOffset == 0 || MaskByteOffset == 2) { 4635 // (x >> 8) & 0xff 4636 // (x >> 8) & 0xff0000 4637 if (Opc0 != ISD::SRL) 4638 return false; 4639 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4640 if (!C || C->getZExtValue() != 8) 4641 return false; 4642 } else { 4643 // (x << 8) & 0xff00 4644 // (x << 8) & 0xff000000 4645 if (Opc0 != ISD::SHL) 4646 return false; 4647 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4648 if (!C || C->getZExtValue() != 8) 4649 return false; 4650 } 4651 } else if (Opc == ISD::SHL) { 4652 // (x & 0xff) << 8 4653 // (x & 0xff0000) << 8 4654 if (MaskByteOffset != 0 && MaskByteOffset != 2) 4655 return false; 4656 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4657 if (!C || C->getZExtValue() != 8) 4658 return false; 4659 } else { // Opc == ISD::SRL 4660 // (x & 0xff00) >> 8 4661 // (x & 0xff000000) >> 8 4662 if (MaskByteOffset != 1 && MaskByteOffset != 3) 4663 return false; 4664 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 4665 if (!C || C->getZExtValue() != 8) 4666 return false; 4667 } 4668 4669 if (Parts[MaskByteOffset]) 4670 return false; 4671 4672 Parts[MaskByteOffset] = N0.getOperand(0).getNode(); 4673 return true; 4674 } 4675 4676 /// Match a 32-bit packed halfword bswap. That is 4677 /// ((x & 0x000000ff) << 8) | 4678 /// ((x & 0x0000ff00) >> 8) | 4679 /// ((x & 0x00ff0000) << 8) | 4680 /// ((x & 0xff000000) >> 8) 4681 /// => (rotl (bswap x), 16) 4682 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 4683 if (!LegalOperations) 4684 return SDValue(); 4685 4686 EVT VT = N->getValueType(0); 4687 if (VT != MVT::i32) 4688 return SDValue(); 4689 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) 4690 return SDValue(); 4691 4692 // Look for either 4693 // (or (or (and), (and)), (or (and), (and))) 4694 // (or (or (or (and), (and)), (and)), (and)) 4695 if (N0.getOpcode() != ISD::OR) 4696 return SDValue(); 4697 SDValue N00 = N0.getOperand(0); 4698 SDValue N01 = N0.getOperand(1); 4699 SDNode *Parts[4] = {}; 4700 4701 if (N1.getOpcode() == ISD::OR && 4702 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 4703 // (or (or (and), (and)), (or (and), (and))) 4704 if (!isBSwapHWordElement(N00, Parts)) 4705 return SDValue(); 4706 4707 if (!isBSwapHWordElement(N01, Parts)) 4708 return SDValue(); 4709 SDValue N10 = N1.getOperand(0); 4710 if (!isBSwapHWordElement(N10, Parts)) 4711 return SDValue(); 4712 SDValue N11 = N1.getOperand(1); 4713 if (!isBSwapHWordElement(N11, Parts)) 4714 return SDValue(); 4715 } else { 4716 // (or (or (or (and), (and)), (and)), (and)) 4717 if (!isBSwapHWordElement(N1, Parts)) 4718 return SDValue(); 4719 if (!isBSwapHWordElement(N01, Parts)) 4720 return SDValue(); 4721 if (N00.getOpcode() != ISD::OR) 4722 return SDValue(); 4723 SDValue N000 = N00.getOperand(0); 4724 if (!isBSwapHWordElement(N000, Parts)) 4725 return SDValue(); 4726 SDValue N001 = N00.getOperand(1); 4727 if (!isBSwapHWordElement(N001, Parts)) 4728 return SDValue(); 4729 } 4730 4731 // Make sure the parts are all coming from the same node. 4732 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 4733 return SDValue(); 4734 4735 SDLoc DL(N); 4736 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 4737 SDValue(Parts[0], 0)); 4738 4739 // Result of the bswap should be rotated by 16. If it's not legal, then 4740 // do (x << 16) | (x >> 16). 4741 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 4742 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 4743 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 4744 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 4745 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 4746 return DAG.getNode(ISD::OR, DL, VT, 4747 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 4748 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 4749 } 4750 4751 /// This contains all DAGCombine rules which reduce two values combined by 4752 /// an Or operation to a single value \see visitANDLike(). 4753 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { 4754 EVT VT = N1.getValueType(); 4755 SDLoc DL(N); 4756 4757 // fold (or x, undef) -> -1 4758 if (!LegalOperations && (N0.isUndef() || N1.isUndef())) 4759 return DAG.getAllOnesConstant(DL, VT); 4760 4761 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL)) 4762 return V; 4763 4764 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 4765 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 4766 // Don't increase # computations. 4767 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 4768 // We can only do this xform if we know that bits from X that are set in C2 4769 // but not in C1 are already zero. Likewise for Y. 4770 if (const ConstantSDNode *N0O1C = 4771 getAsNonOpaqueConstant(N0.getOperand(1))) { 4772 if (const ConstantSDNode *N1O1C = 4773 getAsNonOpaqueConstant(N1.getOperand(1))) { 4774 // We can only do this xform if we know that bits from X that are set in 4775 // C2 but not in C1 are already zero. Likewise for Y. 4776 const APInt &LHSMask = N0O1C->getAPIntValue(); 4777 const APInt &RHSMask = N1O1C->getAPIntValue(); 4778 4779 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 4780 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 4781 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 4782 N0.getOperand(0), N1.getOperand(0)); 4783 return DAG.getNode(ISD::AND, DL, VT, X, 4784 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 4785 } 4786 } 4787 } 4788 } 4789 4790 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 4791 if (N0.getOpcode() == ISD::AND && 4792 N1.getOpcode() == ISD::AND && 4793 N0.getOperand(0) == N1.getOperand(0) && 4794 // Don't increase # computations. 4795 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 4796 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 4797 N0.getOperand(1), N1.getOperand(1)); 4798 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); 4799 } 4800 4801 return SDValue(); 4802 } 4803 4804 SDValue DAGCombiner::visitOR(SDNode *N) { 4805 SDValue N0 = N->getOperand(0); 4806 SDValue N1 = N->getOperand(1); 4807 EVT VT = N1.getValueType(); 4808 4809 // x | x --> x 4810 if (N0 == N1) 4811 return N0; 4812 4813 // fold vector ops 4814 if (VT.isVector()) { 4815 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4816 return FoldedVOp; 4817 4818 // fold (or x, 0) -> x, vector edition 4819 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4820 return N1; 4821 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4822 return N0; 4823 4824 // fold (or x, -1) -> -1, vector edition 4825 if (ISD::isBuildVectorAllOnes(N0.getNode())) 4826 // do not return N0, because undef node may exist in N0 4827 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); 4828 if (ISD::isBuildVectorAllOnes(N1.getNode())) 4829 // do not return N1, because undef node may exist in N1 4830 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); 4831 4832 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) 4833 // Do this only if the resulting shuffle is legal. 4834 if (isa<ShuffleVectorSDNode>(N0) && 4835 isa<ShuffleVectorSDNode>(N1) && 4836 // Avoid folding a node with illegal type. 4837 TLI.isTypeLegal(VT)) { 4838 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); 4839 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); 4840 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 4841 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); 4842 // Ensure both shuffles have a zero input. 4843 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) { 4844 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); 4845 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); 4846 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 4847 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 4848 bool CanFold = true; 4849 int NumElts = VT.getVectorNumElements(); 4850 SmallVector<int, 4> Mask(NumElts); 4851 4852 for (int i = 0; i != NumElts; ++i) { 4853 int M0 = SV0->getMaskElt(i); 4854 int M1 = SV1->getMaskElt(i); 4855 4856 // Determine if either index is pointing to a zero vector. 4857 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); 4858 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); 4859 4860 // If one element is zero and the otherside is undef, keep undef. 4861 // This also handles the case that both are undef. 4862 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { 4863 Mask[i] = -1; 4864 continue; 4865 } 4866 4867 // Make sure only one of the elements is zero. 4868 if (M0Zero == M1Zero) { 4869 CanFold = false; 4870 break; 4871 } 4872 4873 assert((M0 >= 0 || M1 >= 0) && "Undef index!"); 4874 4875 // We have a zero and non-zero element. If the non-zero came from 4876 // SV0 make the index a LHS index. If it came from SV1, make it 4877 // a RHS index. We need to mod by NumElts because we don't care 4878 // which operand it came from in the original shuffles. 4879 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; 4880 } 4881 4882 if (CanFold) { 4883 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); 4884 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); 4885 4886 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 4887 if (!LegalMask) { 4888 std::swap(NewLHS, NewRHS); 4889 ShuffleVectorSDNode::commuteMask(Mask); 4890 LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 4891 } 4892 4893 if (LegalMask) 4894 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); 4895 } 4896 } 4897 } 4898 } 4899 4900 // fold (or c1, c2) -> c1|c2 4901 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4902 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4903 if (N0C && N1C && !N1C->isOpaque()) 4904 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 4905 // canonicalize constant to RHS 4906 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 4907 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 4908 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 4909 // fold (or x, 0) -> x 4910 if (isNullConstant(N1)) 4911 return N0; 4912 // fold (or x, -1) -> -1 4913 if (isAllOnesConstant(N1)) 4914 return N1; 4915 4916 if (SDValue NewSel = foldBinOpIntoSelect(N)) 4917 return NewSel; 4918 4919 // fold (or x, c) -> c iff (x & ~c) == 0 4920 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 4921 return N1; 4922 4923 if (SDValue Combined = visitORLike(N0, N1, N)) 4924 return Combined; 4925 4926 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 4927 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 4928 return BSwap; 4929 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 4930 return BSwap; 4931 4932 // reassociate or 4933 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 4934 return ROR; 4935 4936 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 4937 // iff (c1 & c2) != 0. 4938 auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { 4939 return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); 4940 }; 4941 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 4942 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { 4943 if (SDValue COR = DAG.FoldConstantArithmetic( 4944 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { 4945 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); 4946 AddToWorklist(IOR.getNode()); 4947 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); 4948 } 4949 } 4950 4951 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 4952 if (N0.getOpcode() == N1.getOpcode()) 4953 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4954 return Tmp; 4955 4956 // See if this is some rotate idiom. 4957 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 4958 return SDValue(Rot, 0); 4959 4960 if (SDValue Load = MatchLoadCombine(N)) 4961 return Load; 4962 4963 // Simplify the operands using demanded-bits information. 4964 if (SimplifyDemandedBits(SDValue(N, 0))) 4965 return SDValue(N, 0); 4966 4967 return SDValue(); 4968 } 4969 4970 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 4971 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 4972 if (Op.getOpcode() == ISD::AND) { 4973 if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 4974 Mask = Op.getOperand(1); 4975 Op = Op.getOperand(0); 4976 } else { 4977 return false; 4978 } 4979 } 4980 4981 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 4982 Shift = Op; 4983 return true; 4984 } 4985 4986 return false; 4987 } 4988 4989 // Return true if we can prove that, whenever Neg and Pos are both in the 4990 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 4991 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 4992 // 4993 // (or (shift1 X, Neg), (shift2 X, Pos)) 4994 // 4995 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 4996 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 4997 // to consider shift amounts with defined behavior. 4998 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, 4999 SelectionDAG &DAG) { 5000 // If EltSize is a power of 2 then: 5001 // 5002 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 5003 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 5004 // 5005 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 5006 // for the stronger condition: 5007 // 5008 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 5009 // 5010 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 5011 // we can just replace Neg with Neg' for the rest of the function. 5012 // 5013 // In other cases we check for the even stronger condition: 5014 // 5015 // Neg == EltSize - Pos [B] 5016 // 5017 // for all Neg and Pos. Note that the (or ...) then invokes undefined 5018 // behavior if Pos == 0 (and consequently Neg == EltSize). 5019 // 5020 // We could actually use [A] whenever EltSize is a power of 2, but the 5021 // only extra cases that it would match are those uninteresting ones 5022 // where Neg and Pos are never in range at the same time. E.g. for 5023 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 5024 // as well as (sub 32, Pos), but: 5025 // 5026 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 5027 // 5028 // always invokes undefined behavior for 32-bit X. 5029 // 5030 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 5031 unsigned MaskLoBits = 0; 5032 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 5033 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 5034 KnownBits Known; 5035 DAG.computeKnownBits(Neg.getOperand(0), Known); 5036 unsigned Bits = Log2_64(EltSize); 5037 if (NegC->getAPIntValue().getActiveBits() <= Bits && 5038 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { 5039 Neg = Neg.getOperand(0); 5040 MaskLoBits = Bits; 5041 } 5042 } 5043 } 5044 5045 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 5046 if (Neg.getOpcode() != ISD::SUB) 5047 return false; 5048 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 5049 if (!NegC) 5050 return false; 5051 SDValue NegOp1 = Neg.getOperand(1); 5052 5053 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 5054 // Pos'. The truncation is redundant for the purpose of the equality. 5055 if (MaskLoBits && Pos.getOpcode() == ISD::AND) { 5056 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { 5057 KnownBits Known; 5058 DAG.computeKnownBits(Pos.getOperand(0), Known); 5059 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && 5060 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= 5061 MaskLoBits)) 5062 Pos = Pos.getOperand(0); 5063 } 5064 } 5065 5066 // The condition we need is now: 5067 // 5068 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 5069 // 5070 // If NegOp1 == Pos then we need: 5071 // 5072 // EltSize & Mask == NegC & Mask 5073 // 5074 // (because "x & Mask" is a truncation and distributes through subtraction). 5075 APInt Width; 5076 if (Pos == NegOp1) 5077 Width = NegC->getAPIntValue(); 5078 5079 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 5080 // Then the condition we want to prove becomes: 5081 // 5082 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 5083 // 5084 // which, again because "x & Mask" is a truncation, becomes: 5085 // 5086 // NegC & Mask == (EltSize - PosC) & Mask 5087 // EltSize & Mask == (NegC + PosC) & Mask 5088 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 5089 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 5090 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 5091 else 5092 return false; 5093 } else 5094 return false; 5095 5096 // Now we just need to check that EltSize & Mask == Width & Mask. 5097 if (MaskLoBits) 5098 // EltSize & Mask is 0 since Mask is EltSize - 1. 5099 return Width.getLoBits(MaskLoBits) == 0; 5100 return Width == EltSize; 5101 } 5102 5103 // A subroutine of MatchRotate used once we have found an OR of two opposite 5104 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 5105 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 5106 // former being preferred if supported. InnerPos and InnerNeg are Pos and 5107 // Neg with outer conversions stripped away. 5108 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 5109 SDValue Neg, SDValue InnerPos, 5110 SDValue InnerNeg, unsigned PosOpcode, 5111 unsigned NegOpcode, const SDLoc &DL) { 5112 // fold (or (shl x, (*ext y)), 5113 // (srl x, (*ext (sub 32, y)))) -> 5114 // (rotl x, y) or (rotr x, (sub 32, y)) 5115 // 5116 // fold (or (shl x, (*ext (sub 32, y))), 5117 // (srl x, (*ext y))) -> 5118 // (rotr x, y) or (rotl x, (sub 32, y)) 5119 EVT VT = Shifted.getValueType(); 5120 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { 5121 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 5122 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 5123 HasPos ? Pos : Neg).getNode(); 5124 } 5125 5126 return nullptr; 5127 } 5128 5129 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 5130 // idioms for rotate, and if the target supports rotation instructions, generate 5131 // a rot[lr]. 5132 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { 5133 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 5134 EVT VT = LHS.getValueType(); 5135 if (!TLI.isTypeLegal(VT)) return nullptr; 5136 5137 // The target must have at least one rotate flavor. 5138 bool HasROTL = hasOperation(ISD::ROTL, VT); 5139 bool HasROTR = hasOperation(ISD::ROTR, VT); 5140 if (!HasROTL && !HasROTR) return nullptr; 5141 5142 // Check for truncated rotate. 5143 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && 5144 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { 5145 assert(LHS.getValueType() == RHS.getValueType()); 5146 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { 5147 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), 5148 SDValue(Rot, 0)).getNode(); 5149 } 5150 } 5151 5152 // Match "(X shl/srl V1) & V2" where V2 may not be present. 5153 SDValue LHSShift; // The shift. 5154 SDValue LHSMask; // AND value if any. 5155 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 5156 return nullptr; // Not part of a rotate. 5157 5158 SDValue RHSShift; // The shift. 5159 SDValue RHSMask; // AND value if any. 5160 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 5161 return nullptr; // Not part of a rotate. 5162 5163 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 5164 return nullptr; // Not shifting the same value. 5165 5166 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 5167 return nullptr; // Shifts must disagree. 5168 5169 // Canonicalize shl to left side in a shl/srl pair. 5170 if (RHSShift.getOpcode() == ISD::SHL) { 5171 std::swap(LHS, RHS); 5172 std::swap(LHSShift, RHSShift); 5173 std::swap(LHSMask, RHSMask); 5174 } 5175 5176 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 5177 SDValue LHSShiftArg = LHSShift.getOperand(0); 5178 SDValue LHSShiftAmt = LHSShift.getOperand(1); 5179 SDValue RHSShiftArg = RHSShift.getOperand(0); 5180 SDValue RHSShiftAmt = RHSShift.getOperand(1); 5181 5182 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 5183 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 5184 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, 5185 ConstantSDNode *RHS) { 5186 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; 5187 }; 5188 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { 5189 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 5190 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 5191 5192 // If there is an AND of either shifted operand, apply it to the result. 5193 if (LHSMask.getNode() || RHSMask.getNode()) { 5194 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); 5195 SDValue Mask = AllOnes; 5196 5197 if (LHSMask.getNode()) { 5198 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); 5199 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5200 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); 5201 } 5202 if (RHSMask.getNode()) { 5203 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); 5204 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 5205 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); 5206 } 5207 5208 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 5209 } 5210 5211 return Rot.getNode(); 5212 } 5213 5214 // If there is a mask here, and we have a variable shift, we can't be sure 5215 // that we're masking out the right stuff. 5216 if (LHSMask.getNode() || RHSMask.getNode()) 5217 return nullptr; 5218 5219 // If the shift amount is sign/zext/any-extended just peel it off. 5220 SDValue LExtOp0 = LHSShiftAmt; 5221 SDValue RExtOp0 = RHSShiftAmt; 5222 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5223 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5224 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5225 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 5226 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 5227 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 5228 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 5229 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 5230 LExtOp0 = LHSShiftAmt.getOperand(0); 5231 RExtOp0 = RHSShiftAmt.getOperand(0); 5232 } 5233 5234 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 5235 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 5236 if (TryL) 5237 return TryL; 5238 5239 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 5240 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 5241 if (TryR) 5242 return TryR; 5243 5244 return nullptr; 5245 } 5246 5247 namespace { 5248 5249 /// Represents known origin of an individual byte in load combine pattern. The 5250 /// value of the byte is either constant zero or comes from memory. 5251 struct ByteProvider { 5252 // For constant zero providers Load is set to nullptr. For memory providers 5253 // Load represents the node which loads the byte from memory. 5254 // ByteOffset is the offset of the byte in the value produced by the load. 5255 LoadSDNode *Load = nullptr; 5256 unsigned ByteOffset = 0; 5257 5258 ByteProvider() = default; 5259 5260 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { 5261 return ByteProvider(Load, ByteOffset); 5262 } 5263 5264 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } 5265 5266 bool isConstantZero() const { return !Load; } 5267 bool isMemory() const { return Load; } 5268 5269 bool operator==(const ByteProvider &Other) const { 5270 return Other.Load == Load && Other.ByteOffset == ByteOffset; 5271 } 5272 5273 private: 5274 ByteProvider(LoadSDNode *Load, unsigned ByteOffset) 5275 : Load(Load), ByteOffset(ByteOffset) {} 5276 }; 5277 5278 } // end anonymous namespace 5279 5280 /// Recursively traverses the expression calculating the origin of the requested 5281 /// byte of the given value. Returns None if the provider can't be calculated. 5282 /// 5283 /// For all the values except the root of the expression verifies that the value 5284 /// has exactly one use and if it's not true return None. This way if the origin 5285 /// of the byte is returned it's guaranteed that the values which contribute to 5286 /// the byte are not used outside of this expression. 5287 /// 5288 /// Because the parts of the expression are not allowed to have more than one 5289 /// use this function iterates over trees, not DAGs. So it never visits the same 5290 /// node more than once. 5291 static const Optional<ByteProvider> 5292 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, 5293 bool Root = false) { 5294 // Typical i64 by i8 pattern requires recursion up to 8 calls depth 5295 if (Depth == 10) 5296 return None; 5297 5298 if (!Root && !Op.hasOneUse()) 5299 return None; 5300 5301 assert(Op.getValueType().isScalarInteger() && "can't handle other types"); 5302 unsigned BitWidth = Op.getValueSizeInBits(); 5303 if (BitWidth % 8 != 0) 5304 return None; 5305 unsigned ByteWidth = BitWidth / 8; 5306 assert(Index < ByteWidth && "invalid index requested"); 5307 (void) ByteWidth; 5308 5309 switch (Op.getOpcode()) { 5310 case ISD::OR: { 5311 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); 5312 if (!LHS) 5313 return None; 5314 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); 5315 if (!RHS) 5316 return None; 5317 5318 if (LHS->isConstantZero()) 5319 return RHS; 5320 if (RHS->isConstantZero()) 5321 return LHS; 5322 return None; 5323 } 5324 case ISD::SHL: { 5325 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); 5326 if (!ShiftOp) 5327 return None; 5328 5329 uint64_t BitShift = ShiftOp->getZExtValue(); 5330 if (BitShift % 8 != 0) 5331 return None; 5332 uint64_t ByteShift = BitShift / 8; 5333 5334 return Index < ByteShift 5335 ? ByteProvider::getConstantZero() 5336 : calculateByteProvider(Op->getOperand(0), Index - ByteShift, 5337 Depth + 1); 5338 } 5339 case ISD::ANY_EXTEND: 5340 case ISD::SIGN_EXTEND: 5341 case ISD::ZERO_EXTEND: { 5342 SDValue NarrowOp = Op->getOperand(0); 5343 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); 5344 if (NarrowBitWidth % 8 != 0) 5345 return None; 5346 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5347 5348 if (Index >= NarrowByteWidth) 5349 return Op.getOpcode() == ISD::ZERO_EXTEND 5350 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5351 : None; 5352 return calculateByteProvider(NarrowOp, Index, Depth + 1); 5353 } 5354 case ISD::BSWAP: 5355 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, 5356 Depth + 1); 5357 case ISD::LOAD: { 5358 auto L = cast<LoadSDNode>(Op.getNode()); 5359 if (L->isVolatile() || L->isIndexed()) 5360 return None; 5361 5362 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); 5363 if (NarrowBitWidth % 8 != 0) 5364 return None; 5365 uint64_t NarrowByteWidth = NarrowBitWidth / 8; 5366 5367 if (Index >= NarrowByteWidth) 5368 return L->getExtensionType() == ISD::ZEXTLOAD 5369 ? Optional<ByteProvider>(ByteProvider::getConstantZero()) 5370 : None; 5371 return ByteProvider::getMemory(L, Index); 5372 } 5373 } 5374 5375 return None; 5376 } 5377 5378 /// Match a pattern where a wide type scalar value is loaded by several narrow 5379 /// loads and combined by shifts and ors. Fold it into a single load or a load 5380 /// and a BSWAP if the targets supports it. 5381 /// 5382 /// Assuming little endian target: 5383 /// i8 *a = ... 5384 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) 5385 /// => 5386 /// i32 val = *((i32)a) 5387 /// 5388 /// i8 *a = ... 5389 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] 5390 /// => 5391 /// i32 val = BSWAP(*((i32)a)) 5392 /// 5393 /// TODO: This rule matches complex patterns with OR node roots and doesn't 5394 /// interact well with the worklist mechanism. When a part of the pattern is 5395 /// updated (e.g. one of the loads) its direct users are put into the worklist, 5396 /// but the root node of the pattern which triggers the load combine is not 5397 /// necessarily a direct user of the changed node. For example, once the address 5398 /// of t28 load is reassociated load combine won't be triggered: 5399 /// t25: i32 = add t4, Constant:i32<2> 5400 /// t26: i64 = sign_extend t25 5401 /// t27: i64 = add t2, t26 5402 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 5403 /// t29: i32 = zero_extend t28 5404 /// t32: i32 = shl t29, Constant:i8<8> 5405 /// t33: i32 = or t23, t32 5406 /// As a possible fix visitLoad can check if the load can be a part of a load 5407 /// combine pattern and add corresponding OR roots to the worklist. 5408 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { 5409 assert(N->getOpcode() == ISD::OR && 5410 "Can only match load combining against OR nodes"); 5411 5412 // Handles simple types only 5413 EVT VT = N->getValueType(0); 5414 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) 5415 return SDValue(); 5416 unsigned ByteWidth = VT.getSizeInBits() / 8; 5417 5418 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5419 // Before legalize we can introduce too wide illegal loads which will be later 5420 // split into legal sized loads. This enables us to combine i64 load by i8 5421 // patterns to a couple of i32 loads on 32 bit targets. 5422 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT)) 5423 return SDValue(); 5424 5425 std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = []( 5426 unsigned BW, unsigned i) { return i; }; 5427 std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = []( 5428 unsigned BW, unsigned i) { return BW - i - 1; }; 5429 5430 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); 5431 auto MemoryByteOffset = [&] (ByteProvider P) { 5432 assert(P.isMemory() && "Must be a memory byte provider"); 5433 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); 5434 assert(LoadBitWidth % 8 == 0 && 5435 "can only analyze providers for individual bytes not bit"); 5436 unsigned LoadByteWidth = LoadBitWidth / 8; 5437 return IsBigEndianTarget 5438 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) 5439 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); 5440 }; 5441 5442 Optional<BaseIndexOffset> Base; 5443 SDValue Chain; 5444 5445 SmallPtrSet<LoadSDNode *, 8> Loads; 5446 Optional<ByteProvider> FirstByteProvider; 5447 int64_t FirstOffset = INT64_MAX; 5448 5449 // Check if all the bytes of the OR we are looking at are loaded from the same 5450 // base address. Collect bytes offsets from Base address in ByteOffsets. 5451 SmallVector<int64_t, 4> ByteOffsets(ByteWidth); 5452 for (unsigned i = 0; i < ByteWidth; i++) { 5453 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); 5454 if (!P || !P->isMemory()) // All the bytes must be loaded from memory 5455 return SDValue(); 5456 5457 LoadSDNode *L = P->Load; 5458 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && 5459 "Must be enforced by calculateByteProvider"); 5460 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); 5461 5462 // All loads must share the same chain 5463 SDValue LChain = L->getChain(); 5464 if (!Chain) 5465 Chain = LChain; 5466 else if (Chain != LChain) 5467 return SDValue(); 5468 5469 // Loads must share the same base address 5470 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); 5471 int64_t ByteOffsetFromBase = 0; 5472 if (!Base) 5473 Base = Ptr; 5474 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) 5475 return SDValue(); 5476 5477 // Calculate the offset of the current byte from the base address 5478 ByteOffsetFromBase += MemoryByteOffset(*P); 5479 ByteOffsets[i] = ByteOffsetFromBase; 5480 5481 // Remember the first byte load 5482 if (ByteOffsetFromBase < FirstOffset) { 5483 FirstByteProvider = P; 5484 FirstOffset = ByteOffsetFromBase; 5485 } 5486 5487 Loads.insert(L); 5488 } 5489 assert(!Loads.empty() && "All the bytes of the value must be loaded from " 5490 "memory, so there must be at least one load which produces the value"); 5491 assert(Base && "Base address of the accessed memory location must be set"); 5492 assert(FirstOffset != INT64_MAX && "First byte offset must be set"); 5493 5494 // Check if the bytes of the OR we are looking at match with either big or 5495 // little endian value load 5496 bool BigEndian = true, LittleEndian = true; 5497 for (unsigned i = 0; i < ByteWidth; i++) { 5498 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; 5499 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); 5500 BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); 5501 if (!BigEndian && !LittleEndian) 5502 return SDValue(); 5503 } 5504 assert((BigEndian != LittleEndian) && "should be either or"); 5505 assert(FirstByteProvider && "must be set"); 5506 5507 // Ensure that the first byte is loaded from zero offset of the first load. 5508 // So the combined value can be loaded from the first load address. 5509 if (MemoryByteOffset(*FirstByteProvider) != 0) 5510 return SDValue(); 5511 LoadSDNode *FirstLoad = FirstByteProvider->Load; 5512 5513 // The node we are looking at matches with the pattern, check if we can 5514 // replace it with a single load and bswap if needed. 5515 5516 // If the load needs byte swap check if the target supports it 5517 bool NeedsBswap = IsBigEndianTarget != BigEndian; 5518 5519 // Before legalize we can introduce illegal bswaps which will be later 5520 // converted to an explicit bswap sequence. This way we end up with a single 5521 // load and byte shuffling instead of several loads and byte shuffling. 5522 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT)) 5523 return SDValue(); 5524 5525 // Check that a load of the wide type is both allowed and fast on the target 5526 bool Fast = false; 5527 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), 5528 VT, FirstLoad->getAddressSpace(), 5529 FirstLoad->getAlignment(), &Fast); 5530 if (!Allowed || !Fast) 5531 return SDValue(); 5532 5533 SDValue NewLoad = 5534 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), 5535 FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); 5536 5537 // Transfer chain users from old loads to the new load. 5538 for (LoadSDNode *L : Loads) 5539 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); 5540 5541 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; 5542 } 5543 5544 // If the target has andn, bsl, or a similar bit-select instruction, 5545 // we want to unfold masked merge, with canonical pattern of: 5546 // | A | |B| 5547 // ((x ^ y) & m) ^ y 5548 // | D | 5549 // Into: 5550 // (x & m) | (y & ~m) 5551 // If y is a constant, and the 'andn' does not work with immediates, 5552 // we unfold into a different pattern: 5553 // ~(~x & m) & (m | y) 5554 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at 5555 // the very least that breaks andnpd / andnps patterns, and because those 5556 // patterns are simplified in IR and shouldn't be created in the DAG 5557 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { 5558 assert(N->getOpcode() == ISD::XOR); 5559 5560 // Don't touch 'not' (i.e. where y = -1). 5561 if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) 5562 return SDValue(); 5563 5564 EVT VT = N->getValueType(0); 5565 5566 // There are 3 commutable operators in the pattern, 5567 // so we have to deal with 8 possible variants of the basic pattern. 5568 SDValue X, Y, M; 5569 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { 5570 if (And.getOpcode() != ISD::AND || !And.hasOneUse()) 5571 return false; 5572 SDValue Xor = And.getOperand(XorIdx); 5573 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) 5574 return false; 5575 SDValue Xor0 = Xor.getOperand(0); 5576 SDValue Xor1 = Xor.getOperand(1); 5577 // Don't touch 'not' (i.e. where y = -1). 5578 if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) 5579 return false; 5580 if (Other == Xor0) 5581 std::swap(Xor0, Xor1); 5582 if (Other != Xor1) 5583 return false; 5584 X = Xor0; 5585 Y = Xor1; 5586 M = And.getOperand(XorIdx ? 0 : 1); 5587 return true; 5588 }; 5589 5590 SDValue N0 = N->getOperand(0); 5591 SDValue N1 = N->getOperand(1); 5592 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && 5593 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) 5594 return SDValue(); 5595 5596 // Don't do anything if the mask is constant. This should not be reachable. 5597 // InstCombine should have already unfolded this pattern, and DAGCombiner 5598 // probably shouldn't produce it, too. 5599 if (isa<ConstantSDNode>(M.getNode())) 5600 return SDValue(); 5601 5602 // We can transform if the target has AndNot 5603 if (!TLI.hasAndNot(M)) 5604 return SDValue(); 5605 5606 SDLoc DL(N); 5607 5608 // If Y is a constant, check that 'andn' works with immediates. 5609 if (!TLI.hasAndNot(Y)) { 5610 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); 5611 // If not, we need to do a bit more work to make sure andn is still used. 5612 SDValue NotX = DAG.getNOT(DL, X, VT); 5613 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); 5614 SDValue NotLHS = DAG.getNOT(DL, LHS, VT); 5615 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); 5616 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); 5617 } 5618 5619 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); 5620 SDValue NotM = DAG.getNOT(DL, M, VT); 5621 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); 5622 5623 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); 5624 } 5625 5626 SDValue DAGCombiner::visitXOR(SDNode *N) { 5627 SDValue N0 = N->getOperand(0); 5628 SDValue N1 = N->getOperand(1); 5629 EVT VT = N0.getValueType(); 5630 5631 // fold vector ops 5632 if (VT.isVector()) { 5633 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 5634 return FoldedVOp; 5635 5636 // fold (xor x, 0) -> x, vector edition 5637 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5638 return N1; 5639 if (ISD::isBuildVectorAllZeros(N1.getNode())) 5640 return N0; 5641 } 5642 5643 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 5644 if (N0.isUndef() && N1.isUndef()) 5645 return DAG.getConstant(0, SDLoc(N), VT); 5646 // fold (xor x, undef) -> undef 5647 if (N0.isUndef()) 5648 return N0; 5649 if (N1.isUndef()) 5650 return N1; 5651 // fold (xor c1, c2) -> c1^c2 5652 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 5653 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 5654 if (N0C && N1C) 5655 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); 5656 // canonicalize constant to RHS 5657 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 5658 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 5659 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 5660 // fold (xor x, 0) -> x 5661 if (isNullConstant(N1)) 5662 return N0; 5663 5664 if (SDValue NewSel = foldBinOpIntoSelect(N)) 5665 return NewSel; 5666 5667 // reassociate xor 5668 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 5669 return RXOR; 5670 5671 // fold !(x cc y) -> (x !cc y) 5672 SDValue LHS, RHS, CC; 5673 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 5674 bool isInt = LHS.getValueType().isInteger(); 5675 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 5676 isInt); 5677 5678 if (!LegalOperations || 5679 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 5680 switch (N0.getOpcode()) { 5681 default: 5682 llvm_unreachable("Unhandled SetCC Equivalent!"); 5683 case ISD::SETCC: 5684 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); 5685 case ISD::SELECT_CC: 5686 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), 5687 N0.getOperand(3), NotCC); 5688 } 5689 } 5690 } 5691 5692 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 5693 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && 5694 N0.getNode()->hasOneUse() && 5695 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 5696 SDValue V = N0.getOperand(0); 5697 SDLoc DL(N0); 5698 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, 5699 DAG.getConstant(1, DL, V.getValueType())); 5700 AddToWorklist(V.getNode()); 5701 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 5702 } 5703 5704 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 5705 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && 5706 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 5707 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5708 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 5709 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 5710 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 5711 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 5712 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 5713 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 5714 } 5715 } 5716 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 5717 if (isAllOnesConstant(N1) && N0.hasOneUse() && 5718 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 5719 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5720 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 5721 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 5722 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 5723 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 5724 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 5725 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 5726 } 5727 } 5728 // fold (xor (and x, y), y) -> (and (not x), y) 5729 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 5730 N0->getOperand(1) == N1) { 5731 SDValue X = N0->getOperand(0); 5732 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 5733 AddToWorklist(NotX.getNode()); 5734 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 5735 } 5736 5737 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) 5738 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { 5739 SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; 5740 SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; 5741 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { 5742 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); 5743 SDValue S0 = S.getOperand(0); 5744 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { 5745 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 5746 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) 5747 if (C->getAPIntValue() == (OpSizeInBits - 1)) 5748 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); 5749 } 5750 } 5751 } 5752 5753 // fold (xor x, x) -> 0 5754 if (N0 == N1) 5755 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 5756 5757 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 5758 // Here is a concrete example of this equivalence: 5759 // i16 x == 14 5760 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 5761 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 5762 // 5763 // => 5764 // 5765 // i16 ~1 == 0b1111111111111110 5766 // i16 rol(~1, 14) == 0b1011111111111111 5767 // 5768 // Some additional tips to help conceptualize this transform: 5769 // - Try to see the operation as placing a single zero in a value of all ones. 5770 // - There exists no value for x which would allow the result to contain zero. 5771 // - Values of x larger than the bitwidth are undefined and do not require a 5772 // consistent result. 5773 // - Pushing the zero left requires shifting one bits in from the right. 5774 // A rotate left of ~1 is a nice way of achieving the desired result. 5775 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL 5776 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 5777 SDLoc DL(N); 5778 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 5779 N0.getOperand(1)); 5780 } 5781 5782 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 5783 if (N0.getOpcode() == N1.getOpcode()) 5784 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 5785 return Tmp; 5786 5787 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable 5788 if (SDValue MM = unfoldMaskedMerge(N)) 5789 return MM; 5790 5791 // Simplify the expression using non-local knowledge. 5792 if (SimplifyDemandedBits(SDValue(N, 0))) 5793 return SDValue(N, 0); 5794 5795 return SDValue(); 5796 } 5797 5798 /// Handle transforms common to the three shifts, when the shift amount is a 5799 /// constant. 5800 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 5801 SDNode *LHS = N->getOperand(0).getNode(); 5802 if (!LHS->hasOneUse()) return SDValue(); 5803 5804 // We want to pull some binops through shifts, so that we have (and (shift)) 5805 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 5806 // thing happens with address calculations, so it's important to canonicalize 5807 // it. 5808 bool HighBitSet = false; // Can we transform this if the high bit is set? 5809 5810 switch (LHS->getOpcode()) { 5811 default: return SDValue(); 5812 case ISD::OR: 5813 case ISD::XOR: 5814 HighBitSet = false; // We can only transform sra if the high bit is clear. 5815 break; 5816 case ISD::AND: 5817 HighBitSet = true; // We can only transform sra if the high bit is set. 5818 break; 5819 case ISD::ADD: 5820 if (N->getOpcode() != ISD::SHL) 5821 return SDValue(); // only shl(add) not sr[al](add). 5822 HighBitSet = false; // We can only transform sra if the high bit is clear. 5823 break; 5824 } 5825 5826 // We require the RHS of the binop to be a constant and not opaque as well. 5827 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 5828 if (!BinOpCst) return SDValue(); 5829 5830 // FIXME: disable this unless the input to the binop is a shift by a constant 5831 // or is copy/select.Enable this in other cases when figure out it's exactly profitable. 5832 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 5833 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || 5834 BinOpLHSVal->getOpcode() == ISD::SRA || 5835 BinOpLHSVal->getOpcode() == ISD::SRL; 5836 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || 5837 BinOpLHSVal->getOpcode() == ISD::SELECT; 5838 5839 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) && 5840 !isCopyOrSelect) 5841 return SDValue(); 5842 5843 if (isCopyOrSelect && N->hasOneUse()) 5844 return SDValue(); 5845 5846 EVT VT = N->getValueType(0); 5847 5848 // If this is a signed shift right, and the high bit is modified by the 5849 // logical operation, do not perform the transformation. The highBitSet 5850 // boolean indicates the value of the high bit of the constant which would 5851 // cause it to be modified for this operation. 5852 if (N->getOpcode() == ISD::SRA) { 5853 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 5854 if (BinOpRHSSignSet != HighBitSet) 5855 return SDValue(); 5856 } 5857 5858 if (!TLI.isDesirableToCommuteWithShift(LHS)) 5859 return SDValue(); 5860 5861 // Fold the constants, shifting the binop RHS by the shift amount. 5862 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 5863 N->getValueType(0), 5864 LHS->getOperand(1), N->getOperand(1)); 5865 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 5866 5867 // Create the new shift. 5868 SDValue NewShift = DAG.getNode(N->getOpcode(), 5869 SDLoc(LHS->getOperand(0)), 5870 VT, LHS->getOperand(0), N->getOperand(1)); 5871 5872 // Create the new binop. 5873 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 5874 } 5875 5876 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 5877 assert(N->getOpcode() == ISD::TRUNCATE); 5878 assert(N->getOperand(0).getOpcode() == ISD::AND); 5879 5880 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 5881 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 5882 SDValue N01 = N->getOperand(0).getOperand(1); 5883 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) { 5884 SDLoc DL(N); 5885 EVT TruncVT = N->getValueType(0); 5886 SDValue N00 = N->getOperand(0).getOperand(0); 5887 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); 5888 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); 5889 AddToWorklist(Trunc00.getNode()); 5890 AddToWorklist(Trunc01.getNode()); 5891 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); 5892 } 5893 } 5894 5895 return SDValue(); 5896 } 5897 5898 SDValue DAGCombiner::visitRotate(SDNode *N) { 5899 SDLoc dl(N); 5900 SDValue N0 = N->getOperand(0); 5901 SDValue N1 = N->getOperand(1); 5902 EVT VT = N->getValueType(0); 5903 unsigned Bitsize = VT.getScalarSizeInBits(); 5904 5905 // fold (rot x, 0) -> x 5906 if (isNullConstantOrNullSplatConstant(N1)) 5907 return N0; 5908 5909 // fold (rot x, c) -> (rot x, c % BitSize) 5910 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { 5911 if (Cst->getAPIntValue().uge(Bitsize)) { 5912 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); 5913 return DAG.getNode(N->getOpcode(), dl, VT, N0, 5914 DAG.getConstant(RotAmt, dl, N1.getValueType())); 5915 } 5916 } 5917 5918 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 5919 if (N1.getOpcode() == ISD::TRUNCATE && 5920 N1.getOperand(0).getOpcode() == ISD::AND) { 5921 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 5922 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); 5923 } 5924 5925 unsigned NextOp = N0.getOpcode(); 5926 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) 5927 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { 5928 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); 5929 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); 5930 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) { 5931 EVT ShiftVT = C1->getValueType(0); 5932 bool SameSide = (N->getOpcode() == NextOp); 5933 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; 5934 if (SDValue CombinedShift = 5935 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { 5936 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); 5937 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( 5938 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), 5939 BitsizeC.getNode()); 5940 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), 5941 CombinedShiftNorm); 5942 } 5943 } 5944 } 5945 return SDValue(); 5946 } 5947 5948 SDValue DAGCombiner::visitSHL(SDNode *N) { 5949 SDValue N0 = N->getOperand(0); 5950 SDValue N1 = N->getOperand(1); 5951 EVT VT = N0.getValueType(); 5952 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 5953 5954 // fold vector ops 5955 if (VT.isVector()) { 5956 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 5957 return FoldedVOp; 5958 5959 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 5960 // If setcc produces all-one true value then: 5961 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 5962 if (N1CV && N1CV->isConstant()) { 5963 if (N0.getOpcode() == ISD::AND) { 5964 SDValue N00 = N0->getOperand(0); 5965 SDValue N01 = N0->getOperand(1); 5966 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 5967 5968 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 5969 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 5970 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5971 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 5972 N01CV, N1CV)) 5973 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 5974 } 5975 } 5976 } 5977 } 5978 5979 ConstantSDNode *N1C = isConstOrConstSplat(N1); 5980 5981 // fold (shl c1, c2) -> c1<<c2 5982 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 5983 if (N0C && N1C && !N1C->isOpaque()) 5984 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 5985 // fold (shl 0, x) -> 0 5986 if (isNullConstantOrNullSplatConstant(N0)) 5987 return N0; 5988 // fold (shl x, c >= size(x)) -> undef 5989 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. 5990 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { 5991 return Val->getAPIntValue().uge(OpSizeInBits); 5992 }; 5993 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) 5994 return DAG.getUNDEF(VT); 5995 // fold (shl x, 0) -> x 5996 if (N1C && N1C->isNullValue()) 5997 return N0; 5998 // fold (shl undef, x) -> 0 5999 if (N0.isUndef()) 6000 return DAG.getConstant(0, SDLoc(N), VT); 6001 6002 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6003 return NewSel; 6004 6005 // if (shl x, c) is known to be zero, return 0 6006 if (DAG.MaskedValueIsZero(SDValue(N, 0), 6007 APInt::getAllOnesValue(OpSizeInBits))) 6008 return DAG.getConstant(0, SDLoc(N), VT); 6009 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 6010 if (N1.getOpcode() == ISD::TRUNCATE && 6011 N1.getOperand(0).getOpcode() == ISD::AND) { 6012 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6013 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 6014 } 6015 6016 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6017 return SDValue(N, 0); 6018 6019 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 6020 if (N0.getOpcode() == ISD::SHL) { 6021 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6022 ConstantSDNode *RHS) { 6023 APInt c1 = LHS->getAPIntValue(); 6024 APInt c2 = RHS->getAPIntValue(); 6025 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6026 return (c1 + c2).uge(OpSizeInBits); 6027 }; 6028 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6029 return DAG.getConstant(0, SDLoc(N), VT); 6030 6031 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6032 ConstantSDNode *RHS) { 6033 APInt c1 = LHS->getAPIntValue(); 6034 APInt c2 = RHS->getAPIntValue(); 6035 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6036 return (c1 + c2).ult(OpSizeInBits); 6037 }; 6038 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6039 SDLoc DL(N); 6040 EVT ShiftVT = N1.getValueType(); 6041 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6042 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); 6043 } 6044 } 6045 6046 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 6047 // For this to be valid, the second form must not preserve any of the bits 6048 // that are shifted out by the inner shift in the first form. This means 6049 // the outer shift size must be >= the number of bits added by the ext. 6050 // As a corollary, we don't care what kind of ext it is. 6051 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 6052 N0.getOpcode() == ISD::ANY_EXTEND || 6053 N0.getOpcode() == ISD::SIGN_EXTEND) && 6054 N0.getOperand(0).getOpcode() == ISD::SHL) { 6055 SDValue N0Op0 = N0.getOperand(0); 6056 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6057 APInt c1 = N0Op0C1->getAPIntValue(); 6058 APInt c2 = N1C->getAPIntValue(); 6059 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6060 6061 EVT InnerShiftVT = N0Op0.getValueType(); 6062 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6063 if (c2.uge(OpSizeInBits - InnerShiftSize)) { 6064 SDLoc DL(N0); 6065 APInt Sum = c1 + c2; 6066 if (Sum.uge(OpSizeInBits)) 6067 return DAG.getConstant(0, DL, VT); 6068 6069 return DAG.getNode( 6070 ISD::SHL, DL, VT, 6071 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), 6072 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); 6073 } 6074 } 6075 } 6076 6077 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 6078 // Only fold this if the inner zext has no other uses to avoid increasing 6079 // the total number of instructions. 6080 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 6081 N0.getOperand(0).getOpcode() == ISD::SRL) { 6082 SDValue N0Op0 = N0.getOperand(0); 6083 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 6084 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) { 6085 uint64_t c1 = N0Op0C1->getZExtValue(); 6086 uint64_t c2 = N1C->getZExtValue(); 6087 if (c1 == c2) { 6088 SDValue NewOp0 = N0.getOperand(0); 6089 EVT CountVT = NewOp0.getOperand(1).getValueType(); 6090 SDLoc DL(N); 6091 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 6092 NewOp0, 6093 DAG.getConstant(c2, DL, CountVT)); 6094 AddToWorklist(NewSHL.getNode()); 6095 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 6096 } 6097 } 6098 } 6099 } 6100 6101 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 6102 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 6103 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 6104 N0->getFlags().hasExact()) { 6105 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6106 uint64_t C1 = N0C1->getZExtValue(); 6107 uint64_t C2 = N1C->getZExtValue(); 6108 SDLoc DL(N); 6109 if (C1 <= C2) 6110 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6111 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 6112 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 6113 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 6114 } 6115 } 6116 6117 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 6118 // (and (srl x, (sub c1, c2), MASK) 6119 // Only fold this if the inner shift has no other uses -- if it does, folding 6120 // this will increase the total number of instructions. 6121 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6122 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 6123 uint64_t c1 = N0C1->getZExtValue(); 6124 if (c1 < OpSizeInBits) { 6125 uint64_t c2 = N1C->getZExtValue(); 6126 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 6127 SDValue Shift; 6128 if (c2 > c1) { 6129 Mask <<= c2 - c1; 6130 SDLoc DL(N); 6131 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 6132 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 6133 } else { 6134 Mask.lshrInPlace(c1 - c2); 6135 SDLoc DL(N); 6136 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 6137 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 6138 } 6139 SDLoc DL(N0); 6140 return DAG.getNode(ISD::AND, DL, VT, Shift, 6141 DAG.getConstant(Mask, DL, VT)); 6142 } 6143 } 6144 } 6145 6146 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 6147 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) && 6148 isConstantOrConstantVector(N1, /* No Opaques */ true)) { 6149 SDLoc DL(N); 6150 SDValue AllBits = DAG.getAllOnesConstant(DL, VT); 6151 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); 6152 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); 6153 } 6154 6155 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 6156 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) 6157 // Variant of version done on multiply, except mul by a power of 2 is turned 6158 // into a shift. 6159 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && 6160 N0.getNode()->hasOneUse() && 6161 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6162 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { 6163 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 6164 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6165 AddToWorklist(Shl0.getNode()); 6166 AddToWorklist(Shl1.getNode()); 6167 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); 6168 } 6169 6170 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 6171 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() && 6172 isConstantOrConstantVector(N1, /* No Opaques */ true) && 6173 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { 6174 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 6175 if (isConstantOrConstantVector(Shl)) 6176 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); 6177 } 6178 6179 if (N1C && !N1C->isOpaque()) 6180 if (SDValue NewSHL = visitShiftByConstant(N, N1C)) 6181 return NewSHL; 6182 6183 return SDValue(); 6184 } 6185 6186 SDValue DAGCombiner::visitSRA(SDNode *N) { 6187 SDValue N0 = N->getOperand(0); 6188 SDValue N1 = N->getOperand(1); 6189 EVT VT = N0.getValueType(); 6190 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6191 6192 // Arithmetic shifting an all-sign-bit value is a no-op. 6193 // fold (sra 0, x) -> 0 6194 // fold (sra -1, x) -> -1 6195 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) 6196 return N0; 6197 6198 // fold vector ops 6199 if (VT.isVector()) 6200 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6201 return FoldedVOp; 6202 6203 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6204 6205 // fold (sra c1, c2) -> (sra c1, c2) 6206 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6207 if (N0C && N1C && !N1C->isOpaque()) 6208 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 6209 // fold (sra x, c >= size(x)) -> undef 6210 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. 6211 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { 6212 return Val->getAPIntValue().uge(OpSizeInBits); 6213 }; 6214 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) 6215 return DAG.getUNDEF(VT); 6216 // fold (sra x, 0) -> x 6217 if (N1C && N1C->isNullValue()) 6218 return N0; 6219 6220 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6221 return NewSel; 6222 6223 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 6224 // sext_inreg. 6225 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 6226 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 6227 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 6228 if (VT.isVector()) 6229 ExtVT = EVT::getVectorVT(*DAG.getContext(), 6230 ExtVT, VT.getVectorNumElements()); 6231 if ((!LegalOperations || 6232 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 6233 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6234 N0.getOperand(0), DAG.getValueType(ExtVT)); 6235 } 6236 6237 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 6238 if (N0.getOpcode() == ISD::SRA) { 6239 SDLoc DL(N); 6240 EVT ShiftVT = N1.getValueType(); 6241 6242 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6243 ConstantSDNode *RHS) { 6244 APInt c1 = LHS->getAPIntValue(); 6245 APInt c2 = RHS->getAPIntValue(); 6246 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6247 return (c1 + c2).uge(OpSizeInBits); 6248 }; 6249 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6250 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), 6251 DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); 6252 6253 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6254 ConstantSDNode *RHS) { 6255 APInt c1 = LHS->getAPIntValue(); 6256 APInt c2 = RHS->getAPIntValue(); 6257 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6258 return (c1 + c2).ult(OpSizeInBits); 6259 }; 6260 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6261 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6262 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); 6263 } 6264 } 6265 6266 // fold (sra (shl X, m), (sub result_size, n)) 6267 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 6268 // result_size - n != m. 6269 // If truncate is free for the target sext(shl) is likely to result in better 6270 // code. 6271 if (N0.getOpcode() == ISD::SHL && N1C) { 6272 // Get the two constanst of the shifts, CN0 = m, CN = n. 6273 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 6274 if (N01C) { 6275 LLVMContext &Ctx = *DAG.getContext(); 6276 // Determine what the truncate's result bitsize and type would be. 6277 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 6278 6279 if (VT.isVector()) 6280 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 6281 6282 // Determine the residual right-shift amount. 6283 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 6284 6285 // If the shift is not a no-op (in which case this should be just a sign 6286 // extend already), the truncated to type is legal, sign_extend is legal 6287 // on that type, and the truncate to that type is both legal and free, 6288 // perform the transform. 6289 if ((ShiftAmt > 0) && 6290 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 6291 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 6292 TLI.isTruncateFree(VT, TruncVT)) { 6293 SDLoc DL(N); 6294 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 6295 getShiftAmountTy(N0.getOperand(0).getValueType())); 6296 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 6297 N0.getOperand(0), Amt); 6298 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 6299 Shift); 6300 return DAG.getNode(ISD::SIGN_EXTEND, DL, 6301 N->getValueType(0), Trunc); 6302 } 6303 } 6304 } 6305 6306 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 6307 if (N1.getOpcode() == ISD::TRUNCATE && 6308 N1.getOperand(0).getOpcode() == ISD::AND) { 6309 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6310 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 6311 } 6312 6313 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 6314 // if c1 is equal to the number of bits the trunc removes 6315 if (N0.getOpcode() == ISD::TRUNCATE && 6316 (N0.getOperand(0).getOpcode() == ISD::SRL || 6317 N0.getOperand(0).getOpcode() == ISD::SRA) && 6318 N0.getOperand(0).hasOneUse() && 6319 N0.getOperand(0).getOperand(1).hasOneUse() && 6320 N1C) { 6321 SDValue N0Op0 = N0.getOperand(0); 6322 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 6323 unsigned LargeShiftVal = LargeShift->getZExtValue(); 6324 EVT LargeVT = N0Op0.getValueType(); 6325 6326 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 6327 SDLoc DL(N); 6328 SDValue Amt = 6329 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, 6330 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 6331 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, 6332 N0Op0.getOperand(0), Amt); 6333 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); 6334 } 6335 } 6336 } 6337 6338 // Simplify, based on bits shifted out of the LHS. 6339 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6340 return SDValue(N, 0); 6341 6342 // If the sign bit is known to be zero, switch this to a SRL. 6343 if (DAG.SignBitIsZero(N0)) 6344 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 6345 6346 if (N1C && !N1C->isOpaque()) 6347 if (SDValue NewSRA = visitShiftByConstant(N, N1C)) 6348 return NewSRA; 6349 6350 return SDValue(); 6351 } 6352 6353 SDValue DAGCombiner::visitSRL(SDNode *N) { 6354 SDValue N0 = N->getOperand(0); 6355 SDValue N1 = N->getOperand(1); 6356 EVT VT = N0.getValueType(); 6357 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 6358 6359 // fold vector ops 6360 if (VT.isVector()) 6361 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 6362 return FoldedVOp; 6363 6364 ConstantSDNode *N1C = isConstOrConstSplat(N1); 6365 6366 // fold (srl c1, c2) -> c1 >>u c2 6367 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 6368 if (N0C && N1C && !N1C->isOpaque()) 6369 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); 6370 // fold (srl 0, x) -> 0 6371 if (isNullConstantOrNullSplatConstant(N0)) 6372 return N0; 6373 // fold (srl x, c >= size(x)) -> undef 6374 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. 6375 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { 6376 return Val->getAPIntValue().uge(OpSizeInBits); 6377 }; 6378 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) 6379 return DAG.getUNDEF(VT); 6380 // fold (srl x, 0) -> x 6381 if (N1C && N1C->isNullValue()) 6382 return N0; 6383 6384 if (SDValue NewSel = foldBinOpIntoSelect(N)) 6385 return NewSel; 6386 6387 // if (srl x, c) is known to be zero, return 0 6388 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 6389 APInt::getAllOnesValue(OpSizeInBits))) 6390 return DAG.getConstant(0, SDLoc(N), VT); 6391 6392 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 6393 if (N0.getOpcode() == ISD::SRL) { 6394 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, 6395 ConstantSDNode *RHS) { 6396 APInt c1 = LHS->getAPIntValue(); 6397 APInt c2 = RHS->getAPIntValue(); 6398 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6399 return (c1 + c2).uge(OpSizeInBits); 6400 }; 6401 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) 6402 return DAG.getConstant(0, SDLoc(N), VT); 6403 6404 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, 6405 ConstantSDNode *RHS) { 6406 APInt c1 = LHS->getAPIntValue(); 6407 APInt c2 = RHS->getAPIntValue(); 6408 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); 6409 return (c1 + c2).ult(OpSizeInBits); 6410 }; 6411 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { 6412 SDLoc DL(N); 6413 EVT ShiftVT = N1.getValueType(); 6414 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); 6415 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); 6416 } 6417 } 6418 6419 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 6420 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 6421 N0.getOperand(0).getOpcode() == ISD::SRL) { 6422 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { 6423 uint64_t c1 = N001C->getZExtValue(); 6424 uint64_t c2 = N1C->getZExtValue(); 6425 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 6426 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); 6427 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 6428 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 6429 if (c1 + OpSizeInBits == InnerShiftSize) { 6430 SDLoc DL(N0); 6431 if (c1 + c2 >= InnerShiftSize) 6432 return DAG.getConstant(0, DL, VT); 6433 return DAG.getNode(ISD::TRUNCATE, DL, VT, 6434 DAG.getNode(ISD::SRL, DL, InnerShiftVT, 6435 N0.getOperand(0).getOperand(0), 6436 DAG.getConstant(c1 + c2, DL, 6437 ShiftCountVT))); 6438 } 6439 } 6440 } 6441 6442 // fold (srl (shl x, c), c) -> (and x, cst2) 6443 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 6444 isConstantOrConstantVector(N1, /* NoOpaques */ true)) { 6445 SDLoc DL(N); 6446 SDValue Mask = 6447 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); 6448 AddToWorklist(Mask.getNode()); 6449 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); 6450 } 6451 6452 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 6453 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 6454 // Shifting in all undef bits? 6455 EVT SmallVT = N0.getOperand(0).getValueType(); 6456 unsigned BitSize = SmallVT.getScalarSizeInBits(); 6457 if (N1C->getZExtValue() >= BitSize) 6458 return DAG.getUNDEF(VT); 6459 6460 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 6461 uint64_t ShiftAmt = N1C->getZExtValue(); 6462 SDLoc DL0(N0); 6463 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, 6464 N0.getOperand(0), 6465 DAG.getConstant(ShiftAmt, DL0, 6466 getShiftAmountTy(SmallVT))); 6467 AddToWorklist(SmallShift.getNode()); 6468 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); 6469 SDLoc DL(N); 6470 return DAG.getNode(ISD::AND, DL, VT, 6471 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), 6472 DAG.getConstant(Mask, DL, VT)); 6473 } 6474 } 6475 6476 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 6477 // bit, which is unmodified by sra. 6478 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 6479 if (N0.getOpcode() == ISD::SRA) 6480 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 6481 } 6482 6483 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 6484 if (N1C && N0.getOpcode() == ISD::CTLZ && 6485 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 6486 KnownBits Known; 6487 DAG.computeKnownBits(N0.getOperand(0), Known); 6488 6489 // If any of the input bits are KnownOne, then the input couldn't be all 6490 // zeros, thus the result of the srl will always be zero. 6491 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); 6492 6493 // If all of the bits input the to ctlz node are known to be zero, then 6494 // the result of the ctlz is "32" and the result of the shift is one. 6495 APInt UnknownBits = ~Known.Zero; 6496 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); 6497 6498 // Otherwise, check to see if there is exactly one bit input to the ctlz. 6499 if (UnknownBits.isPowerOf2()) { 6500 // Okay, we know that only that the single bit specified by UnknownBits 6501 // could be set on input to the CTLZ node. If this bit is set, the SRL 6502 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 6503 // to an SRL/XOR pair, which is likely to simplify more. 6504 unsigned ShAmt = UnknownBits.countTrailingZeros(); 6505 SDValue Op = N0.getOperand(0); 6506 6507 if (ShAmt) { 6508 SDLoc DL(N0); 6509 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 6510 DAG.getConstant(ShAmt, DL, 6511 getShiftAmountTy(Op.getValueType()))); 6512 AddToWorklist(Op.getNode()); 6513 } 6514 6515 SDLoc DL(N); 6516 return DAG.getNode(ISD::XOR, DL, VT, 6517 Op, DAG.getConstant(1, DL, VT)); 6518 } 6519 } 6520 6521 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 6522 if (N1.getOpcode() == ISD::TRUNCATE && 6523 N1.getOperand(0).getOpcode() == ISD::AND) { 6524 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 6525 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 6526 } 6527 6528 // fold operands of srl based on knowledge that the low bits are not 6529 // demanded. 6530 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 6531 return SDValue(N, 0); 6532 6533 if (N1C && !N1C->isOpaque()) 6534 if (SDValue NewSRL = visitShiftByConstant(N, N1C)) 6535 return NewSRL; 6536 6537 // Attempt to convert a srl of a load into a narrower zero-extending load. 6538 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 6539 return NarrowLoad; 6540 6541 // Here is a common situation. We want to optimize: 6542 // 6543 // %a = ... 6544 // %b = and i32 %a, 2 6545 // %c = srl i32 %b, 1 6546 // brcond i32 %c ... 6547 // 6548 // into 6549 // 6550 // %a = ... 6551 // %b = and %a, 2 6552 // %c = setcc eq %b, 0 6553 // brcond %c ... 6554 // 6555 // However when after the source operand of SRL is optimized into AND, the SRL 6556 // itself may not be optimized further. Look for it and add the BRCOND into 6557 // the worklist. 6558 if (N->hasOneUse()) { 6559 SDNode *Use = *N->use_begin(); 6560 if (Use->getOpcode() == ISD::BRCOND) 6561 AddToWorklist(Use); 6562 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 6563 // Also look pass the truncate. 6564 Use = *Use->use_begin(); 6565 if (Use->getOpcode() == ISD::BRCOND) 6566 AddToWorklist(Use); 6567 } 6568 } 6569 6570 return SDValue(); 6571 } 6572 6573 SDValue DAGCombiner::visitABS(SDNode *N) { 6574 SDValue N0 = N->getOperand(0); 6575 EVT VT = N->getValueType(0); 6576 6577 // fold (abs c1) -> c2 6578 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6579 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); 6580 // fold (abs (abs x)) -> (abs x) 6581 if (N0.getOpcode() == ISD::ABS) 6582 return N0; 6583 // fold (abs x) -> x iff not-negative 6584 if (DAG.SignBitIsZero(N0)) 6585 return N0; 6586 return SDValue(); 6587 } 6588 6589 SDValue DAGCombiner::visitBSWAP(SDNode *N) { 6590 SDValue N0 = N->getOperand(0); 6591 EVT VT = N->getValueType(0); 6592 6593 // fold (bswap c1) -> c2 6594 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6595 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); 6596 // fold (bswap (bswap x)) -> x 6597 if (N0.getOpcode() == ISD::BSWAP) 6598 return N0->getOperand(0); 6599 return SDValue(); 6600 } 6601 6602 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { 6603 SDValue N0 = N->getOperand(0); 6604 EVT VT = N->getValueType(0); 6605 6606 // fold (bitreverse c1) -> c2 6607 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6608 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); 6609 // fold (bitreverse (bitreverse x)) -> x 6610 if (N0.getOpcode() == ISD::BITREVERSE) 6611 return N0.getOperand(0); 6612 return SDValue(); 6613 } 6614 6615 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 6616 SDValue N0 = N->getOperand(0); 6617 EVT VT = N->getValueType(0); 6618 6619 // fold (ctlz c1) -> c2 6620 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6621 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 6622 6623 // If the value is known never to be zero, switch to the undef version. 6624 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { 6625 if (DAG.isKnownNeverZero(N0)) 6626 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6627 } 6628 6629 return SDValue(); 6630 } 6631 6632 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 6633 SDValue N0 = N->getOperand(0); 6634 EVT VT = N->getValueType(0); 6635 6636 // fold (ctlz_zero_undef c1) -> c2 6637 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6638 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6639 return SDValue(); 6640 } 6641 6642 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 6643 SDValue N0 = N->getOperand(0); 6644 EVT VT = N->getValueType(0); 6645 6646 // fold (cttz c1) -> c2 6647 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6648 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 6649 6650 // If the value is known never to be zero, switch to the undef version. 6651 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { 6652 if (DAG.isKnownNeverZero(N0)) 6653 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6654 } 6655 6656 return SDValue(); 6657 } 6658 6659 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 6660 SDValue N0 = N->getOperand(0); 6661 EVT VT = N->getValueType(0); 6662 6663 // fold (cttz_zero_undef c1) -> c2 6664 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6665 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 6666 return SDValue(); 6667 } 6668 6669 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 6670 SDValue N0 = N->getOperand(0); 6671 EVT VT = N->getValueType(0); 6672 6673 // fold (ctpop c1) -> c2 6674 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 6675 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 6676 return SDValue(); 6677 } 6678 6679 /// Generate Min/Max node 6680 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, 6681 SDValue RHS, SDValue True, SDValue False, 6682 ISD::CondCode CC, const TargetLowering &TLI, 6683 SelectionDAG &DAG) { 6684 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 6685 return SDValue(); 6686 6687 switch (CC) { 6688 case ISD::SETOLT: 6689 case ISD::SETOLE: 6690 case ISD::SETLT: 6691 case ISD::SETLE: 6692 case ISD::SETULT: 6693 case ISD::SETULE: { 6694 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 6695 if (TLI.isOperationLegal(Opcode, VT)) 6696 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 6697 return SDValue(); 6698 } 6699 case ISD::SETOGT: 6700 case ISD::SETOGE: 6701 case ISD::SETGT: 6702 case ISD::SETGE: 6703 case ISD::SETUGT: 6704 case ISD::SETUGE: { 6705 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 6706 if (TLI.isOperationLegal(Opcode, VT)) 6707 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 6708 return SDValue(); 6709 } 6710 default: 6711 return SDValue(); 6712 } 6713 } 6714 6715 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { 6716 SDValue Cond = N->getOperand(0); 6717 SDValue N1 = N->getOperand(1); 6718 SDValue N2 = N->getOperand(2); 6719 EVT VT = N->getValueType(0); 6720 EVT CondVT = Cond.getValueType(); 6721 SDLoc DL(N); 6722 6723 if (!VT.isInteger()) 6724 return SDValue(); 6725 6726 auto *C1 = dyn_cast<ConstantSDNode>(N1); 6727 auto *C2 = dyn_cast<ConstantSDNode>(N2); 6728 if (!C1 || !C2) 6729 return SDValue(); 6730 6731 // Only do this before legalization to avoid conflicting with target-specific 6732 // transforms in the other direction (create a select from a zext/sext). There 6733 // is also a target-independent combine here in DAGCombiner in the other 6734 // direction for (select Cond, -1, 0) when the condition is not i1. 6735 if (CondVT == MVT::i1 && !LegalOperations) { 6736 if (C1->isNullValue() && C2->isOne()) { 6737 // select Cond, 0, 1 --> zext (!Cond) 6738 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 6739 if (VT != MVT::i1) 6740 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); 6741 return NotCond; 6742 } 6743 if (C1->isNullValue() && C2->isAllOnesValue()) { 6744 // select Cond, 0, -1 --> sext (!Cond) 6745 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); 6746 if (VT != MVT::i1) 6747 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); 6748 return NotCond; 6749 } 6750 if (C1->isOne() && C2->isNullValue()) { 6751 // select Cond, 1, 0 --> zext (Cond) 6752 if (VT != MVT::i1) 6753 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 6754 return Cond; 6755 } 6756 if (C1->isAllOnesValue() && C2->isNullValue()) { 6757 // select Cond, -1, 0 --> sext (Cond) 6758 if (VT != MVT::i1) 6759 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 6760 return Cond; 6761 } 6762 6763 // For any constants that differ by 1, we can transform the select into an 6764 // extend and add. Use a target hook because some targets may prefer to 6765 // transform in the other direction. 6766 if (TLI.convertSelectOfConstantsToMath(VT)) { 6767 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { 6768 // select Cond, C1, C1-1 --> add (zext Cond), C1-1 6769 if (VT != MVT::i1) 6770 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); 6771 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 6772 } 6773 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { 6774 // select Cond, C1, C1+1 --> add (sext Cond), C1+1 6775 if (VT != MVT::i1) 6776 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); 6777 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); 6778 } 6779 } 6780 6781 return SDValue(); 6782 } 6783 6784 // fold (select Cond, 0, 1) -> (xor Cond, 1) 6785 // We can't do this reliably if integer based booleans have different contents 6786 // to floating point based booleans. This is because we can't tell whether we 6787 // have an integer-based boolean or a floating-point-based boolean unless we 6788 // can find the SETCC that produced it and inspect its operands. This is 6789 // fairly easy if C is the SETCC node, but it can potentially be 6790 // undiscoverable (or not reasonably discoverable). For example, it could be 6791 // in another basic block or it could require searching a complicated 6792 // expression. 6793 if (CondVT.isInteger() && 6794 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == 6795 TargetLowering::ZeroOrOneBooleanContent && 6796 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == 6797 TargetLowering::ZeroOrOneBooleanContent && 6798 C1->isNullValue() && C2->isOne()) { 6799 SDValue NotCond = 6800 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); 6801 if (VT.bitsEq(CondVT)) 6802 return NotCond; 6803 return DAG.getZExtOrTrunc(NotCond, DL, VT); 6804 } 6805 6806 return SDValue(); 6807 } 6808 6809 SDValue DAGCombiner::visitSELECT(SDNode *N) { 6810 SDValue N0 = N->getOperand(0); 6811 SDValue N1 = N->getOperand(1); 6812 SDValue N2 = N->getOperand(2); 6813 EVT VT = N->getValueType(0); 6814 EVT VT0 = N0.getValueType(); 6815 SDLoc DL(N); 6816 6817 // fold (select C, X, X) -> X 6818 if (N1 == N2) 6819 return N1; 6820 6821 if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { 6822 // fold (select true, X, Y) -> X 6823 // fold (select false, X, Y) -> Y 6824 return !N0C->isNullValue() ? N1 : N2; 6825 } 6826 6827 // fold (select X, X, Y) -> (or X, Y) 6828 // fold (select X, 1, Y) -> (or C, Y) 6829 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) 6830 return DAG.getNode(ISD::OR, DL, VT, N0, N2); 6831 6832 if (SDValue V = foldSelectOfConstants(N)) 6833 return V; 6834 6835 // fold (select C, 0, X) -> (and (not C), X) 6836 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { 6837 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 6838 AddToWorklist(NOTNode.getNode()); 6839 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); 6840 } 6841 // fold (select C, X, 1) -> (or (not C), X) 6842 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { 6843 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 6844 AddToWorklist(NOTNode.getNode()); 6845 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); 6846 } 6847 // fold (select X, Y, X) -> (and X, Y) 6848 // fold (select X, Y, 0) -> (and X, Y) 6849 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) 6850 return DAG.getNode(ISD::AND, DL, VT, N0, N1); 6851 6852 // If we can fold this based on the true/false value, do so. 6853 if (SimplifySelectOps(N, N1, N2)) 6854 return SDValue(N, 0); // Don't revisit N. 6855 6856 if (VT0 == MVT::i1) { 6857 // The code in this block deals with the following 2 equivalences: 6858 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) 6859 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) 6860 // The target can specify its preferred form with the 6861 // shouldNormalizeToSelectSequence() callback. However we always transform 6862 // to the right anyway if we find the inner select exists in the DAG anyway 6863 // and we always transform to the left side if we know that we can further 6864 // optimize the combination of the conditions. 6865 bool normalizeToSequence = 6866 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); 6867 // select (and Cond0, Cond1), X, Y 6868 // -> select Cond0, (select Cond1, X, Y), Y 6869 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 6870 SDValue Cond0 = N0->getOperand(0); 6871 SDValue Cond1 = N0->getOperand(1); 6872 SDValue InnerSelect = 6873 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 6874 if (normalizeToSequence || !InnerSelect.use_empty()) 6875 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, 6876 InnerSelect, N2); 6877 } 6878 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 6879 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 6880 SDValue Cond0 = N0->getOperand(0); 6881 SDValue Cond1 = N0->getOperand(1); 6882 SDValue InnerSelect = 6883 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); 6884 if (normalizeToSequence || !InnerSelect.use_empty()) 6885 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, 6886 InnerSelect); 6887 } 6888 6889 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 6890 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { 6891 SDValue N1_0 = N1->getOperand(0); 6892 SDValue N1_1 = N1->getOperand(1); 6893 SDValue N1_2 = N1->getOperand(2); 6894 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 6895 // Create the actual and node if we can generate good code for it. 6896 if (!normalizeToSequence) { 6897 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); 6898 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); 6899 } 6900 // Otherwise see if we can optimize the "and" to a better pattern. 6901 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 6902 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, 6903 N2); 6904 } 6905 } 6906 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 6907 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { 6908 SDValue N2_0 = N2->getOperand(0); 6909 SDValue N2_1 = N2->getOperand(1); 6910 SDValue N2_2 = N2->getOperand(2); 6911 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 6912 // Create the actual or node if we can generate good code for it. 6913 if (!normalizeToSequence) { 6914 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); 6915 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); 6916 } 6917 // Otherwise see if we can optimize to a better pattern. 6918 if (SDValue Combined = visitORLike(N0, N2_0, N)) 6919 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, 6920 N2_2); 6921 } 6922 } 6923 } 6924 6925 if (VT0 == MVT::i1) { 6926 // select (not Cond), N1, N2 -> select Cond, N2, N1 6927 if (isBitwiseNot(N0)) 6928 return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); 6929 } 6930 6931 // fold selects based on a setcc into other things, such as min/max/abs 6932 if (N0.getOpcode() == ISD::SETCC) { 6933 // select x, y (fcmp lt x, y) -> fminnum x, y 6934 // select x, y (fcmp gt x, y) -> fmaxnum x, y 6935 // 6936 // This is OK if we don't care about what happens if either operand is a 6937 // NaN. 6938 // 6939 6940 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 6941 // no signed zeros as well as no nans. 6942 const TargetOptions &Options = DAG.getTarget().Options; 6943 if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && 6944 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 6945 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 6946 6947 if (SDValue FMinMax = combineMinNumMaxNum( 6948 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) 6949 return FMinMax; 6950 } 6951 6952 if ((!LegalOperations && 6953 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 6954 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 6955 return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), 6956 N0.getOperand(1), N1, N2, N0.getOperand(2)); 6957 return SimplifySelect(DL, N0, N1, N2); 6958 } 6959 6960 return SDValue(); 6961 } 6962 6963 static 6964 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 6965 SDLoc DL(N); 6966 EVT LoVT, HiVT; 6967 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 6968 6969 // Split the inputs. 6970 SDValue Lo, Hi, LL, LH, RL, RH; 6971 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 6972 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 6973 6974 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 6975 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 6976 6977 return std::make_pair(Lo, Hi); 6978 } 6979 6980 // This function assumes all the vselect's arguments are CONCAT_VECTOR 6981 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 6982 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 6983 SDLoc DL(N); 6984 SDValue Cond = N->getOperand(0); 6985 SDValue LHS = N->getOperand(1); 6986 SDValue RHS = N->getOperand(2); 6987 EVT VT = N->getValueType(0); 6988 int NumElems = VT.getVectorNumElements(); 6989 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 6990 RHS.getOpcode() == ISD::CONCAT_VECTORS && 6991 Cond.getOpcode() == ISD::BUILD_VECTOR); 6992 6993 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 6994 // binary ones here. 6995 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 6996 return SDValue(); 6997 6998 // We're sure we have an even number of elements due to the 6999 // concat_vectors we have as arguments to vselect. 7000 // Skip BV elements until we find one that's not an UNDEF 7001 // After we find an UNDEF element, keep looping until we get to half the 7002 // length of the BV and see if all the non-undef nodes are the same. 7003 ConstantSDNode *BottomHalf = nullptr; 7004 for (int i = 0; i < NumElems / 2; ++i) { 7005 if (Cond->getOperand(i)->isUndef()) 7006 continue; 7007 7008 if (BottomHalf == nullptr) 7009 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7010 else if (Cond->getOperand(i).getNode() != BottomHalf) 7011 return SDValue(); 7012 } 7013 7014 // Do the same for the second half of the BuildVector 7015 ConstantSDNode *TopHalf = nullptr; 7016 for (int i = NumElems / 2; i < NumElems; ++i) { 7017 if (Cond->getOperand(i)->isUndef()) 7018 continue; 7019 7020 if (TopHalf == nullptr) 7021 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 7022 else if (Cond->getOperand(i).getNode() != TopHalf) 7023 return SDValue(); 7024 } 7025 7026 assert(TopHalf && BottomHalf && 7027 "One half of the selector was all UNDEFs and the other was all the " 7028 "same value. This should have been addressed before this function."); 7029 return DAG.getNode( 7030 ISD::CONCAT_VECTORS, DL, VT, 7031 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 7032 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 7033 } 7034 7035 SDValue DAGCombiner::visitMSCATTER(SDNode *N) { 7036 if (Level >= AfterLegalizeTypes) 7037 return SDValue(); 7038 7039 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 7040 SDValue Mask = MSC->getMask(); 7041 SDValue Data = MSC->getValue(); 7042 SDLoc DL(N); 7043 7044 // If the MSCATTER data type requires splitting and the mask is provided by a 7045 // SETCC, then split both nodes and its operands before legalization. This 7046 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7047 // and enables future optimizations (e.g. min/max pattern matching on X86). 7048 if (Mask.getOpcode() != ISD::SETCC) 7049 return SDValue(); 7050 7051 // Check if any splitting is required. 7052 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 7053 TargetLowering::TypeSplitVector) 7054 return SDValue(); 7055 SDValue MaskLo, MaskHi, Lo, Hi; 7056 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7057 7058 EVT LoVT, HiVT; 7059 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); 7060 7061 SDValue Chain = MSC->getChain(); 7062 7063 EVT MemoryVT = MSC->getMemoryVT(); 7064 unsigned Alignment = MSC->getOriginalAlignment(); 7065 7066 EVT LoMemVT, HiMemVT; 7067 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7068 7069 SDValue DataLo, DataHi; 7070 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7071 7072 SDValue Scale = MSC->getScale(); 7073 SDValue BasePtr = MSC->getBasePtr(); 7074 SDValue IndexLo, IndexHi; 7075 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); 7076 7077 MachineMemOperand *MMO = DAG.getMachineFunction(). 7078 getMachineMemOperand(MSC->getPointerInfo(), 7079 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7080 Alignment, MSC->getAAInfo(), MSC->getRanges()); 7081 7082 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; 7083 Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), 7084 DL, OpsLo, MMO); 7085 7086 SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; 7087 Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), 7088 DL, OpsHi, MMO); 7089 7090 AddToWorklist(Lo.getNode()); 7091 AddToWorklist(Hi.getNode()); 7092 7093 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 7094 } 7095 7096 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 7097 if (Level >= AfterLegalizeTypes) 7098 return SDValue(); 7099 7100 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 7101 SDValue Mask = MST->getMask(); 7102 SDValue Data = MST->getValue(); 7103 EVT VT = Data.getValueType(); 7104 SDLoc DL(N); 7105 7106 // If the MSTORE data type requires splitting and the mask is provided by a 7107 // SETCC, then split both nodes and its operands before legalization. This 7108 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7109 // and enables future optimizations (e.g. min/max pattern matching on X86). 7110 if (Mask.getOpcode() == ISD::SETCC) { 7111 // Check if any splitting is required. 7112 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7113 TargetLowering::TypeSplitVector) 7114 return SDValue(); 7115 7116 SDValue MaskLo, MaskHi, Lo, Hi; 7117 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7118 7119 SDValue Chain = MST->getChain(); 7120 SDValue Ptr = MST->getBasePtr(); 7121 7122 EVT MemoryVT = MST->getMemoryVT(); 7123 unsigned Alignment = MST->getOriginalAlignment(); 7124 7125 // if Alignment is equal to the vector size, 7126 // take the half of it for the second part 7127 unsigned SecondHalfAlignment = 7128 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment; 7129 7130 EVT LoMemVT, HiMemVT; 7131 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7132 7133 SDValue DataLo, DataHi; 7134 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 7135 7136 MachineMemOperand *MMO = DAG.getMachineFunction(). 7137 getMachineMemOperand(MST->getPointerInfo(), 7138 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 7139 Alignment, MST->getAAInfo(), MST->getRanges()); 7140 7141 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 7142 MST->isTruncatingStore(), 7143 MST->isCompressingStore()); 7144 7145 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7146 MST->isCompressingStore()); 7147 unsigned HiOffset = LoMemVT.getStoreSize(); 7148 7149 MMO = DAG.getMachineFunction().getMachineMemOperand( 7150 MST->getPointerInfo().getWithOffset(HiOffset), 7151 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, 7152 MST->getAAInfo(), MST->getRanges()); 7153 7154 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 7155 MST->isTruncatingStore(), 7156 MST->isCompressingStore()); 7157 7158 AddToWorklist(Lo.getNode()); 7159 AddToWorklist(Hi.getNode()); 7160 7161 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 7162 } 7163 return SDValue(); 7164 } 7165 7166 SDValue DAGCombiner::visitMGATHER(SDNode *N) { 7167 if (Level >= AfterLegalizeTypes) 7168 return SDValue(); 7169 7170 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); 7171 SDValue Mask = MGT->getMask(); 7172 SDLoc DL(N); 7173 7174 // If the MGATHER result requires splitting and the mask is provided by a 7175 // SETCC, then split both nodes and its operands before legalization. This 7176 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7177 // and enables future optimizations (e.g. min/max pattern matching on X86). 7178 7179 if (Mask.getOpcode() != ISD::SETCC) 7180 return SDValue(); 7181 7182 EVT VT = N->getValueType(0); 7183 7184 // Check if any splitting is required. 7185 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7186 TargetLowering::TypeSplitVector) 7187 return SDValue(); 7188 7189 SDValue MaskLo, MaskHi, Lo, Hi; 7190 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7191 7192 SDValue Src0 = MGT->getValue(); 7193 SDValue Src0Lo, Src0Hi; 7194 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 7195 7196 EVT LoVT, HiVT; 7197 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); 7198 7199 SDValue Chain = MGT->getChain(); 7200 EVT MemoryVT = MGT->getMemoryVT(); 7201 unsigned Alignment = MGT->getOriginalAlignment(); 7202 7203 EVT LoMemVT, HiMemVT; 7204 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7205 7206 SDValue Scale = MGT->getScale(); 7207 SDValue BasePtr = MGT->getBasePtr(); 7208 SDValue Index = MGT->getIndex(); 7209 SDValue IndexLo, IndexHi; 7210 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); 7211 7212 MachineMemOperand *MMO = DAG.getMachineFunction(). 7213 getMachineMemOperand(MGT->getPointerInfo(), 7214 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7215 Alignment, MGT->getAAInfo(), MGT->getRanges()); 7216 7217 SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale }; 7218 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, 7219 MMO); 7220 7221 SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale }; 7222 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, 7223 MMO); 7224 7225 AddToWorklist(Lo.getNode()); 7226 AddToWorklist(Hi.getNode()); 7227 7228 // Build a factor node to remember that this load is independent of the 7229 // other one. 7230 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7231 Hi.getValue(1)); 7232 7233 // Legalized the chain result - switch anything that used the old chain to 7234 // use the new one. 7235 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); 7236 7237 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7238 7239 SDValue RetOps[] = { GatherRes, Chain }; 7240 return DAG.getMergeValues(RetOps, DL); 7241 } 7242 7243 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 7244 if (Level >= AfterLegalizeTypes) 7245 return SDValue(); 7246 7247 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 7248 SDValue Mask = MLD->getMask(); 7249 SDLoc DL(N); 7250 7251 // If the MLOAD result requires splitting and the mask is provided by a 7252 // SETCC, then split both nodes and its operands before legalization. This 7253 // prevents the type legalizer from unrolling SETCC into scalar comparisons 7254 // and enables future optimizations (e.g. min/max pattern matching on X86). 7255 if (Mask.getOpcode() == ISD::SETCC) { 7256 EVT VT = N->getValueType(0); 7257 7258 // Check if any splitting is required. 7259 if (TLI.getTypeAction(*DAG.getContext(), VT) != 7260 TargetLowering::TypeSplitVector) 7261 return SDValue(); 7262 7263 SDValue MaskLo, MaskHi, Lo, Hi; 7264 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 7265 7266 SDValue Src0 = MLD->getSrc0(); 7267 SDValue Src0Lo, Src0Hi; 7268 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 7269 7270 EVT LoVT, HiVT; 7271 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 7272 7273 SDValue Chain = MLD->getChain(); 7274 SDValue Ptr = MLD->getBasePtr(); 7275 EVT MemoryVT = MLD->getMemoryVT(); 7276 unsigned Alignment = MLD->getOriginalAlignment(); 7277 7278 // if Alignment is equal to the vector size, 7279 // take the half of it for the second part 7280 unsigned SecondHalfAlignment = 7281 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 7282 Alignment/2 : Alignment; 7283 7284 EVT LoMemVT, HiMemVT; 7285 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 7286 7287 MachineMemOperand *MMO = DAG.getMachineFunction(). 7288 getMachineMemOperand(MLD->getPointerInfo(), 7289 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 7290 Alignment, MLD->getAAInfo(), MLD->getRanges()); 7291 7292 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 7293 ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7294 7295 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 7296 MLD->isExpandingLoad()); 7297 unsigned HiOffset = LoMemVT.getStoreSize(); 7298 7299 MMO = DAG.getMachineFunction().getMachineMemOperand( 7300 MLD->getPointerInfo().getWithOffset(HiOffset), 7301 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, 7302 MLD->getAAInfo(), MLD->getRanges()); 7303 7304 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 7305 ISD::NON_EXTLOAD, MLD->isExpandingLoad()); 7306 7307 AddToWorklist(Lo.getNode()); 7308 AddToWorklist(Hi.getNode()); 7309 7310 // Build a factor node to remember that this load is independent of the 7311 // other one. 7312 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 7313 Hi.getValue(1)); 7314 7315 // Legalized the chain result - switch anything that used the old chain to 7316 // use the new one. 7317 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 7318 7319 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 7320 7321 SDValue RetOps[] = { LoadRes, Chain }; 7322 return DAG.getMergeValues(RetOps, DL); 7323 } 7324 return SDValue(); 7325 } 7326 7327 /// A vector select of 2 constant vectors can be simplified to math/logic to 7328 /// avoid a variable select instruction and possibly avoid constant loads. 7329 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { 7330 SDValue Cond = N->getOperand(0); 7331 SDValue N1 = N->getOperand(1); 7332 SDValue N2 = N->getOperand(2); 7333 EVT VT = N->getValueType(0); 7334 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || 7335 !TLI.convertSelectOfConstantsToMath(VT) || 7336 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || 7337 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) 7338 return SDValue(); 7339 7340 // Check if we can use the condition value to increment/decrement a single 7341 // constant value. This simplifies a select to an add and removes a constant 7342 // load/materialization from the general case. 7343 bool AllAddOne = true; 7344 bool AllSubOne = true; 7345 unsigned Elts = VT.getVectorNumElements(); 7346 for (unsigned i = 0; i != Elts; ++i) { 7347 SDValue N1Elt = N1.getOperand(i); 7348 SDValue N2Elt = N2.getOperand(i); 7349 if (N1Elt.isUndef() || N2Elt.isUndef()) 7350 continue; 7351 7352 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); 7353 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); 7354 if (C1 != C2 + 1) 7355 AllAddOne = false; 7356 if (C1 != C2 - 1) 7357 AllSubOne = false; 7358 } 7359 7360 // Further simplifications for the extra-special cases where the constants are 7361 // all 0 or all -1 should be implemented as folds of these patterns. 7362 SDLoc DL(N); 7363 if (AllAddOne || AllSubOne) { 7364 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C 7365 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C 7366 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; 7367 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); 7368 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); 7369 } 7370 7371 // The general case for select-of-constants: 7372 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 7373 // ...but that only makes sense if a vselect is slower than 2 logic ops, so 7374 // leave that to a machine-specific pass. 7375 return SDValue(); 7376 } 7377 7378 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 7379 SDValue N0 = N->getOperand(0); 7380 SDValue N1 = N->getOperand(1); 7381 SDValue N2 = N->getOperand(2); 7382 SDLoc DL(N); 7383 7384 // fold (vselect C, X, X) -> X 7385 if (N1 == N2) 7386 return N1; 7387 7388 // Canonicalize integer abs. 7389 // vselect (setg[te] X, 0), X, -X -> 7390 // vselect (setgt X, -1), X, -X -> 7391 // vselect (setl[te] X, 0), -X, X -> 7392 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 7393 if (N0.getOpcode() == ISD::SETCC) { 7394 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 7395 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 7396 bool isAbs = false; 7397 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 7398 7399 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 7400 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 7401 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 7402 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 7403 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 7404 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 7405 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 7406 7407 if (isAbs) { 7408 EVT VT = LHS.getValueType(); 7409 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) 7410 return DAG.getNode(ISD::ABS, DL, VT, LHS); 7411 7412 SDValue Shift = DAG.getNode( 7413 ISD::SRA, DL, VT, LHS, 7414 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); 7415 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 7416 AddToWorklist(Shift.getNode()); 7417 AddToWorklist(Add.getNode()); 7418 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 7419 } 7420 7421 // If this select has a condition (setcc) with narrower operands than the 7422 // select, try to widen the compare to match the select width. 7423 // TODO: This should be extended to handle any constant. 7424 // TODO: This could be extended to handle non-loading patterns, but that 7425 // requires thorough testing to avoid regressions. 7426 if (isNullConstantOrNullSplatConstant(RHS)) { 7427 EVT NarrowVT = LHS.getValueType(); 7428 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); 7429 EVT SetCCVT = getSetCCResultType(LHS.getValueType()); 7430 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); 7431 unsigned WideWidth = WideVT.getScalarSizeInBits(); 7432 bool IsSigned = isSignedIntSetCC(CC); 7433 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 7434 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && 7435 SetCCWidth != 1 && SetCCWidth < WideWidth && 7436 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && 7437 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { 7438 // Both compare operands can be widened for free. The LHS can use an 7439 // extended load, and the RHS is a constant: 7440 // vselect (ext (setcc load(X), C)), N1, N2 --> 7441 // vselect (setcc extload(X), C'), N1, N2 7442 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 7443 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); 7444 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); 7445 EVT WideSetCCVT = getSetCCResultType(WideVT); 7446 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); 7447 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); 7448 } 7449 } 7450 } 7451 7452 if (SimplifySelectOps(N, N1, N2)) 7453 return SDValue(N, 0); // Don't revisit N. 7454 7455 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 7456 if (ISD::isBuildVectorAllOnes(N0.getNode())) 7457 return N1; 7458 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 7459 if (ISD::isBuildVectorAllZeros(N0.getNode())) 7460 return N2; 7461 7462 // The ConvertSelectToConcatVector function is assuming both the above 7463 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 7464 // and addressed. 7465 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 7466 N2.getOpcode() == ISD::CONCAT_VECTORS && 7467 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 7468 if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) 7469 return CV; 7470 } 7471 7472 if (SDValue V = foldVSelectOfConstants(N)) 7473 return V; 7474 7475 return SDValue(); 7476 } 7477 7478 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 7479 SDValue N0 = N->getOperand(0); 7480 SDValue N1 = N->getOperand(1); 7481 SDValue N2 = N->getOperand(2); 7482 SDValue N3 = N->getOperand(3); 7483 SDValue N4 = N->getOperand(4); 7484 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 7485 7486 // fold select_cc lhs, rhs, x, x, cc -> x 7487 if (N2 == N3) 7488 return N2; 7489 7490 // Determine if the condition we're dealing with is constant 7491 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, 7492 CC, SDLoc(N), false)) { 7493 AddToWorklist(SCC.getNode()); 7494 7495 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 7496 if (!SCCC->isNullValue()) 7497 return N2; // cond always true -> true val 7498 else 7499 return N3; // cond always false -> false val 7500 } else if (SCC->isUndef()) { 7501 // When the condition is UNDEF, just return the first operand. This is 7502 // coherent the DAG creation, no setcc node is created in this case 7503 return N2; 7504 } else if (SCC.getOpcode() == ISD::SETCC) { 7505 // Fold to a simpler select_cc 7506 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 7507 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 7508 SCC.getOperand(2)); 7509 } 7510 } 7511 7512 // If we can fold this based on the true/false value, do so. 7513 if (SimplifySelectOps(N, N2, N3)) 7514 return SDValue(N, 0); // Don't revisit N. 7515 7516 // fold select_cc into other things, such as min/max/abs 7517 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 7518 } 7519 7520 SDValue DAGCombiner::visitSETCC(SDNode *N) { 7521 // setcc is very commonly used as an argument to brcond. This pattern 7522 // also lend itself to numerous combines and, as a result, it is desired 7523 // we keep the argument to a brcond as a setcc as much as possible. 7524 bool PreferSetCC = 7525 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; 7526 7527 SDValue Combined = SimplifySetCC( 7528 N->getValueType(0), N->getOperand(0), N->getOperand(1), 7529 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); 7530 7531 if (!Combined) 7532 return SDValue(); 7533 7534 // If we prefer to have a setcc, and we don't, we'll try our best to 7535 // recreate one using rebuildSetCC. 7536 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { 7537 SDValue NewSetCC = rebuildSetCC(Combined); 7538 7539 // We don't have anything interesting to combine to. 7540 if (NewSetCC.getNode() == N) 7541 return SDValue(); 7542 7543 if (NewSetCC) 7544 return NewSetCC; 7545 } 7546 7547 return Combined; 7548 } 7549 7550 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { 7551 SDValue LHS = N->getOperand(0); 7552 SDValue RHS = N->getOperand(1); 7553 SDValue Carry = N->getOperand(2); 7554 SDValue Cond = N->getOperand(3); 7555 7556 // If Carry is false, fold to a regular SETCC. 7557 if (isNullConstant(Carry)) 7558 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); 7559 7560 return SDValue(); 7561 } 7562 7563 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 7564 /// a build_vector of constants. 7565 /// This function is called by the DAGCombiner when visiting sext/zext/aext 7566 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 7567 /// Vector extends are not folded if operations are legal; this is to 7568 /// avoid introducing illegal build_vector dag nodes. 7569 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 7570 SelectionDAG &DAG, bool LegalTypes, 7571 bool LegalOperations) { 7572 unsigned Opcode = N->getOpcode(); 7573 SDValue N0 = N->getOperand(0); 7574 EVT VT = N->getValueType(0); 7575 7576 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 7577 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 7578 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) 7579 && "Expected EXTEND dag node in input!"); 7580 7581 // fold (sext c1) -> c1 7582 // fold (zext c1) -> c1 7583 // fold (aext c1) -> c1 7584 if (isa<ConstantSDNode>(N0)) 7585 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 7586 7587 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 7588 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 7589 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 7590 EVT SVT = VT.getScalarType(); 7591 if (!(VT.isVector() && 7592 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 7593 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 7594 return nullptr; 7595 7596 // We can fold this node into a build_vector. 7597 unsigned VTBits = SVT.getSizeInBits(); 7598 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); 7599 SmallVector<SDValue, 8> Elts; 7600 unsigned NumElts = VT.getVectorNumElements(); 7601 SDLoc DL(N); 7602 7603 for (unsigned i=0; i != NumElts; ++i) { 7604 SDValue Op = N0->getOperand(i); 7605 if (Op->isUndef()) { 7606 Elts.push_back(DAG.getUNDEF(SVT)); 7607 continue; 7608 } 7609 7610 SDLoc DL(Op); 7611 // Get the constant value and if needed trunc it to the size of the type. 7612 // Nodes like build_vector might have constants wider than the scalar type. 7613 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); 7614 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 7615 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); 7616 else 7617 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); 7618 } 7619 7620 return DAG.getBuildVector(VT, DL, Elts).getNode(); 7621 } 7622 7623 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 7624 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 7625 // transformation. Returns true if extension are possible and the above 7626 // mentioned transformation is profitable. 7627 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, 7628 unsigned ExtOpc, 7629 SmallVectorImpl<SDNode *> &ExtendNodes, 7630 const TargetLowering &TLI) { 7631 bool HasCopyToRegUses = false; 7632 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); 7633 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 7634 UE = N0.getNode()->use_end(); 7635 UI != UE; ++UI) { 7636 SDNode *User = *UI; 7637 if (User == N) 7638 continue; 7639 if (UI.getUse().getResNo() != N0.getResNo()) 7640 continue; 7641 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 7642 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 7643 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 7644 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 7645 // Sign bits will be lost after a zext. 7646 return false; 7647 bool Add = false; 7648 for (unsigned i = 0; i != 2; ++i) { 7649 SDValue UseOp = User->getOperand(i); 7650 if (UseOp == N0) 7651 continue; 7652 if (!isa<ConstantSDNode>(UseOp)) 7653 return false; 7654 Add = true; 7655 } 7656 if (Add) 7657 ExtendNodes.push_back(User); 7658 continue; 7659 } 7660 // If truncates aren't free and there are users we can't 7661 // extend, it isn't worthwhile. 7662 if (!isTruncFree) 7663 return false; 7664 // Remember if this value is live-out. 7665 if (User->getOpcode() == ISD::CopyToReg) 7666 HasCopyToRegUses = true; 7667 } 7668 7669 if (HasCopyToRegUses) { 7670 bool BothLiveOut = false; 7671 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 7672 UI != UE; ++UI) { 7673 SDUse &Use = UI.getUse(); 7674 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 7675 BothLiveOut = true; 7676 break; 7677 } 7678 } 7679 if (BothLiveOut) 7680 // Both unextended and extended values are live out. There had better be 7681 // a good reason for the transformation. 7682 return ExtendNodes.size(); 7683 } 7684 return true; 7685 } 7686 7687 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 7688 SDValue OrigLoad, SDValue ExtLoad, 7689 ISD::NodeType ExtType) { 7690 // Extend SetCC uses if necessary. 7691 SDLoc DL(ExtLoad); 7692 for (SDNode *SetCC : SetCCs) { 7693 SmallVector<SDValue, 4> Ops; 7694 7695 for (unsigned j = 0; j != 2; ++j) { 7696 SDValue SOp = SetCC->getOperand(j); 7697 if (SOp == OrigLoad) 7698 Ops.push_back(ExtLoad); 7699 else 7700 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 7701 } 7702 7703 Ops.push_back(SetCC->getOperand(2)); 7704 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 7705 } 7706 } 7707 7708 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). 7709 SDValue DAGCombiner::CombineExtLoad(SDNode *N) { 7710 SDValue N0 = N->getOperand(0); 7711 EVT DstVT = N->getValueType(0); 7712 EVT SrcVT = N0.getValueType(); 7713 7714 assert((N->getOpcode() == ISD::SIGN_EXTEND || 7715 N->getOpcode() == ISD::ZERO_EXTEND) && 7716 "Unexpected node type (not an extend)!"); 7717 7718 // fold (sext (load x)) to multiple smaller sextloads; same for zext. 7719 // For example, on a target with legal v4i32, but illegal v8i32, turn: 7720 // (v8i32 (sext (v8i16 (load x)))) 7721 // into: 7722 // (v8i32 (concat_vectors (v4i32 (sextload x)), 7723 // (v4i32 (sextload (x + 16))))) 7724 // Where uses of the original load, i.e.: 7725 // (v8i16 (load x)) 7726 // are replaced with: 7727 // (v8i16 (truncate 7728 // (v8i32 (concat_vectors (v4i32 (sextload x)), 7729 // (v4i32 (sextload (x + 16))))))) 7730 // 7731 // This combine is only applicable to illegal, but splittable, vectors. 7732 // All legal types, and illegal non-vector types, are handled elsewhere. 7733 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. 7734 // 7735 if (N0->getOpcode() != ISD::LOAD) 7736 return SDValue(); 7737 7738 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7739 7740 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || 7741 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || 7742 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) 7743 return SDValue(); 7744 7745 SmallVector<SDNode *, 4> SetCCs; 7746 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) 7747 return SDValue(); 7748 7749 ISD::LoadExtType ExtType = 7750 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 7751 7752 // Try to split the vector types to get down to legal types. 7753 EVT SplitSrcVT = SrcVT; 7754 EVT SplitDstVT = DstVT; 7755 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && 7756 SplitSrcVT.getVectorNumElements() > 1) { 7757 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; 7758 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; 7759 } 7760 7761 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) 7762 return SDValue(); 7763 7764 SDLoc DL(N); 7765 const unsigned NumSplits = 7766 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); 7767 const unsigned Stride = SplitSrcVT.getStoreSize(); 7768 SmallVector<SDValue, 4> Loads; 7769 SmallVector<SDValue, 4> Chains; 7770 7771 SDValue BasePtr = LN0->getBasePtr(); 7772 for (unsigned Idx = 0; Idx < NumSplits; Idx++) { 7773 const unsigned Offset = Idx * Stride; 7774 const unsigned Align = MinAlign(LN0->getAlignment(), Offset); 7775 7776 SDValue SplitLoad = DAG.getExtLoad( 7777 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, 7778 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, 7779 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 7780 7781 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 7782 DAG.getConstant(Stride, DL, BasePtr.getValueType())); 7783 7784 Loads.push_back(SplitLoad.getValue(0)); 7785 Chains.push_back(SplitLoad.getValue(1)); 7786 } 7787 7788 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 7789 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); 7790 7791 // Simplify TF. 7792 AddToWorklist(NewChain.getNode()); 7793 7794 CombineTo(N, NewValue); 7795 7796 // Replace uses of the original load (before extension) 7797 // with a truncate of the concatenated sextloaded vectors. 7798 SDValue Trunc = 7799 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); 7800 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); 7801 CombineTo(N0.getNode(), Trunc, NewChain); 7802 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7803 } 7804 7805 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 7806 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 7807 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { 7808 assert(N->getOpcode() == ISD::ZERO_EXTEND); 7809 EVT VT = N->getValueType(0); 7810 7811 // and/or/xor 7812 SDValue N0 = N->getOperand(0); 7813 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 7814 N0.getOpcode() == ISD::XOR) || 7815 N0.getOperand(1).getOpcode() != ISD::Constant || 7816 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) 7817 return SDValue(); 7818 7819 // shl/shr 7820 SDValue N1 = N0->getOperand(0); 7821 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || 7822 N1.getOperand(1).getOpcode() != ISD::Constant || 7823 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) 7824 return SDValue(); 7825 7826 // load 7827 if (!isa<LoadSDNode>(N1.getOperand(0))) 7828 return SDValue(); 7829 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); 7830 EVT MemVT = Load->getMemoryVT(); 7831 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || 7832 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) 7833 return SDValue(); 7834 7835 7836 // If the shift op is SHL, the logic op must be AND, otherwise the result 7837 // will be wrong. 7838 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) 7839 return SDValue(); 7840 7841 if (!N0.hasOneUse() || !N1.hasOneUse()) 7842 return SDValue(); 7843 7844 SmallVector<SDNode*, 4> SetCCs; 7845 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), 7846 ISD::ZERO_EXTEND, SetCCs, TLI)) 7847 return SDValue(); 7848 7849 // Actually do the transformation. 7850 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, 7851 Load->getChain(), Load->getBasePtr(), 7852 Load->getMemoryVT(), Load->getMemOperand()); 7853 7854 SDLoc DL1(N1); 7855 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, 7856 N1.getOperand(1)); 7857 7858 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 7859 Mask = Mask.zext(VT.getSizeInBits()); 7860 SDLoc DL0(N0); 7861 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, 7862 DAG.getConstant(Mask, DL0, VT)); 7863 7864 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 7865 CombineTo(N, And); 7866 if (SDValue(Load, 0).hasOneUse()) { 7867 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); 7868 } else { 7869 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), 7870 Load->getValueType(0), ExtLoad); 7871 CombineTo(Load, Trunc, ExtLoad.getValue(1)); 7872 } 7873 return SDValue(N,0); // Return N so it doesn't get rechecked! 7874 } 7875 7876 /// If we're narrowing or widening the result of a vector select and the final 7877 /// size is the same size as a setcc (compare) feeding the select, then try to 7878 /// apply the cast operation to the select's operands because matching vector 7879 /// sizes for a select condition and other operands should be more efficient. 7880 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { 7881 unsigned CastOpcode = Cast->getOpcode(); 7882 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || 7883 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || 7884 CastOpcode == ISD::FP_ROUND) && 7885 "Unexpected opcode for vector select narrowing/widening"); 7886 7887 // We only do this transform before legal ops because the pattern may be 7888 // obfuscated by target-specific operations after legalization. Do not create 7889 // an illegal select op, however, because that may be difficult to lower. 7890 EVT VT = Cast->getValueType(0); 7891 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) 7892 return SDValue(); 7893 7894 SDValue VSel = Cast->getOperand(0); 7895 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() || 7896 VSel.getOperand(0).getOpcode() != ISD::SETCC) 7897 return SDValue(); 7898 7899 // Does the setcc have the same vector size as the casted select? 7900 SDValue SetCC = VSel.getOperand(0); 7901 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); 7902 if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) 7903 return SDValue(); 7904 7905 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) 7906 SDValue A = VSel.getOperand(1); 7907 SDValue B = VSel.getOperand(2); 7908 SDValue CastA, CastB; 7909 SDLoc DL(Cast); 7910 if (CastOpcode == ISD::FP_ROUND) { 7911 // FP_ROUND (fptrunc) has an extra flag operand to pass along. 7912 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); 7913 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); 7914 } else { 7915 CastA = DAG.getNode(CastOpcode, DL, VT, A); 7916 CastB = DAG.getNode(CastOpcode, DL, VT, B); 7917 } 7918 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); 7919 } 7920 7921 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 7922 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) 7923 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, 7924 const TargetLowering &TLI, EVT VT, 7925 bool LegalOperations, SDNode *N, 7926 SDValue N0, ISD::LoadExtType ExtLoadType) { 7927 SDNode *N0Node = N0.getNode(); 7928 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) 7929 : ISD::isZEXTLoad(N0Node); 7930 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || 7931 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) 7932 return {}; 7933 7934 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7935 EVT MemVT = LN0->getMemoryVT(); 7936 if ((LegalOperations || LN0->isVolatile()) && 7937 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) 7938 return {}; 7939 7940 SDValue ExtLoad = 7941 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 7942 LN0->getBasePtr(), MemVT, LN0->getMemOperand()); 7943 Combiner.CombineTo(N, ExtLoad); 7944 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 7945 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7946 } 7947 7948 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) 7949 // Only generate vector extloads when 1) they're legal, and 2) they are 7950 // deemed desirable by the target. 7951 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, 7952 const TargetLowering &TLI, EVT VT, 7953 bool LegalOperations, SDNode *N, SDValue N0, 7954 ISD::LoadExtType ExtLoadType, 7955 ISD::NodeType ExtOpc) { 7956 if (!ISD::isNON_EXTLoad(N0.getNode()) || 7957 !ISD::isUNINDEXEDLoad(N0.getNode()) || 7958 ((LegalOperations || VT.isVector() || 7959 cast<LoadSDNode>(N0)->isVolatile()) && 7960 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) 7961 return {}; 7962 7963 bool DoXform = true; 7964 SmallVector<SDNode *, 4> SetCCs; 7965 if (!N0.hasOneUse()) 7966 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); 7967 if (VT.isVector()) 7968 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 7969 if (!DoXform) 7970 return {}; 7971 7972 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7973 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), 7974 LN0->getBasePtr(), N0.getValueType(), 7975 LN0->getMemOperand()); 7976 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); 7977 // If the load value is used only by N, replace it via CombineTo N. 7978 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); 7979 Combiner.CombineTo(N, ExtLoad); 7980 if (NoReplaceTrunc) { 7981 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 7982 } else { 7983 SDValue Trunc = 7984 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); 7985 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 7986 } 7987 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7988 } 7989 7990 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 7991 SDValue N0 = N->getOperand(0); 7992 EVT VT = N->getValueType(0); 7993 SDLoc DL(N); 7994 7995 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 7996 LegalOperations)) 7997 return SDValue(Res, 0); 7998 7999 // fold (sext (sext x)) -> (sext x) 8000 // fold (sext (aext x)) -> (sext x) 8001 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8002 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); 8003 8004 if (N0.getOpcode() == ISD::TRUNCATE) { 8005 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 8006 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 8007 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8008 SDNode *oye = N0.getOperand(0).getNode(); 8009 if (NarrowLoad.getNode() != N0.getNode()) { 8010 CombineTo(N0.getNode(), NarrowLoad); 8011 // CombineTo deleted the truncate, if needed, but not what's under it. 8012 AddToWorklist(oye); 8013 } 8014 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8015 } 8016 8017 // See if the value being truncated is already sign extended. If so, just 8018 // eliminate the trunc/sext pair. 8019 SDValue Op = N0.getOperand(0); 8020 unsigned OpBits = Op.getScalarValueSizeInBits(); 8021 unsigned MidBits = N0.getScalarValueSizeInBits(); 8022 unsigned DestBits = VT.getScalarSizeInBits(); 8023 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 8024 8025 if (OpBits == DestBits) { 8026 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 8027 // bits, it is already ready. 8028 if (NumSignBits > DestBits-MidBits) 8029 return Op; 8030 } else if (OpBits < DestBits) { 8031 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 8032 // bits, just sext from i32. 8033 if (NumSignBits > OpBits-MidBits) 8034 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); 8035 } else { 8036 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 8037 // bits, just truncate to i32. 8038 if (NumSignBits > OpBits-MidBits) 8039 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 8040 } 8041 8042 // fold (sext (truncate x)) -> (sextinreg x). 8043 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 8044 N0.getValueType())) { 8045 if (OpBits < DestBits) 8046 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 8047 else if (OpBits > DestBits) 8048 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 8049 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, 8050 DAG.getValueType(N0.getValueType())); 8051 } 8052 } 8053 8054 // Try to simplify (sext (load x)). 8055 if (SDValue foldedExt = 8056 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8057 ISD::SEXTLOAD, ISD::SIGN_EXTEND)) 8058 return foldedExt; 8059 8060 // fold (sext (load x)) to multiple smaller sextloads. 8061 // Only on illegal but splittable vectors. 8062 if (SDValue ExtLoad = CombineExtLoad(N)) 8063 return ExtLoad; 8064 8065 // Try to simplify (sext (sextload x)). 8066 if (SDValue foldedExt = tryToFoldExtOfExtload( 8067 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) 8068 return foldedExt; 8069 8070 // fold (sext (and/or/xor (load x), cst)) -> 8071 // (and/or/xor (sextload x), (sext cst)) 8072 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8073 N0.getOpcode() == ISD::XOR) && 8074 isa<LoadSDNode>(N0.getOperand(0)) && 8075 N0.getOperand(1).getOpcode() == ISD::Constant && 8076 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8077 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8078 EVT MemVT = LN00->getMemoryVT(); 8079 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && 8080 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { 8081 SmallVector<SDNode*, 4> SetCCs; 8082 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8083 ISD::SIGN_EXTEND, SetCCs, TLI); 8084 if (DoXform) { 8085 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, 8086 LN00->getChain(), LN00->getBasePtr(), 8087 LN00->getMemoryVT(), 8088 LN00->getMemOperand()); 8089 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8090 Mask = Mask.sext(VT.getSizeInBits()); 8091 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8092 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8093 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); 8094 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8095 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8096 CombineTo(N, And); 8097 // If N0 has multiple uses, change other uses as well. 8098 if (NoReplaceTruncAnd) { 8099 SDValue TruncAnd = 8100 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8101 CombineTo(N0.getNode(), TruncAnd); 8102 } 8103 if (NoReplaceTrunc) { 8104 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8105 } else { 8106 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8107 LN00->getValueType(0), ExtLoad); 8108 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8109 } 8110 return SDValue(N,0); // Return N so it doesn't get rechecked! 8111 } 8112 } 8113 } 8114 8115 if (N0.getOpcode() == ISD::SETCC) { 8116 SDValue N00 = N0.getOperand(0); 8117 SDValue N01 = N0.getOperand(1); 8118 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 8119 EVT N00VT = N0.getOperand(0).getValueType(); 8120 8121 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 8122 // Only do this before legalize for now. 8123 if (VT.isVector() && !LegalOperations && 8124 TLI.getBooleanContents(N00VT) == 8125 TargetLowering::ZeroOrNegativeOneBooleanContent) { 8126 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 8127 // of the same size as the compared operands. Only optimize sext(setcc()) 8128 // if this is the case. 8129 EVT SVT = getSetCCResultType(N00VT); 8130 8131 // We know that the # elements of the results is the same as the 8132 // # elements of the compare (and the # elements of the compare result 8133 // for that matter). Check to see that they are the same size. If so, 8134 // we know that the element size of the sext'd result matches the 8135 // element size of the compare operands. 8136 if (VT.getSizeInBits() == SVT.getSizeInBits()) 8137 return DAG.getSetCC(DL, VT, N00, N01, CC); 8138 8139 // If the desired elements are smaller or larger than the source 8140 // elements, we can use a matching integer vector type and then 8141 // truncate/sign extend. 8142 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); 8143 if (SVT == MatchingVecType) { 8144 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); 8145 return DAG.getSExtOrTrunc(VsetCC, DL, VT); 8146 } 8147 } 8148 8149 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) 8150 // Here, T can be 1 or -1, depending on the type of the setcc and 8151 // getBooleanContents(). 8152 unsigned SetCCWidth = N0.getScalarValueSizeInBits(); 8153 8154 // To determine the "true" side of the select, we need to know the high bit 8155 // of the value returned by the setcc if it evaluates to true. 8156 // If the type of the setcc is i1, then the true case of the select is just 8157 // sext(i1 1), that is, -1. 8158 // If the type of the setcc is larger (say, i8) then the value of the high 8159 // bit depends on getBooleanContents(), so ask TLI for a real "true" value 8160 // of the appropriate width. 8161 SDValue ExtTrueVal = (SetCCWidth == 1) 8162 ? DAG.getAllOnesConstant(DL, VT) 8163 : DAG.getBoolConstant(true, DL, VT, N00VT); 8164 SDValue Zero = DAG.getConstant(0, DL, VT); 8165 if (SDValue SCC = 8166 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) 8167 return SCC; 8168 8169 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { 8170 EVT SetCCVT = getSetCCResultType(N00VT); 8171 // Don't do this transform for i1 because there's a select transform 8172 // that would reverse it. 8173 // TODO: We should not do this transform at all without a target hook 8174 // because a sext is likely cheaper than a select? 8175 if (SetCCVT.getScalarSizeInBits() != 1 && 8176 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) { 8177 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); 8178 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); 8179 } 8180 } 8181 } 8182 8183 // fold (sext x) -> (zext x) if the sign bit is known zero. 8184 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 8185 DAG.SignBitIsZero(N0)) 8186 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); 8187 8188 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 8189 return NewVSel; 8190 8191 return SDValue(); 8192 } 8193 8194 // isTruncateOf - If N is a truncate of some other value, return true, record 8195 // the value being truncated in Op and which of Op's bits are zero/one in Known. 8196 // This function computes KnownBits to avoid a duplicated call to 8197 // computeKnownBits in the caller. 8198 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 8199 KnownBits &Known) { 8200 if (N->getOpcode() == ISD::TRUNCATE) { 8201 Op = N->getOperand(0); 8202 DAG.computeKnownBits(Op, Known); 8203 return true; 8204 } 8205 8206 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 8207 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 8208 return false; 8209 8210 SDValue Op0 = N->getOperand(0); 8211 SDValue Op1 = N->getOperand(1); 8212 assert(Op0.getValueType() == Op1.getValueType()); 8213 8214 if (isNullConstant(Op0)) 8215 Op = Op1; 8216 else if (isNullConstant(Op1)) 8217 Op = Op0; 8218 else 8219 return false; 8220 8221 DAG.computeKnownBits(Op, Known); 8222 8223 if (!(Known.Zero | 1).isAllOnesValue()) 8224 return false; 8225 8226 return true; 8227 } 8228 8229 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 8230 SDValue N0 = N->getOperand(0); 8231 EVT VT = N->getValueType(0); 8232 8233 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 8234 LegalOperations)) 8235 return SDValue(Res, 0); 8236 8237 // fold (zext (zext x)) -> (zext x) 8238 // fold (zext (aext x)) -> (zext x) 8239 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 8240 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 8241 N0.getOperand(0)); 8242 8243 // fold (zext (truncate x)) -> (zext x) or 8244 // (zext (truncate x)) -> (truncate x) 8245 // This is valid when the truncated bits of x are already zero. 8246 // FIXME: We should extend this to work for vectors too. 8247 SDValue Op; 8248 KnownBits Known; 8249 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) { 8250 APInt TruncatedBits = 8251 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 8252 APInt(Op.getValueSizeInBits(), 0) : 8253 APInt::getBitsSet(Op.getValueSizeInBits(), 8254 N0.getValueSizeInBits(), 8255 std::min(Op.getValueSizeInBits(), 8256 VT.getSizeInBits())); 8257 if (TruncatedBits.isSubsetOf(Known.Zero)) 8258 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8259 } 8260 8261 // fold (zext (truncate x)) -> (and x, mask) 8262 if (N0.getOpcode() == ISD::TRUNCATE) { 8263 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 8264 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 8265 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8266 SDNode *oye = N0.getOperand(0).getNode(); 8267 if (NarrowLoad.getNode() != N0.getNode()) { 8268 CombineTo(N0.getNode(), NarrowLoad); 8269 // CombineTo deleted the truncate, if needed, but not what's under it. 8270 AddToWorklist(oye); 8271 } 8272 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8273 } 8274 8275 EVT SrcVT = N0.getOperand(0).getValueType(); 8276 EVT MinVT = N0.getValueType(); 8277 8278 // Try to mask before the extension to avoid having to generate a larger mask, 8279 // possibly over several sub-vectors. 8280 if (SrcVT.bitsLT(VT) && VT.isVector()) { 8281 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && 8282 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { 8283 SDValue Op = N0.getOperand(0); 8284 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8285 AddToWorklist(Op.getNode()); 8286 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 8287 // Transfer the debug info; the new node is equivalent to N0. 8288 DAG.transferDbgValues(N0, ZExtOrTrunc); 8289 return ZExtOrTrunc; 8290 } 8291 } 8292 8293 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { 8294 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 8295 AddToWorklist(Op.getNode()); 8296 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 8297 // We may safely transfer the debug info describing the truncate node over 8298 // to the equivalent and operation. 8299 DAG.transferDbgValues(N0, And); 8300 return And; 8301 } 8302 } 8303 8304 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 8305 // if either of the casts is not free. 8306 if (N0.getOpcode() == ISD::AND && 8307 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 8308 N0.getOperand(1).getOpcode() == ISD::Constant && 8309 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 8310 N0.getValueType()) || 8311 !TLI.isZExtFree(N0.getValueType(), VT))) { 8312 SDValue X = N0.getOperand(0).getOperand(0); 8313 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); 8314 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8315 Mask = Mask.zext(VT.getSizeInBits()); 8316 SDLoc DL(N); 8317 return DAG.getNode(ISD::AND, DL, VT, 8318 X, DAG.getConstant(Mask, DL, VT)); 8319 } 8320 8321 // Try to simplify (zext (load x)). 8322 if (SDValue foldedExt = 8323 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, 8324 ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) 8325 return foldedExt; 8326 8327 // fold (zext (load x)) to multiple smaller zextloads. 8328 // Only on illegal but splittable vectors. 8329 if (SDValue ExtLoad = CombineExtLoad(N)) 8330 return ExtLoad; 8331 8332 // fold (zext (and/or/xor (load x), cst)) -> 8333 // (and/or/xor (zextload x), (zext cst)) 8334 // Unless (and (load x) cst) will match as a zextload already and has 8335 // additional users. 8336 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 8337 N0.getOpcode() == ISD::XOR) && 8338 isa<LoadSDNode>(N0.getOperand(0)) && 8339 N0.getOperand(1).getOpcode() == ISD::Constant && 8340 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 8341 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); 8342 EVT MemVT = LN00->getMemoryVT(); 8343 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && 8344 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { 8345 bool DoXform = true; 8346 SmallVector<SDNode*, 4> SetCCs; 8347 if (!N0.hasOneUse()) { 8348 if (N0.getOpcode() == ISD::AND) { 8349 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); 8350 EVT LoadResultTy = AndC->getValueType(0); 8351 EVT ExtVT; 8352 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) 8353 DoXform = false; 8354 } 8355 } 8356 if (DoXform) 8357 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), 8358 ISD::ZERO_EXTEND, SetCCs, TLI); 8359 if (DoXform) { 8360 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, 8361 LN00->getChain(), LN00->getBasePtr(), 8362 LN00->getMemoryVT(), 8363 LN00->getMemOperand()); 8364 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8365 Mask = Mask.zext(VT.getSizeInBits()); 8366 SDLoc DL(N); 8367 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 8368 ExtLoad, DAG.getConstant(Mask, DL, VT)); 8369 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); 8370 bool NoReplaceTruncAnd = !N0.hasOneUse(); 8371 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); 8372 CombineTo(N, And); 8373 // If N0 has multiple uses, change other uses as well. 8374 if (NoReplaceTruncAnd) { 8375 SDValue TruncAnd = 8376 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); 8377 CombineTo(N0.getNode(), TruncAnd); 8378 } 8379 if (NoReplaceTrunc) { 8380 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); 8381 } else { 8382 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), 8383 LN00->getValueType(0), ExtLoad); 8384 CombineTo(LN00, Trunc, ExtLoad.getValue(1)); 8385 } 8386 return SDValue(N,0); // Return N so it doesn't get rechecked! 8387 } 8388 } 8389 } 8390 8391 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> 8392 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) 8393 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) 8394 return ZExtLoad; 8395 8396 // Try to simplify (zext (zextload x)). 8397 if (SDValue foldedExt = tryToFoldExtOfExtload( 8398 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) 8399 return foldedExt; 8400 8401 if (N0.getOpcode() == ISD::SETCC) { 8402 // Only do this before legalize for now. 8403 if (!LegalOperations && VT.isVector() && 8404 N0.getValueType().getVectorElementType() == MVT::i1) { 8405 EVT N00VT = N0.getOperand(0).getValueType(); 8406 if (getSetCCResultType(N00VT) == N0.getValueType()) 8407 return SDValue(); 8408 8409 // We know that the # elements of the results is the same as the # 8410 // elements of the compare (and the # elements of the compare result for 8411 // that matter). Check to see that they are the same size. If so, we know 8412 // that the element size of the sext'd result matches the element size of 8413 // the compare operands. 8414 SDLoc DL(N); 8415 SDValue VecOnes = DAG.getConstant(1, DL, VT); 8416 if (VT.getSizeInBits() == N00VT.getSizeInBits()) { 8417 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 8418 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), 8419 N0.getOperand(1), N0.getOperand(2)); 8420 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); 8421 } 8422 8423 // If the desired elements are smaller or larger than the source 8424 // elements we can use a matching integer vector type and then 8425 // truncate/sign extend. 8426 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 8427 SDValue VsetCC = 8428 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), 8429 N0.getOperand(1), N0.getOperand(2)); 8430 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), 8431 VecOnes); 8432 } 8433 8434 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 8435 SDLoc DL(N); 8436 if (SDValue SCC = SimplifySelectCC( 8437 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 8438 DAG.getConstant(0, DL, VT), 8439 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 8440 return SCC; 8441 } 8442 8443 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 8444 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 8445 isa<ConstantSDNode>(N0.getOperand(1)) && 8446 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 8447 N0.hasOneUse()) { 8448 SDValue ShAmt = N0.getOperand(1); 8449 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8450 if (N0.getOpcode() == ISD::SHL) { 8451 SDValue InnerZExt = N0.getOperand(0); 8452 // If the original shl may be shifting out bits, do not perform this 8453 // transformation. 8454 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - 8455 InnerZExt.getOperand(0).getValueSizeInBits(); 8456 if (ShAmtVal > KnownZeroBits) 8457 return SDValue(); 8458 } 8459 8460 SDLoc DL(N); 8461 8462 // Ensure that the shift amount is wide enough for the shifted value. 8463 if (VT.getSizeInBits() >= 256) 8464 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 8465 8466 return DAG.getNode(N0.getOpcode(), DL, VT, 8467 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 8468 ShAmt); 8469 } 8470 8471 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 8472 return NewVSel; 8473 8474 return SDValue(); 8475 } 8476 8477 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 8478 SDValue N0 = N->getOperand(0); 8479 EVT VT = N->getValueType(0); 8480 8481 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 8482 LegalOperations)) 8483 return SDValue(Res, 0); 8484 8485 // fold (aext (aext x)) -> (aext x) 8486 // fold (aext (zext x)) -> (zext x) 8487 // fold (aext (sext x)) -> (sext x) 8488 if (N0.getOpcode() == ISD::ANY_EXTEND || 8489 N0.getOpcode() == ISD::ZERO_EXTEND || 8490 N0.getOpcode() == ISD::SIGN_EXTEND) 8491 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 8492 8493 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 8494 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 8495 if (N0.getOpcode() == ISD::TRUNCATE) { 8496 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 8497 SDNode *oye = N0.getOperand(0).getNode(); 8498 if (NarrowLoad.getNode() != N0.getNode()) { 8499 CombineTo(N0.getNode(), NarrowLoad); 8500 // CombineTo deleted the truncate, if needed, but not what's under it. 8501 AddToWorklist(oye); 8502 } 8503 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8504 } 8505 } 8506 8507 // fold (aext (truncate x)) 8508 if (N0.getOpcode() == ISD::TRUNCATE) 8509 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); 8510 8511 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 8512 // if the trunc is not free. 8513 if (N0.getOpcode() == ISD::AND && 8514 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 8515 N0.getOperand(1).getOpcode() == ISD::Constant && 8516 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 8517 N0.getValueType())) { 8518 SDLoc DL(N); 8519 SDValue X = N0.getOperand(0).getOperand(0); 8520 X = DAG.getAnyExtOrTrunc(X, DL, VT); 8521 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 8522 Mask = Mask.zext(VT.getSizeInBits()); 8523 return DAG.getNode(ISD::AND, DL, VT, 8524 X, DAG.getConstant(Mask, DL, VT)); 8525 } 8526 8527 // fold (aext (load x)) -> (aext (truncate (extload x))) 8528 // None of the supported targets knows how to perform load and any_ext 8529 // on vectors in one instruction. We only perform this transformation on 8530 // scalars. 8531 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 8532 ISD::isUNINDEXEDLoad(N0.getNode()) && 8533 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 8534 bool DoXform = true; 8535 SmallVector<SDNode*, 4> SetCCs; 8536 if (!N0.hasOneUse()) 8537 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, 8538 TLI); 8539 if (DoXform) { 8540 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8541 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 8542 LN0->getChain(), 8543 LN0->getBasePtr(), N0.getValueType(), 8544 LN0->getMemOperand()); 8545 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); 8546 // If the load value is used only by N, replace it via CombineTo N. 8547 bool NoReplaceTrunc = N0.hasOneUse(); 8548 CombineTo(N, ExtLoad); 8549 if (NoReplaceTrunc) { 8550 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8551 } else { 8552 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 8553 N0.getValueType(), ExtLoad); 8554 CombineTo(LN0, Trunc, ExtLoad.getValue(1)); 8555 } 8556 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8557 } 8558 } 8559 8560 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 8561 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 8562 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 8563 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && 8564 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 8565 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8566 ISD::LoadExtType ExtType = LN0->getExtensionType(); 8567 EVT MemVT = LN0->getMemoryVT(); 8568 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 8569 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 8570 VT, LN0->getChain(), LN0->getBasePtr(), 8571 MemVT, LN0->getMemOperand()); 8572 CombineTo(N, ExtLoad); 8573 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); 8574 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8575 } 8576 } 8577 8578 if (N0.getOpcode() == ISD::SETCC) { 8579 // For vectors: 8580 // aext(setcc) -> vsetcc 8581 // aext(setcc) -> truncate(vsetcc) 8582 // aext(setcc) -> aext(vsetcc) 8583 // Only do this before legalize for now. 8584 if (VT.isVector() && !LegalOperations) { 8585 EVT N00VT = N0.getOperand(0).getValueType(); 8586 if (getSetCCResultType(N00VT) == N0.getValueType()) 8587 return SDValue(); 8588 8589 // We know that the # elements of the results is the same as the 8590 // # elements of the compare (and the # elements of the compare result 8591 // for that matter). Check to see that they are the same size. If so, 8592 // we know that the element size of the sext'd result matches the 8593 // element size of the compare operands. 8594 if (VT.getSizeInBits() == N00VT.getSizeInBits()) 8595 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 8596 N0.getOperand(1), 8597 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 8598 // If the desired elements are smaller or larger than the source 8599 // elements we can use a matching integer vector type and then 8600 // truncate/any extend 8601 else { 8602 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); 8603 SDValue VsetCC = 8604 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 8605 N0.getOperand(1), 8606 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 8607 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 8608 } 8609 } 8610 8611 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 8612 SDLoc DL(N); 8613 if (SDValue SCC = SimplifySelectCC( 8614 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), 8615 DAG.getConstant(0, DL, VT), 8616 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) 8617 return SCC; 8618 } 8619 8620 return SDValue(); 8621 } 8622 8623 SDValue DAGCombiner::visitAssertExt(SDNode *N) { 8624 unsigned Opcode = N->getOpcode(); 8625 SDValue N0 = N->getOperand(0); 8626 SDValue N1 = N->getOperand(1); 8627 EVT AssertVT = cast<VTSDNode>(N1)->getVT(); 8628 8629 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) 8630 if (N0.getOpcode() == Opcode && 8631 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) 8632 return N0; 8633 8634 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 8635 N0.getOperand(0).getOpcode() == Opcode) { 8636 // We have an assert, truncate, assert sandwich. Make one stronger assert 8637 // by asserting on the smallest asserted type to the larger source type. 8638 // This eliminates the later assert: 8639 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN 8640 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN 8641 SDValue BigA = N0.getOperand(0); 8642 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); 8643 assert(BigA_AssertVT.bitsLE(N0.getValueType()) && 8644 "Asserting zero/sign-extended bits to a type larger than the " 8645 "truncated destination does not provide information"); 8646 8647 SDLoc DL(N); 8648 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; 8649 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); 8650 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), 8651 BigA.getOperand(0), MinAssertVTVal); 8652 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); 8653 } 8654 8655 return SDValue(); 8656 } 8657 8658 /// If the result of a wider load is shifted to right of N bits and then 8659 /// truncated to a narrower type and where N is a multiple of number of bits of 8660 /// the narrower type, transform it to a narrower load from address + N / num of 8661 /// bits of new type. Also narrow the load if the result is masked with an AND 8662 /// to effectively produce a smaller type. If the result is to be extended, also 8663 /// fold the extension to form a extending load. 8664 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 8665 unsigned Opc = N->getOpcode(); 8666 8667 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 8668 SDValue N0 = N->getOperand(0); 8669 EVT VT = N->getValueType(0); 8670 EVT ExtVT = VT; 8671 8672 // This transformation isn't valid for vector loads. 8673 if (VT.isVector()) 8674 return SDValue(); 8675 8676 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 8677 // extended to VT. 8678 if (Opc == ISD::SIGN_EXTEND_INREG) { 8679 ExtType = ISD::SEXTLOAD; 8680 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 8681 } else if (Opc == ISD::SRL) { 8682 // Another special-case: SRL is basically zero-extending a narrower value, 8683 // or it maybe shifting a higher subword, half or byte into the lowest 8684 // bits. 8685 ExtType = ISD::ZEXTLOAD; 8686 N0 = SDValue(N, 0); 8687 8688 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); 8689 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 8690 if (!N01 || !LN0) 8691 return SDValue(); 8692 8693 uint64_t ShiftAmt = N01->getZExtValue(); 8694 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); 8695 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) 8696 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); 8697 else 8698 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 8699 VT.getSizeInBits() - ShiftAmt); 8700 } else if (Opc == ISD::AND) { 8701 // An AND with a constant mask is the same as a truncate + zero-extend. 8702 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8703 if (!AndC || !AndC->getAPIntValue().isMask()) 8704 return SDValue(); 8705 8706 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); 8707 ExtType = ISD::ZEXTLOAD; 8708 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 8709 } 8710 8711 unsigned ShAmt = 0; 8712 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 8713 SDValue SRL = N0; 8714 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { 8715 ShAmt = ConstShift->getZExtValue(); 8716 unsigned EVTBits = ExtVT.getSizeInBits(); 8717 // Is the shift amount a multiple of size of VT? 8718 if ((ShAmt & (EVTBits-1)) == 0) { 8719 N0 = N0.getOperand(0); 8720 // Is the load width a multiple of size of VT? 8721 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) 8722 return SDValue(); 8723 } 8724 8725 // At this point, we must have a load or else we can't do the transform. 8726 if (!isa<LoadSDNode>(N0)) return SDValue(); 8727 8728 auto *LN0 = cast<LoadSDNode>(N0); 8729 8730 // Because a SRL must be assumed to *need* to zero-extend the high bits 8731 // (as opposed to anyext the high bits), we can't combine the zextload 8732 // lowering of SRL and an sextload. 8733 if (LN0->getExtensionType() == ISD::SEXTLOAD) 8734 return SDValue(); 8735 8736 // If the shift amount is larger than the input type then we're not 8737 // accessing any of the loaded bytes. If the load was a zextload/extload 8738 // then the result of the shift+trunc is zero/undef (handled elsewhere). 8739 if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) 8740 return SDValue(); 8741 8742 // If the SRL is only used by a masking AND, we may be able to adjust 8743 // the ExtVT to make the AND redundant. 8744 SDNode *Mask = *(SRL->use_begin()); 8745 if (Mask->getOpcode() == ISD::AND && 8746 isa<ConstantSDNode>(Mask->getOperand(1))) { 8747 const APInt &ShiftMask = 8748 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); 8749 if (ShiftMask.isMask()) { 8750 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), 8751 ShiftMask.countTrailingOnes()); 8752 // If the mask is smaller, recompute the type. 8753 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && 8754 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) 8755 ExtVT = MaskedVT; 8756 } 8757 } 8758 } 8759 } 8760 8761 // If the load is shifted left (and the result isn't shifted back right), 8762 // we can fold the truncate through the shift. 8763 unsigned ShLeftAmt = 0; 8764 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 8765 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 8766 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 8767 ShLeftAmt = N01->getZExtValue(); 8768 N0 = N0.getOperand(0); 8769 } 8770 } 8771 8772 // If we haven't found a load, we can't narrow it. 8773 if (!isa<LoadSDNode>(N0)) 8774 return SDValue(); 8775 8776 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8777 if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) 8778 return SDValue(); 8779 8780 // For big endian targets, we need to adjust the offset to the pointer to 8781 // load the correct bytes. 8782 if (DAG.getDataLayout().isBigEndian()) { 8783 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 8784 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 8785 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 8786 } 8787 8788 EVT PtrType = N0.getOperand(1).getValueType(); 8789 uint64_t PtrOff = ShAmt / 8; 8790 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 8791 SDLoc DL(LN0); 8792 // The original load itself didn't wrap, so an offset within it doesn't. 8793 SDNodeFlags Flags; 8794 Flags.setNoUnsignedWrap(true); 8795 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, 8796 PtrType, LN0->getBasePtr(), 8797 DAG.getConstant(PtrOff, DL, PtrType), 8798 Flags); 8799 AddToWorklist(NewPtr.getNode()); 8800 8801 SDValue Load; 8802 if (ExtType == ISD::NON_EXTLOAD) 8803 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 8804 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, 8805 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 8806 else 8807 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, 8808 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, 8809 NewAlign, LN0->getMemOperand()->getFlags(), 8810 LN0->getAAInfo()); 8811 8812 // Replace the old load's chain with the new load's chain. 8813 WorklistRemover DeadNodes(*this); 8814 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 8815 8816 // Shift the result left, if we've swallowed a left shift. 8817 SDValue Result = Load; 8818 if (ShLeftAmt != 0) { 8819 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 8820 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 8821 ShImmTy = VT; 8822 // If the shift amount is as large as the result size (but, presumably, 8823 // no larger than the source) then the useful bits of the result are 8824 // zero; we can't simply return the shortened shift, because the result 8825 // of that operation is undefined. 8826 SDLoc DL(N0); 8827 if (ShLeftAmt >= VT.getSizeInBits()) 8828 Result = DAG.getConstant(0, DL, VT); 8829 else 8830 Result = DAG.getNode(ISD::SHL, DL, VT, 8831 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); 8832 } 8833 8834 // Return the new loaded value. 8835 return Result; 8836 } 8837 8838 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 8839 SDValue N0 = N->getOperand(0); 8840 SDValue N1 = N->getOperand(1); 8841 EVT VT = N->getValueType(0); 8842 EVT EVT = cast<VTSDNode>(N1)->getVT(); 8843 unsigned VTBits = VT.getScalarSizeInBits(); 8844 unsigned EVTBits = EVT.getScalarSizeInBits(); 8845 8846 if (N0.isUndef()) 8847 return DAG.getUNDEF(VT); 8848 8849 // fold (sext_in_reg c1) -> c1 8850 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) 8851 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 8852 8853 // If the input is already sign extended, just drop the extension. 8854 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 8855 return N0; 8856 8857 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 8858 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 8859 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 8860 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 8861 N0.getOperand(0), N1); 8862 8863 // fold (sext_in_reg (sext x)) -> (sext x) 8864 // fold (sext_in_reg (aext x)) -> (sext x) 8865 // if x is small enough. 8866 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 8867 SDValue N00 = N0.getOperand(0); 8868 if (N00.getScalarValueSizeInBits() <= EVTBits && 8869 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 8870 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 8871 } 8872 8873 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) 8874 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || 8875 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || 8876 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && 8877 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { 8878 if (!LegalOperations || 8879 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) 8880 return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT); 8881 } 8882 8883 // fold (sext_in_reg (zext x)) -> (sext x) 8884 // iff we are extending the source sign bit. 8885 if (N0.getOpcode() == ISD::ZERO_EXTEND) { 8886 SDValue N00 = N0.getOperand(0); 8887 if (N00.getScalarValueSizeInBits() == EVTBits && 8888 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 8889 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 8890 } 8891 8892 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 8893 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) 8894 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); 8895 8896 // fold operands of sext_in_reg based on knowledge that the top bits are not 8897 // demanded. 8898 if (SimplifyDemandedBits(SDValue(N, 0))) 8899 return SDValue(N, 0); 8900 8901 // fold (sext_in_reg (load x)) -> (smaller sextload x) 8902 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 8903 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 8904 return NarrowLoad; 8905 8906 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 8907 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 8908 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 8909 if (N0.getOpcode() == ISD::SRL) { 8910 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 8911 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 8912 // We can turn this into an SRA iff the input to the SRL is already sign 8913 // extended enough. 8914 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 8915 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 8916 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 8917 N0.getOperand(0), N0.getOperand(1)); 8918 } 8919 } 8920 8921 // fold (sext_inreg (extload x)) -> (sextload x) 8922 // If sextload is not supported by target, we can only do the combine when 8923 // load has one use. Doing otherwise can block folding the extload with other 8924 // extends that the target does support. 8925 if (ISD::isEXTLoad(N0.getNode()) && 8926 ISD::isUNINDEXEDLoad(N0.getNode()) && 8927 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 8928 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && 8929 N0.hasOneUse()) || 8930 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 8931 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8932 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 8933 LN0->getChain(), 8934 LN0->getBasePtr(), EVT, 8935 LN0->getMemOperand()); 8936 CombineTo(N, ExtLoad); 8937 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 8938 AddToWorklist(ExtLoad.getNode()); 8939 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8940 } 8941 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 8942 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 8943 N0.hasOneUse() && 8944 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 8945 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 8946 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 8947 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8948 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 8949 LN0->getChain(), 8950 LN0->getBasePtr(), EVT, 8951 LN0->getMemOperand()); 8952 CombineTo(N, ExtLoad); 8953 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 8954 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8955 } 8956 8957 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 8958 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 8959 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 8960 N0.getOperand(1), false)) 8961 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 8962 BSwap, N1); 8963 } 8964 8965 return SDValue(); 8966 } 8967 8968 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { 8969 SDValue N0 = N->getOperand(0); 8970 EVT VT = N->getValueType(0); 8971 8972 if (N0.isUndef()) 8973 return DAG.getUNDEF(VT); 8974 8975 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 8976 LegalOperations)) 8977 return SDValue(Res, 0); 8978 8979 return SDValue(); 8980 } 8981 8982 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { 8983 SDValue N0 = N->getOperand(0); 8984 EVT VT = N->getValueType(0); 8985 8986 if (N0.isUndef()) 8987 return DAG.getUNDEF(VT); 8988 8989 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 8990 LegalOperations)) 8991 return SDValue(Res, 0); 8992 8993 return SDValue(); 8994 } 8995 8996 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 8997 SDValue N0 = N->getOperand(0); 8998 EVT VT = N->getValueType(0); 8999 bool isLE = DAG.getDataLayout().isLittleEndian(); 9000 9001 // noop truncate 9002 if (N0.getValueType() == N->getValueType(0)) 9003 return N0; 9004 9005 // fold (truncate (truncate x)) -> (truncate x) 9006 if (N0.getOpcode() == ISD::TRUNCATE) 9007 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9008 9009 // fold (truncate c1) -> c1 9010 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 9011 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 9012 if (C.getNode() != N) 9013 return C; 9014 } 9015 9016 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 9017 if (N0.getOpcode() == ISD::ZERO_EXTEND || 9018 N0.getOpcode() == ISD::SIGN_EXTEND || 9019 N0.getOpcode() == ISD::ANY_EXTEND) { 9020 // if the source is smaller than the dest, we still need an extend. 9021 if (N0.getOperand(0).getValueType().bitsLT(VT)) 9022 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 9023 // if the source is larger than the dest, than we just need the truncate. 9024 if (N0.getOperand(0).getValueType().bitsGT(VT)) 9025 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 9026 // if the source and dest are the same type, we can drop both the extend 9027 // and the truncate. 9028 return N0.getOperand(0); 9029 } 9030 9031 // If this is anyext(trunc), don't fold it, allow ourselves to be folded. 9032 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) 9033 return SDValue(); 9034 9035 // Fold extract-and-trunc into a narrow extract. For example: 9036 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 9037 // i32 y = TRUNCATE(i64 x) 9038 // -- becomes -- 9039 // v16i8 b = BITCAST (v2i64 val) 9040 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 9041 // 9042 // Note: We only run this optimization after type legalization (which often 9043 // creates this pattern) and before operation legalization after which 9044 // we need to be more careful about the vector instructions that we generate. 9045 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 9046 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 9047 EVT VecTy = N0.getOperand(0).getValueType(); 9048 EVT ExTy = N0.getValueType(); 9049 EVT TrTy = N->getValueType(0); 9050 9051 unsigned NumElem = VecTy.getVectorNumElements(); 9052 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 9053 9054 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 9055 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 9056 9057 SDValue EltNo = N0->getOperand(1); 9058 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 9059 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9060 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 9061 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 9062 9063 SDLoc DL(N); 9064 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, 9065 DAG.getBitcast(NVT, N0.getOperand(0)), 9066 DAG.getConstant(Index, DL, IndexTy)); 9067 } 9068 } 9069 9070 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 9071 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) { 9072 EVT SrcVT = N0.getValueType(); 9073 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 9074 TLI.isTruncateFree(SrcVT, VT)) { 9075 SDLoc SL(N0); 9076 SDValue Cond = N0.getOperand(0); 9077 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9078 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 9079 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 9080 } 9081 } 9082 9083 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() 9084 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 9085 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) && 9086 TLI.isTypeDesirableForOp(ISD::SHL, VT)) { 9087 SDValue Amt = N0.getOperand(1); 9088 KnownBits Known; 9089 DAG.computeKnownBits(Amt, Known); 9090 unsigned Size = VT.getScalarSizeInBits(); 9091 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { 9092 SDLoc SL(N); 9093 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); 9094 9095 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9096 if (AmtVT != Amt.getValueType()) { 9097 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); 9098 AddToWorklist(Amt.getNode()); 9099 } 9100 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); 9101 } 9102 } 9103 9104 // Fold a series of buildvector, bitcast, and truncate if possible. 9105 // For example fold 9106 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 9107 // (2xi32 (buildvector x, y)). 9108 if (Level == AfterLegalizeVectorOps && VT.isVector() && 9109 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 9110 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 9111 N0.getOperand(0).hasOneUse()) { 9112 SDValue BuildVect = N0.getOperand(0); 9113 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 9114 EVT TruncVecEltTy = VT.getVectorElementType(); 9115 9116 // Check that the element types match. 9117 if (BuildVectEltTy == TruncVecEltTy) { 9118 // Now we only need to compute the offset of the truncated elements. 9119 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 9120 unsigned TruncVecNumElts = VT.getVectorNumElements(); 9121 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 9122 9123 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 9124 "Invalid number of elements"); 9125 9126 SmallVector<SDValue, 8> Opnds; 9127 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 9128 Opnds.push_back(BuildVect.getOperand(i)); 9129 9130 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 9131 } 9132 } 9133 9134 // See if we can simplify the input to this truncate through knowledge that 9135 // only the low bits are being used. 9136 // For example "trunc (or (shl x, 8), y)" // -> trunc y 9137 // Currently we only perform this optimization on scalars because vectors 9138 // may have different active low bits. 9139 if (!VT.isVector()) { 9140 APInt Mask = 9141 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); 9142 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) 9143 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 9144 } 9145 9146 // fold (truncate (load x)) -> (smaller load x) 9147 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 9148 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 9149 if (SDValue Reduced = ReduceLoadWidth(N)) 9150 return Reduced; 9151 9152 // Handle the case where the load remains an extending load even 9153 // after truncation. 9154 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 9155 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9156 if (!LN0->isVolatile() && 9157 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 9158 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 9159 VT, LN0->getChain(), LN0->getBasePtr(), 9160 LN0->getMemoryVT(), 9161 LN0->getMemOperand()); 9162 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 9163 return NewLoad; 9164 } 9165 } 9166 } 9167 9168 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 9169 // where ... are all 'undef'. 9170 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 9171 SmallVector<EVT, 8> VTs; 9172 SDValue V; 9173 unsigned Idx = 0; 9174 unsigned NumDefs = 0; 9175 9176 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 9177 SDValue X = N0.getOperand(i); 9178 if (!X.isUndef()) { 9179 V = X; 9180 Idx = i; 9181 NumDefs++; 9182 } 9183 // Stop if more than one members are non-undef. 9184 if (NumDefs > 1) 9185 break; 9186 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 9187 VT.getVectorElementType(), 9188 X.getValueType().getVectorNumElements())); 9189 } 9190 9191 if (NumDefs == 0) 9192 return DAG.getUNDEF(VT); 9193 9194 if (NumDefs == 1) { 9195 assert(V.getNode() && "The single defined operand is empty!"); 9196 SmallVector<SDValue, 8> Opnds; 9197 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 9198 if (i != Idx) { 9199 Opnds.push_back(DAG.getUNDEF(VTs[i])); 9200 continue; 9201 } 9202 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 9203 AddToWorklist(NV.getNode()); 9204 Opnds.push_back(NV); 9205 } 9206 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 9207 } 9208 } 9209 9210 // Fold truncate of a bitcast of a vector to an extract of the low vector 9211 // element. 9212 // 9213 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx 9214 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { 9215 SDValue VecSrc = N0.getOperand(0); 9216 EVT SrcVT = VecSrc.getValueType(); 9217 if (SrcVT.isVector() && SrcVT.getScalarType() == VT && 9218 (!LegalOperations || 9219 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) { 9220 SDLoc SL(N); 9221 9222 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); 9223 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; 9224 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, 9225 VecSrc, DAG.getConstant(Idx, SL, IdxVT)); 9226 } 9227 } 9228 9229 // Simplify the operands using demanded-bits information. 9230 if (!VT.isVector() && 9231 SimplifyDemandedBits(SDValue(N, 0))) 9232 return SDValue(N, 0); 9233 9234 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) 9235 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) 9236 // When the adde's carry is not used. 9237 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) && 9238 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) && 9239 (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) { 9240 SDLoc SL(N); 9241 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); 9242 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 9243 auto VTs = DAG.getVTList(VT, N0->getValueType(1)); 9244 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); 9245 } 9246 9247 // fold (truncate (extract_subvector(ext x))) -> 9248 // (extract_subvector x) 9249 // TODO: This can be generalized to cover cases where the truncate and extract 9250 // do not fully cancel each other out. 9251 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { 9252 SDValue N00 = N0.getOperand(0); 9253 if (N00.getOpcode() == ISD::SIGN_EXTEND || 9254 N00.getOpcode() == ISD::ZERO_EXTEND || 9255 N00.getOpcode() == ISD::ANY_EXTEND) { 9256 if (N00.getOperand(0)->getValueType(0).getVectorElementType() == 9257 VT.getVectorElementType()) 9258 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, 9259 N00.getOperand(0), N0.getOperand(1)); 9260 } 9261 } 9262 9263 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 9264 return NewVSel; 9265 9266 return SDValue(); 9267 } 9268 9269 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 9270 SDValue Elt = N->getOperand(i); 9271 if (Elt.getOpcode() != ISD::MERGE_VALUES) 9272 return Elt.getNode(); 9273 return Elt.getOperand(Elt.getResNo()).getNode(); 9274 } 9275 9276 /// build_pair (load, load) -> load 9277 /// if load locations are consecutive. 9278 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 9279 assert(N->getOpcode() == ISD::BUILD_PAIR); 9280 9281 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 9282 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 9283 9284 // A BUILD_PAIR is always having the least significant part in elt 0 and the 9285 // most significant part in elt 1. So when combining into one large load, we 9286 // need to consider the endianness. 9287 if (DAG.getDataLayout().isBigEndian()) 9288 std::swap(LD1, LD2); 9289 9290 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 9291 LD1->getAddressSpace() != LD2->getAddressSpace()) 9292 return SDValue(); 9293 EVT LD1VT = LD1->getValueType(0); 9294 unsigned LD1Bytes = LD1VT.getStoreSize(); 9295 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && 9296 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { 9297 unsigned Align = LD1->getAlignment(); 9298 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 9299 VT.getTypeForEVT(*DAG.getContext())); 9300 9301 if (NewAlign <= Align && 9302 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 9303 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), 9304 LD1->getPointerInfo(), Align); 9305 } 9306 9307 return SDValue(); 9308 } 9309 9310 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { 9311 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi 9312 // and Lo parts; on big-endian machines it doesn't. 9313 return DAG.getDataLayout().isBigEndian() ? 1 : 0; 9314 } 9315 9316 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, 9317 const TargetLowering &TLI) { 9318 // If this is not a bitcast to an FP type or if the target doesn't have 9319 // IEEE754-compliant FP logic, we're done. 9320 EVT VT = N->getValueType(0); 9321 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT)) 9322 return SDValue(); 9323 9324 // TODO: Use splat values for the constant-checking below and remove this 9325 // restriction. 9326 SDValue N0 = N->getOperand(0); 9327 EVT SourceVT = N0.getValueType(); 9328 if (SourceVT.isVector()) 9329 return SDValue(); 9330 9331 unsigned FPOpcode; 9332 APInt SignMask; 9333 switch (N0.getOpcode()) { 9334 case ISD::AND: 9335 FPOpcode = ISD::FABS; 9336 SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); 9337 break; 9338 case ISD::XOR: 9339 FPOpcode = ISD::FNEG; 9340 SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); 9341 break; 9342 // TODO: ISD::OR --> ISD::FNABS? 9343 default: 9344 return SDValue(); 9345 } 9346 9347 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X 9348 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X 9349 SDValue LogicOp0 = N0.getOperand(0); 9350 ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 9351 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask && 9352 LogicOp0.getOpcode() == ISD::BITCAST && 9353 LogicOp0->getOperand(0).getValueType() == VT) 9354 return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); 9355 9356 return SDValue(); 9357 } 9358 9359 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 9360 SDValue N0 = N->getOperand(0); 9361 EVT VT = N->getValueType(0); 9362 9363 if (N0.isUndef()) 9364 return DAG.getUNDEF(VT); 9365 9366 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 9367 // Only do this before legalize, since afterward the target may be depending 9368 // on the bitconvert. 9369 // First check to see if this is all constant. 9370 if (!LegalTypes && 9371 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 9372 VT.isVector()) { 9373 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 9374 9375 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 9376 assert(!DestEltVT.isVector() && 9377 "Element type of vector ValueType must not be vector!"); 9378 if (isSimple) 9379 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 9380 } 9381 9382 // If the input is a constant, let getNode fold it. 9383 // We always need to check that this is just a fp -> int or int -> conversion 9384 // otherwise we will get back N which will confuse the caller into thinking 9385 // we used CombineTo. This can block target combines from running. If we can't 9386 // allowed legal operations, we need to ensure the resulting operation will be 9387 // legal. 9388 // TODO: Maybe we should check that the return value isn't N explicitly? 9389 if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 9390 (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) || 9391 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 9392 (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT)))) 9393 return DAG.getBitcast(VT, N0); 9394 9395 // (conv (conv x, t1), t2) -> (conv x, t2) 9396 if (N0.getOpcode() == ISD::BITCAST) 9397 return DAG.getBitcast(VT, N0.getOperand(0)); 9398 9399 // fold (conv (load x)) -> (load (conv*)x) 9400 // If the resultant load doesn't need a higher alignment than the original! 9401 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9402 // Do not change the width of a volatile load. 9403 !cast<LoadSDNode>(N0)->isVolatile() && 9404 // Do not remove the cast if the types differ in endian layout. 9405 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == 9406 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && 9407 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 9408 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 9409 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9410 unsigned OrigAlign = LN0->getAlignment(); 9411 9412 bool Fast = false; 9413 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, 9414 LN0->getAddressSpace(), OrigAlign, &Fast) && 9415 Fast) { 9416 SDValue Load = 9417 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), 9418 LN0->getPointerInfo(), OrigAlign, 9419 LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); 9420 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 9421 return Load; 9422 } 9423 } 9424 9425 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI)) 9426 return V; 9427 9428 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 9429 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 9430 // 9431 // For ppc_fp128: 9432 // fold (bitcast (fneg x)) -> 9433 // flipbit = signbit 9434 // (xor (bitcast x) (build_pair flipbit, flipbit)) 9435 // 9436 // fold (bitcast (fabs x)) -> 9437 // flipbit = (and (extract_element (bitcast x), 0), signbit) 9438 // (xor (bitcast x) (build_pair flipbit, flipbit)) 9439 // This often reduces constant pool loads. 9440 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 9441 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 9442 N0.getNode()->hasOneUse() && VT.isInteger() && 9443 !VT.isVector() && !N0.getValueType().isVector()) { 9444 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); 9445 AddToWorklist(NewConv.getNode()); 9446 9447 SDLoc DL(N); 9448 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 9449 assert(VT.getSizeInBits() == 128); 9450 SDValue SignBit = DAG.getConstant( 9451 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); 9452 SDValue FlipBit; 9453 if (N0.getOpcode() == ISD::FNEG) { 9454 FlipBit = SignBit; 9455 AddToWorklist(FlipBit.getNode()); 9456 } else { 9457 assert(N0.getOpcode() == ISD::FABS); 9458 SDValue Hi = 9459 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, 9460 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 9461 SDLoc(NewConv))); 9462 AddToWorklist(Hi.getNode()); 9463 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); 9464 AddToWorklist(FlipBit.getNode()); 9465 } 9466 SDValue FlipBits = 9467 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 9468 AddToWorklist(FlipBits.getNode()); 9469 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); 9470 } 9471 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 9472 if (N0.getOpcode() == ISD::FNEG) 9473 return DAG.getNode(ISD::XOR, DL, VT, 9474 NewConv, DAG.getConstant(SignBit, DL, VT)); 9475 assert(N0.getOpcode() == ISD::FABS); 9476 return DAG.getNode(ISD::AND, DL, VT, 9477 NewConv, DAG.getConstant(~SignBit, DL, VT)); 9478 } 9479 9480 // fold (bitconvert (fcopysign cst, x)) -> 9481 // (or (and (bitconvert x), sign), (and cst, (not sign))) 9482 // Note that we don't handle (copysign x, cst) because this can always be 9483 // folded to an fneg or fabs. 9484 // 9485 // For ppc_fp128: 9486 // fold (bitcast (fcopysign cst, x)) -> 9487 // flipbit = (and (extract_element 9488 // (xor (bitcast cst), (bitcast x)), 0), 9489 // signbit) 9490 // (xor (bitcast cst) (build_pair flipbit, flipbit)) 9491 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 9492 isa<ConstantFPSDNode>(N0.getOperand(0)) && 9493 VT.isInteger() && !VT.isVector()) { 9494 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); 9495 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 9496 if (isTypeLegal(IntXVT)) { 9497 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); 9498 AddToWorklist(X.getNode()); 9499 9500 // If X has a different width than the result/lhs, sext it or truncate it. 9501 unsigned VTWidth = VT.getSizeInBits(); 9502 if (OrigXWidth < VTWidth) { 9503 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 9504 AddToWorklist(X.getNode()); 9505 } else if (OrigXWidth > VTWidth) { 9506 // To get the sign bit in the right place, we have to shift it right 9507 // before truncating. 9508 SDLoc DL(X); 9509 X = DAG.getNode(ISD::SRL, DL, 9510 X.getValueType(), X, 9511 DAG.getConstant(OrigXWidth-VTWidth, DL, 9512 X.getValueType())); 9513 AddToWorklist(X.getNode()); 9514 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 9515 AddToWorklist(X.getNode()); 9516 } 9517 9518 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { 9519 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); 9520 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 9521 AddToWorklist(Cst.getNode()); 9522 SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); 9523 AddToWorklist(X.getNode()); 9524 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); 9525 AddToWorklist(XorResult.getNode()); 9526 SDValue XorResult64 = DAG.getNode( 9527 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, 9528 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), 9529 SDLoc(XorResult))); 9530 AddToWorklist(XorResult64.getNode()); 9531 SDValue FlipBit = 9532 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, 9533 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); 9534 AddToWorklist(FlipBit.getNode()); 9535 SDValue FlipBits = 9536 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); 9537 AddToWorklist(FlipBits.getNode()); 9538 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); 9539 } 9540 APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); 9541 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 9542 X, DAG.getConstant(SignBit, SDLoc(X), VT)); 9543 AddToWorklist(X.getNode()); 9544 9545 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); 9546 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 9547 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); 9548 AddToWorklist(Cst.getNode()); 9549 9550 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 9551 } 9552 } 9553 9554 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 9555 if (N0.getOpcode() == ISD::BUILD_PAIR) 9556 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) 9557 return CombineLD; 9558 9559 // Remove double bitcasts from shuffles - this is often a legacy of 9560 // XformToShuffleWithZero being used to combine bitmaskings (of 9561 // float vectors bitcast to integer vectors) into shuffles. 9562 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) 9563 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && 9564 N0->getOpcode() == ISD::VECTOR_SHUFFLE && 9565 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && 9566 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { 9567 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); 9568 9569 // If operands are a bitcast, peek through if it casts the original VT. 9570 // If operands are a constant, just bitcast back to original VT. 9571 auto PeekThroughBitcast = [&](SDValue Op) { 9572 if (Op.getOpcode() == ISD::BITCAST && 9573 Op.getOperand(0).getValueType() == VT) 9574 return SDValue(Op.getOperand(0)); 9575 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 9576 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 9577 return DAG.getBitcast(VT, Op); 9578 return SDValue(); 9579 }; 9580 9581 // FIXME: If either input vector is bitcast, try to convert the shuffle to 9582 // the result type of this bitcast. This would eliminate at least one 9583 // bitcast. See the transform in InstCombine. 9584 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); 9585 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); 9586 if (!(SV0 && SV1)) 9587 return SDValue(); 9588 9589 int MaskScale = 9590 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); 9591 SmallVector<int, 8> NewMask; 9592 for (int M : SVN->getMask()) 9593 for (int i = 0; i != MaskScale; ++i) 9594 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); 9595 9596 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 9597 if (!LegalMask) { 9598 std::swap(SV0, SV1); 9599 ShuffleVectorSDNode::commuteMask(NewMask); 9600 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 9601 } 9602 9603 if (LegalMask) 9604 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); 9605 } 9606 9607 return SDValue(); 9608 } 9609 9610 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 9611 EVT VT = N->getValueType(0); 9612 return CombineConsecutiveLoads(N, VT); 9613 } 9614 9615 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef 9616 /// operands. DstEltVT indicates the destination element value type. 9617 SDValue DAGCombiner:: 9618 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 9619 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 9620 9621 // If this is already the right type, we're done. 9622 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 9623 9624 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 9625 unsigned DstBitSize = DstEltVT.getSizeInBits(); 9626 9627 // If this is a conversion of N elements of one type to N elements of another 9628 // type, convert each element. This handles FP<->INT cases. 9629 if (SrcBitSize == DstBitSize) { 9630 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 9631 BV->getValueType(0).getVectorNumElements()); 9632 9633 // Due to the FP element handling below calling this routine recursively, 9634 // we can end up with a scalar-to-vector node here. 9635 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 9636 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 9637 DAG.getBitcast(DstEltVT, BV->getOperand(0))); 9638 9639 SmallVector<SDValue, 8> Ops; 9640 for (SDValue Op : BV->op_values()) { 9641 // If the vector element type is not legal, the BUILD_VECTOR operands 9642 // are promoted and implicitly truncated. Make that explicit here. 9643 if (Op.getValueType() != SrcEltVT) 9644 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 9645 Ops.push_back(DAG.getBitcast(DstEltVT, Op)); 9646 AddToWorklist(Ops.back().getNode()); 9647 } 9648 return DAG.getBuildVector(VT, SDLoc(BV), Ops); 9649 } 9650 9651 // Otherwise, we're growing or shrinking the elements. To avoid having to 9652 // handle annoying details of growing/shrinking FP values, we convert them to 9653 // int first. 9654 if (SrcEltVT.isFloatingPoint()) { 9655 // Convert the input float vector to a int vector where the elements are the 9656 // same sizes. 9657 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 9658 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 9659 SrcEltVT = IntVT; 9660 } 9661 9662 // Now we know the input is an integer vector. If the output is a FP type, 9663 // convert to integer first, then to FP of the right size. 9664 if (DstEltVT.isFloatingPoint()) { 9665 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 9666 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 9667 9668 // Next, convert to FP elements of the same size. 9669 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 9670 } 9671 9672 SDLoc DL(BV); 9673 9674 // Okay, we know the src/dst types are both integers of differing types. 9675 // Handling growing first. 9676 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 9677 if (SrcBitSize < DstBitSize) { 9678 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 9679 9680 SmallVector<SDValue, 8> Ops; 9681 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 9682 i += NumInputsPerOutput) { 9683 bool isLE = DAG.getDataLayout().isLittleEndian(); 9684 APInt NewBits = APInt(DstBitSize, 0); 9685 bool EltIsUndef = true; 9686 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 9687 // Shift the previously computed bits over. 9688 NewBits <<= SrcBitSize; 9689 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 9690 if (Op.isUndef()) continue; 9691 EltIsUndef = false; 9692 9693 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 9694 zextOrTrunc(SrcBitSize).zext(DstBitSize); 9695 } 9696 9697 if (EltIsUndef) 9698 Ops.push_back(DAG.getUNDEF(DstEltVT)); 9699 else 9700 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); 9701 } 9702 9703 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 9704 return DAG.getBuildVector(VT, DL, Ops); 9705 } 9706 9707 // Finally, this must be the case where we are shrinking elements: each input 9708 // turns into multiple outputs. 9709 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 9710 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 9711 NumOutputsPerInput*BV->getNumOperands()); 9712 SmallVector<SDValue, 8> Ops; 9713 9714 for (const SDValue &Op : BV->op_values()) { 9715 if (Op.isUndef()) { 9716 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); 9717 continue; 9718 } 9719 9720 APInt OpVal = cast<ConstantSDNode>(Op)-> 9721 getAPIntValue().zextOrTrunc(SrcBitSize); 9722 9723 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 9724 APInt ThisVal = OpVal.trunc(DstBitSize); 9725 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); 9726 OpVal.lshrInPlace(DstBitSize); 9727 } 9728 9729 // For big endian targets, swap the order of the pieces of each element. 9730 if (DAG.getDataLayout().isBigEndian()) 9731 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 9732 } 9733 9734 return DAG.getBuildVector(VT, DL, Ops); 9735 } 9736 9737 static bool isContractable(SDNode *N) { 9738 SDNodeFlags F = N->getFlags(); 9739 return F.hasAllowContract() || F.hasAllowReassociation(); 9740 } 9741 9742 /// Try to perform FMA combining on a given FADD node. 9743 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { 9744 SDValue N0 = N->getOperand(0); 9745 SDValue N1 = N->getOperand(1); 9746 EVT VT = N->getValueType(0); 9747 SDLoc SL(N); 9748 9749 const TargetOptions &Options = DAG.getTarget().Options; 9750 9751 // Floating-point multiply-add with intermediate rounding. 9752 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 9753 9754 // Floating-point multiply-add without intermediate rounding. 9755 bool HasFMA = 9756 TLI.isFMAFasterThanFMulAndFAdd(VT) && 9757 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 9758 9759 // No valid opcode, do not combine. 9760 if (!HasFMAD && !HasFMA) 9761 return SDValue(); 9762 9763 SDNodeFlags Flags = N->getFlags(); 9764 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 9765 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 9766 CanFuse || HasFMAD); 9767 // If the addition is not contractable, do not combine. 9768 if (!AllowFusionGlobally && !isContractable(N)) 9769 return SDValue(); 9770 9771 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 9772 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 9773 return SDValue(); 9774 9775 // Always prefer FMAD to FMA for precision. 9776 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 9777 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 9778 9779 // Is the node an FMUL and contractable either due to global flags or 9780 // SDNodeFlags. 9781 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 9782 if (N.getOpcode() != ISD::FMUL) 9783 return false; 9784 return AllowFusionGlobally || isContractable(N.getNode()); 9785 }; 9786 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), 9787 // prefer to fold the multiply with fewer uses. 9788 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) { 9789 if (N0.getNode()->use_size() > N1.getNode()->use_size()) 9790 std::swap(N0, N1); 9791 } 9792 9793 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 9794 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 9795 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9796 N0.getOperand(0), N0.getOperand(1), N1, Flags); 9797 } 9798 9799 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 9800 // Note: Commutes FADD operands. 9801 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 9802 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9803 N1.getOperand(0), N1.getOperand(1), N0, Flags); 9804 } 9805 9806 // Look through FP_EXTEND nodes to do more combining. 9807 9808 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 9809 if (N0.getOpcode() == ISD::FP_EXTEND) { 9810 SDValue N00 = N0.getOperand(0); 9811 if (isContractableFMUL(N00) && 9812 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 9813 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9814 DAG.getNode(ISD::FP_EXTEND, SL, VT, 9815 N00.getOperand(0)), 9816 DAG.getNode(ISD::FP_EXTEND, SL, VT, 9817 N00.getOperand(1)), N1, Flags); 9818 } 9819 } 9820 9821 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 9822 // Note: Commutes FADD operands. 9823 if (N1.getOpcode() == ISD::FP_EXTEND) { 9824 SDValue N10 = N1.getOperand(0); 9825 if (isContractableFMUL(N10) && 9826 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 9827 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9828 DAG.getNode(ISD::FP_EXTEND, SL, VT, 9829 N10.getOperand(0)), 9830 DAG.getNode(ISD::FP_EXTEND, SL, VT, 9831 N10.getOperand(1)), N0, Flags); 9832 } 9833 } 9834 9835 // More folding opportunities when target permits. 9836 if (Aggressive) { 9837 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 9838 if (CanFuse && 9839 N0.getOpcode() == PreferredFusedOpcode && 9840 N0.getOperand(2).getOpcode() == ISD::FMUL && 9841 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { 9842 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9843 N0.getOperand(0), N0.getOperand(1), 9844 DAG.getNode(PreferredFusedOpcode, SL, VT, 9845 N0.getOperand(2).getOperand(0), 9846 N0.getOperand(2).getOperand(1), 9847 N1, Flags), Flags); 9848 } 9849 9850 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 9851 if (CanFuse && 9852 N1->getOpcode() == PreferredFusedOpcode && 9853 N1.getOperand(2).getOpcode() == ISD::FMUL && 9854 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { 9855 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9856 N1.getOperand(0), N1.getOperand(1), 9857 DAG.getNode(PreferredFusedOpcode, SL, VT, 9858 N1.getOperand(2).getOperand(0), 9859 N1.getOperand(2).getOperand(1), 9860 N0, Flags), Flags); 9861 } 9862 9863 9864 // fold (fadd (fma x, y, (fpext (fmul u, v))), z) 9865 // -> (fma x, y, (fma (fpext u), (fpext v), z)) 9866 auto FoldFAddFMAFPExtFMul = [&] ( 9867 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 9868 SDNodeFlags Flags) { 9869 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, 9870 DAG.getNode(PreferredFusedOpcode, SL, VT, 9871 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 9872 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 9873 Z, Flags), Flags); 9874 }; 9875 if (N0.getOpcode() == PreferredFusedOpcode) { 9876 SDValue N02 = N0.getOperand(2); 9877 if (N02.getOpcode() == ISD::FP_EXTEND) { 9878 SDValue N020 = N02.getOperand(0); 9879 if (isContractableFMUL(N020) && 9880 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 9881 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), 9882 N020.getOperand(0), N020.getOperand(1), 9883 N1, Flags); 9884 } 9885 } 9886 } 9887 9888 // fold (fadd (fpext (fma x, y, (fmul u, v))), z) 9889 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) 9890 // FIXME: This turns two single-precision and one double-precision 9891 // operation into two double-precision operations, which might not be 9892 // interesting for all targets, especially GPUs. 9893 auto FoldFAddFPExtFMAFMul = [&] ( 9894 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, 9895 SDNodeFlags Flags) { 9896 return DAG.getNode(PreferredFusedOpcode, SL, VT, 9897 DAG.getNode(ISD::FP_EXTEND, SL, VT, X), 9898 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), 9899 DAG.getNode(PreferredFusedOpcode, SL, VT, 9900 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 9901 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 9902 Z, Flags), Flags); 9903 }; 9904 if (N0.getOpcode() == ISD::FP_EXTEND) { 9905 SDValue N00 = N0.getOperand(0); 9906 if (N00.getOpcode() == PreferredFusedOpcode) { 9907 SDValue N002 = N00.getOperand(2); 9908 if (isContractableFMUL(N002) && 9909 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 9910 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), 9911 N002.getOperand(0), N002.getOperand(1), 9912 N1, Flags); 9913 } 9914 } 9915 } 9916 9917 // fold (fadd x, (fma y, z, (fpext (fmul u, v))) 9918 // -> (fma y, z, (fma (fpext u), (fpext v), x)) 9919 if (N1.getOpcode() == PreferredFusedOpcode) { 9920 SDValue N12 = N1.getOperand(2); 9921 if (N12.getOpcode() == ISD::FP_EXTEND) { 9922 SDValue N120 = N12.getOperand(0); 9923 if (isContractableFMUL(N120) && 9924 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 9925 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), 9926 N120.getOperand(0), N120.getOperand(1), 9927 N0, Flags); 9928 } 9929 } 9930 } 9931 9932 // fold (fadd x, (fpext (fma y, z, (fmul u, v))) 9933 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 9934 // FIXME: This turns two single-precision and one double-precision 9935 // operation into two double-precision operations, which might not be 9936 // interesting for all targets, especially GPUs. 9937 if (N1.getOpcode() == ISD::FP_EXTEND) { 9938 SDValue N10 = N1.getOperand(0); 9939 if (N10.getOpcode() == PreferredFusedOpcode) { 9940 SDValue N102 = N10.getOperand(2); 9941 if (isContractableFMUL(N102) && 9942 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 9943 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), 9944 N102.getOperand(0), N102.getOperand(1), 9945 N0, Flags); 9946 } 9947 } 9948 } 9949 } 9950 9951 return SDValue(); 9952 } 9953 9954 /// Try to perform FMA combining on a given FSUB node. 9955 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { 9956 SDValue N0 = N->getOperand(0); 9957 SDValue N1 = N->getOperand(1); 9958 EVT VT = N->getValueType(0); 9959 SDLoc SL(N); 9960 9961 const TargetOptions &Options = DAG.getTarget().Options; 9962 // Floating-point multiply-add with intermediate rounding. 9963 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 9964 9965 // Floating-point multiply-add without intermediate rounding. 9966 bool HasFMA = 9967 TLI.isFMAFasterThanFMulAndFAdd(VT) && 9968 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 9969 9970 // No valid opcode, do not combine. 9971 if (!HasFMAD && !HasFMA) 9972 return SDValue(); 9973 9974 const SDNodeFlags Flags = N->getFlags(); 9975 bool CanFuse = Options.UnsafeFPMath || isContractable(N); 9976 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || 9977 CanFuse || HasFMAD); 9978 9979 // If the subtraction is not contractable, do not combine. 9980 if (!AllowFusionGlobally && !isContractable(N)) 9981 return SDValue(); 9982 9983 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); 9984 if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) 9985 return SDValue(); 9986 9987 // Always prefer FMAD to FMA for precision. 9988 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 9989 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 9990 9991 // Is the node an FMUL and contractable either due to global flags or 9992 // SDNodeFlags. 9993 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { 9994 if (N.getOpcode() != ISD::FMUL) 9995 return false; 9996 return AllowFusionGlobally || isContractable(N.getNode()); 9997 }; 9998 9999 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 10000 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { 10001 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10002 N0.getOperand(0), N0.getOperand(1), 10003 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10004 } 10005 10006 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 10007 // Note: Commutes FSUB operands. 10008 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { 10009 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10010 DAG.getNode(ISD::FNEG, SL, VT, 10011 N1.getOperand(0)), 10012 N1.getOperand(1), N0, Flags); 10013 } 10014 10015 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 10016 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && 10017 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { 10018 SDValue N00 = N0.getOperand(0).getOperand(0); 10019 SDValue N01 = N0.getOperand(0).getOperand(1); 10020 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10021 DAG.getNode(ISD::FNEG, SL, VT, N00), N01, 10022 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10023 } 10024 10025 // Look through FP_EXTEND nodes to do more combining. 10026 10027 // fold (fsub (fpext (fmul x, y)), z) 10028 // -> (fma (fpext x), (fpext y), (fneg z)) 10029 if (N0.getOpcode() == ISD::FP_EXTEND) { 10030 SDValue N00 = N0.getOperand(0); 10031 if (isContractableFMUL(N00) && 10032 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10033 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10034 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10035 N00.getOperand(0)), 10036 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10037 N00.getOperand(1)), 10038 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); 10039 } 10040 } 10041 10042 // fold (fsub x, (fpext (fmul y, z))) 10043 // -> (fma (fneg (fpext y)), (fpext z), x) 10044 // Note: Commutes FSUB operands. 10045 if (N1.getOpcode() == ISD::FP_EXTEND) { 10046 SDValue N10 = N1.getOperand(0); 10047 if (isContractableFMUL(N10) && 10048 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { 10049 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10050 DAG.getNode(ISD::FNEG, SL, VT, 10051 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10052 N10.getOperand(0))), 10053 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10054 N10.getOperand(1)), 10055 N0, Flags); 10056 } 10057 } 10058 10059 // fold (fsub (fpext (fneg (fmul, x, y))), z) 10060 // -> (fneg (fma (fpext x), (fpext y), z)) 10061 // Note: This could be removed with appropriate canonicalization of the 10062 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10063 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10064 // from implementing the canonicalization in visitFSUB. 10065 if (N0.getOpcode() == ISD::FP_EXTEND) { 10066 SDValue N00 = N0.getOperand(0); 10067 if (N00.getOpcode() == ISD::FNEG) { 10068 SDValue N000 = N00.getOperand(0); 10069 if (isContractableFMUL(N000) && 10070 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10071 return DAG.getNode(ISD::FNEG, SL, VT, 10072 DAG.getNode(PreferredFusedOpcode, SL, VT, 10073 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10074 N000.getOperand(0)), 10075 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10076 N000.getOperand(1)), 10077 N1, Flags)); 10078 } 10079 } 10080 } 10081 10082 // fold (fsub (fneg (fpext (fmul, x, y))), z) 10083 // -> (fneg (fma (fpext x)), (fpext y), z) 10084 // Note: This could be removed with appropriate canonicalization of the 10085 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 10086 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 10087 // from implementing the canonicalization in visitFSUB. 10088 if (N0.getOpcode() == ISD::FNEG) { 10089 SDValue N00 = N0.getOperand(0); 10090 if (N00.getOpcode() == ISD::FP_EXTEND) { 10091 SDValue N000 = N00.getOperand(0); 10092 if (isContractableFMUL(N000) && 10093 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { 10094 return DAG.getNode(ISD::FNEG, SL, VT, 10095 DAG.getNode(PreferredFusedOpcode, SL, VT, 10096 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10097 N000.getOperand(0)), 10098 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10099 N000.getOperand(1)), 10100 N1, Flags)); 10101 } 10102 } 10103 } 10104 10105 // More folding opportunities when target permits. 10106 if (Aggressive) { 10107 // fold (fsub (fma x, y, (fmul u, v)), z) 10108 // -> (fma x, y (fma u, v, (fneg z))) 10109 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && 10110 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && 10111 N0.getOperand(2)->hasOneUse()) { 10112 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10113 N0.getOperand(0), N0.getOperand(1), 10114 DAG.getNode(PreferredFusedOpcode, SL, VT, 10115 N0.getOperand(2).getOperand(0), 10116 N0.getOperand(2).getOperand(1), 10117 DAG.getNode(ISD::FNEG, SL, VT, 10118 N1), Flags), Flags); 10119 } 10120 10121 // fold (fsub x, (fma y, z, (fmul u, v))) 10122 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 10123 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && 10124 isContractableFMUL(N1.getOperand(2))) { 10125 SDValue N20 = N1.getOperand(2).getOperand(0); 10126 SDValue N21 = N1.getOperand(2).getOperand(1); 10127 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10128 DAG.getNode(ISD::FNEG, SL, VT, 10129 N1.getOperand(0)), 10130 N1.getOperand(1), 10131 DAG.getNode(PreferredFusedOpcode, SL, VT, 10132 DAG.getNode(ISD::FNEG, SL, VT, N20), 10133 N21, N0, Flags), Flags); 10134 } 10135 10136 10137 // fold (fsub (fma x, y, (fpext (fmul u, v))), z) 10138 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) 10139 if (N0.getOpcode() == PreferredFusedOpcode) { 10140 SDValue N02 = N0.getOperand(2); 10141 if (N02.getOpcode() == ISD::FP_EXTEND) { 10142 SDValue N020 = N02.getOperand(0); 10143 if (isContractableFMUL(N020) && 10144 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { 10145 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10146 N0.getOperand(0), N0.getOperand(1), 10147 DAG.getNode(PreferredFusedOpcode, SL, VT, 10148 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10149 N020.getOperand(0)), 10150 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10151 N020.getOperand(1)), 10152 DAG.getNode(ISD::FNEG, SL, VT, 10153 N1), Flags), Flags); 10154 } 10155 } 10156 } 10157 10158 // fold (fsub (fpext (fma x, y, (fmul u, v))), z) 10159 // -> (fma (fpext x), (fpext y), 10160 // (fma (fpext u), (fpext v), (fneg z))) 10161 // FIXME: This turns two single-precision and one double-precision 10162 // operation into two double-precision operations, which might not be 10163 // interesting for all targets, especially GPUs. 10164 if (N0.getOpcode() == ISD::FP_EXTEND) { 10165 SDValue N00 = N0.getOperand(0); 10166 if (N00.getOpcode() == PreferredFusedOpcode) { 10167 SDValue N002 = N00.getOperand(2); 10168 if (isContractableFMUL(N002) && 10169 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { 10170 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10171 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10172 N00.getOperand(0)), 10173 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10174 N00.getOperand(1)), 10175 DAG.getNode(PreferredFusedOpcode, SL, VT, 10176 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10177 N002.getOperand(0)), 10178 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10179 N002.getOperand(1)), 10180 DAG.getNode(ISD::FNEG, SL, VT, 10181 N1), Flags), Flags); 10182 } 10183 } 10184 } 10185 10186 // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) 10187 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) 10188 if (N1.getOpcode() == PreferredFusedOpcode && 10189 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { 10190 SDValue N120 = N1.getOperand(2).getOperand(0); 10191 if (isContractableFMUL(N120) && 10192 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { 10193 SDValue N1200 = N120.getOperand(0); 10194 SDValue N1201 = N120.getOperand(1); 10195 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10196 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), 10197 N1.getOperand(1), 10198 DAG.getNode(PreferredFusedOpcode, SL, VT, 10199 DAG.getNode(ISD::FNEG, SL, VT, 10200 DAG.getNode(ISD::FP_EXTEND, SL, 10201 VT, N1200)), 10202 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10203 N1201), 10204 N0, Flags), Flags); 10205 } 10206 } 10207 10208 // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 10209 // -> (fma (fneg (fpext y)), (fpext z), 10210 // (fma (fneg (fpext u)), (fpext v), x)) 10211 // FIXME: This turns two single-precision and one double-precision 10212 // operation into two double-precision operations, which might not be 10213 // interesting for all targets, especially GPUs. 10214 if (N1.getOpcode() == ISD::FP_EXTEND && 10215 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { 10216 SDValue CvtSrc = N1.getOperand(0); 10217 SDValue N100 = CvtSrc.getOperand(0); 10218 SDValue N101 = CvtSrc.getOperand(1); 10219 SDValue N102 = CvtSrc.getOperand(2); 10220 if (isContractableFMUL(N102) && 10221 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { 10222 SDValue N1020 = N102.getOperand(0); 10223 SDValue N1021 = N102.getOperand(1); 10224 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10225 DAG.getNode(ISD::FNEG, SL, VT, 10226 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10227 N100)), 10228 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), 10229 DAG.getNode(PreferredFusedOpcode, SL, VT, 10230 DAG.getNode(ISD::FNEG, SL, VT, 10231 DAG.getNode(ISD::FP_EXTEND, SL, 10232 VT, N1020)), 10233 DAG.getNode(ISD::FP_EXTEND, SL, VT, 10234 N1021), 10235 N0, Flags), Flags); 10236 } 10237 } 10238 } 10239 10240 return SDValue(); 10241 } 10242 10243 /// Try to perform FMA combining on a given FMUL node based on the distributive 10244 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, 10245 /// subtraction instead of addition). 10246 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { 10247 SDValue N0 = N->getOperand(0); 10248 SDValue N1 = N->getOperand(1); 10249 EVT VT = N->getValueType(0); 10250 SDLoc SL(N); 10251 const SDNodeFlags Flags = N->getFlags(); 10252 10253 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); 10254 10255 const TargetOptions &Options = DAG.getTarget().Options; 10256 10257 // The transforms below are incorrect when x == 0 and y == inf, because the 10258 // intermediate multiplication produces a nan. 10259 if (!Options.NoInfsFPMath) 10260 return SDValue(); 10261 10262 // Floating-point multiply-add without intermediate rounding. 10263 bool HasFMA = 10264 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && 10265 TLI.isFMAFasterThanFMulAndFAdd(VT) && 10266 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 10267 10268 // Floating-point multiply-add with intermediate rounding. This can result 10269 // in a less precise result due to the changed rounding order. 10270 bool HasFMAD = Options.UnsafeFPMath && 10271 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 10272 10273 // No valid opcode, do not combine. 10274 if (!HasFMAD && !HasFMA) 10275 return SDValue(); 10276 10277 // Always prefer FMAD to FMA for precision. 10278 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 10279 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 10280 10281 // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) 10282 // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) 10283 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 10284 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { 10285 auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); 10286 if (XC1 && XC1->isExactlyValue(+1.0)) 10287 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10288 Y, Flags); 10289 if (XC1 && XC1->isExactlyValue(-1.0)) 10290 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10291 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10292 } 10293 return SDValue(); 10294 }; 10295 10296 if (SDValue FMA = FuseFADD(N0, N1, Flags)) 10297 return FMA; 10298 if (SDValue FMA = FuseFADD(N1, N0, Flags)) 10299 return FMA; 10300 10301 // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) 10302 // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) 10303 // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) 10304 // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) 10305 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { 10306 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { 10307 auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); 10308 if (XC0 && XC0->isExactlyValue(+1.0)) 10309 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10310 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 10311 Y, Flags); 10312 if (XC0 && XC0->isExactlyValue(-1.0)) 10313 return DAG.getNode(PreferredFusedOpcode, SL, VT, 10314 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 10315 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10316 10317 auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); 10318 if (XC1 && XC1->isExactlyValue(+1.0)) 10319 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10320 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); 10321 if (XC1 && XC1->isExactlyValue(-1.0)) 10322 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 10323 Y, Flags); 10324 } 10325 return SDValue(); 10326 }; 10327 10328 if (SDValue FMA = FuseFSUB(N0, N1, Flags)) 10329 return FMA; 10330 if (SDValue FMA = FuseFSUB(N1, N0, Flags)) 10331 return FMA; 10332 10333 return SDValue(); 10334 } 10335 10336 static bool isFMulNegTwo(SDValue &N) { 10337 if (N.getOpcode() != ISD::FMUL) 10338 return false; 10339 if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1))) 10340 return CFP->isExactlyValue(-2.0); 10341 return false; 10342 } 10343 10344 SDValue DAGCombiner::visitFADD(SDNode *N) { 10345 SDValue N0 = N->getOperand(0); 10346 SDValue N1 = N->getOperand(1); 10347 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 10348 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 10349 EVT VT = N->getValueType(0); 10350 SDLoc DL(N); 10351 const TargetOptions &Options = DAG.getTarget().Options; 10352 const SDNodeFlags Flags = N->getFlags(); 10353 10354 // fold vector ops 10355 if (VT.isVector()) 10356 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10357 return FoldedVOp; 10358 10359 // fold (fadd c1, c2) -> c1 + c2 10360 if (N0CFP && N1CFP) 10361 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); 10362 10363 // canonicalize constant to RHS 10364 if (N0CFP && !N1CFP) 10365 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); 10366 10367 if (SDValue NewSel = foldBinOpIntoSelect(N)) 10368 return NewSel; 10369 10370 // fold (fadd A, (fneg B)) -> (fsub A, B) 10371 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 10372 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 10373 return DAG.getNode(ISD::FSUB, DL, VT, N0, 10374 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 10375 10376 // fold (fadd (fneg A), B) -> (fsub B, A) 10377 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 10378 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 10379 return DAG.getNode(ISD::FSUB, DL, VT, N1, 10380 GetNegatedExpression(N0, DAG, LegalOperations), Flags); 10381 10382 // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) 10383 // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B)) 10384 if ((isFMulNegTwo(N0) && N0.hasOneUse()) || 10385 (isFMulNegTwo(N1) && N1.hasOneUse())) { 10386 bool N1IsFMul = isFMulNegTwo(N1); 10387 SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0); 10388 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags); 10389 return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); 10390 } 10391 10392 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); 10393 if (N1C && N1C->isZero()) { 10394 if (N1C->isNegative() || Options.UnsafeFPMath || 10395 Flags.hasNoSignedZeros()) { 10396 // fold (fadd A, 0) -> A 10397 return N0; 10398 } 10399 } 10400 10401 // No FP constant should be created after legalization as Instruction 10402 // Selection pass has a hard time dealing with FP constants. 10403 bool AllowNewConst = (Level < AfterLegalizeDAG); 10404 10405 // If 'unsafe math' or nnan is enabled, fold lots of things. 10406 if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { 10407 // If allowed, fold (fadd (fneg x), x) -> 0.0 10408 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 10409 return DAG.getConstantFP(0.0, DL, VT); 10410 10411 // If allowed, fold (fadd x, (fneg x)) -> 0.0 10412 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 10413 return DAG.getConstantFP(0.0, DL, VT); 10414 } 10415 10416 // If 'unsafe math' or reassoc and nsz, fold lots of things. 10417 // TODO: break out portions of the transformations below for which Unsafe is 10418 // considered and which do not require both nsz and reassoc 10419 if ((Options.UnsafeFPMath || 10420 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && 10421 AllowNewConst) { 10422 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 10423 if (N1CFP && N0.getOpcode() == ISD::FADD && 10424 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 10425 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); 10426 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); 10427 } 10428 10429 // We can fold chains of FADD's of the same value into multiplications. 10430 // This transform is not safe in general because we are reducing the number 10431 // of rounding steps. 10432 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 10433 if (N0.getOpcode() == ISD::FMUL) { 10434 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 10435 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); 10436 10437 // (fadd (fmul x, c), x) -> (fmul x, c+1) 10438 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 10439 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 10440 DAG.getConstantFP(1.0, DL, VT), Flags); 10441 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); 10442 } 10443 10444 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 10445 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 10446 N1.getOperand(0) == N1.getOperand(1) && 10447 N0.getOperand(0) == N1.getOperand(0)) { 10448 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 10449 DAG.getConstantFP(2.0, DL, VT), Flags); 10450 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); 10451 } 10452 } 10453 10454 if (N1.getOpcode() == ISD::FMUL) { 10455 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 10456 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); 10457 10458 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 10459 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 10460 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 10461 DAG.getConstantFP(1.0, DL, VT), Flags); 10462 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); 10463 } 10464 10465 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 10466 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 10467 N0.getOperand(0) == N0.getOperand(1) && 10468 N1.getOperand(0) == N0.getOperand(0)) { 10469 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 10470 DAG.getConstantFP(2.0, DL, VT), Flags); 10471 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); 10472 } 10473 } 10474 10475 if (N0.getOpcode() == ISD::FADD) { 10476 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 10477 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 10478 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && 10479 (N0.getOperand(0) == N1)) { 10480 return DAG.getNode(ISD::FMUL, DL, VT, 10481 N1, DAG.getConstantFP(3.0, DL, VT), Flags); 10482 } 10483 } 10484 10485 if (N1.getOpcode() == ISD::FADD) { 10486 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 10487 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 10488 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 10489 N1.getOperand(0) == N0) { 10490 return DAG.getNode(ISD::FMUL, DL, VT, 10491 N0, DAG.getConstantFP(3.0, DL, VT), Flags); 10492 } 10493 } 10494 10495 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 10496 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 10497 N0.getOperand(0) == N0.getOperand(1) && 10498 N1.getOperand(0) == N1.getOperand(1) && 10499 N0.getOperand(0) == N1.getOperand(0)) { 10500 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), 10501 DAG.getConstantFP(4.0, DL, VT), Flags); 10502 } 10503 } 10504 } // enable-unsafe-fp-math 10505 10506 // FADD -> FMA combines: 10507 if (SDValue Fused = visitFADDForFMACombine(N)) { 10508 AddToWorklist(Fused.getNode()); 10509 return Fused; 10510 } 10511 return SDValue(); 10512 } 10513 10514 SDValue DAGCombiner::visitFSUB(SDNode *N) { 10515 SDValue N0 = N->getOperand(0); 10516 SDValue N1 = N->getOperand(1); 10517 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 10518 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 10519 EVT VT = N->getValueType(0); 10520 SDLoc DL(N); 10521 const TargetOptions &Options = DAG.getTarget().Options; 10522 const SDNodeFlags Flags = N->getFlags(); 10523 10524 // fold vector ops 10525 if (VT.isVector()) 10526 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10527 return FoldedVOp; 10528 10529 // fold (fsub c1, c2) -> c1-c2 10530 if (N0CFP && N1CFP) 10531 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); 10532 10533 if (SDValue NewSel = foldBinOpIntoSelect(N)) 10534 return NewSel; 10535 10536 // (fsub A, 0) -> A 10537 if (N1CFP && N1CFP->isZero()) { 10538 if (!N1CFP->isNegative() || Options.UnsafeFPMath || 10539 Flags.hasNoSignedZeros()) { 10540 return N0; 10541 } 10542 } 10543 10544 if (N0 == N1) { 10545 // (fsub x, x) -> 0.0 10546 if (Options.UnsafeFPMath || Flags.hasNoNaNs()) 10547 return DAG.getConstantFP(0.0f, DL, VT); 10548 } 10549 10550 // (fsub 0, B) -> -B 10551 if (N0CFP && N0CFP->isZero()) { 10552 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { 10553 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 10554 return GetNegatedExpression(N1, DAG, LegalOperations); 10555 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 10556 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); 10557 } 10558 } 10559 10560 // fold (fsub A, (fneg B)) -> (fadd A, B) 10561 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 10562 return DAG.getNode(ISD::FADD, DL, VT, N0, 10563 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 10564 10565 // If 'unsafe math' is enabled, fold lots of things. 10566 if (Options.UnsafeFPMath) { 10567 // (fsub x, (fadd x, y)) -> (fneg y) 10568 // (fsub x, (fadd y, x)) -> (fneg y) 10569 if (N1.getOpcode() == ISD::FADD) { 10570 SDValue N10 = N1->getOperand(0); 10571 SDValue N11 = N1->getOperand(1); 10572 10573 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 10574 return GetNegatedExpression(N11, DAG, LegalOperations); 10575 10576 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 10577 return GetNegatedExpression(N10, DAG, LegalOperations); 10578 } 10579 } 10580 10581 // FSUB -> FMA combines: 10582 if (SDValue Fused = visitFSUBForFMACombine(N)) { 10583 AddToWorklist(Fused.getNode()); 10584 return Fused; 10585 } 10586 10587 return SDValue(); 10588 } 10589 10590 SDValue DAGCombiner::visitFMUL(SDNode *N) { 10591 SDValue N0 = N->getOperand(0); 10592 SDValue N1 = N->getOperand(1); 10593 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 10594 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 10595 EVT VT = N->getValueType(0); 10596 SDLoc DL(N); 10597 const TargetOptions &Options = DAG.getTarget().Options; 10598 const SDNodeFlags Flags = N->getFlags(); 10599 10600 // fold vector ops 10601 if (VT.isVector()) { 10602 // This just handles C1 * C2 for vectors. Other vector folds are below. 10603 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10604 return FoldedVOp; 10605 } 10606 10607 // fold (fmul c1, c2) -> c1*c2 10608 if (N0CFP && N1CFP) 10609 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); 10610 10611 // canonicalize constant to RHS 10612 if (isConstantFPBuildVectorOrConstantFP(N0) && 10613 !isConstantFPBuildVectorOrConstantFP(N1)) 10614 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); 10615 10616 // fold (fmul A, 1.0) -> A 10617 if (N1CFP && N1CFP->isExactlyValue(1.0)) 10618 return N0; 10619 10620 if (SDValue NewSel = foldBinOpIntoSelect(N)) 10621 return NewSel; 10622 10623 if (Options.UnsafeFPMath || 10624 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { 10625 // fold (fmul A, 0) -> 0 10626 if (N1CFP && N1CFP->isZero()) 10627 return N1; 10628 } 10629 10630 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { 10631 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 10632 if (N0.getOpcode() == ISD::FMUL) { 10633 // Fold scalars or any vector constants (not just splats). 10634 // This fold is done in general by InstCombine, but extra fmul insts 10635 // may have been generated during lowering. 10636 SDValue N00 = N0.getOperand(0); 10637 SDValue N01 = N0.getOperand(1); 10638 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 10639 auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); 10640 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); 10641 10642 // Check 1: Make sure that the first operand of the inner multiply is NOT 10643 // a constant. Otherwise, we may induce infinite looping. 10644 if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { 10645 // Check 2: Make sure that the second operand of the inner multiply and 10646 // the second operand of the outer multiply are constants. 10647 if ((N1CFP && isConstOrConstSplatFP(N01)) || 10648 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 10649 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); 10650 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); 10651 } 10652 } 10653 } 10654 10655 // Match a special-case: we convert X * 2.0 into fadd. 10656 // fmul (fadd X, X), C -> fmul X, 2.0 * C 10657 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && 10658 N0.getOperand(0) == N0.getOperand(1)) { 10659 const SDValue Two = DAG.getConstantFP(2.0, DL, VT); 10660 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); 10661 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); 10662 } 10663 } 10664 10665 // fold (fmul X, 2.0) -> (fadd X, X) 10666 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 10667 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); 10668 10669 // fold (fmul X, -1.0) -> (fneg X) 10670 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 10671 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 10672 return DAG.getNode(ISD::FNEG, DL, VT, N0); 10673 10674 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 10675 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 10676 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 10677 // Both can be negated for free, check to see if at least one is cheaper 10678 // negated. 10679 if (LHSNeg == 2 || RHSNeg == 2) 10680 return DAG.getNode(ISD::FMUL, DL, VT, 10681 GetNegatedExpression(N0, DAG, LegalOperations), 10682 GetNegatedExpression(N1, DAG, LegalOperations), 10683 Flags); 10684 } 10685 } 10686 10687 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) 10688 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) 10689 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() && 10690 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) && 10691 TLI.isOperationLegal(ISD::FABS, VT)) { 10692 SDValue Select = N0, X = N1; 10693 if (Select.getOpcode() != ISD::SELECT) 10694 std::swap(Select, X); 10695 10696 SDValue Cond = Select.getOperand(0); 10697 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); 10698 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); 10699 10700 if (TrueOpnd && FalseOpnd && 10701 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X && 10702 isa<ConstantFPSDNode>(Cond.getOperand(1)) && 10703 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) { 10704 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 10705 switch (CC) { 10706 default: break; 10707 case ISD::SETOLT: 10708 case ISD::SETULT: 10709 case ISD::SETOLE: 10710 case ISD::SETULE: 10711 case ISD::SETLT: 10712 case ISD::SETLE: 10713 std::swap(TrueOpnd, FalseOpnd); 10714 LLVM_FALLTHROUGH; 10715 case ISD::SETOGT: 10716 case ISD::SETUGT: 10717 case ISD::SETOGE: 10718 case ISD::SETUGE: 10719 case ISD::SETGT: 10720 case ISD::SETGE: 10721 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && 10722 TLI.isOperationLegal(ISD::FNEG, VT)) 10723 return DAG.getNode(ISD::FNEG, DL, VT, 10724 DAG.getNode(ISD::FABS, DL, VT, X)); 10725 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) 10726 return DAG.getNode(ISD::FABS, DL, VT, X); 10727 10728 break; 10729 } 10730 } 10731 } 10732 10733 // FMUL -> FMA combines: 10734 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) { 10735 AddToWorklist(Fused.getNode()); 10736 return Fused; 10737 } 10738 10739 return SDValue(); 10740 } 10741 10742 SDValue DAGCombiner::visitFMA(SDNode *N) { 10743 SDValue N0 = N->getOperand(0); 10744 SDValue N1 = N->getOperand(1); 10745 SDValue N2 = N->getOperand(2); 10746 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 10747 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 10748 EVT VT = N->getValueType(0); 10749 SDLoc DL(N); 10750 const TargetOptions &Options = DAG.getTarget().Options; 10751 10752 // FMA nodes have flags that propagate to the created nodes. 10753 const SDNodeFlags Flags = N->getFlags(); 10754 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); 10755 10756 // Constant fold FMA. 10757 if (isa<ConstantFPSDNode>(N0) && 10758 isa<ConstantFPSDNode>(N1) && 10759 isa<ConstantFPSDNode>(N2)) { 10760 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); 10761 } 10762 10763 if (UnsafeFPMath) { 10764 if (N0CFP && N0CFP->isZero()) 10765 return N2; 10766 if (N1CFP && N1CFP->isZero()) 10767 return N2; 10768 } 10769 // TODO: The FMA node should have flags that propagate to these nodes. 10770 if (N0CFP && N0CFP->isExactlyValue(1.0)) 10771 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 10772 if (N1CFP && N1CFP->isExactlyValue(1.0)) 10773 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 10774 10775 // Canonicalize (fma c, x, y) -> (fma x, c, y) 10776 if (isConstantFPBuildVectorOrConstantFP(N0) && 10777 !isConstantFPBuildVectorOrConstantFP(N1)) 10778 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 10779 10780 if (UnsafeFPMath) { 10781 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 10782 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && 10783 isConstantFPBuildVectorOrConstantFP(N1) && 10784 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { 10785 return DAG.getNode(ISD::FMUL, DL, VT, N0, 10786 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), 10787 Flags), Flags); 10788 } 10789 10790 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 10791 if (N0.getOpcode() == ISD::FMUL && 10792 isConstantFPBuildVectorOrConstantFP(N1) && 10793 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 10794 return DAG.getNode(ISD::FMA, DL, VT, 10795 N0.getOperand(0), 10796 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), 10797 Flags), 10798 N2); 10799 } 10800 } 10801 10802 // (fma x, 1, y) -> (fadd x, y) 10803 // (fma x, -1, y) -> (fadd (fneg x), y) 10804 if (N1CFP) { 10805 if (N1CFP->isExactlyValue(1.0)) 10806 // TODO: The FMA node should have flags that propagate to this node. 10807 return DAG.getNode(ISD::FADD, DL, VT, N0, N2); 10808 10809 if (N1CFP->isExactlyValue(-1.0) && 10810 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 10811 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); 10812 AddToWorklist(RHSNeg.getNode()); 10813 // TODO: The FMA node should have flags that propagate to this node. 10814 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); 10815 } 10816 10817 // fma (fneg x), K, y -> fma x -K, y 10818 if (N0.getOpcode() == ISD::FNEG && 10819 (TLI.isOperationLegal(ISD::ConstantFP, VT) || 10820 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { 10821 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), 10822 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); 10823 } 10824 } 10825 10826 if (UnsafeFPMath) { 10827 // (fma x, c, x) -> (fmul x, (c+1)) 10828 if (N1CFP && N0 == N2) { 10829 return DAG.getNode(ISD::FMUL, DL, VT, N0, 10830 DAG.getNode(ISD::FADD, DL, VT, N1, 10831 DAG.getConstantFP(1.0, DL, VT), Flags), 10832 Flags); 10833 } 10834 10835 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 10836 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 10837 return DAG.getNode(ISD::FMUL, DL, VT, N0, 10838 DAG.getNode(ISD::FADD, DL, VT, N1, 10839 DAG.getConstantFP(-1.0, DL, VT), Flags), 10840 Flags); 10841 } 10842 } 10843 10844 return SDValue(); 10845 } 10846 10847 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 10848 // reciprocal. 10849 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 10850 // Notice that this is not always beneficial. One reason is different targets 10851 // may have different costs for FDIV and FMUL, so sometimes the cost of two 10852 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 10853 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 10854 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { 10855 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; 10856 const SDNodeFlags Flags = N->getFlags(); 10857 if (!UnsafeMath && !Flags.hasAllowReciprocal()) 10858 return SDValue(); 10859 10860 // Skip if current node is a reciprocal. 10861 SDValue N0 = N->getOperand(0); 10862 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 10863 if (N0CFP && N0CFP->isExactlyValue(1.0)) 10864 return SDValue(); 10865 10866 // Exit early if the target does not want this transform or if there can't 10867 // possibly be enough uses of the divisor to make the transform worthwhile. 10868 SDValue N1 = N->getOperand(1); 10869 unsigned MinUses = TLI.combineRepeatedFPDivisors(); 10870 if (!MinUses || N1->use_size() < MinUses) 10871 return SDValue(); 10872 10873 // Find all FDIV users of the same divisor. 10874 // Use a set because duplicates may be present in the user list. 10875 SetVector<SDNode *> Users; 10876 for (auto *U : N1->uses()) { 10877 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { 10878 // This division is eligible for optimization only if global unsafe math 10879 // is enabled or if this division allows reciprocal formation. 10880 if (UnsafeMath || U->getFlags().hasAllowReciprocal()) 10881 Users.insert(U); 10882 } 10883 } 10884 10885 // Now that we have the actual number of divisor uses, make sure it meets 10886 // the minimum threshold specified by the target. 10887 if (Users.size() < MinUses) 10888 return SDValue(); 10889 10890 EVT VT = N->getValueType(0); 10891 SDLoc DL(N); 10892 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 10893 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); 10894 10895 // Dividend / Divisor -> Dividend * Reciprocal 10896 for (auto *U : Users) { 10897 SDValue Dividend = U->getOperand(0); 10898 if (Dividend != FPOne) { 10899 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, 10900 Reciprocal, Flags); 10901 CombineTo(U, NewNode); 10902 } else if (U != Reciprocal.getNode()) { 10903 // In the absence of fast-math-flags, this user node is always the 10904 // same node as Reciprocal, but with FMF they may be different nodes. 10905 CombineTo(U, Reciprocal); 10906 } 10907 } 10908 return SDValue(N, 0); // N was replaced. 10909 } 10910 10911 SDValue DAGCombiner::visitFDIV(SDNode *N) { 10912 SDValue N0 = N->getOperand(0); 10913 SDValue N1 = N->getOperand(1); 10914 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 10915 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 10916 EVT VT = N->getValueType(0); 10917 SDLoc DL(N); 10918 const TargetOptions &Options = DAG.getTarget().Options; 10919 SDNodeFlags Flags = N->getFlags(); 10920 10921 // fold vector ops 10922 if (VT.isVector()) 10923 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 10924 return FoldedVOp; 10925 10926 // fold (fdiv c1, c2) -> c1/c2 10927 if (N0CFP && N1CFP) 10928 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); 10929 10930 if (SDValue NewSel = foldBinOpIntoSelect(N)) 10931 return NewSel; 10932 10933 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { 10934 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 10935 if (N1CFP) { 10936 // Compute the reciprocal 1.0 / c2. 10937 const APFloat &N1APF = N1CFP->getValueAPF(); 10938 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 10939 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 10940 // Only do the transform if the reciprocal is a legal fp immediate that 10941 // isn't too nasty (eg NaN, denormal, ...). 10942 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 10943 (!LegalOperations || 10944 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 10945 // backend)... we should handle this gracefully after Legalize. 10946 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || 10947 TLI.isOperationLegal(ISD::ConstantFP, VT) || 10948 TLI.isFPImmLegal(Recip, VT))) 10949 return DAG.getNode(ISD::FMUL, DL, VT, N0, 10950 DAG.getConstantFP(Recip, DL, VT), Flags); 10951 } 10952 10953 // If this FDIV is part of a reciprocal square root, it may be folded 10954 // into a target-specific square root estimate instruction. 10955 if (N1.getOpcode() == ISD::FSQRT) { 10956 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { 10957 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 10958 } 10959 } else if (N1.getOpcode() == ISD::FP_EXTEND && 10960 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 10961 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 10962 Flags)) { 10963 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 10964 AddToWorklist(RV.getNode()); 10965 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 10966 } 10967 } else if (N1.getOpcode() == ISD::FP_ROUND && 10968 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 10969 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), 10970 Flags)) { 10971 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 10972 AddToWorklist(RV.getNode()); 10973 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 10974 } 10975 } else if (N1.getOpcode() == ISD::FMUL) { 10976 // Look through an FMUL. Even though this won't remove the FDIV directly, 10977 // it's still worthwhile to get rid of the FSQRT if possible. 10978 SDValue SqrtOp; 10979 SDValue OtherOp; 10980 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 10981 SqrtOp = N1.getOperand(0); 10982 OtherOp = N1.getOperand(1); 10983 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 10984 SqrtOp = N1.getOperand(1); 10985 OtherOp = N1.getOperand(0); 10986 } 10987 if (SqrtOp.getNode()) { 10988 // We found a FSQRT, so try to make this fold: 10989 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 10990 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { 10991 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); 10992 AddToWorklist(RV.getNode()); 10993 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 10994 } 10995 } 10996 } 10997 10998 // Fold into a reciprocal estimate and multiply instead of a real divide. 10999 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { 11000 AddToWorklist(RV.getNode()); 11001 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 11002 } 11003 } 11004 11005 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 11006 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 11007 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 11008 // Both can be negated for free, check to see if at least one is cheaper 11009 // negated. 11010 if (LHSNeg == 2 || RHSNeg == 2) 11011 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 11012 GetNegatedExpression(N0, DAG, LegalOperations), 11013 GetNegatedExpression(N1, DAG, LegalOperations), 11014 Flags); 11015 } 11016 } 11017 11018 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) 11019 return CombineRepeatedDivisors; 11020 11021 return SDValue(); 11022 } 11023 11024 SDValue DAGCombiner::visitFREM(SDNode *N) { 11025 SDValue N0 = N->getOperand(0); 11026 SDValue N1 = N->getOperand(1); 11027 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11028 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11029 EVT VT = N->getValueType(0); 11030 11031 // fold (frem c1, c2) -> fmod(c1,c2) 11032 if (N0CFP && N1CFP) 11033 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); 11034 11035 if (SDValue NewSel = foldBinOpIntoSelect(N)) 11036 return NewSel; 11037 11038 return SDValue(); 11039 } 11040 11041 SDValue DAGCombiner::visitFSQRT(SDNode *N) { 11042 SDNodeFlags Flags = N->getFlags(); 11043 if (!DAG.getTarget().Options.UnsafeFPMath && 11044 !Flags.hasApproximateFuncs()) 11045 return SDValue(); 11046 11047 SDValue N0 = N->getOperand(0); 11048 if (TLI.isFsqrtCheap(N0, DAG)) 11049 return SDValue(); 11050 11051 // FSQRT nodes have flags that propagate to the created nodes. 11052 return buildSqrtEstimate(N0, Flags); 11053 } 11054 11055 /// copysign(x, fp_extend(y)) -> copysign(x, y) 11056 /// copysign(x, fp_round(y)) -> copysign(x, y) 11057 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { 11058 SDValue N1 = N->getOperand(1); 11059 if ((N1.getOpcode() == ISD::FP_EXTEND || 11060 N1.getOpcode() == ISD::FP_ROUND)) { 11061 // Do not optimize out type conversion of f128 type yet. 11062 // For some targets like x86_64, configuration is changed to keep one f128 11063 // value in one SSE register, but instruction selection cannot handle 11064 // FCOPYSIGN on SSE registers yet. 11065 EVT N1VT = N1->getValueType(0); 11066 EVT N1Op0VT = N1->getOperand(0).getValueType(); 11067 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); 11068 } 11069 return false; 11070 } 11071 11072 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 11073 SDValue N0 = N->getOperand(0); 11074 SDValue N1 = N->getOperand(1); 11075 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11076 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 11077 EVT VT = N->getValueType(0); 11078 11079 if (N0CFP && N1CFP) // Constant fold 11080 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 11081 11082 if (N1CFP) { 11083 const APFloat &V = N1CFP->getValueAPF(); 11084 // copysign(x, c1) -> fabs(x) iff ispos(c1) 11085 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 11086 if (!V.isNegative()) { 11087 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 11088 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11089 } else { 11090 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 11091 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 11092 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 11093 } 11094 } 11095 11096 // copysign(fabs(x), y) -> copysign(x, y) 11097 // copysign(fneg(x), y) -> copysign(x, y) 11098 // copysign(copysign(x,z), y) -> copysign(x, y) 11099 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 11100 N0.getOpcode() == ISD::FCOPYSIGN) 11101 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); 11102 11103 // copysign(x, abs(y)) -> abs(x) 11104 if (N1.getOpcode() == ISD::FABS) 11105 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11106 11107 // copysign(x, copysign(y,z)) -> copysign(x, z) 11108 if (N1.getOpcode() == ISD::FCOPYSIGN) 11109 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); 11110 11111 // copysign(x, fp_extend(y)) -> copysign(x, y) 11112 // copysign(x, fp_round(y)) -> copysign(x, y) 11113 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) 11114 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); 11115 11116 return SDValue(); 11117 } 11118 11119 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, 11120 const TargetLowering &TLI) { 11121 // This optimization is guarded by a function attribute because it may produce 11122 // unexpected results. Ie, programs may be relying on the platform-specific 11123 // undefined behavior when the float-to-int conversion overflows. 11124 const Function &F = DAG.getMachineFunction().getFunction(); 11125 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); 11126 if (StrictOverflow.getValueAsString().equals("false")) 11127 return SDValue(); 11128 11129 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be 11130 // replacing casts with a libcall. We also must be allowed to ignore -0.0 11131 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer 11132 // conversions would return +0.0. 11133 // FIXME: We should be able to use node-level FMF here. 11134 // TODO: If strict math, should we use FABS (+ range check for signed cast)? 11135 EVT VT = N->getValueType(0); 11136 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || 11137 !DAG.getTarget().Options.NoSignedZerosFPMath) 11138 return SDValue(); 11139 11140 // fptosi/fptoui round towards zero, so converting from FP to integer and 11141 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X 11142 SDValue N0 = N->getOperand(0); 11143 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && 11144 N0.getOperand(0).getValueType() == VT) 11145 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11146 11147 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && 11148 N0.getOperand(0).getValueType() == VT) 11149 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); 11150 11151 return SDValue(); 11152 } 11153 11154 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 11155 SDValue N0 = N->getOperand(0); 11156 EVT VT = N->getValueType(0); 11157 EVT OpVT = N0.getValueType(); 11158 11159 // fold (sint_to_fp c1) -> c1fp 11160 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11161 // ...but only if the target supports immediate floating-point values 11162 (!LegalOperations || 11163 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11164 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11165 11166 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 11167 // but UINT_TO_FP is legal on this target, try to convert. 11168 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 11169 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 11170 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 11171 if (DAG.SignBitIsZero(N0)) 11172 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11173 } 11174 11175 // The next optimizations are desirable only if SELECT_CC can be lowered. 11176 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11177 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11178 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 11179 !VT.isVector() && 11180 (!LegalOperations || 11181 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11182 SDLoc DL(N); 11183 SDValue Ops[] = 11184 { N0.getOperand(0), N0.getOperand(1), 11185 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11186 N0.getOperand(2) }; 11187 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11188 } 11189 11190 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 11191 // (select_cc x, y, 1.0, 0.0,, cc) 11192 if (N0.getOpcode() == ISD::ZERO_EXTEND && 11193 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 11194 (!LegalOperations || 11195 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11196 SDLoc DL(N); 11197 SDValue Ops[] = 11198 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 11199 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11200 N0.getOperand(0).getOperand(2) }; 11201 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11202 } 11203 } 11204 11205 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 11206 return FTrunc; 11207 11208 return SDValue(); 11209 } 11210 11211 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 11212 SDValue N0 = N->getOperand(0); 11213 EVT VT = N->getValueType(0); 11214 EVT OpVT = N0.getValueType(); 11215 11216 // fold (uint_to_fp c1) -> c1fp 11217 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 11218 // ...but only if the target supports immediate floating-point values 11219 (!LegalOperations || 11220 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) 11221 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 11222 11223 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 11224 // but SINT_TO_FP is legal on this target, try to convert. 11225 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 11226 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 11227 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 11228 if (DAG.SignBitIsZero(N0)) 11229 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 11230 } 11231 11232 // The next optimizations are desirable only if SELECT_CC can be lowered. 11233 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 11234 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 11235 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 11236 (!LegalOperations || 11237 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { 11238 SDLoc DL(N); 11239 SDValue Ops[] = 11240 { N0.getOperand(0), N0.getOperand(1), 11241 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 11242 N0.getOperand(2) }; 11243 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 11244 } 11245 } 11246 11247 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) 11248 return FTrunc; 11249 11250 return SDValue(); 11251 } 11252 11253 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x 11254 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { 11255 SDValue N0 = N->getOperand(0); 11256 EVT VT = N->getValueType(0); 11257 11258 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) 11259 return SDValue(); 11260 11261 SDValue Src = N0.getOperand(0); 11262 EVT SrcVT = Src.getValueType(); 11263 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; 11264 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; 11265 11266 // We can safely assume the conversion won't overflow the output range, 11267 // because (for example) (uint8_t)18293.f is undefined behavior. 11268 11269 // Since we can assume the conversion won't overflow, our decision as to 11270 // whether the input will fit in the float should depend on the minimum 11271 // of the input range and output range. 11272 11273 // This means this is also safe for a signed input and unsigned output, since 11274 // a negative input would lead to undefined behavior. 11275 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; 11276 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; 11277 unsigned ActualSize = std::min(InputSize, OutputSize); 11278 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); 11279 11280 // We can only fold away the float conversion if the input range can be 11281 // represented exactly in the float range. 11282 if (APFloat::semanticsPrecision(sem) >= ActualSize) { 11283 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { 11284 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND 11285 : ISD::ZERO_EXTEND; 11286 return DAG.getNode(ExtOp, SDLoc(N), VT, Src); 11287 } 11288 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) 11289 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); 11290 return DAG.getBitcast(VT, Src); 11291 } 11292 return SDValue(); 11293 } 11294 11295 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 11296 SDValue N0 = N->getOperand(0); 11297 EVT VT = N->getValueType(0); 11298 11299 // fold (fp_to_sint c1fp) -> c1 11300 if (isConstantFPBuildVectorOrConstantFP(N0)) 11301 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 11302 11303 return FoldIntToFPToInt(N, DAG); 11304 } 11305 11306 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 11307 SDValue N0 = N->getOperand(0); 11308 EVT VT = N->getValueType(0); 11309 11310 // fold (fp_to_uint c1fp) -> c1 11311 if (isConstantFPBuildVectorOrConstantFP(N0)) 11312 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 11313 11314 return FoldIntToFPToInt(N, DAG); 11315 } 11316 11317 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 11318 SDValue N0 = N->getOperand(0); 11319 SDValue N1 = N->getOperand(1); 11320 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11321 EVT VT = N->getValueType(0); 11322 11323 // fold (fp_round c1fp) -> c1fp 11324 if (N0CFP) 11325 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 11326 11327 // fold (fp_round (fp_extend x)) -> x 11328 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 11329 return N0.getOperand(0); 11330 11331 // fold (fp_round (fp_round x)) -> (fp_round x) 11332 if (N0.getOpcode() == ISD::FP_ROUND) { 11333 const bool NIsTrunc = N->getConstantOperandVal(1) == 1; 11334 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; 11335 11336 // Skip this folding if it results in an fp_round from f80 to f16. 11337 // 11338 // f80 to f16 always generates an expensive (and as yet, unimplemented) 11339 // libcall to __truncxfhf2 instead of selecting native f16 conversion 11340 // instructions from f32 or f64. Moreover, the first (value-preserving) 11341 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like 11342 // x86. 11343 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16) 11344 return SDValue(); 11345 11346 // If the first fp_round isn't a value preserving truncation, it might 11347 // introduce a tie in the second fp_round, that wouldn't occur in the 11348 // single-step fp_round we want to fold to. 11349 // In other words, double rounding isn't the same as rounding. 11350 // Also, this is a value preserving truncation iff both fp_round's are. 11351 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { 11352 SDLoc DL(N); 11353 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), 11354 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); 11355 } 11356 } 11357 11358 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 11359 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 11360 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 11361 N0.getOperand(0), N1); 11362 AddToWorklist(Tmp.getNode()); 11363 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 11364 Tmp, N0.getOperand(1)); 11365 } 11366 11367 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 11368 return NewVSel; 11369 11370 return SDValue(); 11371 } 11372 11373 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 11374 SDValue N0 = N->getOperand(0); 11375 EVT VT = N->getValueType(0); 11376 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 11377 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 11378 11379 // fold (fp_round_inreg c1fp) -> c1fp 11380 if (N0CFP && isTypeLegal(EVT)) { 11381 SDLoc DL(N); 11382 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); 11383 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); 11384 } 11385 11386 return SDValue(); 11387 } 11388 11389 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 11390 SDValue N0 = N->getOperand(0); 11391 EVT VT = N->getValueType(0); 11392 11393 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 11394 if (N->hasOneUse() && 11395 N->use_begin()->getOpcode() == ISD::FP_ROUND) 11396 return SDValue(); 11397 11398 // fold (fp_extend c1fp) -> c1fp 11399 if (isConstantFPBuildVectorOrConstantFP(N0)) 11400 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 11401 11402 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) 11403 if (N0.getOpcode() == ISD::FP16_TO_FP && 11404 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) 11405 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); 11406 11407 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 11408 // value of X. 11409 if (N0.getOpcode() == ISD::FP_ROUND 11410 && N0.getConstantOperandVal(1) == 1) { 11411 SDValue In = N0.getOperand(0); 11412 if (In.getValueType() == VT) return In; 11413 if (VT.bitsLT(In.getValueType())) 11414 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 11415 In, N0.getOperand(1)); 11416 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 11417 } 11418 11419 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 11420 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 11421 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 11422 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 11423 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 11424 LN0->getChain(), 11425 LN0->getBasePtr(), N0.getValueType(), 11426 LN0->getMemOperand()); 11427 CombineTo(N, ExtLoad); 11428 CombineTo(N0.getNode(), 11429 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 11430 N0.getValueType(), ExtLoad, 11431 DAG.getIntPtrConstant(1, SDLoc(N0))), 11432 ExtLoad.getValue(1)); 11433 return SDValue(N, 0); // Return N so it doesn't get rechecked! 11434 } 11435 11436 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) 11437 return NewVSel; 11438 11439 return SDValue(); 11440 } 11441 11442 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 11443 SDValue N0 = N->getOperand(0); 11444 EVT VT = N->getValueType(0); 11445 11446 // fold (fceil c1) -> fceil(c1) 11447 if (isConstantFPBuildVectorOrConstantFP(N0)) 11448 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 11449 11450 return SDValue(); 11451 } 11452 11453 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 11454 SDValue N0 = N->getOperand(0); 11455 EVT VT = N->getValueType(0); 11456 11457 // fold (ftrunc c1) -> ftrunc(c1) 11458 if (isConstantFPBuildVectorOrConstantFP(N0)) 11459 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 11460 11461 // fold ftrunc (known rounded int x) -> x 11462 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is 11463 // likely to be generated to extract integer from a rounded floating value. 11464 switch (N0.getOpcode()) { 11465 default: break; 11466 case ISD::FRINT: 11467 case ISD::FTRUNC: 11468 case ISD::FNEARBYINT: 11469 case ISD::FFLOOR: 11470 case ISD::FCEIL: 11471 return N0; 11472 } 11473 11474 return SDValue(); 11475 } 11476 11477 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 11478 SDValue N0 = N->getOperand(0); 11479 EVT VT = N->getValueType(0); 11480 11481 // fold (ffloor c1) -> ffloor(c1) 11482 if (isConstantFPBuildVectorOrConstantFP(N0)) 11483 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 11484 11485 return SDValue(); 11486 } 11487 11488 // FIXME: FNEG and FABS have a lot in common; refactor. 11489 SDValue DAGCombiner::visitFNEG(SDNode *N) { 11490 SDValue N0 = N->getOperand(0); 11491 EVT VT = N->getValueType(0); 11492 11493 // Constant fold FNEG. 11494 if (isConstantFPBuildVectorOrConstantFP(N0)) 11495 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 11496 11497 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 11498 &DAG.getTarget().Options)) 11499 return GetNegatedExpression(N0, DAG, LegalOperations); 11500 11501 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 11502 // constant pool values. 11503 if (!TLI.isFNegFree(VT) && 11504 N0.getOpcode() == ISD::BITCAST && 11505 N0.getNode()->hasOneUse()) { 11506 SDValue Int = N0.getOperand(0); 11507 EVT IntVT = Int.getValueType(); 11508 if (IntVT.isInteger() && !IntVT.isVector()) { 11509 APInt SignMask; 11510 if (N0.getValueType().isVector()) { 11511 // For a vector, get a mask such as 0x80... per scalar element 11512 // and splat it. 11513 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); 11514 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 11515 } else { 11516 // For a scalar, just generate 0x80... 11517 SignMask = APInt::getSignMask(IntVT.getSizeInBits()); 11518 } 11519 SDLoc DL0(N0); 11520 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, 11521 DAG.getConstant(SignMask, DL0, IntVT)); 11522 AddToWorklist(Int.getNode()); 11523 return DAG.getBitcast(VT, Int); 11524 } 11525 } 11526 11527 // (fneg (fmul c, x)) -> (fmul -c, x) 11528 if (N0.getOpcode() == ISD::FMUL && 11529 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { 11530 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 11531 if (CFP1) { 11532 APFloat CVal = CFP1->getValueAPF(); 11533 CVal.changeSign(); 11534 if (Level >= AfterLegalizeDAG && 11535 (TLI.isFPImmLegal(CVal, VT) || 11536 TLI.isOperationLegal(ISD::ConstantFP, VT))) 11537 return DAG.getNode( 11538 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 11539 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), 11540 N0->getFlags()); 11541 } 11542 } 11543 11544 return SDValue(); 11545 } 11546 11547 SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 11548 SDValue N0 = N->getOperand(0); 11549 SDValue N1 = N->getOperand(1); 11550 EVT VT = N->getValueType(0); 11551 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 11552 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 11553 11554 if (N0CFP && N1CFP) { 11555 const APFloat &C0 = N0CFP->getValueAPF(); 11556 const APFloat &C1 = N1CFP->getValueAPF(); 11557 return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); 11558 } 11559 11560 // Canonicalize to constant on RHS. 11561 if (isConstantFPBuildVectorOrConstantFP(N0) && 11562 !isConstantFPBuildVectorOrConstantFP(N1)) 11563 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 11564 11565 return SDValue(); 11566 } 11567 11568 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 11569 SDValue N0 = N->getOperand(0); 11570 SDValue N1 = N->getOperand(1); 11571 EVT VT = N->getValueType(0); 11572 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 11573 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 11574 11575 if (N0CFP && N1CFP) { 11576 const APFloat &C0 = N0CFP->getValueAPF(); 11577 const APFloat &C1 = N1CFP->getValueAPF(); 11578 return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); 11579 } 11580 11581 // Canonicalize to constant on RHS. 11582 if (isConstantFPBuildVectorOrConstantFP(N0) && 11583 !isConstantFPBuildVectorOrConstantFP(N1)) 11584 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 11585 11586 return SDValue(); 11587 } 11588 11589 SDValue DAGCombiner::visitFABS(SDNode *N) { 11590 SDValue N0 = N->getOperand(0); 11591 EVT VT = N->getValueType(0); 11592 11593 // fold (fabs c1) -> fabs(c1) 11594 if (isConstantFPBuildVectorOrConstantFP(N0)) 11595 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 11596 11597 // fold (fabs (fabs x)) -> (fabs x) 11598 if (N0.getOpcode() == ISD::FABS) 11599 return N->getOperand(0); 11600 11601 // fold (fabs (fneg x)) -> (fabs x) 11602 // fold (fabs (fcopysign x, y)) -> (fabs x) 11603 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 11604 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 11605 11606 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 11607 // constant pool values. 11608 if (!TLI.isFAbsFree(VT) && 11609 N0.getOpcode() == ISD::BITCAST && 11610 N0.getNode()->hasOneUse()) { 11611 SDValue Int = N0.getOperand(0); 11612 EVT IntVT = Int.getValueType(); 11613 if (IntVT.isInteger() && !IntVT.isVector()) { 11614 APInt SignMask; 11615 if (N0.getValueType().isVector()) { 11616 // For a vector, get a mask such as 0x7f... per scalar element 11617 // and splat it. 11618 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); 11619 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 11620 } else { 11621 // For a scalar, just generate 0x7f... 11622 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); 11623 } 11624 SDLoc DL(N0); 11625 Int = DAG.getNode(ISD::AND, DL, IntVT, Int, 11626 DAG.getConstant(SignMask, DL, IntVT)); 11627 AddToWorklist(Int.getNode()); 11628 return DAG.getBitcast(N->getValueType(0), Int); 11629 } 11630 } 11631 11632 return SDValue(); 11633 } 11634 11635 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 11636 SDValue Chain = N->getOperand(0); 11637 SDValue N1 = N->getOperand(1); 11638 SDValue N2 = N->getOperand(2); 11639 11640 // If N is a constant we could fold this into a fallthrough or unconditional 11641 // branch. However that doesn't happen very often in normal code, because 11642 // Instcombine/SimplifyCFG should have handled the available opportunities. 11643 // If we did this folding here, it would be necessary to update the 11644 // MachineBasicBlock CFG, which is awkward. 11645 11646 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 11647 // on the target. 11648 if (N1.getOpcode() == ISD::SETCC && 11649 TLI.isOperationLegalOrCustom(ISD::BR_CC, 11650 N1.getOperand(0).getValueType())) { 11651 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 11652 Chain, N1.getOperand(2), 11653 N1.getOperand(0), N1.getOperand(1), N2); 11654 } 11655 11656 if (N1.hasOneUse()) { 11657 if (SDValue NewN1 = rebuildSetCC(N1)) 11658 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); 11659 } 11660 11661 return SDValue(); 11662 } 11663 11664 SDValue DAGCombiner::rebuildSetCC(SDValue N) { 11665 if (N.getOpcode() == ISD::SRL || 11666 (N.getOpcode() == ISD::TRUNCATE && 11667 (N.getOperand(0).hasOneUse() && 11668 N.getOperand(0).getOpcode() == ISD::SRL))) { 11669 // Look pass the truncate. 11670 if (N.getOpcode() == ISD::TRUNCATE) 11671 N = N.getOperand(0); 11672 11673 // Match this pattern so that we can generate simpler code: 11674 // 11675 // %a = ... 11676 // %b = and i32 %a, 2 11677 // %c = srl i32 %b, 1 11678 // brcond i32 %c ... 11679 // 11680 // into 11681 // 11682 // %a = ... 11683 // %b = and i32 %a, 2 11684 // %c = setcc eq %b, 0 11685 // brcond %c ... 11686 // 11687 // This applies only when the AND constant value has one bit set and the 11688 // SRL constant is equal to the log2 of the AND constant. The back-end is 11689 // smart enough to convert the result into a TEST/JMP sequence. 11690 SDValue Op0 = N.getOperand(0); 11691 SDValue Op1 = N.getOperand(1); 11692 11693 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { 11694 SDValue AndOp1 = Op0.getOperand(1); 11695 11696 if (AndOp1.getOpcode() == ISD::Constant) { 11697 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 11698 11699 if (AndConst.isPowerOf2() && 11700 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { 11701 SDLoc DL(N); 11702 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), 11703 Op0, DAG.getConstant(0, DL, Op0.getValueType()), 11704 ISD::SETNE); 11705 } 11706 } 11707 } 11708 } 11709 11710 // Transform br(xor(x, y)) -> br(x != y) 11711 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 11712 if (N.getOpcode() == ISD::XOR) { 11713 // Because we may call this on a speculatively constructed 11714 // SimplifiedSetCC Node, we need to simplify this node first. 11715 // Ideally this should be folded into SimplifySetCC and not 11716 // here. For now, grab a handle to N so we don't lose it from 11717 // replacements interal to the visit. 11718 HandleSDNode XORHandle(N); 11719 while (N.getOpcode() == ISD::XOR) { 11720 SDValue Tmp = visitXOR(N.getNode()); 11721 // No simplification done. 11722 if (!Tmp.getNode()) 11723 break; 11724 // Returning N is form in-visit replacement that may invalidated 11725 // N. Grab value from Handle. 11726 if (Tmp.getNode() == N.getNode()) 11727 N = XORHandle.getValue(); 11728 else // Node simplified. Try simplifying again. 11729 N = Tmp; 11730 } 11731 11732 if (N.getOpcode() != ISD::XOR) 11733 return N; 11734 11735 SDNode *TheXor = N.getNode(); 11736 11737 SDValue Op0 = TheXor->getOperand(0); 11738 SDValue Op1 = TheXor->getOperand(1); 11739 11740 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 11741 bool Equal = false; 11742 if (isOneConstant(Op0) && Op0.hasOneUse() && 11743 Op0.getOpcode() == ISD::XOR) { 11744 TheXor = Op0.getNode(); 11745 Equal = true; 11746 } 11747 11748 EVT SetCCVT = N.getValueType(); 11749 if (LegalTypes) 11750 SetCCVT = getSetCCResultType(SetCCVT); 11751 // Replace the uses of XOR with SETCC 11752 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, 11753 Equal ? ISD::SETEQ : ISD::SETNE); 11754 } 11755 } 11756 11757 return SDValue(); 11758 } 11759 11760 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 11761 // 11762 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 11763 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 11764 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 11765 11766 // If N is a constant we could fold this into a fallthrough or unconditional 11767 // branch. However that doesn't happen very often in normal code, because 11768 // Instcombine/SimplifyCFG should have handled the available opportunities. 11769 // If we did this folding here, it would be necessary to update the 11770 // MachineBasicBlock CFG, which is awkward. 11771 11772 // Use SimplifySetCC to simplify SETCC's. 11773 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 11774 CondLHS, CondRHS, CC->get(), SDLoc(N), 11775 false); 11776 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 11777 11778 // fold to a simpler setcc 11779 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 11780 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 11781 N->getOperand(0), Simp.getOperand(2), 11782 Simp.getOperand(0), Simp.getOperand(1), 11783 N->getOperand(4)); 11784 11785 return SDValue(); 11786 } 11787 11788 /// Return true if 'Use' is a load or a store that uses N as its base pointer 11789 /// and that N may be folded in the load / store addressing mode. 11790 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 11791 SelectionDAG &DAG, 11792 const TargetLowering &TLI) { 11793 EVT VT; 11794 unsigned AS; 11795 11796 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 11797 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 11798 return false; 11799 VT = LD->getMemoryVT(); 11800 AS = LD->getAddressSpace(); 11801 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 11802 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 11803 return false; 11804 VT = ST->getMemoryVT(); 11805 AS = ST->getAddressSpace(); 11806 } else 11807 return false; 11808 11809 TargetLowering::AddrMode AM; 11810 if (N->getOpcode() == ISD::ADD) { 11811 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 11812 if (Offset) 11813 // [reg +/- imm] 11814 AM.BaseOffs = Offset->getSExtValue(); 11815 else 11816 // [reg +/- reg] 11817 AM.Scale = 1; 11818 } else if (N->getOpcode() == ISD::SUB) { 11819 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 11820 if (Offset) 11821 // [reg +/- imm] 11822 AM.BaseOffs = -Offset->getSExtValue(); 11823 else 11824 // [reg +/- reg] 11825 AM.Scale = 1; 11826 } else 11827 return false; 11828 11829 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, 11830 VT.getTypeForEVT(*DAG.getContext()), AS); 11831 } 11832 11833 /// Try turning a load/store into a pre-indexed load/store when the base 11834 /// pointer is an add or subtract and it has other uses besides the load/store. 11835 /// After the transformation, the new indexed load/store has effectively folded 11836 /// the add/subtract in and all of its other uses are redirected to the 11837 /// new load/store. 11838 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 11839 if (Level < AfterLegalizeDAG) 11840 return false; 11841 11842 bool isLoad = true; 11843 SDValue Ptr; 11844 EVT VT; 11845 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 11846 if (LD->isIndexed()) 11847 return false; 11848 VT = LD->getMemoryVT(); 11849 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 11850 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 11851 return false; 11852 Ptr = LD->getBasePtr(); 11853 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 11854 if (ST->isIndexed()) 11855 return false; 11856 VT = ST->getMemoryVT(); 11857 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 11858 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 11859 return false; 11860 Ptr = ST->getBasePtr(); 11861 isLoad = false; 11862 } else { 11863 return false; 11864 } 11865 11866 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 11867 // out. There is no reason to make this a preinc/predec. 11868 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 11869 Ptr.getNode()->hasOneUse()) 11870 return false; 11871 11872 // Ask the target to do addressing mode selection. 11873 SDValue BasePtr; 11874 SDValue Offset; 11875 ISD::MemIndexedMode AM = ISD::UNINDEXED; 11876 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 11877 return false; 11878 11879 // Backends without true r+i pre-indexed forms may need to pass a 11880 // constant base with a variable offset so that constant coercion 11881 // will work with the patterns in canonical form. 11882 bool Swapped = false; 11883 if (isa<ConstantSDNode>(BasePtr)) { 11884 std::swap(BasePtr, Offset); 11885 Swapped = true; 11886 } 11887 11888 // Don't create a indexed load / store with zero offset. 11889 if (isNullConstant(Offset)) 11890 return false; 11891 11892 // Try turning it into a pre-indexed load / store except when: 11893 // 1) The new base ptr is a frame index. 11894 // 2) If N is a store and the new base ptr is either the same as or is a 11895 // predecessor of the value being stored. 11896 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 11897 // that would create a cycle. 11898 // 4) All uses are load / store ops that use it as old base ptr. 11899 11900 // Check #1. Preinc'ing a frame index would require copying the stack pointer 11901 // (plus the implicit offset) to a register to preinc anyway. 11902 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 11903 return false; 11904 11905 // Check #2. 11906 if (!isLoad) { 11907 SDValue Val = cast<StoreSDNode>(N)->getValue(); 11908 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 11909 return false; 11910 } 11911 11912 // Caches for hasPredecessorHelper. 11913 SmallPtrSet<const SDNode *, 32> Visited; 11914 SmallVector<const SDNode *, 16> Worklist; 11915 Worklist.push_back(N); 11916 11917 // If the offset is a constant, there may be other adds of constants that 11918 // can be folded with this one. We should do this to avoid having to keep 11919 // a copy of the original base pointer. 11920 SmallVector<SDNode *, 16> OtherUses; 11921 if (isa<ConstantSDNode>(Offset)) 11922 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), 11923 UE = BasePtr.getNode()->use_end(); 11924 UI != UE; ++UI) { 11925 SDUse &Use = UI.getUse(); 11926 // Skip the use that is Ptr and uses of other results from BasePtr's 11927 // node (important for nodes that return multiple results). 11928 if (Use.getUser() == Ptr.getNode() || Use != BasePtr) 11929 continue; 11930 11931 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) 11932 continue; 11933 11934 if (Use.getUser()->getOpcode() != ISD::ADD && 11935 Use.getUser()->getOpcode() != ISD::SUB) { 11936 OtherUses.clear(); 11937 break; 11938 } 11939 11940 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); 11941 if (!isa<ConstantSDNode>(Op1)) { 11942 OtherUses.clear(); 11943 break; 11944 } 11945 11946 // FIXME: In some cases, we can be smarter about this. 11947 if (Op1.getValueType() != Offset.getValueType()) { 11948 OtherUses.clear(); 11949 break; 11950 } 11951 11952 OtherUses.push_back(Use.getUser()); 11953 } 11954 11955 if (Swapped) 11956 std::swap(BasePtr, Offset); 11957 11958 // Now check for #3 and #4. 11959 bool RealUse = false; 11960 11961 for (SDNode *Use : Ptr.getNode()->uses()) { 11962 if (Use == N) 11963 continue; 11964 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) 11965 return false; 11966 11967 // If Ptr may be folded in addressing mode of other use, then it's 11968 // not profitable to do this transformation. 11969 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 11970 RealUse = true; 11971 } 11972 11973 if (!RealUse) 11974 return false; 11975 11976 SDValue Result; 11977 if (isLoad) 11978 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 11979 BasePtr, Offset, AM); 11980 else 11981 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 11982 BasePtr, Offset, AM); 11983 ++PreIndexedNodes; 11984 ++NodesCombined; 11985 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; 11986 Result.getNode()->dump(&DAG); dbgs() << '\n'); 11987 WorklistRemover DeadNodes(*this); 11988 if (isLoad) { 11989 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 11990 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 11991 } else { 11992 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 11993 } 11994 11995 // Finally, since the node is now dead, remove it from the graph. 11996 deleteAndRecombine(N); 11997 11998 if (Swapped) 11999 std::swap(BasePtr, Offset); 12000 12001 // Replace other uses of BasePtr that can be updated to use Ptr 12002 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 12003 unsigned OffsetIdx = 1; 12004 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 12005 OffsetIdx = 0; 12006 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 12007 BasePtr.getNode() && "Expected BasePtr operand"); 12008 12009 // We need to replace ptr0 in the following expression: 12010 // x0 * offset0 + y0 * ptr0 = t0 12011 // knowing that 12012 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 12013 // 12014 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 12015 // indexed load/store and the expression that needs to be re-written. 12016 // 12017 // Therefore, we have: 12018 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 12019 12020 ConstantSDNode *CN = 12021 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 12022 int X0, X1, Y0, Y1; 12023 const APInt &Offset0 = CN->getAPIntValue(); 12024 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 12025 12026 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 12027 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 12028 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 12029 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 12030 12031 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 12032 12033 APInt CNV = Offset0; 12034 if (X0 < 0) CNV = -CNV; 12035 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 12036 else CNV = CNV - Offset1; 12037 12038 SDLoc DL(OtherUses[i]); 12039 12040 // We can now generate the new expression. 12041 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); 12042 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 12043 12044 SDValue NewUse = DAG.getNode(Opcode, 12045 DL, 12046 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 12047 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 12048 deleteAndRecombine(OtherUses[i]); 12049 } 12050 12051 // Replace the uses of Ptr with uses of the updated base value. 12052 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 12053 deleteAndRecombine(Ptr.getNode()); 12054 AddToWorklist(Result.getNode()); 12055 12056 return true; 12057 } 12058 12059 /// Try to combine a load/store with a add/sub of the base pointer node into a 12060 /// post-indexed load/store. The transformation folded the add/subtract into the 12061 /// new indexed load/store effectively and all of its uses are redirected to the 12062 /// new load/store. 12063 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 12064 if (Level < AfterLegalizeDAG) 12065 return false; 12066 12067 bool isLoad = true; 12068 SDValue Ptr; 12069 EVT VT; 12070 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 12071 if (LD->isIndexed()) 12072 return false; 12073 VT = LD->getMemoryVT(); 12074 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 12075 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 12076 return false; 12077 Ptr = LD->getBasePtr(); 12078 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 12079 if (ST->isIndexed()) 12080 return false; 12081 VT = ST->getMemoryVT(); 12082 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 12083 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 12084 return false; 12085 Ptr = ST->getBasePtr(); 12086 isLoad = false; 12087 } else { 12088 return false; 12089 } 12090 12091 if (Ptr.getNode()->hasOneUse()) 12092 return false; 12093 12094 for (SDNode *Op : Ptr.getNode()->uses()) { 12095 if (Op == N || 12096 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 12097 continue; 12098 12099 SDValue BasePtr; 12100 SDValue Offset; 12101 ISD::MemIndexedMode AM = ISD::UNINDEXED; 12102 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 12103 // Don't create a indexed load / store with zero offset. 12104 if (isNullConstant(Offset)) 12105 continue; 12106 12107 // Try turning it into a post-indexed load / store except when 12108 // 1) All uses are load / store ops that use it as base ptr (and 12109 // it may be folded as addressing mmode). 12110 // 2) Op must be independent of N, i.e. Op is neither a predecessor 12111 // nor a successor of N. Otherwise, if Op is folded that would 12112 // create a cycle. 12113 12114 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 12115 continue; 12116 12117 // Check for #1. 12118 bool TryNext = false; 12119 for (SDNode *Use : BasePtr.getNode()->uses()) { 12120 if (Use == Ptr.getNode()) 12121 continue; 12122 12123 // If all the uses are load / store addresses, then don't do the 12124 // transformation. 12125 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 12126 bool RealUse = false; 12127 for (SDNode *UseUse : Use->uses()) { 12128 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 12129 RealUse = true; 12130 } 12131 12132 if (!RealUse) { 12133 TryNext = true; 12134 break; 12135 } 12136 } 12137 } 12138 12139 if (TryNext) 12140 continue; 12141 12142 // Check for #2 12143 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 12144 SDValue Result = isLoad 12145 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 12146 BasePtr, Offset, AM) 12147 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 12148 BasePtr, Offset, AM); 12149 ++PostIndexedNodes; 12150 ++NodesCombined; 12151 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); 12152 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); 12153 dbgs() << '\n'); 12154 WorklistRemover DeadNodes(*this); 12155 if (isLoad) { 12156 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 12157 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 12158 } else { 12159 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 12160 } 12161 12162 // Finally, since the node is now dead, remove it from the graph. 12163 deleteAndRecombine(N); 12164 12165 // Replace the uses of Use with uses of the updated base value. 12166 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 12167 Result.getValue(isLoad ? 1 : 0)); 12168 deleteAndRecombine(Op); 12169 return true; 12170 } 12171 } 12172 } 12173 12174 return false; 12175 } 12176 12177 /// Return the base-pointer arithmetic from an indexed \p LD. 12178 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 12179 ISD::MemIndexedMode AM = LD->getAddressingMode(); 12180 assert(AM != ISD::UNINDEXED); 12181 SDValue BP = LD->getOperand(1); 12182 SDValue Inc = LD->getOperand(2); 12183 12184 // Some backends use TargetConstants for load offsets, but don't expect 12185 // TargetConstants in general ADD nodes. We can convert these constants into 12186 // regular Constants (if the constant is not opaque). 12187 assert((Inc.getOpcode() != ISD::TargetConstant || 12188 !cast<ConstantSDNode>(Inc)->isOpaque()) && 12189 "Cannot split out indexing using opaque target constants"); 12190 if (Inc.getOpcode() == ISD::TargetConstant) { 12191 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 12192 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), 12193 ConstInc->getValueType(0)); 12194 } 12195 12196 unsigned Opc = 12197 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 12198 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 12199 } 12200 12201 SDValue DAGCombiner::visitLOAD(SDNode *N) { 12202 LoadSDNode *LD = cast<LoadSDNode>(N); 12203 SDValue Chain = LD->getChain(); 12204 SDValue Ptr = LD->getBasePtr(); 12205 12206 // If load is not volatile and there are no uses of the loaded value (and 12207 // the updated indexed value in case of indexed loads), change uses of the 12208 // chain value into uses of the chain input (i.e. delete the dead load). 12209 if (!LD->isVolatile()) { 12210 if (N->getValueType(1) == MVT::Other) { 12211 // Unindexed loads. 12212 if (!N->hasAnyUseOfValue(0)) { 12213 // It's not safe to use the two value CombineTo variant here. e.g. 12214 // v1, chain2 = load chain1, loc 12215 // v2, chain3 = load chain2, loc 12216 // v3 = add v2, c 12217 // Now we replace use of chain2 with chain1. This makes the second load 12218 // isomorphic to the one we are deleting, and thus makes this load live. 12219 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); 12220 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); 12221 dbgs() << "\n"); 12222 WorklistRemover DeadNodes(*this); 12223 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 12224 AddUsersToWorklist(Chain.getNode()); 12225 if (N->use_empty()) 12226 deleteAndRecombine(N); 12227 12228 return SDValue(N, 0); // Return N so it doesn't get rechecked! 12229 } 12230 } else { 12231 // Indexed loads. 12232 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 12233 12234 // If this load has an opaque TargetConstant offset, then we cannot split 12235 // the indexing into an add/sub directly (that TargetConstant may not be 12236 // valid for a different type of node, and we cannot convert an opaque 12237 // target constant into a regular constant). 12238 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 12239 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 12240 12241 if (!N->hasAnyUseOfValue(0) && 12242 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 12243 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 12244 SDValue Index; 12245 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 12246 Index = SplitIndexingFromLoad(LD); 12247 // Try to fold the base pointer arithmetic into subsequent loads and 12248 // stores. 12249 AddUsersToWorklist(N); 12250 } else 12251 Index = DAG.getUNDEF(N->getValueType(1)); 12252 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); 12253 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); 12254 dbgs() << " and 2 other values\n"); 12255 WorklistRemover DeadNodes(*this); 12256 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 12257 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 12258 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 12259 deleteAndRecombine(N); 12260 return SDValue(N, 0); // Return N so it doesn't get rechecked! 12261 } 12262 } 12263 } 12264 12265 // If this load is directly stored, replace the load value with the stored 12266 // value. 12267 // TODO: Handle store large -> read small portion. 12268 // TODO: Handle TRUNCSTORE/LOADEXT 12269 if (OptLevel != CodeGenOpt::None && 12270 ISD::isNormalLoad(N) && !LD->isVolatile()) { 12271 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 12272 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 12273 if (PrevST->getBasePtr() == Ptr && 12274 PrevST->getValue().getValueType() == N->getValueType(0)) 12275 return CombineTo(N, PrevST->getOperand(1), Chain); 12276 } 12277 } 12278 12279 // Try to infer better alignment information than the load already has. 12280 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 12281 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 12282 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { 12283 SDValue NewLoad = DAG.getExtLoad( 12284 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, 12285 LD->getPointerInfo(), LD->getMemoryVT(), Align, 12286 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 12287 // NewLoad will always be N as we are only refining the alignment 12288 assert(NewLoad.getNode() == N); 12289 (void)NewLoad; 12290 } 12291 } 12292 } 12293 12294 if (LD->isUnindexed()) { 12295 // Walk up chain skipping non-aliasing memory nodes. 12296 SDValue BetterChain = FindBetterChain(N, Chain); 12297 12298 // If there is a better chain. 12299 if (Chain != BetterChain) { 12300 SDValue ReplLoad; 12301 12302 // Replace the chain to void dependency. 12303 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 12304 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 12305 BetterChain, Ptr, LD->getMemOperand()); 12306 } else { 12307 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 12308 LD->getValueType(0), 12309 BetterChain, Ptr, LD->getMemoryVT(), 12310 LD->getMemOperand()); 12311 } 12312 12313 // Create token factor to keep old chain connected. 12314 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 12315 MVT::Other, Chain, ReplLoad.getValue(1)); 12316 12317 // Replace uses with load result and token factor 12318 return CombineTo(N, ReplLoad.getValue(0), Token); 12319 } 12320 } 12321 12322 // Try transforming N to an indexed load. 12323 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 12324 return SDValue(N, 0); 12325 12326 // Try to slice up N to more direct loads if the slices are mapped to 12327 // different register banks or pairing can take place. 12328 if (SliceUpLoad(N)) 12329 return SDValue(N, 0); 12330 12331 return SDValue(); 12332 } 12333 12334 namespace { 12335 12336 /// Helper structure used to slice a load in smaller loads. 12337 /// Basically a slice is obtained from the following sequence: 12338 /// Origin = load Ty1, Base 12339 /// Shift = srl Ty1 Origin, CstTy Amount 12340 /// Inst = trunc Shift to Ty2 12341 /// 12342 /// Then, it will be rewritten into: 12343 /// Slice = load SliceTy, Base + SliceOffset 12344 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 12345 /// 12346 /// SliceTy is deduced from the number of bits that are actually used to 12347 /// build Inst. 12348 struct LoadedSlice { 12349 /// Helper structure used to compute the cost of a slice. 12350 struct Cost { 12351 /// Are we optimizing for code size. 12352 bool ForCodeSize; 12353 12354 /// Various cost. 12355 unsigned Loads = 0; 12356 unsigned Truncates = 0; 12357 unsigned CrossRegisterBanksCopies = 0; 12358 unsigned ZExts = 0; 12359 unsigned Shift = 0; 12360 12361 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} 12362 12363 /// Get the cost of one isolated slice. 12364 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 12365 : ForCodeSize(ForCodeSize), Loads(1) { 12366 EVT TruncType = LS.Inst->getValueType(0); 12367 EVT LoadedType = LS.getLoadedType(); 12368 if (TruncType != LoadedType && 12369 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 12370 ZExts = 1; 12371 } 12372 12373 /// Account for slicing gain in the current cost. 12374 /// Slicing provide a few gains like removing a shift or a 12375 /// truncate. This method allows to grow the cost of the original 12376 /// load with the gain from this slice. 12377 void addSliceGain(const LoadedSlice &LS) { 12378 // Each slice saves a truncate. 12379 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 12380 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), 12381 LS.Inst->getValueType(0))) 12382 ++Truncates; 12383 // If there is a shift amount, this slice gets rid of it. 12384 if (LS.Shift) 12385 ++Shift; 12386 // If this slice can merge a cross register bank copy, account for it. 12387 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 12388 ++CrossRegisterBanksCopies; 12389 } 12390 12391 Cost &operator+=(const Cost &RHS) { 12392 Loads += RHS.Loads; 12393 Truncates += RHS.Truncates; 12394 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 12395 ZExts += RHS.ZExts; 12396 Shift += RHS.Shift; 12397 return *this; 12398 } 12399 12400 bool operator==(const Cost &RHS) const { 12401 return Loads == RHS.Loads && Truncates == RHS.Truncates && 12402 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 12403 ZExts == RHS.ZExts && Shift == RHS.Shift; 12404 } 12405 12406 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 12407 12408 bool operator<(const Cost &RHS) const { 12409 // Assume cross register banks copies are as expensive as loads. 12410 // FIXME: Do we want some more target hooks? 12411 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 12412 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 12413 // Unless we are optimizing for code size, consider the 12414 // expensive operation first. 12415 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 12416 return ExpensiveOpsLHS < ExpensiveOpsRHS; 12417 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 12418 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 12419 } 12420 12421 bool operator>(const Cost &RHS) const { return RHS < *this; } 12422 12423 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 12424 12425 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 12426 }; 12427 12428 // The last instruction that represent the slice. This should be a 12429 // truncate instruction. 12430 SDNode *Inst; 12431 12432 // The original load instruction. 12433 LoadSDNode *Origin; 12434 12435 // The right shift amount in bits from the original load. 12436 unsigned Shift; 12437 12438 // The DAG from which Origin came from. 12439 // This is used to get some contextual information about legal types, etc. 12440 SelectionDAG *DAG; 12441 12442 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 12443 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 12444 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 12445 12446 /// Get the bits used in a chunk of bits \p BitWidth large. 12447 /// \return Result is \p BitWidth and has used bits set to 1 and 12448 /// not used bits set to 0. 12449 APInt getUsedBits() const { 12450 // Reproduce the trunc(lshr) sequence: 12451 // - Start from the truncated value. 12452 // - Zero extend to the desired bit width. 12453 // - Shift left. 12454 assert(Origin && "No original load to compare against."); 12455 unsigned BitWidth = Origin->getValueSizeInBits(0); 12456 assert(Inst && "This slice is not bound to an instruction"); 12457 assert(Inst->getValueSizeInBits(0) <= BitWidth && 12458 "Extracted slice is bigger than the whole type!"); 12459 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 12460 UsedBits.setAllBits(); 12461 UsedBits = UsedBits.zext(BitWidth); 12462 UsedBits <<= Shift; 12463 return UsedBits; 12464 } 12465 12466 /// Get the size of the slice to be loaded in bytes. 12467 unsigned getLoadedSize() const { 12468 unsigned SliceSize = getUsedBits().countPopulation(); 12469 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 12470 return SliceSize / 8; 12471 } 12472 12473 /// Get the type that will be loaded for this slice. 12474 /// Note: This may not be the final type for the slice. 12475 EVT getLoadedType() const { 12476 assert(DAG && "Missing context"); 12477 LLVMContext &Ctxt = *DAG->getContext(); 12478 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 12479 } 12480 12481 /// Get the alignment of the load used for this slice. 12482 unsigned getAlignment() const { 12483 unsigned Alignment = Origin->getAlignment(); 12484 unsigned Offset = getOffsetFromBase(); 12485 if (Offset != 0) 12486 Alignment = MinAlign(Alignment, Alignment + Offset); 12487 return Alignment; 12488 } 12489 12490 /// Check if this slice can be rewritten with legal operations. 12491 bool isLegal() const { 12492 // An invalid slice is not legal. 12493 if (!Origin || !Inst || !DAG) 12494 return false; 12495 12496 // Offsets are for indexed load only, we do not handle that. 12497 if (!Origin->getOffset().isUndef()) 12498 return false; 12499 12500 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 12501 12502 // Check that the type is legal. 12503 EVT SliceType = getLoadedType(); 12504 if (!TLI.isTypeLegal(SliceType)) 12505 return false; 12506 12507 // Check that the load is legal for this type. 12508 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 12509 return false; 12510 12511 // Check that the offset can be computed. 12512 // 1. Check its type. 12513 EVT PtrType = Origin->getBasePtr().getValueType(); 12514 if (PtrType == MVT::Untyped || PtrType.isExtended()) 12515 return false; 12516 12517 // 2. Check that it fits in the immediate. 12518 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 12519 return false; 12520 12521 // 3. Check that the computation is legal. 12522 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 12523 return false; 12524 12525 // Check that the zext is legal if it needs one. 12526 EVT TruncateType = Inst->getValueType(0); 12527 if (TruncateType != SliceType && 12528 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 12529 return false; 12530 12531 return true; 12532 } 12533 12534 /// Get the offset in bytes of this slice in the original chunk of 12535 /// bits. 12536 /// \pre DAG != nullptr. 12537 uint64_t getOffsetFromBase() const { 12538 assert(DAG && "Missing context."); 12539 bool IsBigEndian = DAG->getDataLayout().isBigEndian(); 12540 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 12541 uint64_t Offset = Shift / 8; 12542 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 12543 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 12544 "The size of the original loaded type is not a multiple of a" 12545 " byte."); 12546 // If Offset is bigger than TySizeInBytes, it means we are loading all 12547 // zeros. This should have been optimized before in the process. 12548 assert(TySizeInBytes > Offset && 12549 "Invalid shift amount for given loaded size"); 12550 if (IsBigEndian) 12551 Offset = TySizeInBytes - Offset - getLoadedSize(); 12552 return Offset; 12553 } 12554 12555 /// Generate the sequence of instructions to load the slice 12556 /// represented by this object and redirect the uses of this slice to 12557 /// this new sequence of instructions. 12558 /// \pre this->Inst && this->Origin are valid Instructions and this 12559 /// object passed the legal check: LoadedSlice::isLegal returned true. 12560 /// \return The last instruction of the sequence used to load the slice. 12561 SDValue loadSlice() const { 12562 assert(Inst && Origin && "Unable to replace a non-existing slice."); 12563 const SDValue &OldBaseAddr = Origin->getBasePtr(); 12564 SDValue BaseAddr = OldBaseAddr; 12565 // Get the offset in that chunk of bytes w.r.t. the endianness. 12566 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 12567 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 12568 if (Offset) { 12569 // BaseAddr = BaseAddr + Offset. 12570 EVT ArithType = BaseAddr.getValueType(); 12571 SDLoc DL(Origin); 12572 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, 12573 DAG->getConstant(Offset, DL, ArithType)); 12574 } 12575 12576 // Create the type of the loaded slice according to its size. 12577 EVT SliceType = getLoadedType(); 12578 12579 // Create the load for the slice. 12580 SDValue LastInst = 12581 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 12582 Origin->getPointerInfo().getWithOffset(Offset), 12583 getAlignment(), Origin->getMemOperand()->getFlags()); 12584 // If the final type is not the same as the loaded type, this means that 12585 // we have to pad with zero. Create a zero extend for that. 12586 EVT FinalType = Inst->getValueType(0); 12587 if (SliceType != FinalType) 12588 LastInst = 12589 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 12590 return LastInst; 12591 } 12592 12593 /// Check if this slice can be merged with an expensive cross register 12594 /// bank copy. E.g., 12595 /// i = load i32 12596 /// f = bitcast i32 i to float 12597 bool canMergeExpensiveCrossRegisterBankCopy() const { 12598 if (!Inst || !Inst->hasOneUse()) 12599 return false; 12600 SDNode *Use = *Inst->use_begin(); 12601 if (Use->getOpcode() != ISD::BITCAST) 12602 return false; 12603 assert(DAG && "Missing context"); 12604 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 12605 EVT ResVT = Use->getValueType(0); 12606 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 12607 const TargetRegisterClass *ArgRC = 12608 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 12609 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 12610 return false; 12611 12612 // At this point, we know that we perform a cross-register-bank copy. 12613 // Check if it is expensive. 12614 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 12615 // Assume bitcasts are cheap, unless both register classes do not 12616 // explicitly share a common sub class. 12617 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 12618 return false; 12619 12620 // Check if it will be merged with the load. 12621 // 1. Check the alignment constraint. 12622 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( 12623 ResVT.getTypeForEVT(*DAG->getContext())); 12624 12625 if (RequiredAlignment > getAlignment()) 12626 return false; 12627 12628 // 2. Check that the load is a legal operation for that type. 12629 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 12630 return false; 12631 12632 // 3. Check that we do not have a zext in the way. 12633 if (Inst->getValueType(0) != getLoadedType()) 12634 return false; 12635 12636 return true; 12637 } 12638 }; 12639 12640 } // end anonymous namespace 12641 12642 /// Check that all bits set in \p UsedBits form a dense region, i.e., 12643 /// \p UsedBits looks like 0..0 1..1 0..0. 12644 static bool areUsedBitsDense(const APInt &UsedBits) { 12645 // If all the bits are one, this is dense! 12646 if (UsedBits.isAllOnesValue()) 12647 return true; 12648 12649 // Get rid of the unused bits on the right. 12650 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 12651 // Get rid of the unused bits on the left. 12652 if (NarrowedUsedBits.countLeadingZeros()) 12653 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 12654 // Check that the chunk of bits is completely used. 12655 return NarrowedUsedBits.isAllOnesValue(); 12656 } 12657 12658 /// Check whether or not \p First and \p Second are next to each other 12659 /// in memory. This means that there is no hole between the bits loaded 12660 /// by \p First and the bits loaded by \p Second. 12661 static bool areSlicesNextToEachOther(const LoadedSlice &First, 12662 const LoadedSlice &Second) { 12663 assert(First.Origin == Second.Origin && First.Origin && 12664 "Unable to match different memory origins."); 12665 APInt UsedBits = First.getUsedBits(); 12666 assert((UsedBits & Second.getUsedBits()) == 0 && 12667 "Slices are not supposed to overlap."); 12668 UsedBits |= Second.getUsedBits(); 12669 return areUsedBitsDense(UsedBits); 12670 } 12671 12672 /// Adjust the \p GlobalLSCost according to the target 12673 /// paring capabilities and the layout of the slices. 12674 /// \pre \p GlobalLSCost should account for at least as many loads as 12675 /// there is in the slices in \p LoadedSlices. 12676 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 12677 LoadedSlice::Cost &GlobalLSCost) { 12678 unsigned NumberOfSlices = LoadedSlices.size(); 12679 // If there is less than 2 elements, no pairing is possible. 12680 if (NumberOfSlices < 2) 12681 return; 12682 12683 // Sort the slices so that elements that are likely to be next to each 12684 // other in memory are next to each other in the list. 12685 llvm::sort(LoadedSlices.begin(), LoadedSlices.end(), 12686 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 12687 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 12688 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 12689 }); 12690 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 12691 // First (resp. Second) is the first (resp. Second) potentially candidate 12692 // to be placed in a paired load. 12693 const LoadedSlice *First = nullptr; 12694 const LoadedSlice *Second = nullptr; 12695 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 12696 // Set the beginning of the pair. 12697 First = Second) { 12698 Second = &LoadedSlices[CurrSlice]; 12699 12700 // If First is NULL, it means we start a new pair. 12701 // Get to the next slice. 12702 if (!First) 12703 continue; 12704 12705 EVT LoadedType = First->getLoadedType(); 12706 12707 // If the types of the slices are different, we cannot pair them. 12708 if (LoadedType != Second->getLoadedType()) 12709 continue; 12710 12711 // Check if the target supplies paired loads for this type. 12712 unsigned RequiredAlignment = 0; 12713 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 12714 // move to the next pair, this type is hopeless. 12715 Second = nullptr; 12716 continue; 12717 } 12718 // Check if we meet the alignment requirement. 12719 if (RequiredAlignment > First->getAlignment()) 12720 continue; 12721 12722 // Check that both loads are next to each other in memory. 12723 if (!areSlicesNextToEachOther(*First, *Second)) 12724 continue; 12725 12726 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 12727 --GlobalLSCost.Loads; 12728 // Move to the next pair. 12729 Second = nullptr; 12730 } 12731 } 12732 12733 /// Check the profitability of all involved LoadedSlice. 12734 /// Currently, it is considered profitable if there is exactly two 12735 /// involved slices (1) which are (2) next to each other in memory, and 12736 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 12737 /// 12738 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 12739 /// the elements themselves. 12740 /// 12741 /// FIXME: When the cost model will be mature enough, we can relax 12742 /// constraints (1) and (2). 12743 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 12744 const APInt &UsedBits, bool ForCodeSize) { 12745 unsigned NumberOfSlices = LoadedSlices.size(); 12746 if (StressLoadSlicing) 12747 return NumberOfSlices > 1; 12748 12749 // Check (1). 12750 if (NumberOfSlices != 2) 12751 return false; 12752 12753 // Check (2). 12754 if (!areUsedBitsDense(UsedBits)) 12755 return false; 12756 12757 // Check (3). 12758 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 12759 // The original code has one big load. 12760 OrigCost.Loads = 1; 12761 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 12762 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 12763 // Accumulate the cost of all the slices. 12764 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 12765 GlobalSlicingCost += SliceCost; 12766 12767 // Account as cost in the original configuration the gain obtained 12768 // with the current slices. 12769 OrigCost.addSliceGain(LS); 12770 } 12771 12772 // If the target supports paired load, adjust the cost accordingly. 12773 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 12774 return OrigCost > GlobalSlicingCost; 12775 } 12776 12777 /// If the given load, \p LI, is used only by trunc or trunc(lshr) 12778 /// operations, split it in the various pieces being extracted. 12779 /// 12780 /// This sort of thing is introduced by SROA. 12781 /// This slicing takes care not to insert overlapping loads. 12782 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 12783 bool DAGCombiner::SliceUpLoad(SDNode *N) { 12784 if (Level < AfterLegalizeDAG) 12785 return false; 12786 12787 LoadSDNode *LD = cast<LoadSDNode>(N); 12788 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 12789 !LD->getValueType(0).isInteger()) 12790 return false; 12791 12792 // Keep track of already used bits to detect overlapping values. 12793 // In that case, we will just abort the transformation. 12794 APInt UsedBits(LD->getValueSizeInBits(0), 0); 12795 12796 SmallVector<LoadedSlice, 4> LoadedSlices; 12797 12798 // Check if this load is used as several smaller chunks of bits. 12799 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 12800 // of computation for each trunc. 12801 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 12802 UI != UIEnd; ++UI) { 12803 // Skip the uses of the chain. 12804 if (UI.getUse().getResNo() != 0) 12805 continue; 12806 12807 SDNode *User = *UI; 12808 unsigned Shift = 0; 12809 12810 // Check if this is a trunc(lshr). 12811 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 12812 isa<ConstantSDNode>(User->getOperand(1))) { 12813 Shift = User->getConstantOperandVal(1); 12814 User = *User->use_begin(); 12815 } 12816 12817 // At this point, User is a Truncate, iff we encountered, trunc or 12818 // trunc(lshr). 12819 if (User->getOpcode() != ISD::TRUNCATE) 12820 return false; 12821 12822 // The width of the type must be a power of 2 and greater than 8-bits. 12823 // Otherwise the load cannot be represented in LLVM IR. 12824 // Moreover, if we shifted with a non-8-bits multiple, the slice 12825 // will be across several bytes. We do not support that. 12826 unsigned Width = User->getValueSizeInBits(0); 12827 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 12828 return false; 12829 12830 // Build the slice for this chain of computations. 12831 LoadedSlice LS(User, LD, Shift, &DAG); 12832 APInt CurrentUsedBits = LS.getUsedBits(); 12833 12834 // Check if this slice overlaps with another. 12835 if ((CurrentUsedBits & UsedBits) != 0) 12836 return false; 12837 // Update the bits used globally. 12838 UsedBits |= CurrentUsedBits; 12839 12840 // Check if the new slice would be legal. 12841 if (!LS.isLegal()) 12842 return false; 12843 12844 // Record the slice. 12845 LoadedSlices.push_back(LS); 12846 } 12847 12848 // Abort slicing if it does not seem to be profitable. 12849 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 12850 return false; 12851 12852 ++SlicedLoads; 12853 12854 // Rewrite each chain to use an independent load. 12855 // By construction, each chain can be represented by a unique load. 12856 12857 // Prepare the argument for the new token factor for all the slices. 12858 SmallVector<SDValue, 8> ArgChains; 12859 for (SmallVectorImpl<LoadedSlice>::const_iterator 12860 LSIt = LoadedSlices.begin(), 12861 LSItEnd = LoadedSlices.end(); 12862 LSIt != LSItEnd; ++LSIt) { 12863 SDValue SliceInst = LSIt->loadSlice(); 12864 CombineTo(LSIt->Inst, SliceInst, true); 12865 if (SliceInst.getOpcode() != ISD::LOAD) 12866 SliceInst = SliceInst.getOperand(0); 12867 assert(SliceInst->getOpcode() == ISD::LOAD && 12868 "It takes more than a zext to get to the loaded slice!!"); 12869 ArgChains.push_back(SliceInst.getValue(1)); 12870 } 12871 12872 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 12873 ArgChains); 12874 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 12875 AddToWorklist(Chain.getNode()); 12876 return true; 12877 } 12878 12879 /// Check to see if V is (and load (ptr), imm), where the load is having 12880 /// specific bytes cleared out. If so, return the byte size being masked out 12881 /// and the shift amount. 12882 static std::pair<unsigned, unsigned> 12883 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 12884 std::pair<unsigned, unsigned> Result(0, 0); 12885 12886 // Check for the structure we're looking for. 12887 if (V->getOpcode() != ISD::AND || 12888 !isa<ConstantSDNode>(V->getOperand(1)) || 12889 !ISD::isNormalLoad(V->getOperand(0).getNode())) 12890 return Result; 12891 12892 // Check the chain and pointer. 12893 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 12894 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 12895 12896 // The store should be chained directly to the load or be an operand of a 12897 // tokenfactor. 12898 if (LD == Chain.getNode()) 12899 ; // ok. 12900 else if (Chain->getOpcode() != ISD::TokenFactor) 12901 return Result; // Fail. 12902 else { 12903 bool isOk = false; 12904 for (const SDValue &ChainOp : Chain->op_values()) 12905 if (ChainOp.getNode() == LD) { 12906 isOk = true; 12907 break; 12908 } 12909 if (!isOk) return Result; 12910 } 12911 12912 // This only handles simple types. 12913 if (V.getValueType() != MVT::i16 && 12914 V.getValueType() != MVT::i32 && 12915 V.getValueType() != MVT::i64) 12916 return Result; 12917 12918 // Check the constant mask. Invert it so that the bits being masked out are 12919 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 12920 // follow the sign bit for uniformity. 12921 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 12922 unsigned NotMaskLZ = countLeadingZeros(NotMask); 12923 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 12924 unsigned NotMaskTZ = countTrailingZeros(NotMask); 12925 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 12926 if (NotMaskLZ == 64) return Result; // All zero mask. 12927 12928 // See if we have a continuous run of bits. If so, we have 0*1+0* 12929 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) 12930 return Result; 12931 12932 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 12933 if (V.getValueType() != MVT::i64 && NotMaskLZ) 12934 NotMaskLZ -= 64-V.getValueSizeInBits(); 12935 12936 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 12937 switch (MaskedBytes) { 12938 case 1: 12939 case 2: 12940 case 4: break; 12941 default: return Result; // All one mask, or 5-byte mask. 12942 } 12943 12944 // Verify that the first bit starts at a multiple of mask so that the access 12945 // is aligned the same as the access width. 12946 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 12947 12948 Result.first = MaskedBytes; 12949 Result.second = NotMaskTZ/8; 12950 return Result; 12951 } 12952 12953 /// Check to see if IVal is something that provides a value as specified by 12954 /// MaskInfo. If so, replace the specified store with a narrower store of 12955 /// truncated IVal. 12956 static SDNode * 12957 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 12958 SDValue IVal, StoreSDNode *St, 12959 DAGCombiner *DC) { 12960 unsigned NumBytes = MaskInfo.first; 12961 unsigned ByteShift = MaskInfo.second; 12962 SelectionDAG &DAG = DC->getDAG(); 12963 12964 // Check to see if IVal is all zeros in the part being masked in by the 'or' 12965 // that uses this. If not, this is not a replacement. 12966 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 12967 ByteShift*8, (ByteShift+NumBytes)*8); 12968 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 12969 12970 // Check that it is legal on the target to do this. It is legal if the new 12971 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 12972 // legalization. 12973 MVT VT = MVT::getIntegerVT(NumBytes*8); 12974 if (!DC->isTypeLegal(VT)) 12975 return nullptr; 12976 12977 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 12978 // shifted by ByteShift and truncated down to NumBytes. 12979 if (ByteShift) { 12980 SDLoc DL(IVal); 12981 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, 12982 DAG.getConstant(ByteShift*8, DL, 12983 DC->getShiftAmountTy(IVal.getValueType()))); 12984 } 12985 12986 // Figure out the offset for the store and the alignment of the access. 12987 unsigned StOffset; 12988 unsigned NewAlign = St->getAlignment(); 12989 12990 if (DAG.getDataLayout().isLittleEndian()) 12991 StOffset = ByteShift; 12992 else 12993 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 12994 12995 SDValue Ptr = St->getBasePtr(); 12996 if (StOffset) { 12997 SDLoc DL(IVal); 12998 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), 12999 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); 13000 NewAlign = MinAlign(NewAlign, StOffset); 13001 } 13002 13003 // Truncate down to the new size. 13004 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 13005 13006 ++OpsNarrowed; 13007 return DAG 13008 .getStore(St->getChain(), SDLoc(St), IVal, Ptr, 13009 St->getPointerInfo().getWithOffset(StOffset), NewAlign) 13010 .getNode(); 13011 } 13012 13013 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and 13014 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 13015 /// narrowing the load and store if it would end up being a win for performance 13016 /// or code size. 13017 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 13018 StoreSDNode *ST = cast<StoreSDNode>(N); 13019 if (ST->isVolatile()) 13020 return SDValue(); 13021 13022 SDValue Chain = ST->getChain(); 13023 SDValue Value = ST->getValue(); 13024 SDValue Ptr = ST->getBasePtr(); 13025 EVT VT = Value.getValueType(); 13026 13027 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 13028 return SDValue(); 13029 13030 unsigned Opc = Value.getOpcode(); 13031 13032 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 13033 // is a byte mask indicating a consecutive number of bytes, check to see if 13034 // Y is known to provide just those bytes. If so, we try to replace the 13035 // load + replace + store sequence with a single (narrower) store, which makes 13036 // the load dead. 13037 if (Opc == ISD::OR) { 13038 std::pair<unsigned, unsigned> MaskedLoad; 13039 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 13040 if (MaskedLoad.first) 13041 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13042 Value.getOperand(1), ST,this)) 13043 return SDValue(NewST, 0); 13044 13045 // Or is commutative, so try swapping X and Y. 13046 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 13047 if (MaskedLoad.first) 13048 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 13049 Value.getOperand(0), ST,this)) 13050 return SDValue(NewST, 0); 13051 } 13052 13053 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 13054 Value.getOperand(1).getOpcode() != ISD::Constant) 13055 return SDValue(); 13056 13057 SDValue N0 = Value.getOperand(0); 13058 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 13059 Chain == SDValue(N0.getNode(), 1)) { 13060 LoadSDNode *LD = cast<LoadSDNode>(N0); 13061 if (LD->getBasePtr() != Ptr || 13062 LD->getPointerInfo().getAddrSpace() != 13063 ST->getPointerInfo().getAddrSpace()) 13064 return SDValue(); 13065 13066 // Find the type to narrow it the load / op / store to. 13067 SDValue N1 = Value.getOperand(1); 13068 unsigned BitWidth = N1.getValueSizeInBits(); 13069 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 13070 if (Opc == ISD::AND) 13071 Imm ^= APInt::getAllOnesValue(BitWidth); 13072 if (Imm == 0 || Imm.isAllOnesValue()) 13073 return SDValue(); 13074 unsigned ShAmt = Imm.countTrailingZeros(); 13075 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 13076 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 13077 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13078 // The narrowing should be profitable, the load/store operation should be 13079 // legal (or custom) and the store size should be equal to the NewVT width. 13080 while (NewBW < BitWidth && 13081 (NewVT.getStoreSizeInBits() != NewBW || 13082 !TLI.isOperationLegalOrCustom(Opc, NewVT) || 13083 !TLI.isNarrowingProfitable(VT, NewVT))) { 13084 NewBW = NextPowerOf2(NewBW); 13085 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 13086 } 13087 if (NewBW >= BitWidth) 13088 return SDValue(); 13089 13090 // If the lsb changed does not start at the type bitwidth boundary, 13091 // start at the previous one. 13092 if (ShAmt % NewBW) 13093 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 13094 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 13095 std::min(BitWidth, ShAmt + NewBW)); 13096 if ((Imm & Mask) == Imm) { 13097 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 13098 if (Opc == ISD::AND) 13099 NewImm ^= APInt::getAllOnesValue(NewBW); 13100 uint64_t PtrOff = ShAmt / 8; 13101 // For big endian targets, we need to adjust the offset to the pointer to 13102 // load the correct bytes. 13103 if (DAG.getDataLayout().isBigEndian()) 13104 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 13105 13106 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 13107 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 13108 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) 13109 return SDValue(); 13110 13111 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 13112 Ptr.getValueType(), Ptr, 13113 DAG.getConstant(PtrOff, SDLoc(LD), 13114 Ptr.getValueType())); 13115 SDValue NewLD = 13116 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, 13117 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, 13118 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 13119 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 13120 DAG.getConstant(NewImm, SDLoc(Value), 13121 NewVT)); 13122 SDValue NewST = 13123 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, 13124 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); 13125 13126 AddToWorklist(NewPtr.getNode()); 13127 AddToWorklist(NewLD.getNode()); 13128 AddToWorklist(NewVal.getNode()); 13129 WorklistRemover DeadNodes(*this); 13130 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 13131 ++OpsNarrowed; 13132 return NewST; 13133 } 13134 } 13135 13136 return SDValue(); 13137 } 13138 13139 /// For a given floating point load / store pair, if the load value isn't used 13140 /// by any other operations, then consider transforming the pair to integer 13141 /// load / store operations if the target deems the transformation profitable. 13142 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 13143 StoreSDNode *ST = cast<StoreSDNode>(N); 13144 SDValue Chain = ST->getChain(); 13145 SDValue Value = ST->getValue(); 13146 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 13147 Value.hasOneUse() && 13148 Chain == SDValue(Value.getNode(), 1)) { 13149 LoadSDNode *LD = cast<LoadSDNode>(Value); 13150 EVT VT = LD->getMemoryVT(); 13151 if (!VT.isFloatingPoint() || 13152 VT != ST->getMemoryVT() || 13153 LD->isNonTemporal() || 13154 ST->isNonTemporal() || 13155 LD->getPointerInfo().getAddrSpace() != 0 || 13156 ST->getPointerInfo().getAddrSpace() != 0) 13157 return SDValue(); 13158 13159 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 13160 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 13161 !TLI.isOperationLegal(ISD::STORE, IntVT) || 13162 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 13163 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 13164 return SDValue(); 13165 13166 unsigned LDAlign = LD->getAlignment(); 13167 unsigned STAlign = ST->getAlignment(); 13168 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 13169 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); 13170 if (LDAlign < ABIAlign || STAlign < ABIAlign) 13171 return SDValue(); 13172 13173 SDValue NewLD = 13174 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), 13175 LD->getPointerInfo(), LDAlign); 13176 13177 SDValue NewST = 13178 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), 13179 ST->getPointerInfo(), STAlign); 13180 13181 AddToWorklist(NewLD.getNode()); 13182 AddToWorklist(NewST.getNode()); 13183 WorklistRemover DeadNodes(*this); 13184 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 13185 ++LdStFP2Int; 13186 return NewST; 13187 } 13188 13189 return SDValue(); 13190 } 13191 13192 // This is a helper function for visitMUL to check the profitability 13193 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 13194 // MulNode is the original multiply, AddNode is (add x, c1), 13195 // and ConstNode is c2. 13196 // 13197 // If the (add x, c1) has multiple uses, we could increase 13198 // the number of adds if we make this transformation. 13199 // It would only be worth doing this if we can remove a 13200 // multiply in the process. Check for that here. 13201 // To illustrate: 13202 // (A + c1) * c3 13203 // (A + c2) * c3 13204 // We're checking for cases where we have common "c3 * A" expressions. 13205 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, 13206 SDValue &AddNode, 13207 SDValue &ConstNode) { 13208 APInt Val; 13209 13210 // If the add only has one use, this would be OK to do. 13211 if (AddNode.getNode()->hasOneUse()) 13212 return true; 13213 13214 // Walk all the users of the constant with which we're multiplying. 13215 for (SDNode *Use : ConstNode->uses()) { 13216 if (Use == MulNode) // This use is the one we're on right now. Skip it. 13217 continue; 13218 13219 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. 13220 SDNode *OtherOp; 13221 SDNode *MulVar = AddNode.getOperand(0).getNode(); 13222 13223 // OtherOp is what we're multiplying against the constant. 13224 if (Use->getOperand(0) == ConstNode) 13225 OtherOp = Use->getOperand(1).getNode(); 13226 else 13227 OtherOp = Use->getOperand(0).getNode(); 13228 13229 // Check to see if multiply is with the same operand of our "add". 13230 // 13231 // ConstNode = CONST 13232 // Use = ConstNode * A <-- visiting Use. OtherOp is A. 13233 // ... 13234 // AddNode = (A + c1) <-- MulVar is A. 13235 // = AddNode * ConstNode <-- current visiting instruction. 13236 // 13237 // If we make this transformation, we will have a common 13238 // multiply (ConstNode * A) that we can save. 13239 if (OtherOp == MulVar) 13240 return true; 13241 13242 // Now check to see if a future expansion will give us a common 13243 // multiply. 13244 // 13245 // ConstNode = CONST 13246 // AddNode = (A + c1) 13247 // ... = AddNode * ConstNode <-- current visiting instruction. 13248 // ... 13249 // OtherOp = (A + c2) 13250 // Use = OtherOp * ConstNode <-- visiting Use. 13251 // 13252 // If we make this transformation, we will have a common 13253 // multiply (CONST * A) after we also do the same transformation 13254 // to the "t2" instruction. 13255 if (OtherOp->getOpcode() == ISD::ADD && 13256 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && 13257 OtherOp->getOperand(0).getNode() == MulVar) 13258 return true; 13259 } 13260 } 13261 13262 // Didn't find a case where this would be profitable. 13263 return false; 13264 } 13265 13266 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, 13267 unsigned NumStores) { 13268 SmallVector<SDValue, 8> Chains; 13269 SmallPtrSet<const SDNode *, 8> Visited; 13270 SDLoc StoreDL(StoreNodes[0].MemNode); 13271 13272 for (unsigned i = 0; i < NumStores; ++i) { 13273 Visited.insert(StoreNodes[i].MemNode); 13274 } 13275 13276 // don't include nodes that are children 13277 for (unsigned i = 0; i < NumStores; ++i) { 13278 if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0) 13279 Chains.push_back(StoreNodes[i].MemNode->getChain()); 13280 } 13281 13282 assert(Chains.size() > 0 && "Chain should have generated a chain"); 13283 return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains); 13284 } 13285 13286 bool DAGCombiner::MergeStoresOfConstantsOrVecElts( 13287 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, 13288 bool IsConstantSrc, bool UseVector, bool UseTrunc) { 13289 // Make sure we have something to merge. 13290 if (NumStores < 2) 13291 return false; 13292 13293 // The latest Node in the DAG. 13294 SDLoc DL(StoreNodes[0].MemNode); 13295 13296 int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); 13297 unsigned SizeInBits = NumStores * ElementSizeBits; 13298 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 13299 13300 EVT StoreTy; 13301 if (UseVector) { 13302 unsigned Elts = NumStores * NumMemElts; 13303 // Get the type for the merged vector store. 13304 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 13305 } else 13306 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 13307 13308 SDValue StoredVal; 13309 if (UseVector) { 13310 if (IsConstantSrc) { 13311 SmallVector<SDValue, 8> BuildVector; 13312 for (unsigned I = 0; I != NumStores; ++I) { 13313 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); 13314 SDValue Val = St->getValue(); 13315 // If constant is of the wrong type, convert it now. 13316 if (MemVT != Val.getValueType()) { 13317 Val = peekThroughBitcast(Val); 13318 // Deal with constants of wrong size. 13319 if (ElementSizeBits != Val.getValueSizeInBits()) { 13320 EVT IntMemVT = 13321 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 13322 if (isa<ConstantFPSDNode>(Val)) { 13323 // Not clear how to truncate FP values. 13324 return false; 13325 } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) 13326 Val = DAG.getConstant(C->getAPIntValue() 13327 .zextOrTrunc(Val.getValueSizeInBits()) 13328 .zextOrTrunc(ElementSizeBits), 13329 SDLoc(C), IntMemVT); 13330 } 13331 // Make sure correctly size type is the correct type. 13332 Val = DAG.getBitcast(MemVT, Val); 13333 } 13334 BuildVector.push_back(Val); 13335 } 13336 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 13337 : ISD::BUILD_VECTOR, 13338 DL, StoreTy, BuildVector); 13339 } else { 13340 SmallVector<SDValue, 8> Ops; 13341 for (unsigned i = 0; i < NumStores; ++i) { 13342 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 13343 SDValue Val = peekThroughBitcast(St->getValue()); 13344 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of 13345 // type MemVT. If the underlying value is not the correct 13346 // type, but it is an extraction of an appropriate vector we 13347 // can recast Val to be of the correct type. This may require 13348 // converting between EXTRACT_VECTOR_ELT and 13349 // EXTRACT_SUBVECTOR. 13350 if ((MemVT != Val.getValueType()) && 13351 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 13352 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { 13353 SDValue Vec = Val.getOperand(0); 13354 EVT MemVTScalarTy = MemVT.getScalarType(); 13355 // We may need to add a bitcast here to get types to line up. 13356 if (MemVTScalarTy != Vec.getValueType()) { 13357 unsigned Elts = Vec.getValueType().getSizeInBits() / 13358 MemVTScalarTy.getSizeInBits(); 13359 EVT NewVecTy = 13360 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); 13361 Vec = DAG.getBitcast(NewVecTy, Vec); 13362 } 13363 auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR 13364 : ISD::EXTRACT_VECTOR_ELT; 13365 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); 13366 } 13367 Ops.push_back(Val); 13368 } 13369 13370 // Build the extracted vector elements back into a vector. 13371 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS 13372 : ISD::BUILD_VECTOR, 13373 DL, StoreTy, Ops); 13374 } 13375 } else { 13376 // We should always use a vector store when merging extracted vector 13377 // elements, so this path implies a store of constants. 13378 assert(IsConstantSrc && "Merged vector elements should use vector store"); 13379 13380 APInt StoreInt(SizeInBits, 0); 13381 13382 // Construct a single integer constant which is made of the smaller 13383 // constant inputs. 13384 bool IsLE = DAG.getDataLayout().isLittleEndian(); 13385 for (unsigned i = 0; i < NumStores; ++i) { 13386 unsigned Idx = IsLE ? (NumStores - 1 - i) : i; 13387 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 13388 13389 SDValue Val = St->getValue(); 13390 Val = peekThroughBitcast(Val); 13391 StoreInt <<= ElementSizeBits; 13392 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 13393 StoreInt |= C->getAPIntValue() 13394 .zextOrTrunc(ElementSizeBits) 13395 .zextOrTrunc(SizeInBits); 13396 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 13397 StoreInt |= C->getValueAPF() 13398 .bitcastToAPInt() 13399 .zextOrTrunc(ElementSizeBits) 13400 .zextOrTrunc(SizeInBits); 13401 // If fp truncation is necessary give up for now. 13402 if (MemVT.getSizeInBits() != ElementSizeBits) 13403 return false; 13404 } else { 13405 llvm_unreachable("Invalid constant element type"); 13406 } 13407 } 13408 13409 // Create the new Load and Store operations. 13410 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); 13411 } 13412 13413 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 13414 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); 13415 13416 // make sure we use trunc store if it's necessary to be legal. 13417 SDValue NewStore; 13418 if (!UseTrunc) { 13419 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), 13420 FirstInChain->getPointerInfo(), 13421 FirstInChain->getAlignment()); 13422 } else { // Must be realized as a trunc store 13423 EVT LegalizedStoredValTy = 13424 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 13425 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); 13426 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); 13427 SDValue ExtendedStoreVal = 13428 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, 13429 LegalizedStoredValTy); 13430 NewStore = DAG.getTruncStore( 13431 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), 13432 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, 13433 FirstInChain->getAlignment(), 13434 FirstInChain->getMemOperand()->getFlags()); 13435 } 13436 13437 // Replace all merged stores with the new store. 13438 for (unsigned i = 0; i < NumStores; ++i) 13439 CombineTo(StoreNodes[i].MemNode, NewStore); 13440 13441 AddToWorklist(NewChain.getNode()); 13442 return true; 13443 } 13444 13445 void DAGCombiner::getStoreMergeCandidates( 13446 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, 13447 SDNode *&RootNode) { 13448 // This holds the base pointer, index, and the offset in bytes from the base 13449 // pointer. 13450 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 13451 EVT MemVT = St->getMemoryVT(); 13452 13453 SDValue Val = peekThroughBitcast(St->getValue()); 13454 // We must have a base and an offset. 13455 if (!BasePtr.getBase().getNode()) 13456 return; 13457 13458 // Do not handle stores to undef base pointers. 13459 if (BasePtr.getBase().isUndef()) 13460 return; 13461 13462 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); 13463 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 13464 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); 13465 bool IsLoadSrc = isa<LoadSDNode>(Val); 13466 BaseIndexOffset LBasePtr; 13467 // Match on loadbaseptr if relevant. 13468 EVT LoadVT; 13469 if (IsLoadSrc) { 13470 auto *Ld = cast<LoadSDNode>(Val); 13471 LBasePtr = BaseIndexOffset::match(Ld, DAG); 13472 LoadVT = Ld->getMemoryVT(); 13473 // Load and store should be the same type. 13474 if (MemVT != LoadVT) 13475 return; 13476 // Loads must only have one use. 13477 if (!Ld->hasNUsesOfValue(1, 0)) 13478 return; 13479 // The memory operands must not be volatile. 13480 if (Ld->isVolatile() || Ld->isIndexed()) 13481 return; 13482 } 13483 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, 13484 int64_t &Offset) -> bool { 13485 if (Other->isVolatile() || Other->isIndexed()) 13486 return false; 13487 SDValue Val = peekThroughBitcast(Other->getValue()); 13488 // Allow merging constants of different types as integers. 13489 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) 13490 : Other->getMemoryVT() != MemVT; 13491 if (IsLoadSrc) { 13492 if (NoTypeMatch) 13493 return false; 13494 // The Load's Base Ptr must also match 13495 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { 13496 auto LPtr = BaseIndexOffset::match(OtherLd, DAG); 13497 if (LoadVT != OtherLd->getMemoryVT()) 13498 return false; 13499 // Loads must only have one use. 13500 if (!OtherLd->hasNUsesOfValue(1, 0)) 13501 return false; 13502 // The memory operands must not be volatile. 13503 if (OtherLd->isVolatile() || OtherLd->isIndexed()) 13504 return false; 13505 if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) 13506 return false; 13507 } else 13508 return false; 13509 } 13510 if (IsConstantSrc) { 13511 if (NoTypeMatch) 13512 return false; 13513 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) 13514 return false; 13515 } 13516 if (IsExtractVecSrc) { 13517 // Do not merge truncated stores here. 13518 if (Other->isTruncatingStore()) 13519 return false; 13520 if (!MemVT.bitsEq(Val.getValueType())) 13521 return false; 13522 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && 13523 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) 13524 return false; 13525 } 13526 Ptr = BaseIndexOffset::match(Other, DAG); 13527 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); 13528 }; 13529 13530 // We looking for a root node which is an ancestor to all mergable 13531 // stores. We search up through a load, to our root and then down 13532 // through all children. For instance we will find Store{1,2,3} if 13533 // St is Store1, Store2. or Store3 where the root is not a load 13534 // which always true for nonvolatile ops. TODO: Expand 13535 // the search to find all valid candidates through multiple layers of loads. 13536 // 13537 // Root 13538 // |-------|-------| 13539 // Load Load Store3 13540 // | | 13541 // Store1 Store2 13542 // 13543 // FIXME: We should be able to climb and 13544 // descend TokenFactors to find candidates as well. 13545 13546 RootNode = St->getChain().getNode(); 13547 13548 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { 13549 RootNode = Ldn->getChain().getNode(); 13550 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 13551 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain 13552 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2) 13553 if (I2.getOperandNo() == 0) 13554 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { 13555 BaseIndexOffset Ptr; 13556 int64_t PtrDiff; 13557 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 13558 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 13559 } 13560 } else 13561 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I) 13562 if (I.getOperandNo() == 0) 13563 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { 13564 BaseIndexOffset Ptr; 13565 int64_t PtrDiff; 13566 if (CandidateMatch(OtherST, Ptr, PtrDiff)) 13567 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); 13568 } 13569 } 13570 13571 // We need to check that merging these stores does not cause a loop in 13572 // the DAG. Any store candidate may depend on another candidate 13573 // indirectly through its operand (we already consider dependencies 13574 // through the chain). Check in parallel by searching up from 13575 // non-chain operands of candidates. 13576 bool DAGCombiner::checkMergeStoreCandidatesForDependencies( 13577 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, 13578 SDNode *RootNode) { 13579 // FIXME: We should be able to truncate a full search of 13580 // predecessors by doing a BFS and keeping tabs the originating 13581 // stores from which worklist nodes come from in a similar way to 13582 // TokenFactor simplfication. 13583 13584 SmallPtrSet<const SDNode *, 32> Visited; 13585 SmallVector<const SDNode *, 8> Worklist; 13586 13587 // RootNode is a predecessor to all candidates so we need not search 13588 // past it. Add RootNode (peeking through TokenFactors). Do not count 13589 // these towards size check. 13590 13591 Worklist.push_back(RootNode); 13592 while (!Worklist.empty()) { 13593 auto N = Worklist.pop_back_val(); 13594 if (N->getOpcode() == ISD::TokenFactor) { 13595 for (SDValue Op : N->ops()) 13596 Worklist.push_back(Op.getNode()); 13597 } 13598 Visited.insert(N); 13599 } 13600 13601 // Don't count pruning nodes towards max. 13602 unsigned int Max = 1024 + Visited.size(); 13603 // Search Ops of store candidates. 13604 for (unsigned i = 0; i < NumStores; ++i) { 13605 SDNode *N = StoreNodes[i].MemNode; 13606 // Of the 4 Store Operands: 13607 // * Chain (Op 0) -> We have already considered these 13608 // in candidate selection and can be 13609 // safely ignored 13610 // * Value (Op 1) -> Cycles may happen (e.g. through load chains) 13611 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant 13612 // and so no cycles are possible. 13613 // * (Op 3) -> appears to always be undef. Cannot be source of cycle. 13614 // 13615 // Thus we need only check predecessors of the value operands. 13616 auto *Op = N->getOperand(1).getNode(); 13617 if (Visited.insert(Op).second) 13618 Worklist.push_back(Op); 13619 } 13620 // Search through DAG. We can stop early if we find a store node. 13621 for (unsigned i = 0; i < NumStores; ++i) 13622 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, 13623 Max)) 13624 return false; 13625 return true; 13626 } 13627 13628 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { 13629 if (OptLevel == CodeGenOpt::None) 13630 return false; 13631 13632 EVT MemVT = St->getMemoryVT(); 13633 int64_t ElementSizeBytes = MemVT.getStoreSize(); 13634 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; 13635 13636 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) 13637 return false; 13638 13639 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( 13640 Attribute::NoImplicitFloat); 13641 13642 // This function cannot currently deal with non-byte-sized memory sizes. 13643 if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) 13644 return false; 13645 13646 if (!MemVT.isSimple()) 13647 return false; 13648 13649 // Perform an early exit check. Do not bother looking at stored values that 13650 // are not constants, loads, or extracted vector elements. 13651 SDValue StoredVal = peekThroughBitcast(St->getValue()); 13652 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 13653 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || 13654 isa<ConstantFPSDNode>(StoredVal); 13655 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 13656 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); 13657 13658 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) 13659 return false; 13660 13661 SmallVector<MemOpLink, 8> StoreNodes; 13662 SDNode *RootNode; 13663 // Find potential store merge candidates by searching through chain sub-DAG 13664 getStoreMergeCandidates(St, StoreNodes, RootNode); 13665 13666 // Check if there is anything to merge. 13667 if (StoreNodes.size() < 2) 13668 return false; 13669 13670 // Sort the memory operands according to their distance from the 13671 // base pointer. 13672 llvm::sort(StoreNodes.begin(), StoreNodes.end(), 13673 [](MemOpLink LHS, MemOpLink RHS) { 13674 return LHS.OffsetFromBase < RHS.OffsetFromBase; 13675 }); 13676 13677 // Store Merge attempts to merge the lowest stores. This generally 13678 // works out as if successful, as the remaining stores are checked 13679 // after the first collection of stores is merged. However, in the 13680 // case that a non-mergeable store is found first, e.g., {p[-2], 13681 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent 13682 // mergeable cases. To prevent this, we prune such stores from the 13683 // front of StoreNodes here. 13684 13685 bool RV = false; 13686 while (StoreNodes.size() > 1) { 13687 unsigned StartIdx = 0; 13688 while ((StartIdx + 1 < StoreNodes.size()) && 13689 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != 13690 StoreNodes[StartIdx + 1].OffsetFromBase) 13691 ++StartIdx; 13692 13693 // Bail if we don't have enough candidates to merge. 13694 if (StartIdx + 1 >= StoreNodes.size()) 13695 return RV; 13696 13697 if (StartIdx) 13698 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); 13699 13700 // Scan the memory operations on the chain and find the first 13701 // non-consecutive store memory address. 13702 unsigned NumConsecutiveStores = 1; 13703 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 13704 // Check that the addresses are consecutive starting from the second 13705 // element in the list of stores. 13706 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) { 13707 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 13708 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 13709 break; 13710 NumConsecutiveStores = i + 1; 13711 } 13712 13713 if (NumConsecutiveStores < 2) { 13714 StoreNodes.erase(StoreNodes.begin(), 13715 StoreNodes.begin() + NumConsecutiveStores); 13716 continue; 13717 } 13718 13719 // The node with the lowest store address. 13720 LLVMContext &Context = *DAG.getContext(); 13721 const DataLayout &DL = DAG.getDataLayout(); 13722 13723 // Store the constants into memory as one consecutive store. 13724 if (IsConstantSrc) { 13725 while (NumConsecutiveStores >= 2) { 13726 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 13727 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 13728 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 13729 unsigned LastLegalType = 1; 13730 unsigned LastLegalVectorType = 1; 13731 bool LastIntegerTrunc = false; 13732 bool NonZero = false; 13733 unsigned FirstZeroAfterNonZero = NumConsecutiveStores; 13734 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 13735 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); 13736 SDValue StoredVal = ST->getValue(); 13737 bool IsElementZero = false; 13738 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) 13739 IsElementZero = C->isNullValue(); 13740 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) 13741 IsElementZero = C->getConstantFPValue()->isNullValue(); 13742 if (IsElementZero) { 13743 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) 13744 FirstZeroAfterNonZero = i; 13745 } 13746 NonZero |= !IsElementZero; 13747 13748 // Find a legal type for the constant store. 13749 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 13750 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); 13751 bool IsFast = false; 13752 13753 // Break early when size is too large to be legal. 13754 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 13755 break; 13756 13757 if (TLI.isTypeLegal(StoreTy) && 13758 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 13759 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 13760 FirstStoreAlign, &IsFast) && 13761 IsFast) { 13762 LastIntegerTrunc = false; 13763 LastLegalType = i + 1; 13764 // Or check whether a truncstore is legal. 13765 } else if (TLI.getTypeAction(Context, StoreTy) == 13766 TargetLowering::TypePromoteInteger) { 13767 EVT LegalizedStoredValTy = 13768 TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); 13769 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 13770 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 13771 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 13772 FirstStoreAlign, &IsFast) && 13773 IsFast) { 13774 LastIntegerTrunc = true; 13775 LastLegalType = i + 1; 13776 } 13777 } 13778 13779 // We only use vectors if the constant is known to be zero or the 13780 // target allows it and the function is not marked with the 13781 // noimplicitfloat attribute. 13782 if ((!NonZero || 13783 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && 13784 !NoVectors) { 13785 // Find a legal type for the vector store. 13786 unsigned Elts = (i + 1) * NumMemElts; 13787 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 13788 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && 13789 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 13790 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 13791 FirstStoreAlign, &IsFast) && 13792 IsFast) 13793 LastLegalVectorType = i + 1; 13794 } 13795 } 13796 13797 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 13798 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; 13799 13800 // Check if we found a legal integer type that creates a meaningful 13801 // merge. 13802 if (NumElem < 2) { 13803 // We know that candidate stores are in order and of correct 13804 // shape. While there is no mergeable sequence from the 13805 // beginning one may start later in the sequence. The only 13806 // reason a merge of size N could have failed where another of 13807 // the same size would not have, is if the alignment has 13808 // improved or we've dropped a non-zero value. Drop as many 13809 // candidates as we can here. 13810 unsigned NumSkip = 1; 13811 while ( 13812 (NumSkip < NumConsecutiveStores) && 13813 (NumSkip < FirstZeroAfterNonZero) && 13814 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 13815 NumSkip++; 13816 13817 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 13818 NumConsecutiveStores -= NumSkip; 13819 continue; 13820 } 13821 13822 // Check that we can merge these candidates without causing a cycle. 13823 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 13824 RootNode)) { 13825 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 13826 NumConsecutiveStores -= NumElem; 13827 continue; 13828 } 13829 13830 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, 13831 UseVector, LastIntegerTrunc); 13832 13833 // Remove merged stores for next iteration. 13834 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 13835 NumConsecutiveStores -= NumElem; 13836 } 13837 continue; 13838 } 13839 13840 // When extracting multiple vector elements, try to store them 13841 // in one vector store rather than a sequence of scalar stores. 13842 if (IsExtractVecSrc) { 13843 // Loop on Consecutive Stores on success. 13844 while (NumConsecutiveStores >= 2) { 13845 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 13846 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 13847 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 13848 unsigned NumStoresToMerge = 1; 13849 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 13850 // Find a legal type for the vector store. 13851 unsigned Elts = (i + 1) * NumMemElts; 13852 EVT Ty = 13853 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 13854 bool IsFast; 13855 13856 // Break early when size is too large to be legal. 13857 if (Ty.getSizeInBits() > MaximumLegalStoreInBits) 13858 break; 13859 13860 if (TLI.isTypeLegal(Ty) && 13861 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && 13862 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 13863 FirstStoreAlign, &IsFast) && 13864 IsFast) 13865 NumStoresToMerge = i + 1; 13866 } 13867 13868 // Check if we found a legal integer type creating a meaningful 13869 // merge. 13870 if (NumStoresToMerge < 2) { 13871 // We know that candidate stores are in order and of correct 13872 // shape. While there is no mergeable sequence from the 13873 // beginning one may start later in the sequence. The only 13874 // reason a merge of size N could have failed where another of 13875 // the same size would not have, is if the alignment has 13876 // improved. Drop as many candidates as we can here. 13877 unsigned NumSkip = 1; 13878 while ( 13879 (NumSkip < NumConsecutiveStores) && 13880 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 13881 NumSkip++; 13882 13883 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 13884 NumConsecutiveStores -= NumSkip; 13885 continue; 13886 } 13887 13888 // Check that we can merge these candidates without causing a cycle. 13889 if (!checkMergeStoreCandidatesForDependencies( 13890 StoreNodes, NumStoresToMerge, RootNode)) { 13891 StoreNodes.erase(StoreNodes.begin(), 13892 StoreNodes.begin() + NumStoresToMerge); 13893 NumConsecutiveStores -= NumStoresToMerge; 13894 continue; 13895 } 13896 13897 RV |= MergeStoresOfConstantsOrVecElts( 13898 StoreNodes, MemVT, NumStoresToMerge, false, true, false); 13899 13900 StoreNodes.erase(StoreNodes.begin(), 13901 StoreNodes.begin() + NumStoresToMerge); 13902 NumConsecutiveStores -= NumStoresToMerge; 13903 } 13904 continue; 13905 } 13906 13907 // Below we handle the case of multiple consecutive stores that 13908 // come from multiple consecutive loads. We merge them into a single 13909 // wide load and a single wide store. 13910 13911 // Look for load nodes which are used by the stored values. 13912 SmallVector<MemOpLink, 8> LoadNodes; 13913 13914 // Find acceptable loads. Loads need to have the same chain (token factor), 13915 // must not be zext, volatile, indexed, and they must be consecutive. 13916 BaseIndexOffset LdBasePtr; 13917 13918 for (unsigned i = 0; i < NumConsecutiveStores; ++i) { 13919 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 13920 SDValue Val = peekThroughBitcast(St->getValue()); 13921 LoadSDNode *Ld = cast<LoadSDNode>(Val); 13922 13923 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); 13924 // If this is not the first ptr that we check. 13925 int64_t LdOffset = 0; 13926 if (LdBasePtr.getBase().getNode()) { 13927 // The base ptr must be the same. 13928 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) 13929 break; 13930 } else { 13931 // Check that all other base pointers are the same as this one. 13932 LdBasePtr = LdPtr; 13933 } 13934 13935 // We found a potential memory operand to merge. 13936 LoadNodes.push_back(MemOpLink(Ld, LdOffset)); 13937 } 13938 13939 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { 13940 // If we have load/store pair instructions and we only have two values, 13941 // don't bother merging. 13942 unsigned RequiredAlignment; 13943 if (LoadNodes.size() == 2 && 13944 TLI.hasPairedLoad(MemVT, RequiredAlignment) && 13945 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { 13946 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); 13947 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); 13948 break; 13949 } 13950 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 13951 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 13952 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 13953 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 13954 unsigned FirstLoadAS = FirstLoad->getAddressSpace(); 13955 unsigned FirstLoadAlign = FirstLoad->getAlignment(); 13956 13957 // Scan the memory operations on the chain and find the first 13958 // non-consecutive load memory address. These variables hold the index in 13959 // the store node array. 13960 13961 unsigned LastConsecutiveLoad = 1; 13962 13963 // This variable refers to the size and not index in the array. 13964 unsigned LastLegalVectorType = 1; 13965 unsigned LastLegalIntegerType = 1; 13966 bool isDereferenceable = true; 13967 bool DoIntegerTruncate = false; 13968 StartAddress = LoadNodes[0].OffsetFromBase; 13969 SDValue FirstChain = FirstLoad->getChain(); 13970 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 13971 // All loads must share the same chain. 13972 if (LoadNodes[i].MemNode->getChain() != FirstChain) 13973 break; 13974 13975 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 13976 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 13977 break; 13978 LastConsecutiveLoad = i; 13979 13980 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) 13981 isDereferenceable = false; 13982 13983 // Find a legal type for the vector store. 13984 unsigned Elts = (i + 1) * NumMemElts; 13985 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 13986 13987 // Break early when size is too large to be legal. 13988 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) 13989 break; 13990 13991 bool IsFastSt, IsFastLd; 13992 if (TLI.isTypeLegal(StoreTy) && 13993 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 13994 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 13995 FirstStoreAlign, &IsFastSt) && 13996 IsFastSt && 13997 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 13998 FirstLoadAlign, &IsFastLd) && 13999 IsFastLd) { 14000 LastLegalVectorType = i + 1; 14001 } 14002 14003 // Find a legal type for the integer store. 14004 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; 14005 StoreTy = EVT::getIntegerVT(Context, SizeInBits); 14006 if (TLI.isTypeLegal(StoreTy) && 14007 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && 14008 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14009 FirstStoreAlign, &IsFastSt) && 14010 IsFastSt && 14011 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14012 FirstLoadAlign, &IsFastLd) && 14013 IsFastLd) { 14014 LastLegalIntegerType = i + 1; 14015 DoIntegerTruncate = false; 14016 // Or check whether a truncstore and extload is legal. 14017 } else if (TLI.getTypeAction(Context, StoreTy) == 14018 TargetLowering::TypePromoteInteger) { 14019 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); 14020 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && 14021 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && 14022 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, 14023 StoreTy) && 14024 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, 14025 StoreTy) && 14026 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && 14027 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 14028 FirstStoreAlign, &IsFastSt) && 14029 IsFastSt && 14030 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 14031 FirstLoadAlign, &IsFastLd) && 14032 IsFastLd) { 14033 LastLegalIntegerType = i + 1; 14034 DoIntegerTruncate = true; 14035 } 14036 } 14037 } 14038 14039 // Only use vector types if the vector type is larger than the integer 14040 // type. If they are the same, use integers. 14041 bool UseVectorTy = 14042 LastLegalVectorType > LastLegalIntegerType && !NoVectors; 14043 unsigned LastLegalType = 14044 std::max(LastLegalVectorType, LastLegalIntegerType); 14045 14046 // We add +1 here because the LastXXX variables refer to location while 14047 // the NumElem refers to array/index size. 14048 unsigned NumElem = 14049 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); 14050 NumElem = std::min(LastLegalType, NumElem); 14051 14052 if (NumElem < 2) { 14053 // We know that candidate stores are in order and of correct 14054 // shape. While there is no mergeable sequence from the 14055 // beginning one may start later in the sequence. The only 14056 // reason a merge of size N could have failed where another of 14057 // the same size would not have is if the alignment or either 14058 // the load or store has improved. Drop as many candidates as we 14059 // can here. 14060 unsigned NumSkip = 1; 14061 while ((NumSkip < LoadNodes.size()) && 14062 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && 14063 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) 14064 NumSkip++; 14065 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); 14066 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); 14067 NumConsecutiveStores -= NumSkip; 14068 continue; 14069 } 14070 14071 // Check that we can merge these candidates without causing a cycle. 14072 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, 14073 RootNode)) { 14074 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14075 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 14076 NumConsecutiveStores -= NumElem; 14077 continue; 14078 } 14079 14080 // Find if it is better to use vectors or integers to load and store 14081 // to memory. 14082 EVT JointMemOpVT; 14083 if (UseVectorTy) { 14084 // Find a legal type for the vector store. 14085 unsigned Elts = NumElem * NumMemElts; 14086 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); 14087 } else { 14088 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 14089 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); 14090 } 14091 14092 SDLoc LoadDL(LoadNodes[0].MemNode); 14093 SDLoc StoreDL(StoreNodes[0].MemNode); 14094 14095 // The merged loads are required to have the same incoming chain, so 14096 // using the first's chain is acceptable. 14097 14098 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); 14099 AddToWorklist(NewStoreChain.getNode()); 14100 14101 MachineMemOperand::Flags MMOFlags = 14102 isDereferenceable ? MachineMemOperand::MODereferenceable 14103 : MachineMemOperand::MONone; 14104 14105 SDValue NewLoad, NewStore; 14106 if (UseVectorTy || !DoIntegerTruncate) { 14107 NewLoad = 14108 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), 14109 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), 14110 FirstLoadAlign, MMOFlags); 14111 NewStore = DAG.getStore( 14112 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), 14113 FirstInChain->getPointerInfo(), FirstStoreAlign); 14114 } else { // This must be the truncstore/extload case 14115 EVT ExtendedTy = 14116 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); 14117 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, 14118 FirstLoad->getChain(), FirstLoad->getBasePtr(), 14119 FirstLoad->getPointerInfo(), JointMemOpVT, 14120 FirstLoadAlign, MMOFlags); 14121 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, 14122 FirstInChain->getBasePtr(), 14123 FirstInChain->getPointerInfo(), 14124 JointMemOpVT, FirstInChain->getAlignment(), 14125 FirstInChain->getMemOperand()->getFlags()); 14126 } 14127 14128 // Transfer chain users from old loads to the new load. 14129 for (unsigned i = 0; i < NumElem; ++i) { 14130 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 14131 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 14132 SDValue(NewLoad.getNode(), 1)); 14133 } 14134 14135 // Replace the all stores with the new store. Recursively remove 14136 // corresponding value if its no longer used. 14137 for (unsigned i = 0; i < NumElem; ++i) { 14138 SDValue Val = StoreNodes[i].MemNode->getOperand(1); 14139 CombineTo(StoreNodes[i].MemNode, NewStore); 14140 if (Val.getNode()->use_empty()) 14141 recursivelyDeleteUnusedNodes(Val.getNode()); 14142 } 14143 14144 RV = true; 14145 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); 14146 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); 14147 NumConsecutiveStores -= NumElem; 14148 } 14149 } 14150 return RV; 14151 } 14152 14153 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { 14154 SDLoc SL(ST); 14155 SDValue ReplStore; 14156 14157 // Replace the chain to avoid dependency. 14158 if (ST->isTruncatingStore()) { 14159 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), 14160 ST->getBasePtr(), ST->getMemoryVT(), 14161 ST->getMemOperand()); 14162 } else { 14163 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), 14164 ST->getMemOperand()); 14165 } 14166 14167 // Create token to keep both nodes around. 14168 SDValue Token = DAG.getNode(ISD::TokenFactor, SL, 14169 MVT::Other, ST->getChain(), ReplStore); 14170 14171 // Make sure the new and old chains are cleaned up. 14172 AddToWorklist(Token.getNode()); 14173 14174 // Don't add users to work list. 14175 return CombineTo(ST, Token, false); 14176 } 14177 14178 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { 14179 SDValue Value = ST->getValue(); 14180 if (Value.getOpcode() == ISD::TargetConstantFP) 14181 return SDValue(); 14182 14183 SDLoc DL(ST); 14184 14185 SDValue Chain = ST->getChain(); 14186 SDValue Ptr = ST->getBasePtr(); 14187 14188 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); 14189 14190 // NOTE: If the original store is volatile, this transform must not increase 14191 // the number of stores. For example, on x86-32 an f64 can be stored in one 14192 // processor operation but an i64 (which is not legal) requires two. So the 14193 // transform should not be done in this case. 14194 14195 SDValue Tmp; 14196 switch (CFP->getSimpleValueType(0).SimpleTy) { 14197 default: 14198 llvm_unreachable("Unknown FP type"); 14199 case MVT::f16: // We don't do this for these yet. 14200 case MVT::f80: 14201 case MVT::f128: 14202 case MVT::ppcf128: 14203 return SDValue(); 14204 case MVT::f32: 14205 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 14206 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 14207 ; 14208 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 14209 bitcastToAPInt().getZExtValue(), SDLoc(CFP), 14210 MVT::i32); 14211 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); 14212 } 14213 14214 return SDValue(); 14215 case MVT::f64: 14216 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 14217 !ST->isVolatile()) || 14218 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 14219 ; 14220 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 14221 getZExtValue(), SDLoc(CFP), MVT::i64); 14222 return DAG.getStore(Chain, DL, Tmp, 14223 Ptr, ST->getMemOperand()); 14224 } 14225 14226 if (!ST->isVolatile() && 14227 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 14228 // Many FP stores are not made apparent until after legalize, e.g. for 14229 // argument passing. Since this is so common, custom legalize the 14230 // 64-bit integer store into two 32-bit stores. 14231 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 14232 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); 14233 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); 14234 if (DAG.getDataLayout().isBigEndian()) 14235 std::swap(Lo, Hi); 14236 14237 unsigned Alignment = ST->getAlignment(); 14238 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 14239 AAMDNodes AAInfo = ST->getAAInfo(); 14240 14241 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 14242 ST->getAlignment(), MMOFlags, AAInfo); 14243 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 14244 DAG.getConstant(4, DL, Ptr.getValueType())); 14245 Alignment = MinAlign(Alignment, 4U); 14246 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, 14247 ST->getPointerInfo().getWithOffset(4), 14248 Alignment, MMOFlags, AAInfo); 14249 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 14250 St0, St1); 14251 } 14252 14253 return SDValue(); 14254 } 14255 } 14256 14257 SDValue DAGCombiner::visitSTORE(SDNode *N) { 14258 StoreSDNode *ST = cast<StoreSDNode>(N); 14259 SDValue Chain = ST->getChain(); 14260 SDValue Value = ST->getValue(); 14261 SDValue Ptr = ST->getBasePtr(); 14262 14263 // If this is a store of a bit convert, store the input value if the 14264 // resultant store does not need a higher alignment than the original. 14265 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 14266 ST->isUnindexed()) { 14267 EVT SVT = Value.getOperand(0).getValueType(); 14268 if (((!LegalOperations && !ST->isVolatile()) || 14269 TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && 14270 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { 14271 unsigned OrigAlign = ST->getAlignment(); 14272 bool Fast = false; 14273 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, 14274 ST->getAddressSpace(), OrigAlign, &Fast) && 14275 Fast) { 14276 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, 14277 ST->getPointerInfo(), OrigAlign, 14278 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 14279 } 14280 } 14281 } 14282 14283 // Turn 'store undef, Ptr' -> nothing. 14284 if (Value.isUndef() && ST->isUnindexed()) 14285 return Chain; 14286 14287 // Try to infer better alignment information than the store already has. 14288 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 14289 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 14290 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { 14291 SDValue NewStore = 14292 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), 14293 ST->getMemoryVT(), Align, 14294 ST->getMemOperand()->getFlags(), ST->getAAInfo()); 14295 // NewStore will always be N as we are only refining the alignment 14296 assert(NewStore.getNode() == N); 14297 (void)NewStore; 14298 } 14299 } 14300 } 14301 14302 // Try transforming a pair floating point load / store ops to integer 14303 // load / store ops. 14304 if (SDValue NewST = TransformFPLoadStorePair(N)) 14305 return NewST; 14306 14307 if (ST->isUnindexed()) { 14308 // Walk up chain skipping non-aliasing memory nodes, on this store and any 14309 // adjacent stores. 14310 if (findBetterNeighborChains(ST)) { 14311 // replaceStoreChain uses CombineTo, which handled all of the worklist 14312 // manipulation. Return the original node to not do anything else. 14313 return SDValue(ST, 0); 14314 } 14315 Chain = ST->getChain(); 14316 } 14317 14318 // FIXME: is there such a thing as a truncating indexed store? 14319 if (ST->isTruncatingStore() && ST->isUnindexed() && 14320 Value.getValueType().isInteger()) { 14321 // See if we can simplify the input to this truncstore with knowledge that 14322 // only the low bits are being used. For example: 14323 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 14324 SDValue Shorter = DAG.GetDemandedBits( 14325 Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 14326 ST->getMemoryVT().getScalarSizeInBits())); 14327 AddToWorklist(Value.getNode()); 14328 if (Shorter.getNode()) 14329 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 14330 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 14331 14332 // Otherwise, see if we can simplify the operation with 14333 // SimplifyDemandedBits, which only works if the value has a single use. 14334 if (SimplifyDemandedBits( 14335 Value, 14336 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), 14337 ST->getMemoryVT().getScalarSizeInBits()))) { 14338 // Re-visit the store if anything changed and the store hasn't been merged 14339 // with another node (N is deleted) SimplifyDemandedBits will add Value's 14340 // node back to the worklist if necessary, but we also need to re-visit 14341 // the Store node itself. 14342 if (N->getOpcode() != ISD::DELETED_NODE) 14343 AddToWorklist(N); 14344 return SDValue(N, 0); 14345 } 14346 } 14347 14348 // If this is a load followed by a store to the same location, then the store 14349 // is dead/noop. 14350 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 14351 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 14352 ST->isUnindexed() && !ST->isVolatile() && 14353 // There can't be any side effects between the load and store, such as 14354 // a call or store. 14355 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 14356 // The store is dead, remove it. 14357 return Chain; 14358 } 14359 } 14360 14361 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 14362 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && 14363 !ST1->isVolatile() && ST1->getBasePtr() == Ptr && 14364 ST->getMemoryVT() == ST1->getMemoryVT()) { 14365 // If this is a store followed by a store with the same value to the same 14366 // location, then the store is dead/noop. 14367 if (ST1->getValue() == Value) { 14368 // The store is dead, remove it. 14369 return Chain; 14370 } 14371 14372 // If this is a store who's preceeding store to the same location 14373 // and no one other node is chained to that store we can effectively 14374 // drop the store. Do not remove stores to undef as they may be used as 14375 // data sinks. 14376 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && 14377 !ST1->getBasePtr().isUndef()) { 14378 // ST1 is fully overwritten and can be elided. Combine with it's chain 14379 // value. 14380 CombineTo(ST1, ST1->getChain()); 14381 return SDValue(); 14382 } 14383 } 14384 } 14385 14386 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 14387 // truncating store. We can do this even if this is already a truncstore. 14388 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 14389 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 14390 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 14391 ST->getMemoryVT())) { 14392 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 14393 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 14394 } 14395 14396 // Always perform this optimization before types are legal. If the target 14397 // prefers, also try this after legalization to catch stores that were created 14398 // by intrinsics or other nodes. 14399 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { 14400 while (true) { 14401 // There can be multiple store sequences on the same chain. 14402 // Keep trying to merge store sequences until we are unable to do so 14403 // or until we merge the last store on the chain. 14404 bool Changed = MergeConsecutiveStores(ST); 14405 if (!Changed) break; 14406 // Return N as merge only uses CombineTo and no worklist clean 14407 // up is necessary. 14408 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N)) 14409 return SDValue(N, 0); 14410 } 14411 } 14412 14413 // Try transforming N to an indexed store. 14414 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 14415 return SDValue(N, 0); 14416 14417 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 14418 // 14419 // Make sure to do this only after attempting to merge stores in order to 14420 // avoid changing the types of some subset of stores due to visit order, 14421 // preventing their merging. 14422 if (isa<ConstantFPSDNode>(ST->getValue())) { 14423 if (SDValue NewSt = replaceStoreOfFPConstant(ST)) 14424 return NewSt; 14425 } 14426 14427 if (SDValue NewSt = splitMergedValStore(ST)) 14428 return NewSt; 14429 14430 return ReduceLoadOpStoreWidth(N); 14431 } 14432 14433 /// For the instruction sequence of store below, F and I values 14434 /// are bundled together as an i64 value before being stored into memory. 14435 /// Sometimes it is more efficent to generate separate stores for F and I, 14436 /// which can remove the bitwise instructions or sink them to colder places. 14437 /// 14438 /// (store (or (zext (bitcast F to i32) to i64), 14439 /// (shl (zext I to i64), 32)), addr) --> 14440 /// (store F, addr) and (store I, addr+4) 14441 /// 14442 /// Similarly, splitting for other merged store can also be beneficial, like: 14443 /// For pair of {i32, i32}, i64 store --> two i32 stores. 14444 /// For pair of {i32, i16}, i64 store --> two i32 stores. 14445 /// For pair of {i16, i16}, i32 store --> two i16 stores. 14446 /// For pair of {i16, i8}, i32 store --> two i16 stores. 14447 /// For pair of {i8, i8}, i16 store --> two i8 stores. 14448 /// 14449 /// We allow each target to determine specifically which kind of splitting is 14450 /// supported. 14451 /// 14452 /// The store patterns are commonly seen from the simple code snippet below 14453 /// if only std::make_pair(...) is sroa transformed before inlined into hoo. 14454 /// void goo(const std::pair<int, float> &); 14455 /// hoo() { 14456 /// ... 14457 /// goo(std::make_pair(tmp, ftmp)); 14458 /// ... 14459 /// } 14460 /// 14461 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { 14462 if (OptLevel == CodeGenOpt::None) 14463 return SDValue(); 14464 14465 SDValue Val = ST->getValue(); 14466 SDLoc DL(ST); 14467 14468 // Match OR operand. 14469 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR) 14470 return SDValue(); 14471 14472 // Match SHL operand and get Lower and Higher parts of Val. 14473 SDValue Op1 = Val.getOperand(0); 14474 SDValue Op2 = Val.getOperand(1); 14475 SDValue Lo, Hi; 14476 if (Op1.getOpcode() != ISD::SHL) { 14477 std::swap(Op1, Op2); 14478 if (Op1.getOpcode() != ISD::SHL) 14479 return SDValue(); 14480 } 14481 Lo = Op2; 14482 Hi = Op1.getOperand(0); 14483 if (!Op1.hasOneUse()) 14484 return SDValue(); 14485 14486 // Match shift amount to HalfValBitSize. 14487 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; 14488 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); 14489 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize) 14490 return SDValue(); 14491 14492 // Lo and Hi are zero-extended from int with size less equal than 32 14493 // to i64. 14494 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() || 14495 !Lo.getOperand(0).getValueType().isScalarInteger() || 14496 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || 14497 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() || 14498 !Hi.getOperand(0).getValueType().isScalarInteger() || 14499 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) 14500 return SDValue(); 14501 14502 // Use the EVT of low and high parts before bitcast as the input 14503 // of target query. 14504 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) 14505 ? Lo.getOperand(0).getValueType() 14506 : Lo.getValueType(); 14507 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) 14508 ? Hi.getOperand(0).getValueType() 14509 : Hi.getValueType(); 14510 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) 14511 return SDValue(); 14512 14513 // Start to split store. 14514 unsigned Alignment = ST->getAlignment(); 14515 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 14516 AAMDNodes AAInfo = ST->getAAInfo(); 14517 14518 // Change the sizes of Lo and Hi's value types to HalfValBitSize. 14519 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); 14520 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); 14521 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); 14522 14523 SDValue Chain = ST->getChain(); 14524 SDValue Ptr = ST->getBasePtr(); 14525 // Lower value store. 14526 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), 14527 ST->getAlignment(), MMOFlags, AAInfo); 14528 Ptr = 14529 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 14530 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); 14531 // Higher value store. 14532 SDValue St1 = 14533 DAG.getStore(St0, DL, Hi, Ptr, 14534 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), 14535 Alignment / 2, MMOFlags, AAInfo); 14536 return St1; 14537 } 14538 14539 /// Convert a disguised subvector insertion into a shuffle: 14540 /// insert_vector_elt V, (bitcast X from vector type), IdxC --> 14541 /// bitcast(shuffle (bitcast V), (extended X), Mask) 14542 /// Note: We do not use an insert_subvector node because that requires a legal 14543 /// subvector type. 14544 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { 14545 SDValue InsertVal = N->getOperand(1); 14546 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || 14547 !InsertVal.getOperand(0).getValueType().isVector()) 14548 return SDValue(); 14549 14550 SDValue SubVec = InsertVal.getOperand(0); 14551 SDValue DestVec = N->getOperand(0); 14552 EVT SubVecVT = SubVec.getValueType(); 14553 EVT VT = DestVec.getValueType(); 14554 unsigned NumSrcElts = SubVecVT.getVectorNumElements(); 14555 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); 14556 unsigned NumMaskVals = ExtendRatio * NumSrcElts; 14557 14558 // Step 1: Create a shuffle mask that implements this insert operation. The 14559 // vector that we are inserting into will be operand 0 of the shuffle, so 14560 // those elements are just 'i'. The inserted subvector is in the first 14561 // positions of operand 1 of the shuffle. Example: 14562 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} 14563 SmallVector<int, 16> Mask(NumMaskVals); 14564 for (unsigned i = 0; i != NumMaskVals; ++i) { 14565 if (i / NumSrcElts == InsIndex) 14566 Mask[i] = (i % NumSrcElts) + NumMaskVals; 14567 else 14568 Mask[i] = i; 14569 } 14570 14571 // Bail out if the target can not handle the shuffle we want to create. 14572 EVT SubVecEltVT = SubVecVT.getVectorElementType(); 14573 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); 14574 if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) 14575 return SDValue(); 14576 14577 // Step 2: Create a wide vector from the inserted source vector by appending 14578 // undefined elements. This is the same size as our destination vector. 14579 SDLoc DL(N); 14580 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); 14581 ConcatOps[0] = SubVec; 14582 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); 14583 14584 // Step 3: Shuffle in the padded subvector. 14585 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); 14586 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); 14587 AddToWorklist(PaddedSubV.getNode()); 14588 AddToWorklist(DestVecBC.getNode()); 14589 AddToWorklist(Shuf.getNode()); 14590 return DAG.getBitcast(VT, Shuf); 14591 } 14592 14593 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 14594 SDValue InVec = N->getOperand(0); 14595 SDValue InVal = N->getOperand(1); 14596 SDValue EltNo = N->getOperand(2); 14597 SDLoc DL(N); 14598 14599 // If the inserted element is an UNDEF, just use the input vector. 14600 if (InVal.isUndef()) 14601 return InVec; 14602 14603 EVT VT = InVec.getValueType(); 14604 14605 // Remove redundant insertions: 14606 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x 14607 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 14608 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) 14609 return InVec; 14610 14611 // We must know which element is being inserted for folds below here. 14612 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); 14613 if (!IndexC) 14614 return SDValue(); 14615 unsigned Elt = IndexC->getZExtValue(); 14616 14617 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) 14618 return Shuf; 14619 14620 // Canonicalize insert_vector_elt dag nodes. 14621 // Example: 14622 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 14623 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 14624 // 14625 // Do this only if the child insert_vector node has one use; also 14626 // do this only if indices are both constants and Idx1 < Idx0. 14627 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 14628 && isa<ConstantSDNode>(InVec.getOperand(2))) { 14629 unsigned OtherElt = InVec.getConstantOperandVal(2); 14630 if (Elt < OtherElt) { 14631 // Swap nodes. 14632 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, 14633 InVec.getOperand(0), InVal, EltNo); 14634 AddToWorklist(NewOp.getNode()); 14635 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 14636 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 14637 } 14638 } 14639 14640 // If we can't generate a legal BUILD_VECTOR, exit 14641 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 14642 return SDValue(); 14643 14644 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 14645 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 14646 // vector elements. 14647 SmallVector<SDValue, 8> Ops; 14648 // Do not combine these two vectors if the output vector will not replace 14649 // the input vector. 14650 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 14651 Ops.append(InVec.getNode()->op_begin(), 14652 InVec.getNode()->op_end()); 14653 } else if (InVec.isUndef()) { 14654 unsigned NElts = VT.getVectorNumElements(); 14655 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 14656 } else { 14657 return SDValue(); 14658 } 14659 14660 // Insert the element 14661 if (Elt < Ops.size()) { 14662 // All the operands of BUILD_VECTOR must have the same type; 14663 // we enforce that here. 14664 EVT OpVT = Ops[0].getValueType(); 14665 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; 14666 } 14667 14668 // Return the new vector 14669 return DAG.getBuildVector(VT, DL, Ops); 14670 } 14671 14672 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 14673 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 14674 assert(!OriginalLoad->isVolatile()); 14675 14676 EVT ResultVT = EVE->getValueType(0); 14677 EVT VecEltVT = InVecVT.getVectorElementType(); 14678 unsigned Align = OriginalLoad->getAlignment(); 14679 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 14680 VecEltVT.getTypeForEVT(*DAG.getContext())); 14681 14682 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 14683 return SDValue(); 14684 14685 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? 14686 ISD::NON_EXTLOAD : ISD::EXTLOAD; 14687 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) 14688 return SDValue(); 14689 14690 Align = NewAlign; 14691 14692 SDValue NewPtr = OriginalLoad->getBasePtr(); 14693 SDValue Offset; 14694 EVT PtrType = NewPtr.getValueType(); 14695 MachinePointerInfo MPI; 14696 SDLoc DL(EVE); 14697 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 14698 int Elt = ConstEltNo->getZExtValue(); 14699 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 14700 Offset = DAG.getConstant(PtrOff, DL, PtrType); 14701 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 14702 } else { 14703 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); 14704 Offset = DAG.getNode( 14705 ISD::MUL, DL, PtrType, Offset, 14706 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); 14707 MPI = OriginalLoad->getPointerInfo(); 14708 } 14709 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); 14710 14711 // The replacement we need to do here is a little tricky: we need to 14712 // replace an extractelement of a load with a load. 14713 // Use ReplaceAllUsesOfValuesWith to do the replacement. 14714 // Note that this replacement assumes that the extractvalue is the only 14715 // use of the load; that's okay because we don't want to perform this 14716 // transformation in other cases anyway. 14717 SDValue Load; 14718 SDValue Chain; 14719 if (ResultVT.bitsGT(VecEltVT)) { 14720 // If the result type of vextract is wider than the load, then issue an 14721 // extending load instead. 14722 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 14723 VecEltVT) 14724 ? ISD::ZEXTLOAD 14725 : ISD::EXTLOAD; 14726 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, 14727 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, 14728 Align, OriginalLoad->getMemOperand()->getFlags(), 14729 OriginalLoad->getAAInfo()); 14730 Chain = Load.getValue(1); 14731 } else { 14732 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, 14733 MPI, Align, OriginalLoad->getMemOperand()->getFlags(), 14734 OriginalLoad->getAAInfo()); 14735 Chain = Load.getValue(1); 14736 if (ResultVT.bitsLT(VecEltVT)) 14737 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 14738 else 14739 Load = DAG.getBitcast(ResultVT, Load); 14740 } 14741 WorklistRemover DeadNodes(*this); 14742 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 14743 SDValue To[] = { Load, Chain }; 14744 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 14745 // Since we're explicitly calling ReplaceAllUses, add the new node to the 14746 // worklist explicitly as well. 14747 AddToWorklist(Load.getNode()); 14748 AddUsersToWorklist(Load.getNode()); // Add users too 14749 // Make sure to revisit this node to clean it up; it will usually be dead. 14750 AddToWorklist(EVE); 14751 ++OpsNarrowed; 14752 return SDValue(EVE, 0); 14753 } 14754 14755 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 14756 // (vextract (scalar_to_vector val, 0) -> val 14757 SDValue InVec = N->getOperand(0); 14758 EVT VT = InVec.getValueType(); 14759 EVT NVT = N->getValueType(0); 14760 14761 if (InVec.isUndef()) 14762 return DAG.getUNDEF(NVT); 14763 14764 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 14765 // Check if the result type doesn't match the inserted element type. A 14766 // SCALAR_TO_VECTOR may truncate the inserted element and the 14767 // EXTRACT_VECTOR_ELT may widen the extracted vector. 14768 SDValue InOp = InVec.getOperand(0); 14769 if (InOp.getValueType() != NVT) { 14770 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 14771 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 14772 } 14773 return InOp; 14774 } 14775 14776 SDValue EltNo = N->getOperand(1); 14777 ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 14778 14779 // extract_vector_elt of out-of-bounds element -> UNDEF 14780 if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) 14781 return DAG.getUNDEF(NVT); 14782 14783 // extract_vector_elt (build_vector x, y), 1 -> y 14784 if (ConstEltNo && 14785 InVec.getOpcode() == ISD::BUILD_VECTOR && 14786 TLI.isTypeLegal(VT) && 14787 (InVec.hasOneUse() || 14788 TLI.aggressivelyPreferBuildVectorSources(VT))) { 14789 SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); 14790 EVT InEltVT = Elt.getValueType(); 14791 14792 // Sometimes build_vector's scalar input types do not match result type. 14793 if (NVT == InEltVT) 14794 return Elt; 14795 14796 // TODO: It may be useful to truncate if free if the build_vector implicitly 14797 // converts. 14798 } 14799 14800 // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x) 14801 bool isLE = DAG.getDataLayout().isLittleEndian(); 14802 unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1; 14803 if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && 14804 ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) { 14805 SDValue BCSrc = InVec.getOperand(0); 14806 if (BCSrc.getValueType().isScalarInteger()) 14807 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); 14808 } 14809 14810 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val 14811 // 14812 // This only really matters if the index is non-constant since other combines 14813 // on the constant elements already work. 14814 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && 14815 EltNo == InVec.getOperand(2)) { 14816 SDValue Elt = InVec.getOperand(1); 14817 return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt; 14818 } 14819 14820 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 14821 // We only perform this optimization before the op legalization phase because 14822 // we may introduce new vector instructions which are not backed by TD 14823 // patterns. For example on AVX, extracting elements from a wide vector 14824 // without using extract_subvector. However, if we can find an underlying 14825 // scalar value, then we can always use that. 14826 if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { 14827 int NumElem = VT.getVectorNumElements(); 14828 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 14829 // Find the new index to extract from. 14830 int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); 14831 14832 // Extracting an undef index is undef. 14833 if (OrigElt == -1) 14834 return DAG.getUNDEF(NVT); 14835 14836 // Select the right vector half to extract from. 14837 SDValue SVInVec; 14838 if (OrigElt < NumElem) { 14839 SVInVec = InVec->getOperand(0); 14840 } else { 14841 SVInVec = InVec->getOperand(1); 14842 OrigElt -= NumElem; 14843 } 14844 14845 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 14846 SDValue InOp = SVInVec.getOperand(OrigElt); 14847 if (InOp.getValueType() != NVT) { 14848 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 14849 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 14850 } 14851 14852 return InOp; 14853 } 14854 14855 // FIXME: We should handle recursing on other vector shuffles and 14856 // scalar_to_vector here as well. 14857 14858 if (!LegalOperations || 14859 // FIXME: Should really be just isOperationLegalOrCustom. 14860 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || 14861 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) { 14862 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 14863 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, 14864 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); 14865 } 14866 } 14867 14868 bool BCNumEltsChanged = false; 14869 EVT ExtVT = VT.getVectorElementType(); 14870 EVT LVT = ExtVT; 14871 14872 // If the result of load has to be truncated, then it's not necessarily 14873 // profitable. 14874 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 14875 return SDValue(); 14876 14877 if (InVec.getOpcode() == ISD::BITCAST) { 14878 // Don't duplicate a load with other uses. 14879 if (!InVec.hasOneUse()) 14880 return SDValue(); 14881 14882 EVT BCVT = InVec.getOperand(0).getValueType(); 14883 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 14884 return SDValue(); 14885 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 14886 BCNumEltsChanged = true; 14887 InVec = InVec.getOperand(0); 14888 ExtVT = BCVT.getVectorElementType(); 14889 } 14890 14891 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 14892 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 14893 ISD::isNormalLoad(InVec.getNode()) && 14894 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 14895 SDValue Index = N->getOperand(1); 14896 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) { 14897 if (!OrigLoad->isVolatile()) { 14898 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 14899 OrigLoad); 14900 } 14901 } 14902 } 14903 14904 // Perform only after legalization to ensure build_vector / vector_shuffle 14905 // optimizations have already been done. 14906 if (!LegalOperations) return SDValue(); 14907 14908 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 14909 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 14910 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 14911 14912 if (ConstEltNo) { 14913 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 14914 14915 LoadSDNode *LN0 = nullptr; 14916 const ShuffleVectorSDNode *SVN = nullptr; 14917 if (ISD::isNormalLoad(InVec.getNode())) { 14918 LN0 = cast<LoadSDNode>(InVec); 14919 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 14920 InVec.getOperand(0).getValueType() == ExtVT && 14921 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 14922 // Don't duplicate a load with other uses. 14923 if (!InVec.hasOneUse()) 14924 return SDValue(); 14925 14926 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 14927 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 14928 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 14929 // => 14930 // (load $addr+1*size) 14931 14932 // Don't duplicate a load with other uses. 14933 if (!InVec.hasOneUse()) 14934 return SDValue(); 14935 14936 // If the bit convert changed the number of elements, it is unsafe 14937 // to examine the mask. 14938 if (BCNumEltsChanged) 14939 return SDValue(); 14940 14941 // Select the input vector, guarding against out of range extract vector. 14942 unsigned NumElems = VT.getVectorNumElements(); 14943 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 14944 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 14945 14946 if (InVec.getOpcode() == ISD::BITCAST) { 14947 // Don't duplicate a load with other uses. 14948 if (!InVec.hasOneUse()) 14949 return SDValue(); 14950 14951 InVec = InVec.getOperand(0); 14952 } 14953 if (ISD::isNormalLoad(InVec.getNode())) { 14954 LN0 = cast<LoadSDNode>(InVec); 14955 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 14956 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); 14957 } 14958 } 14959 14960 // Make sure we found a non-volatile load and the extractelement is 14961 // the only use. 14962 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 14963 return SDValue(); 14964 14965 // If Idx was -1 above, Elt is going to be -1, so just return undef. 14966 if (Elt == -1) 14967 return DAG.getUNDEF(LVT); 14968 14969 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 14970 } 14971 14972 return SDValue(); 14973 } 14974 14975 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 14976 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 14977 // We perform this optimization post type-legalization because 14978 // the type-legalizer often scalarizes integer-promoted vectors. 14979 // Performing this optimization before may create bit-casts which 14980 // will be type-legalized to complex code sequences. 14981 // We perform this optimization only before the operation legalizer because we 14982 // may introduce illegal operations. 14983 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 14984 return SDValue(); 14985 14986 unsigned NumInScalars = N->getNumOperands(); 14987 SDLoc DL(N); 14988 EVT VT = N->getValueType(0); 14989 14990 // Check to see if this is a BUILD_VECTOR of a bunch of values 14991 // which come from any_extend or zero_extend nodes. If so, we can create 14992 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 14993 // optimizations. We do not handle sign-extend because we can't fill the sign 14994 // using shuffles. 14995 EVT SourceType = MVT::Other; 14996 bool AllAnyExt = true; 14997 14998 for (unsigned i = 0; i != NumInScalars; ++i) { 14999 SDValue In = N->getOperand(i); 15000 // Ignore undef inputs. 15001 if (In.isUndef()) continue; 15002 15003 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 15004 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 15005 15006 // Abort if the element is not an extension. 15007 if (!ZeroExt && !AnyExt) { 15008 SourceType = MVT::Other; 15009 break; 15010 } 15011 15012 // The input is a ZeroExt or AnyExt. Check the original type. 15013 EVT InTy = In.getOperand(0).getValueType(); 15014 15015 // Check that all of the widened source types are the same. 15016 if (SourceType == MVT::Other) 15017 // First time. 15018 SourceType = InTy; 15019 else if (InTy != SourceType) { 15020 // Multiple income types. Abort. 15021 SourceType = MVT::Other; 15022 break; 15023 } 15024 15025 // Check if all of the extends are ANY_EXTENDs. 15026 AllAnyExt &= AnyExt; 15027 } 15028 15029 // In order to have valid types, all of the inputs must be extended from the 15030 // same source type and all of the inputs must be any or zero extend. 15031 // Scalar sizes must be a power of two. 15032 EVT OutScalarTy = VT.getScalarType(); 15033 bool ValidTypes = SourceType != MVT::Other && 15034 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 15035 isPowerOf2_32(SourceType.getSizeInBits()); 15036 15037 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 15038 // turn into a single shuffle instruction. 15039 if (!ValidTypes) 15040 return SDValue(); 15041 15042 bool isLE = DAG.getDataLayout().isLittleEndian(); 15043 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 15044 assert(ElemRatio > 1 && "Invalid element size ratio"); 15045 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 15046 DAG.getConstant(0, DL, SourceType); 15047 15048 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 15049 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 15050 15051 // Populate the new build_vector 15052 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 15053 SDValue Cast = N->getOperand(i); 15054 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 15055 Cast.getOpcode() == ISD::ZERO_EXTEND || 15056 Cast.isUndef()) && "Invalid cast opcode"); 15057 SDValue In; 15058 if (Cast.isUndef()) 15059 In = DAG.getUNDEF(SourceType); 15060 else 15061 In = Cast->getOperand(0); 15062 unsigned Index = isLE ? (i * ElemRatio) : 15063 (i * ElemRatio + (ElemRatio - 1)); 15064 15065 assert(Index < Ops.size() && "Invalid index"); 15066 Ops[Index] = In; 15067 } 15068 15069 // The type of the new BUILD_VECTOR node. 15070 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 15071 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 15072 "Invalid vector size"); 15073 // Check if the new vector type is legal. 15074 if (!isTypeLegal(VecVT) || 15075 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && 15076 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) 15077 return SDValue(); 15078 15079 // Make the new BUILD_VECTOR. 15080 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); 15081 15082 // The new BUILD_VECTOR node has the potential to be further optimized. 15083 AddToWorklist(BV.getNode()); 15084 // Bitcast to the desired type. 15085 return DAG.getBitcast(VT, BV); 15086 } 15087 15088 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 15089 EVT VT = N->getValueType(0); 15090 15091 unsigned NumInScalars = N->getNumOperands(); 15092 SDLoc DL(N); 15093 15094 EVT SrcVT = MVT::Other; 15095 unsigned Opcode = ISD::DELETED_NODE; 15096 unsigned NumDefs = 0; 15097 15098 for (unsigned i = 0; i != NumInScalars; ++i) { 15099 SDValue In = N->getOperand(i); 15100 unsigned Opc = In.getOpcode(); 15101 15102 if (Opc == ISD::UNDEF) 15103 continue; 15104 15105 // If all scalar values are floats and converted from integers. 15106 if (Opcode == ISD::DELETED_NODE && 15107 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 15108 Opcode = Opc; 15109 } 15110 15111 if (Opc != Opcode) 15112 return SDValue(); 15113 15114 EVT InVT = In.getOperand(0).getValueType(); 15115 15116 // If all scalar values are typed differently, bail out. It's chosen to 15117 // simplify BUILD_VECTOR of integer types. 15118 if (SrcVT == MVT::Other) 15119 SrcVT = InVT; 15120 if (SrcVT != InVT) 15121 return SDValue(); 15122 NumDefs++; 15123 } 15124 15125 // If the vector has just one element defined, it's not worth to fold it into 15126 // a vectorized one. 15127 if (NumDefs < 2) 15128 return SDValue(); 15129 15130 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 15131 && "Should only handle conversion from integer to float."); 15132 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 15133 15134 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 15135 15136 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 15137 return SDValue(); 15138 15139 // Just because the floating-point vector type is legal does not necessarily 15140 // mean that the corresponding integer vector type is. 15141 if (!isTypeLegal(NVT)) 15142 return SDValue(); 15143 15144 SmallVector<SDValue, 8> Opnds; 15145 for (unsigned i = 0; i != NumInScalars; ++i) { 15146 SDValue In = N->getOperand(i); 15147 15148 if (In.isUndef()) 15149 Opnds.push_back(DAG.getUNDEF(SrcVT)); 15150 else 15151 Opnds.push_back(In.getOperand(0)); 15152 } 15153 SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); 15154 AddToWorklist(BV.getNode()); 15155 15156 return DAG.getNode(Opcode, DL, VT, BV); 15157 } 15158 15159 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, 15160 ArrayRef<int> VectorMask, 15161 SDValue VecIn1, SDValue VecIn2, 15162 unsigned LeftIdx) { 15163 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 15164 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); 15165 15166 EVT VT = N->getValueType(0); 15167 EVT InVT1 = VecIn1.getValueType(); 15168 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; 15169 15170 unsigned Vec2Offset = 0; 15171 unsigned NumElems = VT.getVectorNumElements(); 15172 unsigned ShuffleNumElems = NumElems; 15173 15174 // In case both the input vectors are extracted from same base 15175 // vector we do not need extra addend (Vec2Offset) while 15176 // computing shuffle mask. 15177 if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 15178 !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || 15179 !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) 15180 Vec2Offset = InVT1.getVectorNumElements(); 15181 15182 // We can't generate a shuffle node with mismatched input and output types. 15183 // Try to make the types match the type of the output. 15184 if (InVT1 != VT || InVT2 != VT) { 15185 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) { 15186 // If the output vector length is a multiple of both input lengths, 15187 // we can concatenate them and pad the rest with undefs. 15188 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); 15189 assert(NumConcats >= 2 && "Concat needs at least two inputs!"); 15190 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); 15191 ConcatOps[0] = VecIn1; 15192 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1); 15193 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 15194 VecIn2 = SDValue(); 15195 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { 15196 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) 15197 return SDValue(); 15198 15199 if (!VecIn2.getNode()) { 15200 // If we only have one input vector, and it's twice the size of the 15201 // output, split it in two. 15202 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, 15203 DAG.getConstant(NumElems, DL, IdxTy)); 15204 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); 15205 // Since we now have shorter input vectors, adjust the offset of the 15206 // second vector's start. 15207 Vec2Offset = NumElems; 15208 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) { 15209 // VecIn1 is wider than the output, and we have another, possibly 15210 // smaller input. Pad the smaller input with undefs, shuffle at the 15211 // input vector width, and extract the output. 15212 // The shuffle type is different than VT, so check legality again. 15213 if (LegalOperations && 15214 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) 15215 return SDValue(); 15216 15217 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to 15218 // lower it back into a BUILD_VECTOR. So if the inserted type is 15219 // illegal, don't even try. 15220 if (InVT1 != InVT2) { 15221 if (!TLI.isTypeLegal(InVT2)) 15222 return SDValue(); 15223 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, 15224 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); 15225 } 15226 ShuffleNumElems = NumElems * 2; 15227 } else { 15228 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider 15229 // than VecIn1. We can't handle this for now - this case will disappear 15230 // when we start sorting the vectors by type. 15231 return SDValue(); 15232 } 15233 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && 15234 InVT1.getSizeInBits() == VT.getSizeInBits()) { 15235 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); 15236 ConcatOps[0] = VecIn2; 15237 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); 15238 } else { 15239 // TODO: Support cases where the length mismatch isn't exactly by a 15240 // factor of 2. 15241 // TODO: Move this check upwards, so that if we have bad type 15242 // mismatches, we don't create any DAG nodes. 15243 return SDValue(); 15244 } 15245 } 15246 15247 // Initialize mask to undef. 15248 SmallVector<int, 8> Mask(ShuffleNumElems, -1); 15249 15250 // Only need to run up to the number of elements actually used, not the 15251 // total number of elements in the shuffle - if we are shuffling a wider 15252 // vector, the high lanes should be set to undef. 15253 for (unsigned i = 0; i != NumElems; ++i) { 15254 if (VectorMask[i] <= 0) 15255 continue; 15256 15257 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); 15258 if (VectorMask[i] == (int)LeftIdx) { 15259 Mask[i] = ExtIndex; 15260 } else if (VectorMask[i] == (int)LeftIdx + 1) { 15261 Mask[i] = Vec2Offset + ExtIndex; 15262 } 15263 } 15264 15265 // The type the input vectors may have changed above. 15266 InVT1 = VecIn1.getValueType(); 15267 15268 // If we already have a VecIn2, it should have the same type as VecIn1. 15269 // If we don't, get an undef/zero vector of the appropriate type. 15270 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1); 15271 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); 15272 15273 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); 15274 if (ShuffleNumElems > NumElems) 15275 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); 15276 15277 return Shuffle; 15278 } 15279 15280 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 15281 // operations. If the types of the vectors we're extracting from allow it, 15282 // turn this into a vector_shuffle node. 15283 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { 15284 SDLoc DL(N); 15285 EVT VT = N->getValueType(0); 15286 15287 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 15288 if (!isTypeLegal(VT)) 15289 return SDValue(); 15290 15291 // May only combine to shuffle after legalize if shuffle is legal. 15292 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 15293 return SDValue(); 15294 15295 bool UsesZeroVector = false; 15296 unsigned NumElems = N->getNumOperands(); 15297 15298 // Record, for each element of the newly built vector, which input vector 15299 // that element comes from. -1 stands for undef, 0 for the zero vector, 15300 // and positive values for the input vectors. 15301 // VectorMask maps each element to its vector number, and VecIn maps vector 15302 // numbers to their initial SDValues. 15303 15304 SmallVector<int, 8> VectorMask(NumElems, -1); 15305 SmallVector<SDValue, 8> VecIn; 15306 VecIn.push_back(SDValue()); 15307 15308 for (unsigned i = 0; i != NumElems; ++i) { 15309 SDValue Op = N->getOperand(i); 15310 15311 if (Op.isUndef()) 15312 continue; 15313 15314 // See if we can use a blend with a zero vector. 15315 // TODO: Should we generalize this to a blend with an arbitrary constant 15316 // vector? 15317 if (isNullConstant(Op) || isNullFPConstant(Op)) { 15318 UsesZeroVector = true; 15319 VectorMask[i] = 0; 15320 continue; 15321 } 15322 15323 // Not an undef or zero. If the input is something other than an 15324 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. 15325 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 15326 !isa<ConstantSDNode>(Op.getOperand(1))) 15327 return SDValue(); 15328 SDValue ExtractedFromVec = Op.getOperand(0); 15329 15330 APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); 15331 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) 15332 return SDValue(); 15333 15334 // All inputs must have the same element type as the output. 15335 if (VT.getVectorElementType() != 15336 ExtractedFromVec.getValueType().getVectorElementType()) 15337 return SDValue(); 15338 15339 // Have we seen this input vector before? 15340 // The vectors are expected to be tiny (usually 1 or 2 elements), so using 15341 // a map back from SDValues to numbers isn't worth it. 15342 unsigned Idx = std::distance( 15343 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); 15344 if (Idx == VecIn.size()) 15345 VecIn.push_back(ExtractedFromVec); 15346 15347 VectorMask[i] = Idx; 15348 } 15349 15350 // If we didn't find at least one input vector, bail out. 15351 if (VecIn.size() < 2) 15352 return SDValue(); 15353 15354 // If all the Operands of BUILD_VECTOR extract from same 15355 // vector, then split the vector efficiently based on the maximum 15356 // vector access index and adjust the VectorMask and 15357 // VecIn accordingly. 15358 if (VecIn.size() == 2) { 15359 unsigned MaxIndex = 0; 15360 unsigned NearestPow2 = 0; 15361 SDValue Vec = VecIn.back(); 15362 EVT InVT = Vec.getValueType(); 15363 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 15364 SmallVector<unsigned, 8> IndexVec(NumElems, 0); 15365 15366 for (unsigned i = 0; i < NumElems; i++) { 15367 if (VectorMask[i] <= 0) 15368 continue; 15369 unsigned Index = N->getOperand(i).getConstantOperandVal(1); 15370 IndexVec[i] = Index; 15371 MaxIndex = std::max(MaxIndex, Index); 15372 } 15373 15374 NearestPow2 = PowerOf2Ceil(MaxIndex); 15375 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && 15376 NumElems * 2 < NearestPow2) { 15377 unsigned SplitSize = NearestPow2 / 2; 15378 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), 15379 InVT.getVectorElementType(), SplitSize); 15380 if (TLI.isTypeLegal(SplitVT)) { 15381 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 15382 DAG.getConstant(SplitSize, DL, IdxTy)); 15383 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, 15384 DAG.getConstant(0, DL, IdxTy)); 15385 VecIn.pop_back(); 15386 VecIn.push_back(VecIn1); 15387 VecIn.push_back(VecIn2); 15388 15389 for (unsigned i = 0; i < NumElems; i++) { 15390 if (VectorMask[i] <= 0) 15391 continue; 15392 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; 15393 } 15394 } 15395 } 15396 } 15397 15398 // TODO: We want to sort the vectors by descending length, so that adjacent 15399 // pairs have similar length, and the longer vector is always first in the 15400 // pair. 15401 15402 // TODO: Should this fire if some of the input vectors has illegal type (like 15403 // it does now), or should we let legalization run its course first? 15404 15405 // Shuffle phase: 15406 // Take pairs of vectors, and shuffle them so that the result has elements 15407 // from these vectors in the correct places. 15408 // For example, given: 15409 // t10: i32 = extract_vector_elt t1, Constant:i64<0> 15410 // t11: i32 = extract_vector_elt t2, Constant:i64<0> 15411 // t12: i32 = extract_vector_elt t3, Constant:i64<0> 15412 // t13: i32 = extract_vector_elt t1, Constant:i64<1> 15413 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 15414 // We will generate: 15415 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 15416 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef 15417 SmallVector<SDValue, 4> Shuffles; 15418 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) { 15419 unsigned LeftIdx = 2 * In + 1; 15420 SDValue VecLeft = VecIn[LeftIdx]; 15421 SDValue VecRight = 15422 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue(); 15423 15424 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, 15425 VecRight, LeftIdx)) 15426 Shuffles.push_back(Shuffle); 15427 else 15428 return SDValue(); 15429 } 15430 15431 // If we need the zero vector as an "ingredient" in the blend tree, add it 15432 // to the list of shuffles. 15433 if (UsesZeroVector) 15434 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT) 15435 : DAG.getConstantFP(0.0, DL, VT)); 15436 15437 // If we only have one shuffle, we're done. 15438 if (Shuffles.size() == 1) 15439 return Shuffles[0]; 15440 15441 // Update the vector mask to point to the post-shuffle vectors. 15442 for (int &Vec : VectorMask) 15443 if (Vec == 0) 15444 Vec = Shuffles.size() - 1; 15445 else 15446 Vec = (Vec - 1) / 2; 15447 15448 // More than one shuffle. Generate a binary tree of blends, e.g. if from 15449 // the previous step we got the set of shuffles t10, t11, t12, t13, we will 15450 // generate: 15451 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 15452 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 15453 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 15454 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 15455 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 15456 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 15457 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 15458 15459 // Make sure the initial size of the shuffle list is even. 15460 if (Shuffles.size() % 2) 15461 Shuffles.push_back(DAG.getUNDEF(VT)); 15462 15463 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) { 15464 if (CurSize % 2) { 15465 Shuffles[CurSize] = DAG.getUNDEF(VT); 15466 CurSize++; 15467 } 15468 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) { 15469 int Left = 2 * In; 15470 int Right = 2 * In + 1; 15471 SmallVector<int, 8> Mask(NumElems, -1); 15472 for (unsigned i = 0; i != NumElems; ++i) { 15473 if (VectorMask[i] == Left) { 15474 Mask[i] = i; 15475 VectorMask[i] = In; 15476 } else if (VectorMask[i] == Right) { 15477 Mask[i] = i + NumElems; 15478 VectorMask[i] = In; 15479 } 15480 } 15481 15482 Shuffles[In] = 15483 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); 15484 } 15485 } 15486 return Shuffles[0]; 15487 } 15488 15489 // Try to turn a build vector of zero extends of extract vector elts into a 15490 // a vector zero extend and possibly an extract subvector. 15491 // TODO: Support sign extend or any extend? 15492 // TODO: Allow undef elements? 15493 // TODO: Don't require the extracts to start at element 0. 15494 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { 15495 if (LegalOperations) 15496 return SDValue(); 15497 15498 EVT VT = N->getValueType(0); 15499 15500 SDValue Op0 = N->getOperand(0); 15501 auto checkElem = [&](SDValue Op) -> int64_t { 15502 if (Op.getOpcode() == ISD::ZERO_EXTEND && 15503 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && 15504 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) 15505 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) 15506 return C->getZExtValue(); 15507 return -1; 15508 }; 15509 15510 // Make sure the first element matches 15511 // (zext (extract_vector_elt X, C)) 15512 int64_t Offset = checkElem(Op0); 15513 if (Offset < 0) 15514 return SDValue(); 15515 15516 unsigned NumElems = N->getNumOperands(); 15517 SDValue In = Op0.getOperand(0).getOperand(0); 15518 EVT InSVT = In.getValueType().getScalarType(); 15519 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); 15520 15521 // Don't create an illegal input type after type legalization. 15522 if (LegalTypes && !TLI.isTypeLegal(InVT)) 15523 return SDValue(); 15524 15525 // Ensure all the elements come from the same vector and are adjacent. 15526 for (unsigned i = 1; i != NumElems; ++i) { 15527 if ((Offset + i) != checkElem(N->getOperand(i))) 15528 return SDValue(); 15529 } 15530 15531 SDLoc DL(N); 15532 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, 15533 Op0.getOperand(0).getOperand(1)); 15534 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); 15535 } 15536 15537 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 15538 EVT VT = N->getValueType(0); 15539 15540 // A vector built entirely of undefs is undef. 15541 if (ISD::allOperandsUndef(N)) 15542 return DAG.getUNDEF(VT); 15543 15544 // If this is a splat of a bitcast from another vector, change to a 15545 // concat_vector. 15546 // For example: 15547 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> 15548 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) 15549 // 15550 // If X is a build_vector itself, the concat can become a larger build_vector. 15551 // TODO: Maybe this is useful for non-splat too? 15552 if (!LegalOperations) { 15553 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { 15554 Splat = peekThroughBitcast(Splat); 15555 EVT SrcVT = Splat.getValueType(); 15556 if (SrcVT.isVector()) { 15557 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); 15558 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), 15559 SrcVT.getVectorElementType(), NumElts); 15560 if (!LegalTypes || TLI.isTypeLegal(NewVT)) { 15561 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); 15562 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), 15563 NewVT, Ops); 15564 return DAG.getBitcast(VT, Concat); 15565 } 15566 } 15567 } 15568 } 15569 15570 // Check if we can express BUILD VECTOR via subvector extract. 15571 if (!LegalTypes && (N->getNumOperands() > 1)) { 15572 SDValue Op0 = N->getOperand(0); 15573 auto checkElem = [&](SDValue Op) -> uint64_t { 15574 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && 15575 (Op0.getOperand(0) == Op.getOperand(0))) 15576 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 15577 return CNode->getZExtValue(); 15578 return -1; 15579 }; 15580 15581 int Offset = checkElem(Op0); 15582 for (unsigned i = 0; i < N->getNumOperands(); ++i) { 15583 if (Offset + i != checkElem(N->getOperand(i))) { 15584 Offset = -1; 15585 break; 15586 } 15587 } 15588 15589 if ((Offset == 0) && 15590 (Op0.getOperand(0).getValueType() == N->getValueType(0))) 15591 return Op0.getOperand(0); 15592 if ((Offset != -1) && 15593 ((Offset % N->getValueType(0).getVectorNumElements()) == 15594 0)) // IDX must be multiple of output size. 15595 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), 15596 Op0.getOperand(0), Op0.getOperand(1)); 15597 } 15598 15599 if (SDValue V = convertBuildVecZextToZext(N)) 15600 return V; 15601 15602 if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) 15603 return V; 15604 15605 if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) 15606 return V; 15607 15608 if (SDValue V = reduceBuildVecToShuffle(N)) 15609 return V; 15610 15611 return SDValue(); 15612 } 15613 15614 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { 15615 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 15616 EVT OpVT = N->getOperand(0).getValueType(); 15617 15618 // If the operands are legal vectors, leave them alone. 15619 if (TLI.isTypeLegal(OpVT)) 15620 return SDValue(); 15621 15622 SDLoc DL(N); 15623 EVT VT = N->getValueType(0); 15624 SmallVector<SDValue, 8> Ops; 15625 15626 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); 15627 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 15628 15629 // Keep track of what we encounter. 15630 bool AnyInteger = false; 15631 bool AnyFP = false; 15632 for (const SDValue &Op : N->ops()) { 15633 if (ISD::BITCAST == Op.getOpcode() && 15634 !Op.getOperand(0).getValueType().isVector()) 15635 Ops.push_back(Op.getOperand(0)); 15636 else if (ISD::UNDEF == Op.getOpcode()) 15637 Ops.push_back(ScalarUndef); 15638 else 15639 return SDValue(); 15640 15641 // Note whether we encounter an integer or floating point scalar. 15642 // If it's neither, bail out, it could be something weird like x86mmx. 15643 EVT LastOpVT = Ops.back().getValueType(); 15644 if (LastOpVT.isFloatingPoint()) 15645 AnyFP = true; 15646 else if (LastOpVT.isInteger()) 15647 AnyInteger = true; 15648 else 15649 return SDValue(); 15650 } 15651 15652 // If any of the operands is a floating point scalar bitcast to a vector, 15653 // use floating point types throughout, and bitcast everything. 15654 // Replace UNDEFs by another scalar UNDEF node, of the final desired type. 15655 if (AnyFP) { 15656 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); 15657 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 15658 if (AnyInteger) { 15659 for (SDValue &Op : Ops) { 15660 if (Op.getValueType() == SVT) 15661 continue; 15662 if (Op.isUndef()) 15663 Op = ScalarUndef; 15664 else 15665 Op = DAG.getBitcast(SVT, Op); 15666 } 15667 } 15668 } 15669 15670 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, 15671 VT.getSizeInBits() / SVT.getSizeInBits()); 15672 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); 15673 } 15674 15675 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR 15676 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at 15677 // most two distinct vectors the same size as the result, attempt to turn this 15678 // into a legal shuffle. 15679 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { 15680 EVT VT = N->getValueType(0); 15681 EVT OpVT = N->getOperand(0).getValueType(); 15682 int NumElts = VT.getVectorNumElements(); 15683 int NumOpElts = OpVT.getVectorNumElements(); 15684 15685 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); 15686 SmallVector<int, 8> Mask; 15687 15688 for (SDValue Op : N->ops()) { 15689 // Peek through any bitcast. 15690 Op = peekThroughBitcast(Op); 15691 15692 // UNDEF nodes convert to UNDEF shuffle mask values. 15693 if (Op.isUndef()) { 15694 Mask.append((unsigned)NumOpElts, -1); 15695 continue; 15696 } 15697 15698 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 15699 return SDValue(); 15700 15701 // What vector are we extracting the subvector from and at what index? 15702 SDValue ExtVec = Op.getOperand(0); 15703 15704 // We want the EVT of the original extraction to correctly scale the 15705 // extraction index. 15706 EVT ExtVT = ExtVec.getValueType(); 15707 15708 // Peek through any bitcast. 15709 ExtVec = peekThroughBitcast(ExtVec); 15710 15711 // UNDEF nodes convert to UNDEF shuffle mask values. 15712 if (ExtVec.isUndef()) { 15713 Mask.append((unsigned)NumOpElts, -1); 15714 continue; 15715 } 15716 15717 if (!isa<ConstantSDNode>(Op.getOperand(1))) 15718 return SDValue(); 15719 int ExtIdx = Op.getConstantOperandVal(1); 15720 15721 // Ensure that we are extracting a subvector from a vector the same 15722 // size as the result. 15723 if (ExtVT.getSizeInBits() != VT.getSizeInBits()) 15724 return SDValue(); 15725 15726 // Scale the subvector index to account for any bitcast. 15727 int NumExtElts = ExtVT.getVectorNumElements(); 15728 if (0 == (NumExtElts % NumElts)) 15729 ExtIdx /= (NumExtElts / NumElts); 15730 else if (0 == (NumElts % NumExtElts)) 15731 ExtIdx *= (NumElts / NumExtElts); 15732 else 15733 return SDValue(); 15734 15735 // At most we can reference 2 inputs in the final shuffle. 15736 if (SV0.isUndef() || SV0 == ExtVec) { 15737 SV0 = ExtVec; 15738 for (int i = 0; i != NumOpElts; ++i) 15739 Mask.push_back(i + ExtIdx); 15740 } else if (SV1.isUndef() || SV1 == ExtVec) { 15741 SV1 = ExtVec; 15742 for (int i = 0; i != NumOpElts; ++i) 15743 Mask.push_back(i + ExtIdx + NumElts); 15744 } else { 15745 return SDValue(); 15746 } 15747 } 15748 15749 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) 15750 return SDValue(); 15751 15752 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), 15753 DAG.getBitcast(VT, SV1), Mask); 15754 } 15755 15756 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 15757 // If we only have one input vector, we don't need to do any concatenation. 15758 if (N->getNumOperands() == 1) 15759 return N->getOperand(0); 15760 15761 // Check if all of the operands are undefs. 15762 EVT VT = N->getValueType(0); 15763 if (ISD::allOperandsUndef(N)) 15764 return DAG.getUNDEF(VT); 15765 15766 // Optimize concat_vectors where all but the first of the vectors are undef. 15767 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { 15768 return Op.isUndef(); 15769 })) { 15770 SDValue In = N->getOperand(0); 15771 assert(In.getValueType().isVector() && "Must concat vectors"); 15772 15773 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 15774 if (In->getOpcode() == ISD::BITCAST && 15775 !In->getOperand(0).getValueType().isVector()) { 15776 SDValue Scalar = In->getOperand(0); 15777 15778 // If the bitcast type isn't legal, it might be a trunc of a legal type; 15779 // look through the trunc so we can still do the transform: 15780 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) 15781 if (Scalar->getOpcode() == ISD::TRUNCATE && 15782 !TLI.isTypeLegal(Scalar.getValueType()) && 15783 TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) 15784 Scalar = Scalar->getOperand(0); 15785 15786 EVT SclTy = Scalar->getValueType(0); 15787 15788 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 15789 return SDValue(); 15790 15791 // Bail out if the vector size is not a multiple of the scalar size. 15792 if (VT.getSizeInBits() % SclTy.getSizeInBits()) 15793 return SDValue(); 15794 15795 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); 15796 if (VNTNumElms < 2) 15797 return SDValue(); 15798 15799 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); 15800 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 15801 return SDValue(); 15802 15803 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); 15804 return DAG.getBitcast(VT, Res); 15805 } 15806 } 15807 15808 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. 15809 // We have already tested above for an UNDEF only concatenation. 15810 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 15811 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 15812 auto IsBuildVectorOrUndef = [](const SDValue &Op) { 15813 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); 15814 }; 15815 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) { 15816 SmallVector<SDValue, 8> Opnds; 15817 EVT SVT = VT.getScalarType(); 15818 15819 EVT MinVT = SVT; 15820 if (!SVT.isFloatingPoint()) { 15821 // If BUILD_VECTOR are from built from integer, they may have different 15822 // operand types. Get the smallest type and truncate all operands to it. 15823 bool FoundMinVT = false; 15824 for (const SDValue &Op : N->ops()) 15825 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 15826 EVT OpSVT = Op.getOperand(0).getValueType(); 15827 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; 15828 FoundMinVT = true; 15829 } 15830 assert(FoundMinVT && "Concat vector type mismatch"); 15831 } 15832 15833 for (const SDValue &Op : N->ops()) { 15834 EVT OpVT = Op.getValueType(); 15835 unsigned NumElts = OpVT.getVectorNumElements(); 15836 15837 if (ISD::UNDEF == Op.getOpcode()) 15838 Opnds.append(NumElts, DAG.getUNDEF(MinVT)); 15839 15840 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 15841 if (SVT.isFloatingPoint()) { 15842 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); 15843 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); 15844 } else { 15845 for (unsigned i = 0; i != NumElts; ++i) 15846 Opnds.push_back( 15847 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); 15848 } 15849 } 15850 } 15851 15852 assert(VT.getVectorNumElements() == Opnds.size() && 15853 "Concat vector type mismatch"); 15854 return DAG.getBuildVector(VT, SDLoc(N), Opnds); 15855 } 15856 15857 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. 15858 if (SDValue V = combineConcatVectorOfScalars(N, DAG)) 15859 return V; 15860 15861 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. 15862 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) 15863 if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) 15864 return V; 15865 15866 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 15867 // nodes often generate nop CONCAT_VECTOR nodes. 15868 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 15869 // place the incoming vectors at the exact same location. 15870 SDValue SingleSource = SDValue(); 15871 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 15872 15873 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 15874 SDValue Op = N->getOperand(i); 15875 15876 if (Op.isUndef()) 15877 continue; 15878 15879 // Check if this is the identity extract: 15880 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 15881 return SDValue(); 15882 15883 // Find the single incoming vector for the extract_subvector. 15884 if (SingleSource.getNode()) { 15885 if (Op.getOperand(0) != SingleSource) 15886 return SDValue(); 15887 } else { 15888 SingleSource = Op.getOperand(0); 15889 15890 // Check the source type is the same as the type of the result. 15891 // If not, this concat may extend the vector, so we can not 15892 // optimize it away. 15893 if (SingleSource.getValueType() != N->getValueType(0)) 15894 return SDValue(); 15895 } 15896 15897 unsigned IdentityIndex = i * PartNumElem; 15898 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 15899 // The extract index must be constant. 15900 if (!CS) 15901 return SDValue(); 15902 15903 // Check that we are reading from the identity index. 15904 if (CS->getZExtValue() != IdentityIndex) 15905 return SDValue(); 15906 } 15907 15908 if (SingleSource.getNode()) 15909 return SingleSource; 15910 15911 return SDValue(); 15912 } 15913 15914 /// If we are extracting a subvector produced by a wide binary operator with at 15915 /// at least one operand that was the result of a vector concatenation, then try 15916 /// to use the narrow vector operands directly to avoid the concatenation and 15917 /// extraction. 15918 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { 15919 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share 15920 // some of these bailouts with other transforms. 15921 15922 // The extract index must be a constant, so we can map it to a concat operand. 15923 auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 15924 if (!ExtractIndex) 15925 return SDValue(); 15926 15927 // Only handle the case where we are doubling and then halving. A larger ratio 15928 // may require more than two narrow binops to replace the wide binop. 15929 EVT VT = Extract->getValueType(0); 15930 unsigned NumElems = VT.getVectorNumElements(); 15931 assert((ExtractIndex->getZExtValue() % NumElems) == 0 && 15932 "Extract index is not a multiple of the vector length."); 15933 if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) 15934 return SDValue(); 15935 15936 // We are looking for an optionally bitcasted wide vector binary operator 15937 // feeding an extract subvector. 15938 SDValue BinOp = peekThroughBitcast(Extract->getOperand(0)); 15939 15940 // TODO: The motivating case for this transform is an x86 AVX1 target. That 15941 // target has temptingly almost legal versions of bitwise logic ops in 256-bit 15942 // flavors, but no other 256-bit integer support. This could be extended to 15943 // handle any binop, but that may require fixing/adding other folds to avoid 15944 // codegen regressions. 15945 unsigned BOpcode = BinOp.getOpcode(); 15946 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) 15947 return SDValue(); 15948 15949 // The binop must be a vector type, so we can chop it in half. 15950 EVT WideBVT = BinOp.getValueType(); 15951 if (!WideBVT.isVector()) 15952 return SDValue(); 15953 15954 // Bail out if the target does not support a narrower version of the binop. 15955 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), 15956 WideBVT.getVectorNumElements() / 2); 15957 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 15958 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) 15959 return SDValue(); 15960 15961 // Peek through bitcasts of the binary operator operands if needed. 15962 SDValue LHS = peekThroughBitcast(BinOp.getOperand(0)); 15963 SDValue RHS = peekThroughBitcast(BinOp.getOperand(1)); 15964 15965 // We need at least one concatenation operation of a binop operand to make 15966 // this transform worthwhile. The concat must double the input vector sizes. 15967 // TODO: Should we also handle INSERT_SUBVECTOR patterns? 15968 bool ConcatL = 15969 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; 15970 bool ConcatR = 15971 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; 15972 if (!ConcatL && !ConcatR) 15973 return SDValue(); 15974 15975 // If one of the binop operands was not the result of a concat, we must 15976 // extract a half-sized operand for our new narrow binop. We can't just reuse 15977 // the original extract index operand because we may have bitcasted. 15978 unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; 15979 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); 15980 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); 15981 SDLoc DL(Extract); 15982 15983 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN 15984 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) 15985 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN 15986 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) 15987 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 15988 BinOp.getOperand(0), 15989 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 15990 15991 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) 15992 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, 15993 BinOp.getOperand(1), 15994 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); 15995 15996 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); 15997 return DAG.getBitcast(VT, NarrowBinOp); 15998 } 15999 16000 /// If we are extracting a subvector from a wide vector load, convert to a 16001 /// narrow load to eliminate the extraction: 16002 /// (extract_subvector (load wide vector)) --> (load narrow vector) 16003 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { 16004 // TODO: Add support for big-endian. The offset calculation must be adjusted. 16005 if (DAG.getDataLayout().isBigEndian()) 16006 return SDValue(); 16007 16008 // TODO: The one-use check is overly conservative. Check the cost of the 16009 // extract instead or remove that condition entirely. 16010 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); 16011 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); 16012 if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() || 16013 !ExtIdx) 16014 return SDValue(); 16015 16016 // The narrow load will be offset from the base address of the old load if 16017 // we are extracting from something besides index 0 (little-endian). 16018 EVT VT = Extract->getValueType(0); 16019 SDLoc DL(Extract); 16020 SDValue BaseAddr = Ld->getOperand(1); 16021 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); 16022 16023 // TODO: Use "BaseIndexOffset" to make this more effective. 16024 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); 16025 MachineFunction &MF = DAG.getMachineFunction(); 16026 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, 16027 VT.getStoreSize()); 16028 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); 16029 DAG.makeEquivalentMemoryOrdering(Ld, NewLd); 16030 return NewLd; 16031 } 16032 16033 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 16034 EVT NVT = N->getValueType(0); 16035 SDValue V = N->getOperand(0); 16036 16037 // Extract from UNDEF is UNDEF. 16038 if (V.isUndef()) 16039 return DAG.getUNDEF(NVT); 16040 16041 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) 16042 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) 16043 return NarrowLoad; 16044 16045 // Combine: 16046 // (extract_subvec (concat V1, V2, ...), i) 16047 // Into: 16048 // Vi if possible 16049 // Only operand 0 is checked as 'concat' assumes all inputs of the same 16050 // type. 16051 if (V->getOpcode() == ISD::CONCAT_VECTORS && 16052 isa<ConstantSDNode>(N->getOperand(1)) && 16053 V->getOperand(0).getValueType() == NVT) { 16054 unsigned Idx = N->getConstantOperandVal(1); 16055 unsigned NumElems = NVT.getVectorNumElements(); 16056 assert((Idx % NumElems) == 0 && 16057 "IDX in concat is not a multiple of the result vector length."); 16058 return V->getOperand(Idx / NumElems); 16059 } 16060 16061 // Skip bitcasting 16062 V = peekThroughBitcast(V); 16063 16064 // If the input is a build vector. Try to make a smaller build vector. 16065 if (V->getOpcode() == ISD::BUILD_VECTOR) { 16066 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 16067 EVT InVT = V->getValueType(0); 16068 unsigned ExtractSize = NVT.getSizeInBits(); 16069 unsigned EltSize = InVT.getScalarSizeInBits(); 16070 // Only do this if we won't split any elements. 16071 if (ExtractSize % EltSize == 0) { 16072 unsigned NumElems = ExtractSize / EltSize; 16073 EVT EltVT = InVT.getVectorElementType(); 16074 EVT ExtractVT = NumElems == 1 ? EltVT : 16075 EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); 16076 if ((Level < AfterLegalizeDAG || 16077 (NumElems == 1 || 16078 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && 16079 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { 16080 unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / 16081 EltSize; 16082 if (NumElems == 1) { 16083 SDValue Src = V->getOperand(IdxVal); 16084 if (EltVT != Src.getValueType()) 16085 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); 16086 16087 return DAG.getBitcast(NVT, Src); 16088 } 16089 16090 // Extract the pieces from the original build_vector. 16091 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), 16092 makeArrayRef(V->op_begin() + IdxVal, 16093 NumElems)); 16094 return DAG.getBitcast(NVT, BuildVec); 16095 } 16096 } 16097 } 16098 } 16099 16100 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 16101 // Handle only simple case where vector being inserted and vector 16102 // being extracted are of same size. 16103 EVT SmallVT = V->getOperand(1).getValueType(); 16104 if (!NVT.bitsEq(SmallVT)) 16105 return SDValue(); 16106 16107 // Only handle cases where both indexes are constants. 16108 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 16109 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 16110 16111 if (InsIdx && ExtIdx) { 16112 // Combine: 16113 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 16114 // Into: 16115 // indices are equal or bit offsets are equal => V1 16116 // otherwise => (extract_subvec V1, ExtIdx) 16117 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == 16118 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) 16119 return DAG.getBitcast(NVT, V->getOperand(1)); 16120 return DAG.getNode( 16121 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, 16122 DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), 16123 N->getOperand(1)); 16124 } 16125 } 16126 16127 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) 16128 return NarrowBOp; 16129 16130 return SDValue(); 16131 } 16132 16133 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, 16134 // or turn a shuffle of a single concat into simpler shuffle then concat. 16135 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 16136 EVT VT = N->getValueType(0); 16137 unsigned NumElts = VT.getVectorNumElements(); 16138 16139 SDValue N0 = N->getOperand(0); 16140 SDValue N1 = N->getOperand(1); 16141 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 16142 16143 SmallVector<SDValue, 4> Ops; 16144 EVT ConcatVT = N0.getOperand(0).getValueType(); 16145 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 16146 unsigned NumConcats = NumElts / NumElemsPerConcat; 16147 16148 // Special case: shuffle(concat(A,B)) can be more efficiently represented 16149 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high 16150 // half vector elements. 16151 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() && 16152 std::all_of(SVN->getMask().begin() + NumElemsPerConcat, 16153 SVN->getMask().end(), [](int i) { return i == -1; })) { 16154 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), 16155 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); 16156 N1 = DAG.getUNDEF(ConcatVT); 16157 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); 16158 } 16159 16160 // Look at every vector that's inserted. We're looking for exact 16161 // subvector-sized copies from a concatenated vector 16162 for (unsigned I = 0; I != NumConcats; ++I) { 16163 // Make sure we're dealing with a copy. 16164 unsigned Begin = I * NumElemsPerConcat; 16165 bool AllUndef = true, NoUndef = true; 16166 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 16167 if (SVN->getMaskElt(J) >= 0) 16168 AllUndef = false; 16169 else 16170 NoUndef = false; 16171 } 16172 16173 if (NoUndef) { 16174 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 16175 return SDValue(); 16176 16177 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 16178 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 16179 return SDValue(); 16180 16181 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 16182 if (FirstElt < N0.getNumOperands()) 16183 Ops.push_back(N0.getOperand(FirstElt)); 16184 else 16185 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 16186 16187 } else if (AllUndef) { 16188 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 16189 } else { // Mixed with general masks and undefs, can't do optimization. 16190 return SDValue(); 16191 } 16192 } 16193 16194 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 16195 } 16196 16197 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 16198 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 16199 // 16200 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always 16201 // a simplification in some sense, but it isn't appropriate in general: some 16202 // BUILD_VECTORs are substantially cheaper than others. The general case 16203 // of a BUILD_VECTOR requires inserting each element individually (or 16204 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of 16205 // all constants is a single constant pool load. A BUILD_VECTOR where each 16206 // element is identical is a splat. A BUILD_VECTOR where most of the operands 16207 // are undef lowers to a small number of element insertions. 16208 // 16209 // To deal with this, we currently use a bunch of mostly arbitrary heuristics. 16210 // We don't fold shuffles where one side is a non-zero constant, and we don't 16211 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate 16212 // non-constant operands. This seems to work out reasonably well in practice. 16213 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, 16214 SelectionDAG &DAG, 16215 const TargetLowering &TLI) { 16216 EVT VT = SVN->getValueType(0); 16217 unsigned NumElts = VT.getVectorNumElements(); 16218 SDValue N0 = SVN->getOperand(0); 16219 SDValue N1 = SVN->getOperand(1); 16220 16221 if (!N0->hasOneUse() || !N1->hasOneUse()) 16222 return SDValue(); 16223 16224 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as 16225 // discussed above. 16226 if (!N1.isUndef()) { 16227 bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); 16228 bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); 16229 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) 16230 return SDValue(); 16231 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) 16232 return SDValue(); 16233 } 16234 16235 // If both inputs are splats of the same value then we can safely merge this 16236 // to a single BUILD_VECTOR with undef elements based on the shuffle mask. 16237 bool IsSplat = false; 16238 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); 16239 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 16240 if (BV0 && BV1) 16241 if (SDValue Splat0 = BV0->getSplatValue()) 16242 IsSplat = (Splat0 == BV1->getSplatValue()); 16243 16244 SmallVector<SDValue, 8> Ops; 16245 SmallSet<SDValue, 16> DuplicateOps; 16246 for (int M : SVN->getMask()) { 16247 SDValue Op = DAG.getUNDEF(VT.getScalarType()); 16248 if (M >= 0) { 16249 int Idx = M < (int)NumElts ? M : M - NumElts; 16250 SDValue &S = (M < (int)NumElts ? N0 : N1); 16251 if (S.getOpcode() == ISD::BUILD_VECTOR) { 16252 Op = S.getOperand(Idx); 16253 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { 16254 assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); 16255 Op = S.getOperand(0); 16256 } else { 16257 // Operand can't be combined - bail out. 16258 return SDValue(); 16259 } 16260 } 16261 16262 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're 16263 // generating a splat; semantically, this is fine, but it's likely to 16264 // generate low-quality code if the target can't reconstruct an appropriate 16265 // shuffle. 16266 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) 16267 if (!IsSplat && !DuplicateOps.insert(Op).second) 16268 return SDValue(); 16269 16270 Ops.push_back(Op); 16271 } 16272 16273 // BUILD_VECTOR requires all inputs to be of the same type, find the 16274 // maximum type and extend them all. 16275 EVT SVT = VT.getScalarType(); 16276 if (SVT.isInteger()) 16277 for (SDValue &Op : Ops) 16278 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 16279 if (SVT != VT.getScalarType()) 16280 for (SDValue &Op : Ops) 16281 Op = TLI.isZExtFree(Op.getValueType(), SVT) 16282 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) 16283 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); 16284 return DAG.getBuildVector(VT, SDLoc(SVN), Ops); 16285 } 16286 16287 // Match shuffles that can be converted to any_vector_extend_in_reg. 16288 // This is often generated during legalization. 16289 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) 16290 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. 16291 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, 16292 SelectionDAG &DAG, 16293 const TargetLowering &TLI, 16294 bool LegalOperations, 16295 bool LegalTypes) { 16296 EVT VT = SVN->getValueType(0); 16297 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 16298 16299 // TODO Add support for big-endian when we have a test case. 16300 if (!VT.isInteger() || IsBigEndian) 16301 return SDValue(); 16302 16303 unsigned NumElts = VT.getVectorNumElements(); 16304 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 16305 ArrayRef<int> Mask = SVN->getMask(); 16306 SDValue N0 = SVN->getOperand(0); 16307 16308 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) 16309 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { 16310 for (unsigned i = 0; i != NumElts; ++i) { 16311 if (Mask[i] < 0) 16312 continue; 16313 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale)) 16314 continue; 16315 return false; 16316 } 16317 return true; 16318 }; 16319 16320 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for 16321 // power-of-2 extensions as they are the most likely. 16322 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { 16323 // Check for non power of 2 vector sizes 16324 if (NumElts % Scale != 0) 16325 continue; 16326 if (!isAnyExtend(Scale)) 16327 continue; 16328 16329 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); 16330 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); 16331 if (!LegalTypes || TLI.isTypeLegal(OutVT)) 16332 if (!LegalOperations || 16333 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) 16334 return DAG.getBitcast(VT, 16335 DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); 16336 } 16337 16338 return SDValue(); 16339 } 16340 16341 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of 16342 // each source element of a large type into the lowest elements of a smaller 16343 // destination type. This is often generated during legalization. 16344 // If the source node itself was a '*_extend_vector_inreg' node then we should 16345 // then be able to remove it. 16346 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, 16347 SelectionDAG &DAG) { 16348 EVT VT = SVN->getValueType(0); 16349 bool IsBigEndian = DAG.getDataLayout().isBigEndian(); 16350 16351 // TODO Add support for big-endian when we have a test case. 16352 if (!VT.isInteger() || IsBigEndian) 16353 return SDValue(); 16354 16355 SDValue N0 = peekThroughBitcast(SVN->getOperand(0)); 16356 16357 unsigned Opcode = N0.getOpcode(); 16358 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && 16359 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && 16360 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) 16361 return SDValue(); 16362 16363 SDValue N00 = N0.getOperand(0); 16364 ArrayRef<int> Mask = SVN->getMask(); 16365 unsigned NumElts = VT.getVectorNumElements(); 16366 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 16367 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); 16368 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); 16369 16370 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) 16371 return SDValue(); 16372 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; 16373 16374 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> 16375 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> 16376 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> 16377 auto isTruncate = [&Mask, &NumElts](unsigned Scale) { 16378 for (unsigned i = 0; i != NumElts; ++i) { 16379 if (Mask[i] < 0) 16380 continue; 16381 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale)) 16382 continue; 16383 return false; 16384 } 16385 return true; 16386 }; 16387 16388 // At the moment we just handle the case where we've truncated back to the 16389 // same size as before the extension. 16390 // TODO: handle more extension/truncation cases as cases arise. 16391 if (EltSizeInBits != ExtSrcSizeInBits) 16392 return SDValue(); 16393 16394 // We can remove *extend_vector_inreg only if the truncation happens at 16395 // the same scale as the extension. 16396 if (isTruncate(ExtScale)) 16397 return DAG.getBitcast(VT, N00); 16398 16399 return SDValue(); 16400 } 16401 16402 // Combine shuffles of splat-shuffles of the form: 16403 // shuffle (shuffle V, undef, splat-mask), undef, M 16404 // If splat-mask contains undef elements, we need to be careful about 16405 // introducing undef's in the folded mask which are not the result of composing 16406 // the masks of the shuffles. 16407 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, 16408 ShuffleVectorSDNode *Splat, 16409 SelectionDAG &DAG) { 16410 ArrayRef<int> SplatMask = Splat->getMask(); 16411 assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); 16412 16413 // Prefer simplifying to the splat-shuffle, if possible. This is legal if 16414 // every undef mask element in the splat-shuffle has a corresponding undef 16415 // element in the user-shuffle's mask or if the composition of mask elements 16416 // would result in undef. 16417 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): 16418 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] 16419 // In this case it is not legal to simplify to the splat-shuffle because we 16420 // may be exposing the users of the shuffle an undef element at index 1 16421 // which was not there before the combine. 16422 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] 16423 // In this case the composition of masks yields SplatMask, so it's ok to 16424 // simplify to the splat-shuffle. 16425 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] 16426 // In this case the composed mask includes all undef elements of SplatMask 16427 // and in addition sets element zero to undef. It is safe to simplify to 16428 // the splat-shuffle. 16429 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, 16430 ArrayRef<int> SplatMask) { 16431 for (unsigned i = 0, e = UserMask.size(); i != e; ++i) 16432 if (UserMask[i] != -1 && SplatMask[i] == -1 && 16433 SplatMask[UserMask[i]] != -1) 16434 return false; 16435 return true; 16436 }; 16437 if (CanSimplifyToExistingSplat(UserMask, SplatMask)) 16438 return SDValue(Splat, 0); 16439 16440 // Create a new shuffle with a mask that is composed of the two shuffles' 16441 // masks. 16442 SmallVector<int, 32> NewMask; 16443 for (int Idx : UserMask) 16444 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]); 16445 16446 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), 16447 Splat->getOperand(0), Splat->getOperand(1), 16448 NewMask); 16449 } 16450 16451 /// If the shuffle mask is taking exactly one element from the first vector 16452 /// operand and passing through all other elements from the second vector 16453 /// operand, return the index of the mask element that is choosing an element 16454 /// from the first operand. Otherwise, return -1. 16455 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { 16456 int MaskSize = Mask.size(); 16457 int EltFromOp0 = -1; 16458 // TODO: This does not match if there are undef elements in the shuffle mask. 16459 // Should we ignore undefs in the shuffle mask instead? The trade-off is 16460 // removing an instruction (a shuffle), but losing the knowledge that some 16461 // vector lanes are not needed. 16462 for (int i = 0; i != MaskSize; ++i) { 16463 if (Mask[i] >= 0 && Mask[i] < MaskSize) { 16464 // We're looking for a shuffle of exactly one element from operand 0. 16465 if (EltFromOp0 != -1) 16466 return -1; 16467 EltFromOp0 = i; 16468 } else if (Mask[i] != i + MaskSize) { 16469 // Nothing from operand 1 can change lanes. 16470 return -1; 16471 } 16472 } 16473 return EltFromOp0; 16474 } 16475 16476 /// If a shuffle inserts exactly one element from a source vector operand into 16477 /// another vector operand and we can access the specified element as a scalar, 16478 /// then we can eliminate the shuffle. 16479 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, 16480 SelectionDAG &DAG) { 16481 // First, check if we are taking one element of a vector and shuffling that 16482 // element into another vector. 16483 ArrayRef<int> Mask = Shuf->getMask(); 16484 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); 16485 SDValue Op0 = Shuf->getOperand(0); 16486 SDValue Op1 = Shuf->getOperand(1); 16487 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); 16488 if (ShufOp0Index == -1) { 16489 // Commute mask and check again. 16490 ShuffleVectorSDNode::commuteMask(CommutedMask); 16491 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); 16492 if (ShufOp0Index == -1) 16493 return SDValue(); 16494 // Commute operands to match the commuted shuffle mask. 16495 std::swap(Op0, Op1); 16496 Mask = CommutedMask; 16497 } 16498 16499 // The shuffle inserts exactly one element from operand 0 into operand 1. 16500 // Now see if we can access that element as a scalar via a real insert element 16501 // instruction. 16502 // TODO: We can try harder to locate the element as a scalar. Examples: it 16503 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. 16504 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && 16505 "Shuffle mask value must be from operand 0"); 16506 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) 16507 return SDValue(); 16508 16509 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); 16510 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) 16511 return SDValue(); 16512 16513 // There's an existing insertelement with constant insertion index, so we 16514 // don't need to check the legality/profitability of a replacement operation 16515 // that differs at most in the constant value. The target should be able to 16516 // lower any of those in a similar way. If not, legalization will expand this 16517 // to a scalar-to-vector plus shuffle. 16518 // 16519 // Note that the shuffle may move the scalar from the position that the insert 16520 // element used. Therefore, our new insert element occurs at the shuffle's 16521 // mask index value, not the insert's index value. 16522 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' 16523 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), 16524 Op0.getOperand(2).getValueType()); 16525 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), 16526 Op1, Op0.getOperand(1), NewInsIndex); 16527 } 16528 16529 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 16530 EVT VT = N->getValueType(0); 16531 unsigned NumElts = VT.getVectorNumElements(); 16532 16533 SDValue N0 = N->getOperand(0); 16534 SDValue N1 = N->getOperand(1); 16535 16536 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 16537 16538 // Canonicalize shuffle undef, undef -> undef 16539 if (N0.isUndef() && N1.isUndef()) 16540 return DAG.getUNDEF(VT); 16541 16542 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 16543 16544 // Canonicalize shuffle v, v -> v, undef 16545 if (N0 == N1) { 16546 SmallVector<int, 8> NewMask; 16547 for (unsigned i = 0; i != NumElts; ++i) { 16548 int Idx = SVN->getMaskElt(i); 16549 if (Idx >= (int)NumElts) Idx -= NumElts; 16550 NewMask.push_back(Idx); 16551 } 16552 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); 16553 } 16554 16555 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 16556 if (N0.isUndef()) 16557 return DAG.getCommutedVectorShuffle(*SVN); 16558 16559 // Remove references to rhs if it is undef 16560 if (N1.isUndef()) { 16561 bool Changed = false; 16562 SmallVector<int, 8> NewMask; 16563 for (unsigned i = 0; i != NumElts; ++i) { 16564 int Idx = SVN->getMaskElt(i); 16565 if (Idx >= (int)NumElts) { 16566 Idx = -1; 16567 Changed = true; 16568 } 16569 NewMask.push_back(Idx); 16570 } 16571 if (Changed) 16572 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); 16573 } 16574 16575 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) 16576 return InsElt; 16577 16578 // A shuffle of a single vector that is a splat can always be folded. 16579 if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) 16580 if (N1->isUndef() && N0Shuf->isSplat()) 16581 return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); 16582 16583 // If it is a splat, check if the argument vector is another splat or a 16584 // build_vector. 16585 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 16586 SDNode *V = N0.getNode(); 16587 16588 // If this is a bit convert that changes the element type of the vector but 16589 // not the number of vector elements, look through it. Be careful not to 16590 // look though conversions that change things like v4f32 to v2f64. 16591 if (V->getOpcode() == ISD::BITCAST) { 16592 SDValue ConvInput = V->getOperand(0); 16593 if (ConvInput.getValueType().isVector() && 16594 ConvInput.getValueType().getVectorNumElements() == NumElts) 16595 V = ConvInput.getNode(); 16596 } 16597 16598 if (V->getOpcode() == ISD::BUILD_VECTOR) { 16599 assert(V->getNumOperands() == NumElts && 16600 "BUILD_VECTOR has wrong number of operands"); 16601 SDValue Base; 16602 bool AllSame = true; 16603 for (unsigned i = 0; i != NumElts; ++i) { 16604 if (!V->getOperand(i).isUndef()) { 16605 Base = V->getOperand(i); 16606 break; 16607 } 16608 } 16609 // Splat of <u, u, u, u>, return <u, u, u, u> 16610 if (!Base.getNode()) 16611 return N0; 16612 for (unsigned i = 0; i != NumElts; ++i) { 16613 if (V->getOperand(i) != Base) { 16614 AllSame = false; 16615 break; 16616 } 16617 } 16618 // Splat of <x, x, x, x>, return <x, x, x, x> 16619 if (AllSame) 16620 return N0; 16621 16622 // Canonicalize any other splat as a build_vector. 16623 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); 16624 SmallVector<SDValue, 8> Ops(NumElts, Splatted); 16625 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); 16626 16627 // We may have jumped through bitcasts, so the type of the 16628 // BUILD_VECTOR may not match the type of the shuffle. 16629 if (V->getValueType(0) != VT) 16630 NewBV = DAG.getBitcast(VT, NewBV); 16631 return NewBV; 16632 } 16633 } 16634 16635 // Simplify source operands based on shuffle mask. 16636 if (SimplifyDemandedVectorElts(SDValue(N, 0))) 16637 return SDValue(N, 0); 16638 16639 // Match shuffles that can be converted to any_vector_extend_in_reg. 16640 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) 16641 return V; 16642 16643 // Combine "truncate_vector_in_reg" style shuffles. 16644 if (SDValue V = combineTruncationShuffle(SVN, DAG)) 16645 return V; 16646 16647 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 16648 Level < AfterLegalizeVectorOps && 16649 (N1.isUndef() || 16650 (N1.getOpcode() == ISD::CONCAT_VECTORS && 16651 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 16652 if (SDValue V = partitionShuffleOfConcats(N, DAG)) 16653 return V; 16654 } 16655 16656 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 16657 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 16658 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) 16659 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI)) 16660 return Res; 16661 16662 // If this shuffle only has a single input that is a bitcasted shuffle, 16663 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output 16664 // back to their original types. 16665 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 16666 N1.isUndef() && Level < AfterLegalizeVectorOps && 16667 TLI.isTypeLegal(VT)) { 16668 16669 // Peek through the bitcast only if there is one user. 16670 SDValue BC0 = N0; 16671 while (BC0.getOpcode() == ISD::BITCAST) { 16672 if (!BC0.hasOneUse()) 16673 break; 16674 BC0 = BC0.getOperand(0); 16675 } 16676 16677 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { 16678 if (Scale == 1) 16679 return SmallVector<int, 8>(Mask.begin(), Mask.end()); 16680 16681 SmallVector<int, 8> NewMask; 16682 for (int M : Mask) 16683 for (int s = 0; s != Scale; ++s) 16684 NewMask.push_back(M < 0 ? -1 : Scale * M + s); 16685 return NewMask; 16686 }; 16687 16688 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { 16689 EVT SVT = VT.getScalarType(); 16690 EVT InnerVT = BC0->getValueType(0); 16691 EVT InnerSVT = InnerVT.getScalarType(); 16692 16693 // Determine which shuffle works with the smaller scalar type. 16694 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; 16695 EVT ScaleSVT = ScaleVT.getScalarType(); 16696 16697 if (TLI.isTypeLegal(ScaleVT) && 16698 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 16699 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { 16700 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 16701 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 16702 16703 // Scale the shuffle masks to the smaller scalar type. 16704 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); 16705 SmallVector<int, 8> InnerMask = 16706 ScaleShuffleMask(InnerSVN->getMask(), InnerScale); 16707 SmallVector<int, 8> OuterMask = 16708 ScaleShuffleMask(SVN->getMask(), OuterScale); 16709 16710 // Merge the shuffle masks. 16711 SmallVector<int, 8> NewMask; 16712 for (int M : OuterMask) 16713 NewMask.push_back(M < 0 ? -1 : InnerMask[M]); 16714 16715 // Test for shuffle mask legality over both commutations. 16716 SDValue SV0 = BC0->getOperand(0); 16717 SDValue SV1 = BC0->getOperand(1); 16718 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 16719 if (!LegalMask) { 16720 std::swap(SV0, SV1); 16721 ShuffleVectorSDNode::commuteMask(NewMask); 16722 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 16723 } 16724 16725 if (LegalMask) { 16726 SV0 = DAG.getBitcast(ScaleVT, SV0); 16727 SV1 = DAG.getBitcast(ScaleVT, SV1); 16728 return DAG.getBitcast( 16729 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); 16730 } 16731 } 16732 } 16733 } 16734 16735 // Canonicalize shuffles according to rules: 16736 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 16737 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 16738 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 16739 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 16740 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 16741 TLI.isTypeLegal(VT)) { 16742 // The incoming shuffle must be of the same type as the result of the 16743 // current shuffle. 16744 assert(N1->getOperand(0).getValueType() == VT && 16745 "Shuffle types don't match"); 16746 16747 SDValue SV0 = N1->getOperand(0); 16748 SDValue SV1 = N1->getOperand(1); 16749 bool HasSameOp0 = N0 == SV0; 16750 bool IsSV1Undef = SV1.isUndef(); 16751 if (HasSameOp0 || IsSV1Undef || N0 == SV1) 16752 // Commute the operands of this shuffle so that next rule 16753 // will trigger. 16754 return DAG.getCommutedVectorShuffle(*SVN); 16755 } 16756 16757 // Try to fold according to rules: 16758 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 16759 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 16760 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 16761 // Don't try to fold shuffles with illegal type. 16762 // Only fold if this shuffle is the only user of the other shuffle. 16763 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && 16764 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { 16765 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 16766 16767 // Don't try to fold splats; they're likely to simplify somehow, or they 16768 // might be free. 16769 if (OtherSV->isSplat()) 16770 return SDValue(); 16771 16772 // The incoming shuffle must be of the same type as the result of the 16773 // current shuffle. 16774 assert(OtherSV->getOperand(0).getValueType() == VT && 16775 "Shuffle types don't match"); 16776 16777 SDValue SV0, SV1; 16778 SmallVector<int, 4> Mask; 16779 // Compute the combined shuffle mask for a shuffle with SV0 as the first 16780 // operand, and SV1 as the second operand. 16781 for (unsigned i = 0; i != NumElts; ++i) { 16782 int Idx = SVN->getMaskElt(i); 16783 if (Idx < 0) { 16784 // Propagate Undef. 16785 Mask.push_back(Idx); 16786 continue; 16787 } 16788 16789 SDValue CurrentVec; 16790 if (Idx < (int)NumElts) { 16791 // This shuffle index refers to the inner shuffle N0. Lookup the inner 16792 // shuffle mask to identify which vector is actually referenced. 16793 Idx = OtherSV->getMaskElt(Idx); 16794 if (Idx < 0) { 16795 // Propagate Undef. 16796 Mask.push_back(Idx); 16797 continue; 16798 } 16799 16800 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 16801 : OtherSV->getOperand(1); 16802 } else { 16803 // This shuffle index references an element within N1. 16804 CurrentVec = N1; 16805 } 16806 16807 // Simple case where 'CurrentVec' is UNDEF. 16808 if (CurrentVec.isUndef()) { 16809 Mask.push_back(-1); 16810 continue; 16811 } 16812 16813 // Canonicalize the shuffle index. We don't know yet if CurrentVec 16814 // will be the first or second operand of the combined shuffle. 16815 Idx = Idx % NumElts; 16816 if (!SV0.getNode() || SV0 == CurrentVec) { 16817 // Ok. CurrentVec is the left hand side. 16818 // Update the mask accordingly. 16819 SV0 = CurrentVec; 16820 Mask.push_back(Idx); 16821 continue; 16822 } 16823 16824 // Bail out if we cannot convert the shuffle pair into a single shuffle. 16825 if (SV1.getNode() && SV1 != CurrentVec) 16826 return SDValue(); 16827 16828 // Ok. CurrentVec is the right hand side. 16829 // Update the mask accordingly. 16830 SV1 = CurrentVec; 16831 Mask.push_back(Idx + NumElts); 16832 } 16833 16834 // Check if all indices in Mask are Undef. In case, propagate Undef. 16835 bool isUndefMask = true; 16836 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 16837 isUndefMask &= Mask[i] < 0; 16838 16839 if (isUndefMask) 16840 return DAG.getUNDEF(VT); 16841 16842 if (!SV0.getNode()) 16843 SV0 = DAG.getUNDEF(VT); 16844 if (!SV1.getNode()) 16845 SV1 = DAG.getUNDEF(VT); 16846 16847 // Avoid introducing shuffles with illegal mask. 16848 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 16849 ShuffleVectorSDNode::commuteMask(Mask); 16850 16851 if (!TLI.isShuffleMaskLegal(Mask, VT)) 16852 return SDValue(); 16853 16854 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 16855 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 16856 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 16857 std::swap(SV0, SV1); 16858 } 16859 16860 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 16861 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 16862 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 16863 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); 16864 } 16865 16866 return SDValue(); 16867 } 16868 16869 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { 16870 SDValue InVal = N->getOperand(0); 16871 EVT VT = N->getValueType(0); 16872 16873 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern 16874 // with a VECTOR_SHUFFLE and possible truncate. 16875 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 16876 SDValue InVec = InVal->getOperand(0); 16877 SDValue EltNo = InVal->getOperand(1); 16878 auto InVecT = InVec.getValueType(); 16879 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { 16880 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); 16881 int Elt = C0->getZExtValue(); 16882 NewMask[0] = Elt; 16883 SDValue Val; 16884 // If we have an implict truncate do truncate here as long as it's legal. 16885 // if it's not legal, this should 16886 if (VT.getScalarType() != InVal.getValueType() && 16887 InVal.getValueType().isScalarInteger() && 16888 isTypeLegal(VT.getScalarType())) { 16889 Val = 16890 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); 16891 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); 16892 } 16893 if (VT.getScalarType() == InVecT.getScalarType() && 16894 VT.getVectorNumElements() <= InVecT.getVectorNumElements() && 16895 TLI.isShuffleMaskLegal(NewMask, VT)) { 16896 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, 16897 DAG.getUNDEF(InVecT), NewMask); 16898 // If the initial vector is the correct size this shuffle is a 16899 // valid result. 16900 if (VT == InVecT) 16901 return Val; 16902 // If not we must truncate the vector. 16903 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { 16904 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 16905 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); 16906 EVT SubVT = 16907 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), 16908 VT.getVectorNumElements()); 16909 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, 16910 ZeroIdx); 16911 return Val; 16912 } 16913 } 16914 } 16915 } 16916 16917 return SDValue(); 16918 } 16919 16920 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 16921 EVT VT = N->getValueType(0); 16922 SDValue N0 = N->getOperand(0); 16923 SDValue N1 = N->getOperand(1); 16924 SDValue N2 = N->getOperand(2); 16925 16926 // If inserting an UNDEF, just return the original vector. 16927 if (N1.isUndef()) 16928 return N0; 16929 16930 // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow 16931 // us to pull BITCASTs from input to output. 16932 if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) 16933 if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) 16934 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); 16935 16936 // If this is an insert of an extracted vector into an undef vector, we can 16937 // just use the input to the extract. 16938 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && 16939 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) 16940 return N1.getOperand(0); 16941 16942 // If we are inserting a bitcast value into an undef, with the same 16943 // number of elements, just use the bitcast input of the extract. 16944 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> 16945 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) 16946 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && 16947 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && 16948 N1.getOperand(0).getOperand(1) == N2 && 16949 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == 16950 VT.getVectorNumElements() && 16951 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == 16952 VT.getSizeInBits()) { 16953 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); 16954 } 16955 16956 // If both N1 and N2 are bitcast values on which insert_subvector 16957 // would makes sense, pull the bitcast through. 16958 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> 16959 // BITCAST (INSERT_SUBVECTOR N0 N1 N2) 16960 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { 16961 SDValue CN0 = N0.getOperand(0); 16962 SDValue CN1 = N1.getOperand(0); 16963 EVT CN0VT = CN0.getValueType(); 16964 EVT CN1VT = CN1.getValueType(); 16965 if (CN0VT.isVector() && CN1VT.isVector() && 16966 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && 16967 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { 16968 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), 16969 CN0.getValueType(), CN0, CN1, N2); 16970 return DAG.getBitcast(VT, NewINSERT); 16971 } 16972 } 16973 16974 // Combine INSERT_SUBVECTORs where we are inserting to the same index. 16975 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) 16976 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) 16977 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && 16978 N0.getOperand(1).getValueType() == N1.getValueType() && 16979 N0.getOperand(2) == N2) 16980 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), 16981 N1, N2); 16982 16983 if (!isa<ConstantSDNode>(N2)) 16984 return SDValue(); 16985 16986 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); 16987 16988 // Canonicalize insert_subvector dag nodes. 16989 // Example: 16990 // (insert_subvector (insert_subvector A, Idx0), Idx1) 16991 // -> (insert_subvector (insert_subvector A, Idx1), Idx0) 16992 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && 16993 N1.getValueType() == N0.getOperand(1).getValueType() && 16994 isa<ConstantSDNode>(N0.getOperand(2))) { 16995 unsigned OtherIdx = N0.getConstantOperandVal(2); 16996 if (InsIdx < OtherIdx) { 16997 // Swap nodes. 16998 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, 16999 N0.getOperand(0), N1, N2); 17000 AddToWorklist(NewOp.getNode()); 17001 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), 17002 VT, NewOp, N0.getOperand(1), N0.getOperand(2)); 17003 } 17004 } 17005 17006 // If the input vector is a concatenation, and the insert replaces 17007 // one of the pieces, we can optimize into a single concat_vectors. 17008 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && 17009 N0.getOperand(0).getValueType() == N1.getValueType()) { 17010 unsigned Factor = N1.getValueType().getVectorNumElements(); 17011 17012 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); 17013 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; 17014 17015 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 17016 } 17017 17018 return SDValue(); 17019 } 17020 17021 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { 17022 SDValue N0 = N->getOperand(0); 17023 17024 // fold (fp_to_fp16 (fp16_to_fp op)) -> op 17025 if (N0->getOpcode() == ISD::FP16_TO_FP) 17026 return N0->getOperand(0); 17027 17028 return SDValue(); 17029 } 17030 17031 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { 17032 SDValue N0 = N->getOperand(0); 17033 17034 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) 17035 if (N0->getOpcode() == ISD::AND) { 17036 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); 17037 if (AndConst && AndConst->getAPIntValue() == 0xffff) { 17038 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), 17039 N0.getOperand(0)); 17040 } 17041 } 17042 17043 return SDValue(); 17044 } 17045 17046 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 17047 /// with the destination vector and a zero vector. 17048 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 17049 /// vector_shuffle V, Zero, <0, 4, 2, 4> 17050 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 17051 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); 17052 17053 EVT VT = N->getValueType(0); 17054 SDValue LHS = N->getOperand(0); 17055 SDValue RHS = peekThroughBitcast(N->getOperand(1)); 17056 SDLoc DL(N); 17057 17058 // Make sure we're not running after operation legalization where it 17059 // may have custom lowered the vector shuffles. 17060 if (LegalOperations) 17061 return SDValue(); 17062 17063 if (RHS.getOpcode() != ISD::BUILD_VECTOR) 17064 return SDValue(); 17065 17066 EVT RVT = RHS.getValueType(); 17067 unsigned NumElts = RHS.getNumOperands(); 17068 17069 // Attempt to create a valid clear mask, splitting the mask into 17070 // sub elements and checking to see if each is 17071 // all zeros or all ones - suitable for shuffle masking. 17072 auto BuildClearMask = [&](int Split) { 17073 int NumSubElts = NumElts * Split; 17074 int NumSubBits = RVT.getScalarSizeInBits() / Split; 17075 17076 SmallVector<int, 8> Indices; 17077 for (int i = 0; i != NumSubElts; ++i) { 17078 int EltIdx = i / Split; 17079 int SubIdx = i % Split; 17080 SDValue Elt = RHS.getOperand(EltIdx); 17081 if (Elt.isUndef()) { 17082 Indices.push_back(-1); 17083 continue; 17084 } 17085 17086 APInt Bits; 17087 if (isa<ConstantSDNode>(Elt)) 17088 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); 17089 else if (isa<ConstantFPSDNode>(Elt)) 17090 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); 17091 else 17092 return SDValue(); 17093 17094 // Extract the sub element from the constant bit mask. 17095 if (DAG.getDataLayout().isBigEndian()) { 17096 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); 17097 } else { 17098 Bits.lshrInPlace(SubIdx * NumSubBits); 17099 } 17100 17101 if (Split > 1) 17102 Bits = Bits.trunc(NumSubBits); 17103 17104 if (Bits.isAllOnesValue()) 17105 Indices.push_back(i); 17106 else if (Bits == 0) 17107 Indices.push_back(i + NumSubElts); 17108 else 17109 return SDValue(); 17110 } 17111 17112 // Let's see if the target supports this vector_shuffle. 17113 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); 17114 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); 17115 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) 17116 return SDValue(); 17117 17118 SDValue Zero = DAG.getConstant(0, DL, ClearVT); 17119 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, 17120 DAG.getBitcast(ClearVT, LHS), 17121 Zero, Indices)); 17122 }; 17123 17124 // Determine maximum split level (byte level masking). 17125 int MaxSplit = 1; 17126 if (RVT.getScalarSizeInBits() % 8 == 0) 17127 MaxSplit = RVT.getScalarSizeInBits() / 8; 17128 17129 for (int Split = 1; Split <= MaxSplit; ++Split) 17130 if (RVT.getScalarSizeInBits() % Split == 0) 17131 if (SDValue S = BuildClearMask(Split)) 17132 return S; 17133 17134 return SDValue(); 17135 } 17136 17137 /// Visit a binary vector operation, like ADD. 17138 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 17139 assert(N->getValueType(0).isVector() && 17140 "SimplifyVBinOp only works on vectors!"); 17141 17142 SDValue LHS = N->getOperand(0); 17143 SDValue RHS = N->getOperand(1); 17144 SDValue Ops[] = {LHS, RHS}; 17145 17146 // See if we can constant fold the vector operation. 17147 if (SDValue Fold = DAG.FoldConstantVectorArithmetic( 17148 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) 17149 return Fold; 17150 17151 // Type legalization might introduce new shuffles in the DAG. 17152 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 17153 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 17154 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 17155 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 17156 LHS.getOperand(1).isUndef() && 17157 RHS.getOperand(1).isUndef()) { 17158 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 17159 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 17160 17161 if (SVN0->getMask().equals(SVN1->getMask())) { 17162 EVT VT = N->getValueType(0); 17163 SDValue UndefVector = LHS.getOperand(1); 17164 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 17165 LHS.getOperand(0), RHS.getOperand(0), 17166 N->getFlags()); 17167 AddUsersToWorklist(N); 17168 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 17169 SVN0->getMask()); 17170 } 17171 } 17172 17173 return SDValue(); 17174 } 17175 17176 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, 17177 SDValue N2) { 17178 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 17179 17180 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 17181 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 17182 17183 // If we got a simplified select_cc node back from SimplifySelectCC, then 17184 // break it down into a new SETCC node, and a new SELECT node, and then return 17185 // the SELECT node, since we were called with a SELECT node. 17186 if (SCC.getNode()) { 17187 // Check to see if we got a select_cc back (to turn into setcc/select). 17188 // Otherwise, just return whatever node we got back, like fabs. 17189 if (SCC.getOpcode() == ISD::SELECT_CC) { 17190 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 17191 N0.getValueType(), 17192 SCC.getOperand(0), SCC.getOperand(1), 17193 SCC.getOperand(4)); 17194 AddToWorklist(SETCC.getNode()); 17195 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 17196 SCC.getOperand(2), SCC.getOperand(3)); 17197 } 17198 17199 return SCC; 17200 } 17201 return SDValue(); 17202 } 17203 17204 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 17205 /// being selected between, see if we can simplify the select. Callers of this 17206 /// should assume that TheSelect is deleted if this returns true. As such, they 17207 /// should return the appropriate thing (e.g. the node) back to the top-level of 17208 /// the DAG combiner loop to avoid it being looked at. 17209 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 17210 SDValue RHS) { 17211 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 17212 // The select + setcc is redundant, because fsqrt returns NaN for X < 0. 17213 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { 17214 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { 17215 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) 17216 SDValue Sqrt = RHS; 17217 ISD::CondCode CC; 17218 SDValue CmpLHS; 17219 const ConstantFPSDNode *Zero = nullptr; 17220 17221 if (TheSelect->getOpcode() == ISD::SELECT_CC) { 17222 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); 17223 CmpLHS = TheSelect->getOperand(0); 17224 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); 17225 } else { 17226 // SELECT or VSELECT 17227 SDValue Cmp = TheSelect->getOperand(0); 17228 if (Cmp.getOpcode() == ISD::SETCC) { 17229 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); 17230 CmpLHS = Cmp.getOperand(0); 17231 Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); 17232 } 17233 } 17234 if (Zero && Zero->isZero() && 17235 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || 17236 CC == ISD::SETULT || CC == ISD::SETLT)) { 17237 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) 17238 CombineTo(TheSelect, Sqrt); 17239 return true; 17240 } 17241 } 17242 } 17243 // Cannot simplify select with vector condition 17244 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 17245 17246 // If this is a select from two identical things, try to pull the operation 17247 // through the select. 17248 if (LHS.getOpcode() != RHS.getOpcode() || 17249 !LHS.hasOneUse() || !RHS.hasOneUse()) 17250 return false; 17251 17252 // If this is a load and the token chain is identical, replace the select 17253 // of two loads with a load through a select of the address to load from. 17254 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 17255 // constants have been dropped into the constant pool. 17256 if (LHS.getOpcode() == ISD::LOAD) { 17257 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 17258 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 17259 17260 // Token chains must be identical. 17261 if (LHS.getOperand(0) != RHS.getOperand(0) || 17262 // Do not let this transformation reduce the number of volatile loads. 17263 LLD->isVolatile() || RLD->isVolatile() || 17264 // FIXME: If either is a pre/post inc/dec load, 17265 // we'd need to split out the address adjustment. 17266 LLD->isIndexed() || RLD->isIndexed() || 17267 // If this is an EXTLOAD, the VT's must match. 17268 LLD->getMemoryVT() != RLD->getMemoryVT() || 17269 // If this is an EXTLOAD, the kind of extension must match. 17270 (LLD->getExtensionType() != RLD->getExtensionType() && 17271 // The only exception is if one of the extensions is anyext. 17272 LLD->getExtensionType() != ISD::EXTLOAD && 17273 RLD->getExtensionType() != ISD::EXTLOAD) || 17274 // FIXME: this discards src value information. This is 17275 // over-conservative. It would be beneficial to be able to remember 17276 // both potential memory locations. Since we are discarding 17277 // src value info, don't do the transformation if the memory 17278 // locations are not in the default address space. 17279 LLD->getPointerInfo().getAddrSpace() != 0 || 17280 RLD->getPointerInfo().getAddrSpace() != 0 || 17281 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 17282 LLD->getBasePtr().getValueType())) 17283 return false; 17284 17285 // Check that the select condition doesn't reach either load. If so, 17286 // folding this will induce a cycle into the DAG. If not, this is safe to 17287 // xform, so create a select of the addresses. 17288 SDValue Addr; 17289 if (TheSelect->getOpcode() == ISD::SELECT) { 17290 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 17291 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 17292 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 17293 return false; 17294 // The loads must not depend on one another. 17295 if (LLD->isPredecessorOf(RLD) || 17296 RLD->isPredecessorOf(LLD)) 17297 return false; 17298 Addr = DAG.getSelect(SDLoc(TheSelect), 17299 LLD->getBasePtr().getValueType(), 17300 TheSelect->getOperand(0), LLD->getBasePtr(), 17301 RLD->getBasePtr()); 17302 } else { // Otherwise SELECT_CC 17303 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 17304 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 17305 17306 if ((LLD->hasAnyUseOfValue(1) && 17307 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 17308 (RLD->hasAnyUseOfValue(1) && 17309 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 17310 return false; 17311 17312 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 17313 LLD->getBasePtr().getValueType(), 17314 TheSelect->getOperand(0), 17315 TheSelect->getOperand(1), 17316 LLD->getBasePtr(), RLD->getBasePtr(), 17317 TheSelect->getOperand(4)); 17318 } 17319 17320 SDValue Load; 17321 // It is safe to replace the two loads if they have different alignments, 17322 // but the new load must be the minimum (most restrictive) alignment of the 17323 // inputs. 17324 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 17325 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); 17326 if (!RLD->isInvariant()) 17327 MMOFlags &= ~MachineMemOperand::MOInvariant; 17328 if (!RLD->isDereferenceable()) 17329 MMOFlags &= ~MachineMemOperand::MODereferenceable; 17330 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 17331 // FIXME: Discards pointer and AA info. 17332 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), 17333 LLD->getChain(), Addr, MachinePointerInfo(), Alignment, 17334 MMOFlags); 17335 } else { 17336 // FIXME: Discards pointer and AA info. 17337 Load = DAG.getExtLoad( 17338 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() 17339 : LLD->getExtensionType(), 17340 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, 17341 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); 17342 } 17343 17344 // Users of the select now use the result of the load. 17345 CombineTo(TheSelect, Load); 17346 17347 // Users of the old loads now use the new load's chain. We know the 17348 // old-load value is dead now. 17349 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 17350 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 17351 return true; 17352 } 17353 17354 return false; 17355 } 17356 17357 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and 17358 /// bitwise 'and'. 17359 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, 17360 SDValue N1, SDValue N2, SDValue N3, 17361 ISD::CondCode CC) { 17362 // If this is a select where the false operand is zero and the compare is a 17363 // check of the sign bit, see if we can perform the "gzip trick": 17364 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A 17365 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A 17366 EVT XType = N0.getValueType(); 17367 EVT AType = N2.getValueType(); 17368 if (!isNullConstant(N3) || !XType.bitsGE(AType)) 17369 return SDValue(); 17370 17371 // If the comparison is testing for a positive value, we have to invert 17372 // the sign bit mask, so only do that transform if the target has a bitwise 17373 // 'and not' instruction (the invert is free). 17374 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) { 17375 // (X > -1) ? A : 0 17376 // (X > 0) ? X : 0 <-- This is canonical signed max. 17377 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2))) 17378 return SDValue(); 17379 } else if (CC == ISD::SETLT) { 17380 // (X < 0) ? A : 0 17381 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. 17382 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2))) 17383 return SDValue(); 17384 } else { 17385 return SDValue(); 17386 } 17387 17388 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit 17389 // constant. 17390 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); 17391 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 17392 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { 17393 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; 17394 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); 17395 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); 17396 AddToWorklist(Shift.getNode()); 17397 17398 if (XType.bitsGT(AType)) { 17399 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 17400 AddToWorklist(Shift.getNode()); 17401 } 17402 17403 if (CC == ISD::SETGT) 17404 Shift = DAG.getNOT(DL, Shift, AType); 17405 17406 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 17407 } 17408 17409 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); 17410 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); 17411 AddToWorklist(Shift.getNode()); 17412 17413 if (XType.bitsGT(AType)) { 17414 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 17415 AddToWorklist(Shift.getNode()); 17416 } 17417 17418 if (CC == ISD::SETGT) 17419 Shift = DAG.getNOT(DL, Shift, AType); 17420 17421 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 17422 } 17423 17424 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 17425 /// where 'cond' is the comparison specified by CC. 17426 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 17427 SDValue N2, SDValue N3, ISD::CondCode CC, 17428 bool NotExtCompare) { 17429 // (x ? y : y) -> y. 17430 if (N2 == N3) return N2; 17431 17432 EVT VT = N2.getValueType(); 17433 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 17434 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 17435 17436 // Determine if the condition we're dealing with is constant 17437 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 17438 N0, N1, CC, DL, false); 17439 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 17440 17441 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 17442 // fold select_cc true, x, y -> x 17443 // fold select_cc false, x, y -> y 17444 return !SCCC->isNullValue() ? N2 : N3; 17445 } 17446 17447 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 17448 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 17449 // in it. This is a win when the constant is not otherwise available because 17450 // it replaces two constant pool loads with one. We only do this if the FP 17451 // type is known to be legal, because if it isn't, then we are before legalize 17452 // types an we want the other legalization to happen first (e.g. to avoid 17453 // messing with soft float) and if the ConstantFP is not legal, because if 17454 // it is legal, we may not need to store the FP constant in a constant pool. 17455 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 17456 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 17457 if (TLI.isTypeLegal(N2.getValueType()) && 17458 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 17459 TargetLowering::Legal && 17460 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 17461 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 17462 // If both constants have multiple uses, then we won't need to do an 17463 // extra load, they are likely around in registers for other users. 17464 (TV->hasOneUse() || FV->hasOneUse())) { 17465 Constant *Elts[] = { 17466 const_cast<ConstantFP*>(FV->getConstantFPValue()), 17467 const_cast<ConstantFP*>(TV->getConstantFPValue()) 17468 }; 17469 Type *FPTy = Elts[0]->getType(); 17470 const DataLayout &TD = DAG.getDataLayout(); 17471 17472 // Create a ConstantArray of the two constants. 17473 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 17474 SDValue CPIdx = 17475 DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), 17476 TD.getPrefTypeAlignment(FPTy)); 17477 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 17478 17479 // Get the offsets to the 0 and 1 element of the array so that we can 17480 // select between them. 17481 SDValue Zero = DAG.getIntPtrConstant(0, DL); 17482 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 17483 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); 17484 17485 SDValue Cond = DAG.getSetCC(DL, 17486 getSetCCResultType(N0.getValueType()), 17487 N0, N1, CC); 17488 AddToWorklist(Cond.getNode()); 17489 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 17490 Cond, One, Zero); 17491 AddToWorklist(CstOffset.getNode()); 17492 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 17493 CstOffset); 17494 AddToWorklist(CPIdx.getNode()); 17495 return DAG.getLoad( 17496 TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 17497 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), 17498 Alignment); 17499 } 17500 } 17501 17502 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) 17503 return V; 17504 17505 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 17506 // where y is has a single bit set. 17507 // A plaintext description would be, we can turn the SELECT_CC into an AND 17508 // when the condition can be materialized as an all-ones register. Any 17509 // single bit-test can be materialized as an all-ones register with 17510 // shift-left and shift-right-arith. 17511 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 17512 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { 17513 SDValue AndLHS = N0->getOperand(0); 17514 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 17515 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 17516 // Shift the tested bit over the sign bit. 17517 const APInt &AndMask = ConstAndRHS->getAPIntValue(); 17518 SDValue ShlAmt = 17519 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), 17520 getShiftAmountTy(AndLHS.getValueType())); 17521 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 17522 17523 // Now arithmetic right shift it all the way over, so the result is either 17524 // all-ones, or zero. 17525 SDValue ShrAmt = 17526 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), 17527 getShiftAmountTy(Shl.getValueType())); 17528 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 17529 17530 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 17531 } 17532 } 17533 17534 // fold select C, 16, 0 -> shl C, 4 17535 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && 17536 TLI.getBooleanContents(N0.getValueType()) == 17537 TargetLowering::ZeroOrOneBooleanContent) { 17538 17539 // If the caller doesn't want us to simplify this into a zext of a compare, 17540 // don't do it. 17541 if (NotExtCompare && N2C->isOne()) 17542 return SDValue(); 17543 17544 // Get a SetCC of the condition 17545 // NOTE: Don't create a SETCC if it's not legal on this target. 17546 if (!LegalOperations || 17547 TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { 17548 SDValue Temp, SCC; 17549 // cast from setcc result type to select result type 17550 if (LegalTypes) { 17551 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 17552 N0, N1, CC); 17553 if (N2.getValueType().bitsLT(SCC.getValueType())) 17554 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 17555 N2.getValueType()); 17556 else 17557 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 17558 N2.getValueType(), SCC); 17559 } else { 17560 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 17561 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 17562 N2.getValueType(), SCC); 17563 } 17564 17565 AddToWorklist(SCC.getNode()); 17566 AddToWorklist(Temp.getNode()); 17567 17568 if (N2C->isOne()) 17569 return Temp; 17570 17571 // shl setcc result by log2 n2c 17572 return DAG.getNode( 17573 ISD::SHL, DL, N2.getValueType(), Temp, 17574 DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), 17575 getShiftAmountTy(Temp.getValueType()))); 17576 } 17577 } 17578 17579 // Check to see if this is an integer abs. 17580 // select_cc setg[te] X, 0, X, -X -> 17581 // select_cc setgt X, -1, X, -X -> 17582 // select_cc setl[te] X, 0, -X, X -> 17583 // select_cc setlt X, 1, -X, X -> 17584 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 17585 if (N1C) { 17586 ConstantSDNode *SubC = nullptr; 17587 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 17588 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 17589 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 17590 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 17591 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 17592 (N1C->isOne() && CC == ISD::SETLT)) && 17593 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 17594 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 17595 17596 EVT XType = N0.getValueType(); 17597 if (SubC && SubC->isNullValue() && XType.isInteger()) { 17598 SDLoc DL(N0); 17599 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, 17600 N0, 17601 DAG.getConstant(XType.getSizeInBits() - 1, DL, 17602 getShiftAmountTy(N0.getValueType()))); 17603 SDValue Add = DAG.getNode(ISD::ADD, DL, 17604 XType, N0, Shift); 17605 AddToWorklist(Shift.getNode()); 17606 AddToWorklist(Add.getNode()); 17607 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 17608 } 17609 } 17610 17611 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) 17612 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) 17613 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) 17614 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) 17615 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) 17616 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) 17617 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) 17618 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) 17619 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 17620 SDValue ValueOnZero = N2; 17621 SDValue Count = N3; 17622 // If the condition is NE instead of E, swap the operands. 17623 if (CC == ISD::SETNE) 17624 std::swap(ValueOnZero, Count); 17625 // Check if the value on zero is a constant equal to the bits in the type. 17626 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) { 17627 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) { 17628 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is 17629 // legal, combine to just cttz. 17630 if ((Count.getOpcode() == ISD::CTTZ || 17631 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && 17632 N0 == Count.getOperand(0) && 17633 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) 17634 return DAG.getNode(ISD::CTTZ, DL, VT, N0); 17635 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is 17636 // legal, combine to just ctlz. 17637 if ((Count.getOpcode() == ISD::CTLZ || 17638 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && 17639 N0 == Count.getOperand(0) && 17640 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT))) 17641 return DAG.getNode(ISD::CTLZ, DL, VT, N0); 17642 } 17643 } 17644 } 17645 17646 return SDValue(); 17647 } 17648 17649 /// This is a stub for TargetLowering::SimplifySetCC. 17650 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, 17651 ISD::CondCode Cond, const SDLoc &DL, 17652 bool foldBooleans) { 17653 TargetLowering::DAGCombinerInfo 17654 DagCombineInfo(DAG, Level, false, this); 17655 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 17656 } 17657 17658 /// Given an ISD::SDIV node expressing a divide by constant, return 17659 /// a DAG expression to select that will generate the same value by multiplying 17660 /// by a magic number. 17661 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 17662 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 17663 // when optimising for minimum size, we don't want to expand a div to a mul 17664 // and a shift. 17665 if (DAG.getMachineFunction().getFunction().optForMinSize()) 17666 return SDValue(); 17667 17668 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 17669 if (!C) 17670 return SDValue(); 17671 17672 // Avoid division by zero. 17673 if (C->isNullValue()) 17674 return SDValue(); 17675 17676 std::vector<SDNode *> Built; 17677 SDValue S = 17678 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 17679 17680 for (SDNode *N : Built) 17681 AddToWorklist(N); 17682 return S; 17683 } 17684 17685 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 17686 /// DAG expression that will generate the same value by right shifting. 17687 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { 17688 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 17689 if (!C) 17690 return SDValue(); 17691 17692 // Avoid division by zero. 17693 if (C->isNullValue()) 17694 return SDValue(); 17695 17696 std::vector<SDNode *> Built; 17697 SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); 17698 17699 for (SDNode *N : Built) 17700 AddToWorklist(N); 17701 return S; 17702 } 17703 17704 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG 17705 /// expression that will generate the same value by multiplying by a magic 17706 /// number. 17707 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 17708 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 17709 // when optimising for minimum size, we don't want to expand a div to a mul 17710 // and a shift. 17711 if (DAG.getMachineFunction().getFunction().optForMinSize()) 17712 return SDValue(); 17713 17714 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 17715 if (!C) 17716 return SDValue(); 17717 17718 // Avoid division by zero. 17719 if (C->isNullValue()) 17720 return SDValue(); 17721 17722 std::vector<SDNode *> Built; 17723 SDValue S = 17724 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 17725 17726 for (SDNode *N : Built) 17727 AddToWorklist(N); 17728 return S; 17729 } 17730 17731 /// Determines the LogBase2 value for a non-null input value using the 17732 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). 17733 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { 17734 EVT VT = V.getValueType(); 17735 unsigned EltBits = VT.getScalarSizeInBits(); 17736 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); 17737 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); 17738 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); 17739 return LogBase2; 17740 } 17741 17742 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 17743 /// For the reciprocal, we need to find the zero of the function: 17744 /// F(X) = A X - 1 [which has a zero at X = 1/A] 17745 /// => 17746 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 17747 /// does not require additional intermediate precision] 17748 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { 17749 if (Level >= AfterLegalizeDAG) 17750 return SDValue(); 17751 17752 // TODO: Handle half and/or extended types? 17753 EVT VT = Op.getValueType(); 17754 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 17755 return SDValue(); 17756 17757 // If estimates are explicitly disabled for this function, we're done. 17758 MachineFunction &MF = DAG.getMachineFunction(); 17759 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); 17760 if (Enabled == TLI.ReciprocalEstimate::Disabled) 17761 return SDValue(); 17762 17763 // Estimates may be explicitly enabled for this type with a custom number of 17764 // refinement steps. 17765 int Iterations = TLI.getDivRefinementSteps(VT, MF); 17766 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { 17767 AddToWorklist(Est.getNode()); 17768 17769 if (Iterations) { 17770 EVT VT = Op.getValueType(); 17771 SDLoc DL(Op); 17772 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 17773 17774 // Newton iterations: Est = Est + Est (1 - Arg * Est) 17775 for (int i = 0; i < Iterations; ++i) { 17776 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); 17777 AddToWorklist(NewEst.getNode()); 17778 17779 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); 17780 AddToWorklist(NewEst.getNode()); 17781 17782 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 17783 AddToWorklist(NewEst.getNode()); 17784 17785 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); 17786 AddToWorklist(Est.getNode()); 17787 } 17788 } 17789 return Est; 17790 } 17791 17792 return SDValue(); 17793 } 17794 17795 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 17796 /// For the reciprocal sqrt, we need to find the zero of the function: 17797 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 17798 /// => 17799 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 17800 /// As a result, we precompute A/2 prior to the iteration loop. 17801 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, 17802 unsigned Iterations, 17803 SDNodeFlags Flags, bool Reciprocal) { 17804 EVT VT = Arg.getValueType(); 17805 SDLoc DL(Arg); 17806 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); 17807 17808 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 17809 // this entire sequence requires only one FP constant. 17810 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); 17811 AddToWorklist(HalfArg.getNode()); 17812 17813 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); 17814 AddToWorklist(HalfArg.getNode()); 17815 17816 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 17817 for (unsigned i = 0; i < Iterations; ++i) { 17818 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); 17819 AddToWorklist(NewEst.getNode()); 17820 17821 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); 17822 AddToWorklist(NewEst.getNode()); 17823 17824 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); 17825 AddToWorklist(NewEst.getNode()); 17826 17827 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 17828 AddToWorklist(Est.getNode()); 17829 } 17830 17831 // If non-reciprocal square root is requested, multiply the result by Arg. 17832 if (!Reciprocal) { 17833 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); 17834 AddToWorklist(Est.getNode()); 17835 } 17836 17837 return Est; 17838 } 17839 17840 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 17841 /// For the reciprocal sqrt, we need to find the zero of the function: 17842 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 17843 /// => 17844 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 17845 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, 17846 unsigned Iterations, 17847 SDNodeFlags Flags, bool Reciprocal) { 17848 EVT VT = Arg.getValueType(); 17849 SDLoc DL(Arg); 17850 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); 17851 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); 17852 17853 // This routine must enter the loop below to work correctly 17854 // when (Reciprocal == false). 17855 assert(Iterations > 0); 17856 17857 // Newton iterations for reciprocal square root: 17858 // E = (E * -0.5) * ((A * E) * E + -3.0) 17859 for (unsigned i = 0; i < Iterations; ++i) { 17860 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); 17861 AddToWorklist(AE.getNode()); 17862 17863 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); 17864 AddToWorklist(AEE.getNode()); 17865 17866 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); 17867 AddToWorklist(RHS.getNode()); 17868 17869 // When calculating a square root at the last iteration build: 17870 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) 17871 // (notice a common subexpression) 17872 SDValue LHS; 17873 if (Reciprocal || (i + 1) < Iterations) { 17874 // RSQRT: LHS = (E * -0.5) 17875 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); 17876 } else { 17877 // SQRT: LHS = (A * E) * -0.5 17878 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); 17879 } 17880 AddToWorklist(LHS.getNode()); 17881 17882 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); 17883 AddToWorklist(Est.getNode()); 17884 } 17885 17886 return Est; 17887 } 17888 17889 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case 17890 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if 17891 /// Op can be zero. 17892 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, 17893 bool Reciprocal) { 17894 if (Level >= AfterLegalizeDAG) 17895 return SDValue(); 17896 17897 // TODO: Handle half and/or extended types? 17898 EVT VT = Op.getValueType(); 17899 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) 17900 return SDValue(); 17901 17902 // If estimates are explicitly disabled for this function, we're done. 17903 MachineFunction &MF = DAG.getMachineFunction(); 17904 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); 17905 if (Enabled == TLI.ReciprocalEstimate::Disabled) 17906 return SDValue(); 17907 17908 // Estimates may be explicitly enabled for this type with a custom number of 17909 // refinement steps. 17910 int Iterations = TLI.getSqrtRefinementSteps(VT, MF); 17911 17912 bool UseOneConstNR = false; 17913 if (SDValue Est = 17914 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, 17915 Reciprocal)) { 17916 AddToWorklist(Est.getNode()); 17917 17918 if (Iterations) { 17919 Est = UseOneConstNR 17920 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) 17921 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); 17922 17923 if (!Reciprocal) { 17924 // The estimate is now completely wrong if the input was exactly 0.0 or 17925 // possibly a denormal. Force the answer to 0.0 for those cases. 17926 EVT VT = Op.getValueType(); 17927 SDLoc DL(Op); 17928 EVT CCVT = getSetCCResultType(VT); 17929 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; 17930 const Function &F = DAG.getMachineFunction().getFunction(); 17931 Attribute Denorms = F.getFnAttribute("denormal-fp-math"); 17932 if (Denorms.getValueAsString().equals("ieee")) { 17933 // fabs(X) < SmallestNormal ? 0.0 : Est 17934 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); 17935 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); 17936 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); 17937 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 17938 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); 17939 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); 17940 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); 17941 AddToWorklist(Fabs.getNode()); 17942 AddToWorklist(IsDenorm.getNode()); 17943 AddToWorklist(Est.getNode()); 17944 } else { 17945 // X == 0.0 ? 0.0 : Est 17946 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); 17947 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); 17948 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); 17949 AddToWorklist(IsZero.getNode()); 17950 AddToWorklist(Est.getNode()); 17951 } 17952 } 17953 } 17954 return Est; 17955 } 17956 17957 return SDValue(); 17958 } 17959 17960 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { 17961 return buildSqrtEstimateImpl(Op, Flags, true); 17962 } 17963 17964 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { 17965 return buildSqrtEstimateImpl(Op, Flags, false); 17966 } 17967 17968 /// Return true if there is any possibility that the two addresses overlap. 17969 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 17970 // If they are the same then they must be aliases. 17971 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 17972 17973 // If they are both volatile then they cannot be reordered. 17974 if (Op0->isVolatile() && Op1->isVolatile()) return true; 17975 17976 // If one operation reads from invariant memory, and the other may store, they 17977 // cannot alias. These should really be checking the equivalent of mayWrite, 17978 // but it only matters for memory nodes other than load /store. 17979 if (Op0->isInvariant() && Op1->writeMem()) 17980 return false; 17981 17982 if (Op1->isInvariant() && Op0->writeMem()) 17983 return false; 17984 17985 unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); 17986 unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); 17987 17988 // Check for BaseIndexOffset matching. 17989 BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); 17990 BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); 17991 int64_t PtrDiff; 17992 if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { 17993 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) 17994 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); 17995 17996 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be 17997 // able to calculate their relative offset if at least one arises 17998 // from an alloca. However, these allocas cannot overlap and we 17999 // can infer there is no alias. 18000 if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) 18001 if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { 18002 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); 18003 // If the base are the same frame index but the we couldn't find a 18004 // constant offset, (indices are different) be conservative. 18005 if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || 18006 !MFI.isFixedObjectIndex(B->getIndex()))) 18007 return false; 18008 } 18009 18010 bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); 18011 bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); 18012 bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); 18013 bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); 18014 bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); 18015 bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); 18016 18017 // If of mismatched base types or checkable indices we can check 18018 // they do not alias. 18019 if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || 18020 (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && 18021 (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) 18022 return false; 18023 } 18024 18025 // If we know required SrcValue1 and SrcValue2 have relatively large 18026 // alignment compared to the size and offset of the access, we may be able 18027 // to prove they do not alias. This check is conservative for now to catch 18028 // cases created by splitting vector types. 18029 int64_t SrcValOffset0 = Op0->getSrcValueOffset(); 18030 int64_t SrcValOffset1 = Op1->getSrcValueOffset(); 18031 unsigned OrigAlignment0 = Op0->getOriginalAlignment(); 18032 unsigned OrigAlignment1 = Op1->getOriginalAlignment(); 18033 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && 18034 NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { 18035 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; 18036 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; 18037 18038 // There is no overlap between these relatively aligned accesses of 18039 // similar size. Return no alias. 18040 if ((OffAlign0 + NumBytes0) <= OffAlign1 || 18041 (OffAlign1 + NumBytes1) <= OffAlign0) 18042 return false; 18043 } 18044 18045 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 18046 ? CombinerGlobalAA 18047 : DAG.getSubtarget().useAA(); 18048 #ifndef NDEBUG 18049 if (CombinerAAOnlyFunc.getNumOccurrences() && 18050 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 18051 UseAA = false; 18052 #endif 18053 18054 if (UseAA && AA && 18055 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 18056 // Use alias analysis information. 18057 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); 18058 int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; 18059 int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; 18060 AliasResult AAResult = 18061 AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, 18062 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 18063 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, 18064 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) ); 18065 if (AAResult == NoAlias) 18066 return false; 18067 } 18068 18069 // Otherwise we have to assume they alias. 18070 return true; 18071 } 18072 18073 /// Walk up chain skipping non-aliasing memory nodes, 18074 /// looking for aliasing nodes and adding them to the Aliases vector. 18075 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 18076 SmallVectorImpl<SDValue> &Aliases) { 18077 SmallVector<SDValue, 8> Chains; // List of chains to visit. 18078 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 18079 18080 // Get alias information for node. 18081 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 18082 18083 // Starting off. 18084 Chains.push_back(OriginalChain); 18085 unsigned Depth = 0; 18086 18087 // Look at each chain and determine if it is an alias. If so, add it to the 18088 // aliases list. If not, then continue up the chain looking for the next 18089 // candidate. 18090 while (!Chains.empty()) { 18091 SDValue Chain = Chains.pop_back_val(); 18092 18093 // For TokenFactor nodes, look at each operand and only continue up the 18094 // chain until we reach the depth limit. 18095 // 18096 // FIXME: The depth check could be made to return the last non-aliasing 18097 // chain we found before we hit a tokenfactor rather than the original 18098 // chain. 18099 if (Depth > TLI.getGatherAllAliasesMaxDepth()) { 18100 Aliases.clear(); 18101 Aliases.push_back(OriginalChain); 18102 return; 18103 } 18104 18105 // Don't bother if we've been before. 18106 if (!Visited.insert(Chain.getNode()).second) 18107 continue; 18108 18109 switch (Chain.getOpcode()) { 18110 case ISD::EntryToken: 18111 // Entry token is ideal chain operand, but handled in FindBetterChain. 18112 break; 18113 18114 case ISD::LOAD: 18115 case ISD::STORE: { 18116 // Get alias information for Chain. 18117 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 18118 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 18119 18120 // If chain is alias then stop here. 18121 if (!(IsLoad && IsOpLoad) && 18122 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 18123 Aliases.push_back(Chain); 18124 } else { 18125 // Look further up the chain. 18126 Chains.push_back(Chain.getOperand(0)); 18127 ++Depth; 18128 } 18129 break; 18130 } 18131 18132 case ISD::TokenFactor: 18133 // We have to check each of the operands of the token factor for "small" 18134 // token factors, so we queue them up. Adding the operands to the queue 18135 // (stack) in reverse order maintains the original order and increases the 18136 // likelihood that getNode will find a matching token factor (CSE.) 18137 if (Chain.getNumOperands() > 16) { 18138 Aliases.push_back(Chain); 18139 break; 18140 } 18141 for (unsigned n = Chain.getNumOperands(); n;) 18142 Chains.push_back(Chain.getOperand(--n)); 18143 ++Depth; 18144 break; 18145 18146 case ISD::CopyFromReg: 18147 // Forward past CopyFromReg. 18148 Chains.push_back(Chain.getOperand(0)); 18149 ++Depth; 18150 break; 18151 18152 default: 18153 // For all other instructions we will just have to take what we can get. 18154 Aliases.push_back(Chain); 18155 break; 18156 } 18157 } 18158 } 18159 18160 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 18161 /// (aliasing node.) 18162 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 18163 if (OptLevel == CodeGenOpt::None) 18164 return OldChain; 18165 18166 // Ops for replacing token factor. 18167 SmallVector<SDValue, 8> Aliases; 18168 18169 // Accumulate all the aliases to this node. 18170 GatherAllAliases(N, OldChain, Aliases); 18171 18172 // If no operands then chain to entry token. 18173 if (Aliases.size() == 0) 18174 return DAG.getEntryNode(); 18175 18176 // If a single operand then chain to it. We don't need to revisit it. 18177 if (Aliases.size() == 1) 18178 return Aliases[0]; 18179 18180 // Construct a custom tailored token factor. 18181 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 18182 } 18183 18184 // This function tries to collect a bunch of potentially interesting 18185 // nodes to improve the chains of, all at once. This might seem 18186 // redundant, as this function gets called when visiting every store 18187 // node, so why not let the work be done on each store as it's visited? 18188 // 18189 // I believe this is mainly important because MergeConsecutiveStores 18190 // is unable to deal with merging stores of different sizes, so unless 18191 // we improve the chains of all the potential candidates up-front 18192 // before running MergeConsecutiveStores, it might only see some of 18193 // the nodes that will eventually be candidates, and then not be able 18194 // to go from a partially-merged state to the desired final 18195 // fully-merged state. 18196 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { 18197 if (OptLevel == CodeGenOpt::None) 18198 return false; 18199 18200 // This holds the base pointer, index, and the offset in bytes from the base 18201 // pointer. 18202 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); 18203 18204 // We must have a base and an offset. 18205 if (!BasePtr.getBase().getNode()) 18206 return false; 18207 18208 // Do not handle stores to undef base pointers. 18209 if (BasePtr.getBase().isUndef()) 18210 return false; 18211 18212 SmallVector<StoreSDNode *, 8> ChainedStores; 18213 ChainedStores.push_back(St); 18214 18215 // Walk up the chain and look for nodes with offsets from the same 18216 // base pointer. Stop when reaching an instruction with a different kind 18217 // or instruction which has a different base pointer. 18218 StoreSDNode *Index = St; 18219 while (Index) { 18220 // If the chain has more than one use, then we can't reorder the mem ops. 18221 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 18222 break; 18223 18224 if (Index->isVolatile() || Index->isIndexed()) 18225 break; 18226 18227 // Find the base pointer and offset for this memory node. 18228 BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); 18229 18230 // Check that the base pointer is the same as the original one. 18231 if (!BasePtr.equalBaseIndex(Ptr, DAG)) 18232 break; 18233 18234 // Walk up the chain to find the next store node, ignoring any 18235 // intermediate loads. Any other kind of node will halt the loop. 18236 SDNode *NextInChain = Index->getChain().getNode(); 18237 while (true) { 18238 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 18239 // We found a store node. Use it for the next iteration. 18240 if (STn->isVolatile() || STn->isIndexed()) { 18241 Index = nullptr; 18242 break; 18243 } 18244 ChainedStores.push_back(STn); 18245 Index = STn; 18246 break; 18247 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 18248 NextInChain = Ldn->getChain().getNode(); 18249 continue; 18250 } else { 18251 Index = nullptr; 18252 break; 18253 } 18254 } // end while 18255 } 18256 18257 // At this point, ChainedStores lists all of the Store nodes 18258 // reachable by iterating up through chain nodes matching the above 18259 // conditions. For each such store identified, try to find an 18260 // earlier chain to attach the store to which won't violate the 18261 // required ordering. 18262 bool MadeChangeToSt = false; 18263 SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; 18264 18265 for (StoreSDNode *ChainedStore : ChainedStores) { 18266 SDValue Chain = ChainedStore->getChain(); 18267 SDValue BetterChain = FindBetterChain(ChainedStore, Chain); 18268 18269 if (Chain != BetterChain) { 18270 if (ChainedStore == St) 18271 MadeChangeToSt = true; 18272 BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); 18273 } 18274 } 18275 18276 // Do all replacements after finding the replacements to make to avoid making 18277 // the chains more complicated by introducing new TokenFactors. 18278 for (auto Replacement : BetterChains) 18279 replaceStoreChain(Replacement.first, Replacement.second); 18280 18281 return MadeChangeToSt; 18282 } 18283 18284 /// This is the entry point for the file. 18285 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, 18286 CodeGenOpt::Level OptLevel) { 18287 /// This is the main entry point to this class. 18288 DAGCombiner(*this, AA, OptLevel).Run(Level); 18289 } 18290