1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetLowering.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dagcombine" 44 45 STATISTIC(NodesCombined , "Number of dag nodes combined"); 46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50 STATISTIC(SlicedLoads, "Number of load sliced"); 51 52 namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65 #ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70 #endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84 //------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode *, 64> Worklist; 103 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode *, unsigned> WorklistMap; 110 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode *, 64> CombinedNodes; 116 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode *N) { 123 for (SDNode *Node : N->uses()) 124 AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode *N); 129 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode *N) { 134 // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode *N) { 145 CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode *N); 157 bool recursivelyDeleteUnusedNodes(SDNode *N); 158 159 /// Replaces all uses of the results of one DAG node with new values. 160 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 161 bool AddTo = true); 162 163 /// Replaces all uses of the results of one DAG node with new values. 164 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 165 return CombineTo(N, &Res, 1, AddTo); 166 } 167 168 /// Replaces all uses of the results of one DAG node with new values. 169 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 170 bool AddTo = true) { 171 SDValue To[] = { Res0, Res1 }; 172 return CombineTo(N, To, 2, AddTo); 173 } 174 175 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 176 177 private: 178 179 /// Check the specified integer node value to see if it can be simplified or 180 /// if things it uses can be simplified by bit propagation. 181 /// If so, return true. 182 bool SimplifyDemandedBits(SDValue Op) { 183 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 184 APInt Demanded = APInt::getAllOnesValue(BitWidth); 185 return SimplifyDemandedBits(Op, Demanded); 186 } 187 188 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 189 190 bool CombineToPreIndexedLoadStore(SDNode *N); 191 bool CombineToPostIndexedLoadStore(SDNode *N); 192 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 193 bool SliceUpLoad(SDNode *N); 194 195 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 196 /// load. 197 /// 198 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 199 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 200 /// \param EltNo index of the vector element to load. 201 /// \param OriginalLoad load that EVE came from to be replaced. 202 /// \returns EVE on success SDValue() on failure. 203 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 204 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 205 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 206 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 207 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 208 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 209 SDValue PromoteIntBinOp(SDValue Op); 210 SDValue PromoteIntShiftOp(SDValue Op); 211 SDValue PromoteExtend(SDValue Op); 212 bool PromoteLoad(SDValue Op); 213 214 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 215 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 216 ISD::NodeType ExtType); 217 218 /// Call the node-specific routine that knows how to fold each 219 /// particular type of node. If that doesn't do anything, try the 220 /// target-specific DAG combines. 221 SDValue combine(SDNode *N); 222 223 // Visitation implementation - Implement dag node combining for different 224 // node types. The semantics are as follows: 225 // Return Value: 226 // SDValue.getNode() == 0 - No change was made 227 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 228 // otherwise - N should be replaced by the returned Operand. 229 // 230 SDValue visitTokenFactor(SDNode *N); 231 SDValue visitMERGE_VALUES(SDNode *N); 232 SDValue visitADD(SDNode *N); 233 SDValue visitSUB(SDNode *N); 234 SDValue visitADDC(SDNode *N); 235 SDValue visitSUBC(SDNode *N); 236 SDValue visitADDE(SDNode *N); 237 SDValue visitSUBE(SDNode *N); 238 SDValue visitMUL(SDNode *N); 239 SDValue useDivRem(SDNode *N); 240 SDValue visitSDIV(SDNode *N); 241 SDValue visitUDIV(SDNode *N); 242 SDValue visitREM(SDNode *N); 243 SDValue visitMULHU(SDNode *N); 244 SDValue visitMULHS(SDNode *N); 245 SDValue visitSMUL_LOHI(SDNode *N); 246 SDValue visitUMUL_LOHI(SDNode *N); 247 SDValue visitSMULO(SDNode *N); 248 SDValue visitUMULO(SDNode *N); 249 SDValue visitIMINMAX(SDNode *N); 250 SDValue visitAND(SDNode *N); 251 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 252 SDValue visitOR(SDNode *N); 253 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 254 SDValue visitXOR(SDNode *N); 255 SDValue SimplifyVBinOp(SDNode *N); 256 SDValue visitSHL(SDNode *N); 257 SDValue visitSRA(SDNode *N); 258 SDValue visitSRL(SDNode *N); 259 SDValue visitRotate(SDNode *N); 260 SDValue visitBSWAP(SDNode *N); 261 SDValue visitCTLZ(SDNode *N); 262 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 263 SDValue visitCTTZ(SDNode *N); 264 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 265 SDValue visitCTPOP(SDNode *N); 266 SDValue visitSELECT(SDNode *N); 267 SDValue visitVSELECT(SDNode *N); 268 SDValue visitSELECT_CC(SDNode *N); 269 SDValue visitSETCC(SDNode *N); 270 SDValue visitSIGN_EXTEND(SDNode *N); 271 SDValue visitZERO_EXTEND(SDNode *N); 272 SDValue visitANY_EXTEND(SDNode *N); 273 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 274 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 275 SDValue visitTRUNCATE(SDNode *N); 276 SDValue visitBITCAST(SDNode *N); 277 SDValue visitBUILD_PAIR(SDNode *N); 278 SDValue visitFADD(SDNode *N); 279 SDValue visitFSUB(SDNode *N); 280 SDValue visitFMUL(SDNode *N); 281 SDValue visitFMA(SDNode *N); 282 SDValue visitFDIV(SDNode *N); 283 SDValue visitFREM(SDNode *N); 284 SDValue visitFSQRT(SDNode *N); 285 SDValue visitFCOPYSIGN(SDNode *N); 286 SDValue visitSINT_TO_FP(SDNode *N); 287 SDValue visitUINT_TO_FP(SDNode *N); 288 SDValue visitFP_TO_SINT(SDNode *N); 289 SDValue visitFP_TO_UINT(SDNode *N); 290 SDValue visitFP_ROUND(SDNode *N); 291 SDValue visitFP_ROUND_INREG(SDNode *N); 292 SDValue visitFP_EXTEND(SDNode *N); 293 SDValue visitFNEG(SDNode *N); 294 SDValue visitFABS(SDNode *N); 295 SDValue visitFCEIL(SDNode *N); 296 SDValue visitFTRUNC(SDNode *N); 297 SDValue visitFFLOOR(SDNode *N); 298 SDValue visitFMINNUM(SDNode *N); 299 SDValue visitFMAXNUM(SDNode *N); 300 SDValue visitBRCOND(SDNode *N); 301 SDValue visitBR_CC(SDNode *N); 302 SDValue visitLOAD(SDNode *N); 303 304 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 305 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 306 307 SDValue visitSTORE(SDNode *N); 308 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 309 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 310 SDValue visitBUILD_VECTOR(SDNode *N); 311 SDValue visitCONCAT_VECTORS(SDNode *N); 312 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 313 SDValue visitVECTOR_SHUFFLE(SDNode *N); 314 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 315 SDValue visitINSERT_SUBVECTOR(SDNode *N); 316 SDValue visitMLOAD(SDNode *N); 317 SDValue visitMSTORE(SDNode *N); 318 SDValue visitMGATHER(SDNode *N); 319 SDValue visitMSCATTER(SDNode *N); 320 SDValue visitFP_TO_FP16(SDNode *N); 321 SDValue visitFP16_TO_FP(SDNode *N); 322 323 SDValue visitFADDForFMACombine(SDNode *N); 324 SDValue visitFSUBForFMACombine(SDNode *N); 325 SDValue visitFMULForFMACombine(SDNode *N); 326 327 SDValue XformToShuffleWithZero(SDNode *N); 328 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 329 330 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 331 332 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 333 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 334 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 335 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 336 SDValue N3, ISD::CondCode CC, 337 bool NotExtCompare = false); 338 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 339 SDLoc DL, bool foldBooleans = true); 340 341 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 342 SDValue &CC) const; 343 bool isOneUseSetCC(SDValue N) const; 344 345 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 346 unsigned HiOp); 347 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 348 SDValue CombineExtLoad(SDNode *N); 349 SDValue combineRepeatedFPDivisors(SDNode *N); 350 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 351 SDValue BuildSDIV(SDNode *N); 352 SDValue BuildSDIVPow2(SDNode *N); 353 SDValue BuildUDIV(SDNode *N); 354 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); 355 SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); 356 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, 357 SDNodeFlags *Flags); 358 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, 359 SDNodeFlags *Flags); 360 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 361 bool DemandHighBits = true); 362 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 363 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 364 SDValue InnerPos, SDValue InnerNeg, 365 unsigned PosOpcode, unsigned NegOpcode, 366 SDLoc DL); 367 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 368 SDValue ReduceLoadWidth(SDNode *N); 369 SDValue ReduceLoadOpStoreWidth(SDNode *N); 370 SDValue TransformFPLoadStorePair(SDNode *N); 371 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 372 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 373 374 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 375 376 /// Walk up chain skipping non-aliasing memory nodes, 377 /// looking for aliasing nodes and adding them to the Aliases vector. 378 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 379 SmallVectorImpl<SDValue> &Aliases); 380 381 /// Return true if there is any possibility that the two addresses overlap. 382 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 383 384 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 385 /// chain (aliasing node.) 386 SDValue FindBetterChain(SDNode *N, SDValue Chain); 387 388 /// Do FindBetterChain for a store and any possibly adjacent stores on 389 /// consecutive chains. 390 bool findBetterNeighborChains(StoreSDNode *St); 391 392 /// Holds a pointer to an LSBaseSDNode as well as information on where it 393 /// is located in a sequence of memory operations connected by a chain. 394 struct MemOpLink { 395 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 396 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 397 // Ptr to the mem node. 398 LSBaseSDNode *MemNode; 399 // Offset from the base ptr. 400 int64_t OffsetFromBase; 401 // What is the sequence number of this mem node. 402 // Lowest mem operand in the DAG starts at zero. 403 unsigned SequenceNum; 404 }; 405 406 /// This is a helper function for visitMUL to check the profitability 407 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 408 /// MulNode is the original multiply, AddNode is (add x, c1), 409 /// and ConstNode is c2. 410 bool isMulAddWithConstProfitable(SDNode *MulNode, 411 SDValue &AddNode, 412 SDValue &ConstNode); 413 414 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a 415 /// constant build_vector of the stored constant values in Stores. 416 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, 417 SDLoc SL, 418 ArrayRef<MemOpLink> Stores, 419 SmallVectorImpl<SDValue> &Chains, 420 EVT Ty) const; 421 422 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 423 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 424 /// the type of the loaded value to be extended. LoadedVT returns the type 425 /// of the original loaded value. NarrowLoad returns whether the load would 426 /// need to be narrowed in order to match. 427 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 428 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 429 bool &NarrowLoad); 430 431 /// This is a helper function for MergeConsecutiveStores. When the source 432 /// elements of the consecutive stores are all constants or all extracted 433 /// vector elements, try to merge them into one larger store. 434 /// \return True if a merged store was created. 435 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 436 EVT MemVT, unsigned NumStores, 437 bool IsConstantSrc, bool UseVector); 438 439 /// This is a helper function for MergeConsecutiveStores. 440 /// Stores that may be merged are placed in StoreNodes. 441 /// Loads that may alias with those stores are placed in AliasLoadNodes. 442 void getStoreMergeAndAliasCandidates( 443 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 444 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); 445 446 /// Merge consecutive store operations into a wide store. 447 /// This optimization uses wide integers or vectors when possible. 448 /// \return True if some memory operations were changed. 449 bool MergeConsecutiveStores(StoreSDNode *N); 450 451 /// \brief Try to transform a truncation where C is a constant: 452 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 453 /// 454 /// \p N needs to be a truncation and its first operand an AND. Other 455 /// requirements are checked by the function (e.g. that trunc is 456 /// single-use) and if missed an empty SDValue is returned. 457 SDValue distributeTruncateThroughAnd(SDNode *N); 458 459 public: 460 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 461 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 462 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 463 ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); 464 } 465 466 /// Runs the dag combiner on all nodes in the work list 467 void Run(CombineLevel AtLevel); 468 469 SelectionDAG &getDAG() const { return DAG; } 470 471 /// Returns a type large enough to hold any valid shift amount - before type 472 /// legalization these can be huge. 473 EVT getShiftAmountTy(EVT LHSTy) { 474 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 475 if (LHSTy.isVector()) 476 return LHSTy; 477 auto &DL = DAG.getDataLayout(); 478 return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) 479 : TLI.getPointerTy(DL); 480 } 481 482 /// This method returns true if we are running before type legalization or 483 /// if the specified VT is legal. 484 bool isTypeLegal(const EVT &VT) { 485 if (!LegalTypes) return true; 486 return TLI.isTypeLegal(VT); 487 } 488 489 /// Convenience wrapper around TargetLowering::getSetCCResultType 490 EVT getSetCCResultType(EVT VT) const { 491 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 492 } 493 }; 494 } 495 496 497 namespace { 498 /// This class is a DAGUpdateListener that removes any deleted 499 /// nodes from the worklist. 500 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 501 DAGCombiner &DC; 502 public: 503 explicit WorklistRemover(DAGCombiner &dc) 504 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 505 506 void NodeDeleted(SDNode *N, SDNode *E) override { 507 DC.removeFromWorklist(N); 508 } 509 }; 510 } 511 512 //===----------------------------------------------------------------------===// 513 // TargetLowering::DAGCombinerInfo implementation 514 //===----------------------------------------------------------------------===// 515 516 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 517 ((DAGCombiner*)DC)->AddToWorklist(N); 518 } 519 520 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 521 ((DAGCombiner*)DC)->removeFromWorklist(N); 522 } 523 524 SDValue TargetLowering::DAGCombinerInfo:: 525 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 526 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 527 } 528 529 SDValue TargetLowering::DAGCombinerInfo:: 530 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 531 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 532 } 533 534 535 SDValue TargetLowering::DAGCombinerInfo:: 536 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 537 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 538 } 539 540 void TargetLowering::DAGCombinerInfo:: 541 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 542 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 543 } 544 545 //===----------------------------------------------------------------------===// 546 // Helper Functions 547 //===----------------------------------------------------------------------===// 548 549 void DAGCombiner::deleteAndRecombine(SDNode *N) { 550 removeFromWorklist(N); 551 552 // If the operands of this node are only used by the node, they will now be 553 // dead. Make sure to re-visit them and recursively delete dead nodes. 554 for (const SDValue &Op : N->ops()) 555 // For an operand generating multiple values, one of the values may 556 // become dead allowing further simplification (e.g. split index 557 // arithmetic from an indexed load). 558 if (Op->hasOneUse() || Op->getNumValues() > 1) 559 AddToWorklist(Op.getNode()); 560 561 DAG.DeleteNode(N); 562 } 563 564 /// Return 1 if we can compute the negated form of the specified expression for 565 /// the same cost as the expression itself, or 2 if we can compute the negated 566 /// form more cheaply than the expression itself. 567 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 568 const TargetLowering &TLI, 569 const TargetOptions *Options, 570 unsigned Depth = 0) { 571 // fneg is removable even if it has multiple uses. 572 if (Op.getOpcode() == ISD::FNEG) return 2; 573 574 // Don't allow anything with multiple uses. 575 if (!Op.hasOneUse()) return 0; 576 577 // Don't recurse exponentially. 578 if (Depth > 6) return 0; 579 580 switch (Op.getOpcode()) { 581 default: return false; 582 case ISD::ConstantFP: 583 // Don't invert constant FP values after legalize. The negated constant 584 // isn't necessarily legal. 585 return LegalOperations ? 0 : 1; 586 case ISD::FADD: 587 // FIXME: determine better conditions for this xform. 588 if (!Options->UnsafeFPMath) return 0; 589 590 // After operation legalization, it might not be legal to create new FSUBs. 591 if (LegalOperations && 592 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 593 return 0; 594 595 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 596 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 597 Options, Depth + 1)) 598 return V; 599 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 600 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 601 Depth + 1); 602 case ISD::FSUB: 603 // We can't turn -(A-B) into B-A when we honor signed zeros. 604 if (!Options->UnsafeFPMath) return 0; 605 606 // fold (fneg (fsub A, B)) -> (fsub B, A) 607 return 1; 608 609 case ISD::FMUL: 610 case ISD::FDIV: 611 if (Options->HonorSignDependentRoundingFPMath()) return 0; 612 613 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 614 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 615 Options, Depth + 1)) 616 return V; 617 618 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 619 Depth + 1); 620 621 case ISD::FP_EXTEND: 622 case ISD::FP_ROUND: 623 case ISD::FSIN: 624 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 625 Depth + 1); 626 } 627 } 628 629 /// If isNegatibleForFree returns true, return the newly negated expression. 630 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 631 bool LegalOperations, unsigned Depth = 0) { 632 const TargetOptions &Options = DAG.getTarget().Options; 633 // fneg is removable even if it has multiple uses. 634 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 635 636 // Don't allow anything with multiple uses. 637 assert(Op.hasOneUse() && "Unknown reuse!"); 638 639 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 640 641 const SDNodeFlags *Flags = Op.getNode()->getFlags(); 642 643 switch (Op.getOpcode()) { 644 default: llvm_unreachable("Unknown code"); 645 case ISD::ConstantFP: { 646 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 647 V.changeSign(); 648 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 649 } 650 case ISD::FADD: 651 // FIXME: determine better conditions for this xform. 652 assert(Options.UnsafeFPMath); 653 654 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 655 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 656 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 657 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 658 GetNegatedExpression(Op.getOperand(0), DAG, 659 LegalOperations, Depth+1), 660 Op.getOperand(1), Flags); 661 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 662 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 663 GetNegatedExpression(Op.getOperand(1), DAG, 664 LegalOperations, Depth+1), 665 Op.getOperand(0), Flags); 666 case ISD::FSUB: 667 // We can't turn -(A-B) into B-A when we honor signed zeros. 668 assert(Options.UnsafeFPMath); 669 670 // fold (fneg (fsub 0, B)) -> B 671 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 672 if (N0CFP->isZero()) 673 return Op.getOperand(1); 674 675 // fold (fneg (fsub A, B)) -> (fsub B, A) 676 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 677 Op.getOperand(1), Op.getOperand(0), Flags); 678 679 case ISD::FMUL: 680 case ISD::FDIV: 681 assert(!Options.HonorSignDependentRoundingFPMath()); 682 683 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 684 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 685 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 686 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 687 GetNegatedExpression(Op.getOperand(0), DAG, 688 LegalOperations, Depth+1), 689 Op.getOperand(1), Flags); 690 691 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 692 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 693 Op.getOperand(0), 694 GetNegatedExpression(Op.getOperand(1), DAG, 695 LegalOperations, Depth+1), Flags); 696 697 case ISD::FP_EXTEND: 698 case ISD::FSIN: 699 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 700 GetNegatedExpression(Op.getOperand(0), DAG, 701 LegalOperations, Depth+1)); 702 case ISD::FP_ROUND: 703 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 704 GetNegatedExpression(Op.getOperand(0), DAG, 705 LegalOperations, Depth+1), 706 Op.getOperand(1)); 707 } 708 } 709 710 // Return true if this node is a setcc, or is a select_cc 711 // that selects between the target values used for true and false, making it 712 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 713 // the appropriate nodes based on the type of node we are checking. This 714 // simplifies life a bit for the callers. 715 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 716 SDValue &CC) const { 717 if (N.getOpcode() == ISD::SETCC) { 718 LHS = N.getOperand(0); 719 RHS = N.getOperand(1); 720 CC = N.getOperand(2); 721 return true; 722 } 723 724 if (N.getOpcode() != ISD::SELECT_CC || 725 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 726 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 727 return false; 728 729 if (TLI.getBooleanContents(N.getValueType()) == 730 TargetLowering::UndefinedBooleanContent) 731 return false; 732 733 LHS = N.getOperand(0); 734 RHS = N.getOperand(1); 735 CC = N.getOperand(4); 736 return true; 737 } 738 739 /// Return true if this is a SetCC-equivalent operation with only one use. 740 /// If this is true, it allows the users to invert the operation for free when 741 /// it is profitable to do so. 742 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 743 SDValue N0, N1, N2; 744 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 745 return true; 746 return false; 747 } 748 749 /// Returns true if N is a BUILD_VECTOR node whose 750 /// elements are all the same constant or undefined. 751 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 752 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 753 if (!C) 754 return false; 755 756 APInt SplatUndef; 757 unsigned SplatBitSize; 758 bool HasAnyUndefs; 759 EVT EltVT = N->getValueType(0).getVectorElementType(); 760 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 761 HasAnyUndefs) && 762 EltVT.getSizeInBits() >= SplatBitSize); 763 } 764 765 // \brief Returns the SDNode if it is a constant integer BuildVector 766 // or constant integer. 767 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { 768 if (isa<ConstantSDNode>(N)) 769 return N.getNode(); 770 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 771 return N.getNode(); 772 return nullptr; 773 } 774 775 // \brief Returns the SDNode if it is a constant float BuildVector 776 // or constant float. 777 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 778 if (isa<ConstantFPSDNode>(N)) 779 return N.getNode(); 780 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 781 return N.getNode(); 782 return nullptr; 783 } 784 785 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 786 // int. 787 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 788 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 789 return CN; 790 791 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 792 BitVector UndefElements; 793 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 794 795 // BuildVectors can truncate their operands. Ignore that case here. 796 // FIXME: We blindly ignore splats which include undef which is overly 797 // pessimistic. 798 if (CN && UndefElements.none() && 799 CN->getValueType(0) == N.getValueType().getScalarType()) 800 return CN; 801 } 802 803 return nullptr; 804 } 805 806 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 807 // float. 808 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 809 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 810 return CN; 811 812 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 813 BitVector UndefElements; 814 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 815 816 if (CN && UndefElements.none()) 817 return CN; 818 } 819 820 return nullptr; 821 } 822 823 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 824 SDValue N0, SDValue N1) { 825 EVT VT = N0.getValueType(); 826 if (N0.getOpcode() == Opc) { 827 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 828 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { 829 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 830 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 831 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 832 return SDValue(); 833 } 834 if (N0.hasOneUse()) { 835 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 836 // use 837 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 838 if (!OpNode.getNode()) 839 return SDValue(); 840 AddToWorklist(OpNode.getNode()); 841 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 842 } 843 } 844 } 845 846 if (N1.getOpcode() == Opc) { 847 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 848 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { 849 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 850 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 851 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 852 return SDValue(); 853 } 854 if (N1.hasOneUse()) { 855 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 856 // use 857 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 858 if (!OpNode.getNode()) 859 return SDValue(); 860 AddToWorklist(OpNode.getNode()); 861 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 862 } 863 } 864 } 865 866 return SDValue(); 867 } 868 869 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 870 bool AddTo) { 871 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 872 ++NodesCombined; 873 DEBUG(dbgs() << "\nReplacing.1 "; 874 N->dump(&DAG); 875 dbgs() << "\nWith: "; 876 To[0].getNode()->dump(&DAG); 877 dbgs() << " and " << NumTo-1 << " other values\n"); 878 for (unsigned i = 0, e = NumTo; i != e; ++i) 879 assert((!To[i].getNode() || 880 N->getValueType(i) == To[i].getValueType()) && 881 "Cannot combine value to value of different type!"); 882 883 WorklistRemover DeadNodes(*this); 884 DAG.ReplaceAllUsesWith(N, To); 885 if (AddTo) { 886 // Push the new nodes and any users onto the worklist 887 for (unsigned i = 0, e = NumTo; i != e; ++i) { 888 if (To[i].getNode()) { 889 AddToWorklist(To[i].getNode()); 890 AddUsersToWorklist(To[i].getNode()); 891 } 892 } 893 } 894 895 // Finally, if the node is now dead, remove it from the graph. The node 896 // may not be dead if the replacement process recursively simplified to 897 // something else needing this node. 898 if (N->use_empty()) 899 deleteAndRecombine(N); 900 return SDValue(N, 0); 901 } 902 903 void DAGCombiner:: 904 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 905 // Replace all uses. If any nodes become isomorphic to other nodes and 906 // are deleted, make sure to remove them from our worklist. 907 WorklistRemover DeadNodes(*this); 908 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 909 910 // Push the new node and any (possibly new) users onto the worklist. 911 AddToWorklist(TLO.New.getNode()); 912 AddUsersToWorklist(TLO.New.getNode()); 913 914 // Finally, if the node is now dead, remove it from the graph. The node 915 // may not be dead if the replacement process recursively simplified to 916 // something else needing this node. 917 if (TLO.Old.getNode()->use_empty()) 918 deleteAndRecombine(TLO.Old.getNode()); 919 } 920 921 /// Check the specified integer node value to see if it can be simplified or if 922 /// things it uses can be simplified by bit propagation. If so, return true. 923 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 924 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 925 APInt KnownZero, KnownOne; 926 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 927 return false; 928 929 // Revisit the node. 930 AddToWorklist(Op.getNode()); 931 932 // Replace the old value with the new one. 933 ++NodesCombined; 934 DEBUG(dbgs() << "\nReplacing.2 "; 935 TLO.Old.getNode()->dump(&DAG); 936 dbgs() << "\nWith: "; 937 TLO.New.getNode()->dump(&DAG); 938 dbgs() << '\n'); 939 940 CommitTargetLoweringOpt(TLO); 941 return true; 942 } 943 944 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 945 SDLoc dl(Load); 946 EVT VT = Load->getValueType(0); 947 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 948 949 DEBUG(dbgs() << "\nReplacing.9 "; 950 Load->dump(&DAG); 951 dbgs() << "\nWith: "; 952 Trunc.getNode()->dump(&DAG); 953 dbgs() << '\n'); 954 WorklistRemover DeadNodes(*this); 955 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 956 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 957 deleteAndRecombine(Load); 958 AddToWorklist(Trunc.getNode()); 959 } 960 961 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 962 Replace = false; 963 SDLoc dl(Op); 964 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 965 EVT MemVT = LD->getMemoryVT(); 966 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 967 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 968 : ISD::EXTLOAD) 969 : LD->getExtensionType(); 970 Replace = true; 971 return DAG.getExtLoad(ExtType, dl, PVT, 972 LD->getChain(), LD->getBasePtr(), 973 MemVT, LD->getMemOperand()); 974 } 975 976 unsigned Opc = Op.getOpcode(); 977 switch (Opc) { 978 default: break; 979 case ISD::AssertSext: 980 return DAG.getNode(ISD::AssertSext, dl, PVT, 981 SExtPromoteOperand(Op.getOperand(0), PVT), 982 Op.getOperand(1)); 983 case ISD::AssertZext: 984 return DAG.getNode(ISD::AssertZext, dl, PVT, 985 ZExtPromoteOperand(Op.getOperand(0), PVT), 986 Op.getOperand(1)); 987 case ISD::Constant: { 988 unsigned ExtOpc = 989 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 990 return DAG.getNode(ExtOpc, dl, PVT, Op); 991 } 992 } 993 994 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 995 return SDValue(); 996 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 997 } 998 999 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1000 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1001 return SDValue(); 1002 EVT OldVT = Op.getValueType(); 1003 SDLoc dl(Op); 1004 bool Replace = false; 1005 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1006 if (!NewOp.getNode()) 1007 return SDValue(); 1008 AddToWorklist(NewOp.getNode()); 1009 1010 if (Replace) 1011 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1012 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 1013 DAG.getValueType(OldVT)); 1014 } 1015 1016 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1017 EVT OldVT = Op.getValueType(); 1018 SDLoc dl(Op); 1019 bool Replace = false; 1020 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1021 if (!NewOp.getNode()) 1022 return SDValue(); 1023 AddToWorklist(NewOp.getNode()); 1024 1025 if (Replace) 1026 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1027 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 1028 } 1029 1030 /// Promote the specified integer binary operation if the target indicates it is 1031 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1032 /// i32 since i16 instructions are longer. 1033 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1034 if (!LegalOperations) 1035 return SDValue(); 1036 1037 EVT VT = Op.getValueType(); 1038 if (VT.isVector() || !VT.isInteger()) 1039 return SDValue(); 1040 1041 // If operation type is 'undesirable', e.g. i16 on x86, consider 1042 // promoting it. 1043 unsigned Opc = Op.getOpcode(); 1044 if (TLI.isTypeDesirableForOp(Opc, VT)) 1045 return SDValue(); 1046 1047 EVT PVT = VT; 1048 // Consult target whether it is a good idea to promote this operation and 1049 // what's the right type to promote it to. 1050 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1051 assert(PVT != VT && "Don't know what type to promote to!"); 1052 1053 bool Replace0 = false; 1054 SDValue N0 = Op.getOperand(0); 1055 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1056 if (!NN0.getNode()) 1057 return SDValue(); 1058 1059 bool Replace1 = false; 1060 SDValue N1 = Op.getOperand(1); 1061 SDValue NN1; 1062 if (N0 == N1) 1063 NN1 = NN0; 1064 else { 1065 NN1 = PromoteOperand(N1, PVT, Replace1); 1066 if (!NN1.getNode()) 1067 return SDValue(); 1068 } 1069 1070 AddToWorklist(NN0.getNode()); 1071 if (NN1.getNode()) 1072 AddToWorklist(NN1.getNode()); 1073 1074 if (Replace0) 1075 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1076 if (Replace1) 1077 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1078 1079 DEBUG(dbgs() << "\nPromoting "; 1080 Op.getNode()->dump(&DAG)); 1081 SDLoc dl(Op); 1082 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1083 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1084 } 1085 return SDValue(); 1086 } 1087 1088 /// Promote the specified integer shift operation if the target indicates it is 1089 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1090 /// i32 since i16 instructions are longer. 1091 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1092 if (!LegalOperations) 1093 return SDValue(); 1094 1095 EVT VT = Op.getValueType(); 1096 if (VT.isVector() || !VT.isInteger()) 1097 return SDValue(); 1098 1099 // If operation type is 'undesirable', e.g. i16 on x86, consider 1100 // promoting it. 1101 unsigned Opc = Op.getOpcode(); 1102 if (TLI.isTypeDesirableForOp(Opc, VT)) 1103 return SDValue(); 1104 1105 EVT PVT = VT; 1106 // Consult target whether it is a good idea to promote this operation and 1107 // what's the right type to promote it to. 1108 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1109 assert(PVT != VT && "Don't know what type to promote to!"); 1110 1111 bool Replace = false; 1112 SDValue N0 = Op.getOperand(0); 1113 if (Opc == ISD::SRA) 1114 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1115 else if (Opc == ISD::SRL) 1116 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1117 else 1118 N0 = PromoteOperand(N0, PVT, Replace); 1119 if (!N0.getNode()) 1120 return SDValue(); 1121 1122 AddToWorklist(N0.getNode()); 1123 if (Replace) 1124 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1125 1126 DEBUG(dbgs() << "\nPromoting "; 1127 Op.getNode()->dump(&DAG)); 1128 SDLoc dl(Op); 1129 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1130 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1131 } 1132 return SDValue(); 1133 } 1134 1135 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1136 if (!LegalOperations) 1137 return SDValue(); 1138 1139 EVT VT = Op.getValueType(); 1140 if (VT.isVector() || !VT.isInteger()) 1141 return SDValue(); 1142 1143 // If operation type is 'undesirable', e.g. i16 on x86, consider 1144 // promoting it. 1145 unsigned Opc = Op.getOpcode(); 1146 if (TLI.isTypeDesirableForOp(Opc, VT)) 1147 return SDValue(); 1148 1149 EVT PVT = VT; 1150 // Consult target whether it is a good idea to promote this operation and 1151 // what's the right type to promote it to. 1152 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1153 assert(PVT != VT && "Don't know what type to promote to!"); 1154 // fold (aext (aext x)) -> (aext x) 1155 // fold (aext (zext x)) -> (zext x) 1156 // fold (aext (sext x)) -> (sext x) 1157 DEBUG(dbgs() << "\nPromoting "; 1158 Op.getNode()->dump(&DAG)); 1159 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1160 } 1161 return SDValue(); 1162 } 1163 1164 bool DAGCombiner::PromoteLoad(SDValue Op) { 1165 if (!LegalOperations) 1166 return false; 1167 1168 EVT VT = Op.getValueType(); 1169 if (VT.isVector() || !VT.isInteger()) 1170 return false; 1171 1172 // If operation type is 'undesirable', e.g. i16 on x86, consider 1173 // promoting it. 1174 unsigned Opc = Op.getOpcode(); 1175 if (TLI.isTypeDesirableForOp(Opc, VT)) 1176 return false; 1177 1178 EVT PVT = VT; 1179 // Consult target whether it is a good idea to promote this operation and 1180 // what's the right type to promote it to. 1181 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1182 assert(PVT != VT && "Don't know what type to promote to!"); 1183 1184 SDLoc dl(Op); 1185 SDNode *N = Op.getNode(); 1186 LoadSDNode *LD = cast<LoadSDNode>(N); 1187 EVT MemVT = LD->getMemoryVT(); 1188 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1189 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1190 : ISD::EXTLOAD) 1191 : LD->getExtensionType(); 1192 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1193 LD->getChain(), LD->getBasePtr(), 1194 MemVT, LD->getMemOperand()); 1195 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1196 1197 DEBUG(dbgs() << "\nPromoting "; 1198 N->dump(&DAG); 1199 dbgs() << "\nTo: "; 1200 Result.getNode()->dump(&DAG); 1201 dbgs() << '\n'); 1202 WorklistRemover DeadNodes(*this); 1203 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1204 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1205 deleteAndRecombine(N); 1206 AddToWorklist(Result.getNode()); 1207 return true; 1208 } 1209 return false; 1210 } 1211 1212 /// \brief Recursively delete a node which has no uses and any operands for 1213 /// which it is the only use. 1214 /// 1215 /// Note that this both deletes the nodes and removes them from the worklist. 1216 /// It also adds any nodes who have had a user deleted to the worklist as they 1217 /// may now have only one use and subject to other combines. 1218 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1219 if (!N->use_empty()) 1220 return false; 1221 1222 SmallSetVector<SDNode *, 16> Nodes; 1223 Nodes.insert(N); 1224 do { 1225 N = Nodes.pop_back_val(); 1226 if (!N) 1227 continue; 1228 1229 if (N->use_empty()) { 1230 for (const SDValue &ChildN : N->op_values()) 1231 Nodes.insert(ChildN.getNode()); 1232 1233 removeFromWorklist(N); 1234 DAG.DeleteNode(N); 1235 } else { 1236 AddToWorklist(N); 1237 } 1238 } while (!Nodes.empty()); 1239 return true; 1240 } 1241 1242 //===----------------------------------------------------------------------===// 1243 // Main DAG Combiner implementation 1244 //===----------------------------------------------------------------------===// 1245 1246 void DAGCombiner::Run(CombineLevel AtLevel) { 1247 // set the instance variables, so that the various visit routines may use it. 1248 Level = AtLevel; 1249 LegalOperations = Level >= AfterLegalizeVectorOps; 1250 LegalTypes = Level >= AfterLegalizeTypes; 1251 1252 // Add all the dag nodes to the worklist. 1253 for (SDNode &Node : DAG.allnodes()) 1254 AddToWorklist(&Node); 1255 1256 // Create a dummy node (which is not added to allnodes), that adds a reference 1257 // to the root node, preventing it from being deleted, and tracking any 1258 // changes of the root. 1259 HandleSDNode Dummy(DAG.getRoot()); 1260 1261 // while the worklist isn't empty, find a node and 1262 // try and combine it. 1263 while (!WorklistMap.empty()) { 1264 SDNode *N; 1265 // The Worklist holds the SDNodes in order, but it may contain null entries. 1266 do { 1267 N = Worklist.pop_back_val(); 1268 } while (!N); 1269 1270 bool GoodWorklistEntry = WorklistMap.erase(N); 1271 (void)GoodWorklistEntry; 1272 assert(GoodWorklistEntry && 1273 "Found a worklist entry without a corresponding map entry!"); 1274 1275 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1276 // N is deleted from the DAG, since they too may now be dead or may have a 1277 // reduced number of uses, allowing other xforms. 1278 if (recursivelyDeleteUnusedNodes(N)) 1279 continue; 1280 1281 WorklistRemover DeadNodes(*this); 1282 1283 // If this combine is running after legalizing the DAG, re-legalize any 1284 // nodes pulled off the worklist. 1285 if (Level == AfterLegalizeDAG) { 1286 SmallSetVector<SDNode *, 16> UpdatedNodes; 1287 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1288 1289 for (SDNode *LN : UpdatedNodes) { 1290 AddToWorklist(LN); 1291 AddUsersToWorklist(LN); 1292 } 1293 if (!NIsValid) 1294 continue; 1295 } 1296 1297 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1298 1299 // Add any operands of the new node which have not yet been combined to the 1300 // worklist as well. Because the worklist uniques things already, this 1301 // won't repeatedly process the same operand. 1302 CombinedNodes.insert(N); 1303 for (const SDValue &ChildN : N->op_values()) 1304 if (!CombinedNodes.count(ChildN.getNode())) 1305 AddToWorklist(ChildN.getNode()); 1306 1307 SDValue RV = combine(N); 1308 1309 if (!RV.getNode()) 1310 continue; 1311 1312 ++NodesCombined; 1313 1314 // If we get back the same node we passed in, rather than a new node or 1315 // zero, we know that the node must have defined multiple values and 1316 // CombineTo was used. Since CombineTo takes care of the worklist 1317 // mechanics for us, we have no work to do in this case. 1318 if (RV.getNode() == N) 1319 continue; 1320 1321 assert(N->getOpcode() != ISD::DELETED_NODE && 1322 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1323 "Node was deleted but visit returned new node!"); 1324 1325 DEBUG(dbgs() << " ... into: "; 1326 RV.getNode()->dump(&DAG)); 1327 1328 // Transfer debug value. 1329 DAG.TransferDbgValues(SDValue(N, 0), RV); 1330 if (N->getNumValues() == RV.getNode()->getNumValues()) 1331 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1332 else { 1333 assert(N->getValueType(0) == RV.getValueType() && 1334 N->getNumValues() == 1 && "Type mismatch"); 1335 SDValue OpV = RV; 1336 DAG.ReplaceAllUsesWith(N, &OpV); 1337 } 1338 1339 // Push the new node and any users onto the worklist 1340 AddToWorklist(RV.getNode()); 1341 AddUsersToWorklist(RV.getNode()); 1342 1343 // Finally, if the node is now dead, remove it from the graph. The node 1344 // may not be dead if the replacement process recursively simplified to 1345 // something else needing this node. This will also take care of adding any 1346 // operands which have lost a user to the worklist. 1347 recursivelyDeleteUnusedNodes(N); 1348 } 1349 1350 // If the root changed (e.g. it was a dead load, update the root). 1351 DAG.setRoot(Dummy.getValue()); 1352 DAG.RemoveDeadNodes(); 1353 } 1354 1355 SDValue DAGCombiner::visit(SDNode *N) { 1356 switch (N->getOpcode()) { 1357 default: break; 1358 case ISD::TokenFactor: return visitTokenFactor(N); 1359 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1360 case ISD::ADD: return visitADD(N); 1361 case ISD::SUB: return visitSUB(N); 1362 case ISD::ADDC: return visitADDC(N); 1363 case ISD::SUBC: return visitSUBC(N); 1364 case ISD::ADDE: return visitADDE(N); 1365 case ISD::SUBE: return visitSUBE(N); 1366 case ISD::MUL: return visitMUL(N); 1367 case ISD::SDIV: return visitSDIV(N); 1368 case ISD::UDIV: return visitUDIV(N); 1369 case ISD::SREM: 1370 case ISD::UREM: return visitREM(N); 1371 case ISD::MULHU: return visitMULHU(N); 1372 case ISD::MULHS: return visitMULHS(N); 1373 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1374 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1375 case ISD::SMULO: return visitSMULO(N); 1376 case ISD::UMULO: return visitUMULO(N); 1377 case ISD::SMIN: 1378 case ISD::SMAX: 1379 case ISD::UMIN: 1380 case ISD::UMAX: return visitIMINMAX(N); 1381 case ISD::AND: return visitAND(N); 1382 case ISD::OR: return visitOR(N); 1383 case ISD::XOR: return visitXOR(N); 1384 case ISD::SHL: return visitSHL(N); 1385 case ISD::SRA: return visitSRA(N); 1386 case ISD::SRL: return visitSRL(N); 1387 case ISD::ROTR: 1388 case ISD::ROTL: return visitRotate(N); 1389 case ISD::BSWAP: return visitBSWAP(N); 1390 case ISD::CTLZ: return visitCTLZ(N); 1391 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1392 case ISD::CTTZ: return visitCTTZ(N); 1393 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1394 case ISD::CTPOP: return visitCTPOP(N); 1395 case ISD::SELECT: return visitSELECT(N); 1396 case ISD::VSELECT: return visitVSELECT(N); 1397 case ISD::SELECT_CC: return visitSELECT_CC(N); 1398 case ISD::SETCC: return visitSETCC(N); 1399 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1400 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1401 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1402 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1403 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1404 case ISD::TRUNCATE: return visitTRUNCATE(N); 1405 case ISD::BITCAST: return visitBITCAST(N); 1406 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1407 case ISD::FADD: return visitFADD(N); 1408 case ISD::FSUB: return visitFSUB(N); 1409 case ISD::FMUL: return visitFMUL(N); 1410 case ISD::FMA: return visitFMA(N); 1411 case ISD::FDIV: return visitFDIV(N); 1412 case ISD::FREM: return visitFREM(N); 1413 case ISD::FSQRT: return visitFSQRT(N); 1414 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1415 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1416 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1417 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1418 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1419 case ISD::FP_ROUND: return visitFP_ROUND(N); 1420 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1421 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1422 case ISD::FNEG: return visitFNEG(N); 1423 case ISD::FABS: return visitFABS(N); 1424 case ISD::FFLOOR: return visitFFLOOR(N); 1425 case ISD::FMINNUM: return visitFMINNUM(N); 1426 case ISD::FMAXNUM: return visitFMAXNUM(N); 1427 case ISD::FCEIL: return visitFCEIL(N); 1428 case ISD::FTRUNC: return visitFTRUNC(N); 1429 case ISD::BRCOND: return visitBRCOND(N); 1430 case ISD::BR_CC: return visitBR_CC(N); 1431 case ISD::LOAD: return visitLOAD(N); 1432 case ISD::STORE: return visitSTORE(N); 1433 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1434 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1435 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1436 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1437 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1438 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1439 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1440 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1441 case ISD::MGATHER: return visitMGATHER(N); 1442 case ISD::MLOAD: return visitMLOAD(N); 1443 case ISD::MSCATTER: return visitMSCATTER(N); 1444 case ISD::MSTORE: return visitMSTORE(N); 1445 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1446 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1447 } 1448 return SDValue(); 1449 } 1450 1451 SDValue DAGCombiner::combine(SDNode *N) { 1452 SDValue RV = visit(N); 1453 1454 // If nothing happened, try a target-specific DAG combine. 1455 if (!RV.getNode()) { 1456 assert(N->getOpcode() != ISD::DELETED_NODE && 1457 "Node was deleted but visit returned NULL!"); 1458 1459 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1460 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1461 1462 // Expose the DAG combiner to the target combiner impls. 1463 TargetLowering::DAGCombinerInfo 1464 DagCombineInfo(DAG, Level, false, this); 1465 1466 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1467 } 1468 } 1469 1470 // If nothing happened still, try promoting the operation. 1471 if (!RV.getNode()) { 1472 switch (N->getOpcode()) { 1473 default: break; 1474 case ISD::ADD: 1475 case ISD::SUB: 1476 case ISD::MUL: 1477 case ISD::AND: 1478 case ISD::OR: 1479 case ISD::XOR: 1480 RV = PromoteIntBinOp(SDValue(N, 0)); 1481 break; 1482 case ISD::SHL: 1483 case ISD::SRA: 1484 case ISD::SRL: 1485 RV = PromoteIntShiftOp(SDValue(N, 0)); 1486 break; 1487 case ISD::SIGN_EXTEND: 1488 case ISD::ZERO_EXTEND: 1489 case ISD::ANY_EXTEND: 1490 RV = PromoteExtend(SDValue(N, 0)); 1491 break; 1492 case ISD::LOAD: 1493 if (PromoteLoad(SDValue(N, 0))) 1494 RV = SDValue(N, 0); 1495 break; 1496 } 1497 } 1498 1499 // If N is a commutative binary node, try commuting it to enable more 1500 // sdisel CSE. 1501 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1502 N->getNumValues() == 1) { 1503 SDValue N0 = N->getOperand(0); 1504 SDValue N1 = N->getOperand(1); 1505 1506 // Constant operands are canonicalized to RHS. 1507 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1508 SDValue Ops[] = {N1, N0}; 1509 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1510 N->getFlags()); 1511 if (CSENode) 1512 return SDValue(CSENode, 0); 1513 } 1514 } 1515 1516 return RV; 1517 } 1518 1519 /// Given a node, return its input chain if it has one, otherwise return a null 1520 /// sd operand. 1521 static SDValue getInputChainForNode(SDNode *N) { 1522 if (unsigned NumOps = N->getNumOperands()) { 1523 if (N->getOperand(0).getValueType() == MVT::Other) 1524 return N->getOperand(0); 1525 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1526 return N->getOperand(NumOps-1); 1527 for (unsigned i = 1; i < NumOps-1; ++i) 1528 if (N->getOperand(i).getValueType() == MVT::Other) 1529 return N->getOperand(i); 1530 } 1531 return SDValue(); 1532 } 1533 1534 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1535 // If N has two operands, where one has an input chain equal to the other, 1536 // the 'other' chain is redundant. 1537 if (N->getNumOperands() == 2) { 1538 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1539 return N->getOperand(0); 1540 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1541 return N->getOperand(1); 1542 } 1543 1544 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1545 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1546 SmallPtrSet<SDNode*, 16> SeenOps; 1547 bool Changed = false; // If we should replace this token factor. 1548 1549 // Start out with this token factor. 1550 TFs.push_back(N); 1551 1552 // Iterate through token factors. The TFs grows when new token factors are 1553 // encountered. 1554 for (unsigned i = 0; i < TFs.size(); ++i) { 1555 SDNode *TF = TFs[i]; 1556 1557 // Check each of the operands. 1558 for (const SDValue &Op : TF->op_values()) { 1559 1560 switch (Op.getOpcode()) { 1561 case ISD::EntryToken: 1562 // Entry tokens don't need to be added to the list. They are 1563 // redundant. 1564 Changed = true; 1565 break; 1566 1567 case ISD::TokenFactor: 1568 if (Op.hasOneUse() && 1569 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1570 // Queue up for processing. 1571 TFs.push_back(Op.getNode()); 1572 // Clean up in case the token factor is removed. 1573 AddToWorklist(Op.getNode()); 1574 Changed = true; 1575 break; 1576 } 1577 // Fall thru 1578 1579 default: 1580 // Only add if it isn't already in the list. 1581 if (SeenOps.insert(Op.getNode()).second) 1582 Ops.push_back(Op); 1583 else 1584 Changed = true; 1585 break; 1586 } 1587 } 1588 } 1589 1590 SDValue Result; 1591 1592 // If we've changed things around then replace token factor. 1593 if (Changed) { 1594 if (Ops.empty()) { 1595 // The entry token is the only possible outcome. 1596 Result = DAG.getEntryNode(); 1597 } else { 1598 // New and improved token factor. 1599 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1600 } 1601 1602 // Add users to worklist if AA is enabled, since it may introduce 1603 // a lot of new chained token factors while removing memory deps. 1604 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1605 : DAG.getSubtarget().useAA(); 1606 return CombineTo(N, Result, UseAA /*add to worklist*/); 1607 } 1608 1609 return Result; 1610 } 1611 1612 /// MERGE_VALUES can always be eliminated. 1613 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1614 WorklistRemover DeadNodes(*this); 1615 // Replacing results may cause a different MERGE_VALUES to suddenly 1616 // be CSE'd with N, and carry its uses with it. Iterate until no 1617 // uses remain, to ensure that the node can be safely deleted. 1618 // First add the users of this node to the work list so that they 1619 // can be tried again once they have new operands. 1620 AddUsersToWorklist(N); 1621 do { 1622 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1623 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1624 } while (!N->use_empty()); 1625 deleteAndRecombine(N); 1626 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1627 } 1628 1629 static bool isNullConstant(SDValue V) { 1630 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1631 return Const != nullptr && Const->isNullValue(); 1632 } 1633 1634 static bool isNullFPConstant(SDValue V) { 1635 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); 1636 return Const != nullptr && Const->isZero() && !Const->isNegative(); 1637 } 1638 1639 static bool isAllOnesConstant(SDValue V) { 1640 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1641 return Const != nullptr && Const->isAllOnesValue(); 1642 } 1643 1644 static bool isOneConstant(SDValue V) { 1645 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1646 return Const != nullptr && Const->isOne(); 1647 } 1648 1649 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a 1650 /// ContantSDNode pointer else nullptr. 1651 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1652 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1653 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1654 } 1655 1656 SDValue DAGCombiner::visitADD(SDNode *N) { 1657 SDValue N0 = N->getOperand(0); 1658 SDValue N1 = N->getOperand(1); 1659 EVT VT = N0.getValueType(); 1660 1661 // fold vector ops 1662 if (VT.isVector()) { 1663 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1664 return FoldedVOp; 1665 1666 // fold (add x, 0) -> x, vector edition 1667 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1668 return N0; 1669 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1670 return N1; 1671 } 1672 1673 // fold (add x, undef) -> undef 1674 if (N0.getOpcode() == ISD::UNDEF) 1675 return N0; 1676 if (N1.getOpcode() == ISD::UNDEF) 1677 return N1; 1678 // fold (add c1, c2) -> c1+c2 1679 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1680 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1681 if (N0C && N1C) 1682 return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); 1683 // canonicalize constant to RHS 1684 if (isConstantIntBuildVectorOrConstantInt(N0) && 1685 !isConstantIntBuildVectorOrConstantInt(N1)) 1686 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1687 // fold (add x, 0) -> x 1688 if (isNullConstant(N1)) 1689 return N0; 1690 // fold (add Sym, c) -> Sym+c 1691 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1692 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1693 GA->getOpcode() == ISD::GlobalAddress) 1694 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1695 GA->getOffset() + 1696 (uint64_t)N1C->getSExtValue()); 1697 // fold ((c1-A)+c2) -> (c1+c2)-A 1698 if (N1C && N0.getOpcode() == ISD::SUB) 1699 if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { 1700 SDLoc DL(N); 1701 return DAG.getNode(ISD::SUB, DL, VT, 1702 DAG.getConstant(N1C->getAPIntValue()+ 1703 N0C->getAPIntValue(), DL, VT), 1704 N0.getOperand(1)); 1705 } 1706 // reassociate add 1707 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1708 return RADD; 1709 // fold ((0-A) + B) -> B-A 1710 if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) 1711 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1712 // fold (A + (0-B)) -> A-B 1713 if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) 1714 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1715 // fold (A+(B-A)) -> B 1716 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1717 return N1.getOperand(0); 1718 // fold ((B-A)+A) -> B 1719 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1720 return N0.getOperand(0); 1721 // fold (A+(B-(A+C))) to (B-C) 1722 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1723 N0 == N1.getOperand(1).getOperand(0)) 1724 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1725 N1.getOperand(1).getOperand(1)); 1726 // fold (A+(B-(C+A))) to (B-C) 1727 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1728 N0 == N1.getOperand(1).getOperand(1)) 1729 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1730 N1.getOperand(1).getOperand(0)); 1731 // fold (A+((B-A)+or-C)) to (B+or-C) 1732 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1733 N1.getOperand(0).getOpcode() == ISD::SUB && 1734 N0 == N1.getOperand(0).getOperand(1)) 1735 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1736 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1737 1738 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1739 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1740 SDValue N00 = N0.getOperand(0); 1741 SDValue N01 = N0.getOperand(1); 1742 SDValue N10 = N1.getOperand(0); 1743 SDValue N11 = N1.getOperand(1); 1744 1745 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1746 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1747 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1748 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1749 } 1750 1751 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1752 return SDValue(N, 0); 1753 1754 // fold (a+b) -> (a|b) iff a and b share no bits. 1755 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 1756 VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) 1757 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1758 1759 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1760 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 1761 isNullConstant(N1.getOperand(0).getOperand(0))) 1762 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1763 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1764 N1.getOperand(0).getOperand(1), 1765 N1.getOperand(1))); 1766 if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && 1767 isNullConstant(N0.getOperand(0).getOperand(0))) 1768 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1769 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1770 N0.getOperand(0).getOperand(1), 1771 N0.getOperand(1))); 1772 1773 if (N1.getOpcode() == ISD::AND) { 1774 SDValue AndOp0 = N1.getOperand(0); 1775 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1776 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1777 1778 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1779 // and similar xforms where the inner op is either ~0 or 0. 1780 if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { 1781 SDLoc DL(N); 1782 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1783 } 1784 } 1785 1786 // add (sext i1), X -> sub X, (zext i1) 1787 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1788 N0.getOperand(0).getValueType() == MVT::i1 && 1789 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1790 SDLoc DL(N); 1791 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1792 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1793 } 1794 1795 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1796 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1797 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1798 if (TN->getVT() == MVT::i1) { 1799 SDLoc DL(N); 1800 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1801 DAG.getConstant(1, DL, VT)); 1802 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1803 } 1804 } 1805 1806 return SDValue(); 1807 } 1808 1809 SDValue DAGCombiner::visitADDC(SDNode *N) { 1810 SDValue N0 = N->getOperand(0); 1811 SDValue N1 = N->getOperand(1); 1812 EVT VT = N0.getValueType(); 1813 1814 // If the flag result is dead, turn this into an ADD. 1815 if (!N->hasAnyUseOfValue(1)) 1816 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1817 DAG.getNode(ISD::CARRY_FALSE, 1818 SDLoc(N), MVT::Glue)); 1819 1820 // canonicalize constant to RHS. 1821 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1822 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1823 if (N0C && !N1C) 1824 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1825 1826 // fold (addc x, 0) -> x + no carry out 1827 if (isNullConstant(N1)) 1828 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1829 SDLoc(N), MVT::Glue)); 1830 1831 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1832 APInt LHSZero, LHSOne; 1833 APInt RHSZero, RHSOne; 1834 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1835 1836 if (LHSZero.getBoolValue()) { 1837 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1838 1839 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1840 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1841 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1842 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1843 DAG.getNode(ISD::CARRY_FALSE, 1844 SDLoc(N), MVT::Glue)); 1845 } 1846 1847 return SDValue(); 1848 } 1849 1850 SDValue DAGCombiner::visitADDE(SDNode *N) { 1851 SDValue N0 = N->getOperand(0); 1852 SDValue N1 = N->getOperand(1); 1853 SDValue CarryIn = N->getOperand(2); 1854 1855 // canonicalize constant to RHS 1856 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1857 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1858 if (N0C && !N1C) 1859 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1860 N1, N0, CarryIn); 1861 1862 // fold (adde x, y, false) -> (addc x, y) 1863 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1864 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1865 1866 return SDValue(); 1867 } 1868 1869 // Since it may not be valid to emit a fold to zero for vector initializers 1870 // check if we can before folding. 1871 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1872 SelectionDAG &DAG, 1873 bool LegalOperations, bool LegalTypes) { 1874 if (!VT.isVector()) 1875 return DAG.getConstant(0, DL, VT); 1876 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1877 return DAG.getConstant(0, DL, VT); 1878 return SDValue(); 1879 } 1880 1881 SDValue DAGCombiner::visitSUB(SDNode *N) { 1882 SDValue N0 = N->getOperand(0); 1883 SDValue N1 = N->getOperand(1); 1884 EVT VT = N0.getValueType(); 1885 1886 // fold vector ops 1887 if (VT.isVector()) { 1888 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1889 return FoldedVOp; 1890 1891 // fold (sub x, 0) -> x, vector edition 1892 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1893 return N0; 1894 } 1895 1896 // fold (sub x, x) -> 0 1897 // FIXME: Refactor this and xor and other similar operations together. 1898 if (N0 == N1) 1899 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1900 // fold (sub c1, c2) -> c1-c2 1901 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1902 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1903 if (N0C && N1C) 1904 return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); 1905 // fold (sub x, c) -> (add x, -c) 1906 if (N1C) { 1907 SDLoc DL(N); 1908 return DAG.getNode(ISD::ADD, DL, VT, N0, 1909 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 1910 } 1911 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1912 if (isAllOnesConstant(N0)) 1913 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1914 // fold A-(A-B) -> B 1915 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1916 return N1.getOperand(1); 1917 // fold (A+B)-A -> B 1918 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1919 return N0.getOperand(1); 1920 // fold (A+B)-B -> A 1921 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1922 return N0.getOperand(0); 1923 // fold C2-(A+C1) -> (C2-C1)-A 1924 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1925 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1926 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1927 SDLoc DL(N); 1928 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1929 DL, VT); 1930 return DAG.getNode(ISD::SUB, DL, VT, NewC, 1931 N1.getOperand(0)); 1932 } 1933 // fold ((A+(B+or-C))-B) -> A+or-C 1934 if (N0.getOpcode() == ISD::ADD && 1935 (N0.getOperand(1).getOpcode() == ISD::SUB || 1936 N0.getOperand(1).getOpcode() == ISD::ADD) && 1937 N0.getOperand(1).getOperand(0) == N1) 1938 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1939 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1940 // fold ((A+(C+B))-B) -> A+C 1941 if (N0.getOpcode() == ISD::ADD && 1942 N0.getOperand(1).getOpcode() == ISD::ADD && 1943 N0.getOperand(1).getOperand(1) == N1) 1944 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1945 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1946 // fold ((A-(B-C))-C) -> A-B 1947 if (N0.getOpcode() == ISD::SUB && 1948 N0.getOperand(1).getOpcode() == ISD::SUB && 1949 N0.getOperand(1).getOperand(1) == N1) 1950 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1951 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1952 1953 // If either operand of a sub is undef, the result is undef 1954 if (N0.getOpcode() == ISD::UNDEF) 1955 return N0; 1956 if (N1.getOpcode() == ISD::UNDEF) 1957 return N1; 1958 1959 // If the relocation model supports it, consider symbol offsets. 1960 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1961 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1962 // fold (sub Sym, c) -> Sym-c 1963 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1964 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1965 GA->getOffset() - 1966 (uint64_t)N1C->getSExtValue()); 1967 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1968 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1969 if (GA->getGlobal() == GB->getGlobal()) 1970 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1971 SDLoc(N), VT); 1972 } 1973 1974 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1975 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1976 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1977 if (TN->getVT() == MVT::i1) { 1978 SDLoc DL(N); 1979 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1980 DAG.getConstant(1, DL, VT)); 1981 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1982 } 1983 } 1984 1985 return SDValue(); 1986 } 1987 1988 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1989 SDValue N0 = N->getOperand(0); 1990 SDValue N1 = N->getOperand(1); 1991 EVT VT = N0.getValueType(); 1992 SDLoc DL(N); 1993 1994 // If the flag result is dead, turn this into an SUB. 1995 if (!N->hasAnyUseOfValue(1)) 1996 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 1997 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1998 1999 // fold (subc x, x) -> 0 + no borrow 2000 if (N0 == N1) 2001 return CombineTo(N, DAG.getConstant(0, DL, VT), 2002 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2003 2004 // fold (subc x, 0) -> x + no borrow 2005 if (isNullConstant(N1)) 2006 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2007 2008 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 2009 if (isAllOnesConstant(N0)) 2010 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 2011 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 2012 2013 return SDValue(); 2014 } 2015 2016 SDValue DAGCombiner::visitSUBE(SDNode *N) { 2017 SDValue N0 = N->getOperand(0); 2018 SDValue N1 = N->getOperand(1); 2019 SDValue CarryIn = N->getOperand(2); 2020 2021 // fold (sube x, y, false) -> (subc x, y) 2022 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2023 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2024 2025 return SDValue(); 2026 } 2027 2028 SDValue DAGCombiner::visitMUL(SDNode *N) { 2029 SDValue N0 = N->getOperand(0); 2030 SDValue N1 = N->getOperand(1); 2031 EVT VT = N0.getValueType(); 2032 2033 // fold (mul x, undef) -> 0 2034 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2035 return DAG.getConstant(0, SDLoc(N), VT); 2036 2037 bool N0IsConst = false; 2038 bool N1IsConst = false; 2039 bool N1IsOpaqueConst = false; 2040 bool N0IsOpaqueConst = false; 2041 APInt ConstValue0, ConstValue1; 2042 // fold vector ops 2043 if (VT.isVector()) { 2044 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2045 return FoldedVOp; 2046 2047 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 2048 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 2049 } else { 2050 N0IsConst = isa<ConstantSDNode>(N0); 2051 if (N0IsConst) { 2052 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2053 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2054 } 2055 N1IsConst = isa<ConstantSDNode>(N1); 2056 if (N1IsConst) { 2057 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2058 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2059 } 2060 } 2061 2062 // fold (mul c1, c2) -> c1*c2 2063 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2064 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2065 N0.getNode(), N1.getNode()); 2066 2067 // canonicalize constant to RHS (vector doesn't have to splat) 2068 if (isConstantIntBuildVectorOrConstantInt(N0) && 2069 !isConstantIntBuildVectorOrConstantInt(N1)) 2070 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2071 // fold (mul x, 0) -> 0 2072 if (N1IsConst && ConstValue1 == 0) 2073 return N1; 2074 // We require a splat of the entire scalar bit width for non-contiguous 2075 // bit patterns. 2076 bool IsFullSplat = 2077 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2078 // fold (mul x, 1) -> x 2079 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2080 return N0; 2081 // fold (mul x, -1) -> 0-x 2082 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2083 SDLoc DL(N); 2084 return DAG.getNode(ISD::SUB, DL, VT, 2085 DAG.getConstant(0, DL, VT), N0); 2086 } 2087 // fold (mul x, (1 << c)) -> x << c 2088 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && 2089 IsFullSplat) { 2090 SDLoc DL(N); 2091 return DAG.getNode(ISD::SHL, DL, VT, N0, 2092 DAG.getConstant(ConstValue1.logBase2(), DL, 2093 getShiftAmountTy(N0.getValueType()))); 2094 } 2095 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2096 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && 2097 IsFullSplat) { 2098 unsigned Log2Val = (-ConstValue1).logBase2(); 2099 SDLoc DL(N); 2100 // FIXME: If the input is something that is easily negated (e.g. a 2101 // single-use add), we should put the negate there. 2102 return DAG.getNode(ISD::SUB, DL, VT, 2103 DAG.getConstant(0, DL, VT), 2104 DAG.getNode(ISD::SHL, DL, VT, N0, 2105 DAG.getConstant(Log2Val, DL, 2106 getShiftAmountTy(N0.getValueType())))); 2107 } 2108 2109 APInt Val; 2110 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2111 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2112 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2113 isa<ConstantSDNode>(N0.getOperand(1)))) { 2114 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2115 N1, N0.getOperand(1)); 2116 AddToWorklist(C3.getNode()); 2117 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2118 N0.getOperand(0), C3); 2119 } 2120 2121 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2122 // use. 2123 { 2124 SDValue Sh(nullptr,0), Y(nullptr,0); 2125 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2126 if (N0.getOpcode() == ISD::SHL && 2127 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2128 isa<ConstantSDNode>(N0.getOperand(1))) && 2129 N0.getNode()->hasOneUse()) { 2130 Sh = N0; Y = N1; 2131 } else if (N1.getOpcode() == ISD::SHL && 2132 isa<ConstantSDNode>(N1.getOperand(1)) && 2133 N1.getNode()->hasOneUse()) { 2134 Sh = N1; Y = N0; 2135 } 2136 2137 if (Sh.getNode()) { 2138 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2139 Sh.getOperand(0), Y); 2140 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2141 Mul, Sh.getOperand(1)); 2142 } 2143 } 2144 2145 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2146 if (isConstantIntBuildVectorOrConstantInt(N1) && 2147 N0.getOpcode() == ISD::ADD && 2148 isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2149 isMulAddWithConstProfitable(N, N0, N1)) 2150 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2151 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2152 N0.getOperand(0), N1), 2153 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2154 N0.getOperand(1), N1)); 2155 2156 // reassociate mul 2157 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2158 return RMUL; 2159 2160 return SDValue(); 2161 } 2162 2163 /// Return true if divmod libcall is available. 2164 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2165 const TargetLowering &TLI) { 2166 RTLIB::Libcall LC; 2167 switch (Node->getSimpleValueType(0).SimpleTy) { 2168 default: return false; // No libcall for vector types. 2169 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2170 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2171 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2172 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2173 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 2174 } 2175 2176 return TLI.getLibcallName(LC) != nullptr; 2177 } 2178 2179 /// Issue divrem if both quotient and remainder are needed. 2180 SDValue DAGCombiner::useDivRem(SDNode *Node) { 2181 if (Node->use_empty()) 2182 return SDValue(); // This is a dead node, leave it alone. 2183 2184 EVT VT = Node->getValueType(0); 2185 if (!TLI.isTypeLegal(VT)) 2186 return SDValue(); 2187 2188 unsigned Opcode = Node->getOpcode(); 2189 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 2190 2191 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 2192 // If DIVREM is going to get expanded into a libcall, 2193 // but there is no libcall available, then don't combine. 2194 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 2195 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 2196 return SDValue(); 2197 2198 // If div is legal, it's better to do the normal expansion 2199 unsigned OtherOpcode = 0; 2200 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 2201 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 2202 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 2203 return SDValue(); 2204 } else { 2205 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2206 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 2207 return SDValue(); 2208 } 2209 2210 SDValue Op0 = Node->getOperand(0); 2211 SDValue Op1 = Node->getOperand(1); 2212 SDValue combined; 2213 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 2214 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 2215 SDNode *User = *UI; 2216 if (User == Node || User->use_empty()) 2217 continue; 2218 // Convert the other matching node(s), too; 2219 // otherwise, the DIVREM may get target-legalized into something 2220 // target-specific that we won't be able to recognize. 2221 unsigned UserOpc = User->getOpcode(); 2222 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 2223 User->getOperand(0) == Op0 && 2224 User->getOperand(1) == Op1) { 2225 if (!combined) { 2226 if (UserOpc == OtherOpcode) { 2227 SDVTList VTs = DAG.getVTList(VT, VT); 2228 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 2229 } else if (UserOpc == DivRemOpc) { 2230 combined = SDValue(User, 0); 2231 } else { 2232 assert(UserOpc == Opcode); 2233 continue; 2234 } 2235 } 2236 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 2237 CombineTo(User, combined); 2238 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 2239 CombineTo(User, combined.getValue(1)); 2240 } 2241 } 2242 return combined; 2243 } 2244 2245 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2246 SDValue N0 = N->getOperand(0); 2247 SDValue N1 = N->getOperand(1); 2248 EVT VT = N->getValueType(0); 2249 2250 // fold vector ops 2251 if (VT.isVector()) 2252 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2253 return FoldedVOp; 2254 2255 SDLoc DL(N); 2256 2257 // fold (sdiv c1, c2) -> c1/c2 2258 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2259 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2260 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 2261 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 2262 // fold (sdiv X, 1) -> X 2263 if (N1C && N1C->isOne()) 2264 return N0; 2265 // fold (sdiv X, -1) -> 0-X 2266 if (N1C && N1C->isAllOnesValue()) 2267 return DAG.getNode(ISD::SUB, DL, VT, 2268 DAG.getConstant(0, DL, VT), N0); 2269 2270 // If we know the sign bits of both operands are zero, strength reduce to a 2271 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2272 if (!VT.isVector()) { 2273 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2274 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 2275 } 2276 2277 // fold (sdiv X, pow2) -> simple ops after legalize 2278 // FIXME: We check for the exact bit here because the generic lowering gives 2279 // better results in that case. The target-specific lowering should learn how 2280 // to handle exact sdivs efficiently. 2281 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2282 !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && 2283 (N1C->getAPIntValue().isPowerOf2() || 2284 (-N1C->getAPIntValue()).isPowerOf2())) { 2285 // Target-specific implementation of sdiv x, pow2. 2286 if (SDValue Res = BuildSDIVPow2(N)) 2287 return Res; 2288 2289 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2290 2291 // Splat the sign bit into the register 2292 SDValue SGN = 2293 DAG.getNode(ISD::SRA, DL, VT, N0, 2294 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, 2295 getShiftAmountTy(N0.getValueType()))); 2296 AddToWorklist(SGN.getNode()); 2297 2298 // Add (N0 < 0) ? abs2 - 1 : 0; 2299 SDValue SRL = 2300 DAG.getNode(ISD::SRL, DL, VT, SGN, 2301 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, 2302 getShiftAmountTy(SGN.getValueType()))); 2303 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); 2304 AddToWorklist(SRL.getNode()); 2305 AddToWorklist(ADD.getNode()); // Divide by pow2 2306 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, 2307 DAG.getConstant(lg2, DL, 2308 getShiftAmountTy(ADD.getValueType()))); 2309 2310 // If we're dividing by a positive value, we're done. Otherwise, we must 2311 // negate the result. 2312 if (N1C->getAPIntValue().isNonNegative()) 2313 return SRA; 2314 2315 AddToWorklist(SRA.getNode()); 2316 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); 2317 } 2318 2319 // If integer divide is expensive and we satisfy the requirements, emit an 2320 // alternate sequence. Targets may check function attributes for size/speed 2321 // trade-offs. 2322 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2323 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2324 if (SDValue Op = BuildSDIV(N)) 2325 return Op; 2326 2327 // sdiv, srem -> sdivrem 2328 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2329 // Otherwise, we break the simplification logic in visitREM(). 2330 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2331 if (SDValue DivRem = useDivRem(N)) 2332 return DivRem; 2333 2334 // undef / X -> 0 2335 if (N0.getOpcode() == ISD::UNDEF) 2336 return DAG.getConstant(0, DL, VT); 2337 // X / undef -> undef 2338 if (N1.getOpcode() == ISD::UNDEF) 2339 return N1; 2340 2341 return SDValue(); 2342 } 2343 2344 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2345 SDValue N0 = N->getOperand(0); 2346 SDValue N1 = N->getOperand(1); 2347 EVT VT = N->getValueType(0); 2348 2349 // fold vector ops 2350 if (VT.isVector()) 2351 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2352 return FoldedVOp; 2353 2354 SDLoc DL(N); 2355 2356 // fold (udiv c1, c2) -> c1/c2 2357 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2358 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2359 if (N0C && N1C) 2360 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 2361 N0C, N1C)) 2362 return Folded; 2363 // fold (udiv x, (1 << c)) -> x >>u c 2364 if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) 2365 return DAG.getNode(ISD::SRL, DL, VT, N0, 2366 DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, 2367 getShiftAmountTy(N0.getValueType()))); 2368 2369 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2370 if (N1.getOpcode() == ISD::SHL) { 2371 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2372 if (SHC->getAPIntValue().isPowerOf2()) { 2373 EVT ADDVT = N1.getOperand(1).getValueType(); 2374 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, 2375 N1.getOperand(1), 2376 DAG.getConstant(SHC->getAPIntValue() 2377 .logBase2(), 2378 DL, ADDVT)); 2379 AddToWorklist(Add.getNode()); 2380 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 2381 } 2382 } 2383 } 2384 2385 // fold (udiv x, c) -> alternate 2386 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2387 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2388 if (SDValue Op = BuildUDIV(N)) 2389 return Op; 2390 2391 // sdiv, srem -> sdivrem 2392 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2393 // Otherwise, we break the simplification logic in visitREM(). 2394 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2395 if (SDValue DivRem = useDivRem(N)) 2396 return DivRem; 2397 2398 // undef / X -> 0 2399 if (N0.getOpcode() == ISD::UNDEF) 2400 return DAG.getConstant(0, DL, VT); 2401 // X / undef -> undef 2402 if (N1.getOpcode() == ISD::UNDEF) 2403 return N1; 2404 2405 return SDValue(); 2406 } 2407 2408 // handles ISD::SREM and ISD::UREM 2409 SDValue DAGCombiner::visitREM(SDNode *N) { 2410 unsigned Opcode = N->getOpcode(); 2411 SDValue N0 = N->getOperand(0); 2412 SDValue N1 = N->getOperand(1); 2413 EVT VT = N->getValueType(0); 2414 bool isSigned = (Opcode == ISD::SREM); 2415 SDLoc DL(N); 2416 2417 // fold (rem c1, c2) -> c1%c2 2418 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2419 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2420 if (N0C && N1C) 2421 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 2422 return Folded; 2423 2424 if (isSigned) { 2425 // If we know the sign bits of both operands are zero, strength reduce to a 2426 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2427 if (!VT.isVector()) { 2428 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2429 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 2430 } 2431 } else { 2432 // fold (urem x, pow2) -> (and x, pow2-1) 2433 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2434 N1C->getAPIntValue().isPowerOf2()) { 2435 return DAG.getNode(ISD::AND, DL, VT, N0, 2436 DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); 2437 } 2438 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2439 if (N1.getOpcode() == ISD::SHL) { 2440 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2441 if (SHC->getAPIntValue().isPowerOf2()) { 2442 SDValue Add = 2443 DAG.getNode(ISD::ADD, DL, VT, N1, 2444 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, 2445 VT)); 2446 AddToWorklist(Add.getNode()); 2447 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 2448 } 2449 } 2450 } 2451 } 2452 2453 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2454 2455 // If X/C can be simplified by the division-by-constant logic, lower 2456 // X%C to the equivalent of X-X/C*C. 2457 // To avoid mangling nodes, this simplification requires that the combine() 2458 // call for the speculative DIV must not cause a DIVREM conversion. We guard 2459 // against this by skipping the simplification if isIntDivCheap(). When 2460 // div is not cheap, combine will not return a DIVREM. Regardless, 2461 // checking cheapness here makes sense since the simplification results in 2462 // fatter code. 2463 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { 2464 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2465 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); 2466 AddToWorklist(Div.getNode()); 2467 SDValue OptimizedDiv = combine(Div.getNode()); 2468 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2469 assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && 2470 (OptimizedDiv.getOpcode() != ISD::SDIVREM)); 2471 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 2472 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 2473 AddToWorklist(Mul.getNode()); 2474 return Sub; 2475 } 2476 } 2477 2478 // sdiv, srem -> sdivrem 2479 if (SDValue DivRem = useDivRem(N)) 2480 return DivRem.getValue(1); 2481 2482 // undef % X -> 0 2483 if (N0.getOpcode() == ISD::UNDEF) 2484 return DAG.getConstant(0, DL, VT); 2485 // X % undef -> undef 2486 if (N1.getOpcode() == ISD::UNDEF) 2487 return N1; 2488 2489 return SDValue(); 2490 } 2491 2492 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2493 SDValue N0 = N->getOperand(0); 2494 SDValue N1 = N->getOperand(1); 2495 EVT VT = N->getValueType(0); 2496 SDLoc DL(N); 2497 2498 // fold (mulhs x, 0) -> 0 2499 if (isNullConstant(N1)) 2500 return N1; 2501 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2502 if (isOneConstant(N1)) { 2503 SDLoc DL(N); 2504 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 2505 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2506 DL, 2507 getShiftAmountTy(N0.getValueType()))); 2508 } 2509 // fold (mulhs x, undef) -> 0 2510 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2511 return DAG.getConstant(0, SDLoc(N), VT); 2512 2513 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2514 // plus a shift. 2515 if (VT.isSimple() && !VT.isVector()) { 2516 MVT Simple = VT.getSimpleVT(); 2517 unsigned SimpleSize = Simple.getSizeInBits(); 2518 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2519 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2520 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2521 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2522 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2523 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2524 DAG.getConstant(SimpleSize, DL, 2525 getShiftAmountTy(N1.getValueType()))); 2526 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2527 } 2528 } 2529 2530 return SDValue(); 2531 } 2532 2533 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2534 SDValue N0 = N->getOperand(0); 2535 SDValue N1 = N->getOperand(1); 2536 EVT VT = N->getValueType(0); 2537 SDLoc DL(N); 2538 2539 // fold (mulhu x, 0) -> 0 2540 if (isNullConstant(N1)) 2541 return N1; 2542 // fold (mulhu x, 1) -> 0 2543 if (isOneConstant(N1)) 2544 return DAG.getConstant(0, DL, N0.getValueType()); 2545 // fold (mulhu x, undef) -> 0 2546 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2547 return DAG.getConstant(0, DL, VT); 2548 2549 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2550 // plus a shift. 2551 if (VT.isSimple() && !VT.isVector()) { 2552 MVT Simple = VT.getSimpleVT(); 2553 unsigned SimpleSize = Simple.getSizeInBits(); 2554 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2555 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2556 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2557 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2558 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2559 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2560 DAG.getConstant(SimpleSize, DL, 2561 getShiftAmountTy(N1.getValueType()))); 2562 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2563 } 2564 } 2565 2566 return SDValue(); 2567 } 2568 2569 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2570 /// give the opcodes for the two computations that are being performed. Return 2571 /// true if a simplification was made. 2572 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2573 unsigned HiOp) { 2574 // If the high half is not needed, just compute the low half. 2575 bool HiExists = N->hasAnyUseOfValue(1); 2576 if (!HiExists && 2577 (!LegalOperations || 2578 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2579 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2580 return CombineTo(N, Res, Res); 2581 } 2582 2583 // If the low half is not needed, just compute the high half. 2584 bool LoExists = N->hasAnyUseOfValue(0); 2585 if (!LoExists && 2586 (!LegalOperations || 2587 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2588 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2589 return CombineTo(N, Res, Res); 2590 } 2591 2592 // If both halves are used, return as it is. 2593 if (LoExists && HiExists) 2594 return SDValue(); 2595 2596 // If the two computed results can be simplified separately, separate them. 2597 if (LoExists) { 2598 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2599 AddToWorklist(Lo.getNode()); 2600 SDValue LoOpt = combine(Lo.getNode()); 2601 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2602 (!LegalOperations || 2603 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2604 return CombineTo(N, LoOpt, LoOpt); 2605 } 2606 2607 if (HiExists) { 2608 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2609 AddToWorklist(Hi.getNode()); 2610 SDValue HiOpt = combine(Hi.getNode()); 2611 if (HiOpt.getNode() && HiOpt != Hi && 2612 (!LegalOperations || 2613 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2614 return CombineTo(N, HiOpt, HiOpt); 2615 } 2616 2617 return SDValue(); 2618 } 2619 2620 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2621 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 2622 return Res; 2623 2624 EVT VT = N->getValueType(0); 2625 SDLoc DL(N); 2626 2627 // If the type is twice as wide is legal, transform the mulhu to a wider 2628 // multiply plus a shift. 2629 if (VT.isSimple() && !VT.isVector()) { 2630 MVT Simple = VT.getSimpleVT(); 2631 unsigned SimpleSize = Simple.getSizeInBits(); 2632 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2633 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2634 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2635 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2636 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2637 // Compute the high part as N1. 2638 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2639 DAG.getConstant(SimpleSize, DL, 2640 getShiftAmountTy(Lo.getValueType()))); 2641 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2642 // Compute the low part as N0. 2643 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2644 return CombineTo(N, Lo, Hi); 2645 } 2646 } 2647 2648 return SDValue(); 2649 } 2650 2651 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2652 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 2653 return Res; 2654 2655 EVT VT = N->getValueType(0); 2656 SDLoc DL(N); 2657 2658 // If the type is twice as wide is legal, transform the mulhu to a wider 2659 // multiply plus a shift. 2660 if (VT.isSimple() && !VT.isVector()) { 2661 MVT Simple = VT.getSimpleVT(); 2662 unsigned SimpleSize = Simple.getSizeInBits(); 2663 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2664 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2665 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2666 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2667 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2668 // Compute the high part as N1. 2669 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2670 DAG.getConstant(SimpleSize, DL, 2671 getShiftAmountTy(Lo.getValueType()))); 2672 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2673 // Compute the low part as N0. 2674 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2675 return CombineTo(N, Lo, Hi); 2676 } 2677 } 2678 2679 return SDValue(); 2680 } 2681 2682 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2683 // (smulo x, 2) -> (saddo x, x) 2684 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2685 if (C2->getAPIntValue() == 2) 2686 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2687 N->getOperand(0), N->getOperand(0)); 2688 2689 return SDValue(); 2690 } 2691 2692 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2693 // (umulo x, 2) -> (uaddo x, x) 2694 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2695 if (C2->getAPIntValue() == 2) 2696 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2697 N->getOperand(0), N->getOperand(0)); 2698 2699 return SDValue(); 2700 } 2701 2702 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 2703 SDValue N0 = N->getOperand(0); 2704 SDValue N1 = N->getOperand(1); 2705 EVT VT = N0.getValueType(); 2706 2707 // fold vector ops 2708 if (VT.isVector()) 2709 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2710 return FoldedVOp; 2711 2712 // fold (add c1, c2) -> c1+c2 2713 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 2714 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2715 if (N0C && N1C) 2716 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 2717 2718 // canonicalize constant to RHS 2719 if (isConstantIntBuildVectorOrConstantInt(N0) && 2720 !isConstantIntBuildVectorOrConstantInt(N1)) 2721 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 2722 2723 return SDValue(); 2724 } 2725 2726 /// If this is a binary operator with two operands of the same opcode, try to 2727 /// simplify it. 2728 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2729 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2730 EVT VT = N0.getValueType(); 2731 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2732 2733 // Bail early if none of these transforms apply. 2734 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2735 2736 // For each of OP in AND/OR/XOR: 2737 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2738 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2739 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2740 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2741 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2742 // 2743 // do not sink logical op inside of a vector extend, since it may combine 2744 // into a vsetcc. 2745 EVT Op0VT = N0.getOperand(0).getValueType(); 2746 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2747 N0.getOpcode() == ISD::SIGN_EXTEND || 2748 N0.getOpcode() == ISD::BSWAP || 2749 // Avoid infinite looping with PromoteIntBinOp. 2750 (N0.getOpcode() == ISD::ANY_EXTEND && 2751 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2752 (N0.getOpcode() == ISD::TRUNCATE && 2753 (!TLI.isZExtFree(VT, Op0VT) || 2754 !TLI.isTruncateFree(Op0VT, VT)) && 2755 TLI.isTypeLegal(Op0VT))) && 2756 !VT.isVector() && 2757 Op0VT == N1.getOperand(0).getValueType() && 2758 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2759 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2760 N0.getOperand(0).getValueType(), 2761 N0.getOperand(0), N1.getOperand(0)); 2762 AddToWorklist(ORNode.getNode()); 2763 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2764 } 2765 2766 // For each of OP in SHL/SRL/SRA/AND... 2767 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2768 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2769 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2770 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2771 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2772 N0.getOperand(1) == N1.getOperand(1)) { 2773 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2774 N0.getOperand(0).getValueType(), 2775 N0.getOperand(0), N1.getOperand(0)); 2776 AddToWorklist(ORNode.getNode()); 2777 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2778 ORNode, N0.getOperand(1)); 2779 } 2780 2781 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2782 // Only perform this optimization after type legalization and before 2783 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2784 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2785 // we don't want to undo this promotion. 2786 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2787 // on scalars. 2788 if ((N0.getOpcode() == ISD::BITCAST || 2789 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2790 Level == AfterLegalizeTypes) { 2791 SDValue In0 = N0.getOperand(0); 2792 SDValue In1 = N1.getOperand(0); 2793 EVT In0Ty = In0.getValueType(); 2794 EVT In1Ty = In1.getValueType(); 2795 SDLoc DL(N); 2796 // If both incoming values are integers, and the original types are the 2797 // same. 2798 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2799 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2800 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2801 AddToWorklist(Op.getNode()); 2802 return BC; 2803 } 2804 } 2805 2806 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2807 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2808 // If both shuffles use the same mask, and both shuffle within a single 2809 // vector, then it is worthwhile to move the swizzle after the operation. 2810 // The type-legalizer generates this pattern when loading illegal 2811 // vector types from memory. In many cases this allows additional shuffle 2812 // optimizations. 2813 // There are other cases where moving the shuffle after the xor/and/or 2814 // is profitable even if shuffles don't perform a swizzle. 2815 // If both shuffles use the same mask, and both shuffles have the same first 2816 // or second operand, then it might still be profitable to move the shuffle 2817 // after the xor/and/or operation. 2818 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2819 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2820 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2821 2822 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2823 "Inputs to shuffles are not the same type"); 2824 2825 // Check that both shuffles use the same mask. The masks are known to be of 2826 // the same length because the result vector type is the same. 2827 // Check also that shuffles have only one use to avoid introducing extra 2828 // instructions. 2829 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2830 SVN0->getMask().equals(SVN1->getMask())) { 2831 SDValue ShOp = N0->getOperand(1); 2832 2833 // Don't try to fold this node if it requires introducing a 2834 // build vector of all zeros that might be illegal at this stage. 2835 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2836 if (!LegalTypes) 2837 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2838 else 2839 ShOp = SDValue(); 2840 } 2841 2842 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2843 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2844 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2845 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2846 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2847 N0->getOperand(0), N1->getOperand(0)); 2848 AddToWorklist(NewNode.getNode()); 2849 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2850 &SVN0->getMask()[0]); 2851 } 2852 2853 // Don't try to fold this node if it requires introducing a 2854 // build vector of all zeros that might be illegal at this stage. 2855 ShOp = N0->getOperand(0); 2856 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2857 if (!LegalTypes) 2858 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2859 else 2860 ShOp = SDValue(); 2861 } 2862 2863 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2864 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2865 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2866 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2867 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2868 N0->getOperand(1), N1->getOperand(1)); 2869 AddToWorklist(NewNode.getNode()); 2870 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2871 &SVN0->getMask()[0]); 2872 } 2873 } 2874 } 2875 2876 return SDValue(); 2877 } 2878 2879 /// This contains all DAGCombine rules which reduce two values combined by 2880 /// an And operation to a single value. This makes them reusable in the context 2881 /// of visitSELECT(). Rules involving constants are not included as 2882 /// visitSELECT() already handles those cases. 2883 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2884 SDNode *LocReference) { 2885 EVT VT = N1.getValueType(); 2886 2887 // fold (and x, undef) -> 0 2888 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2889 return DAG.getConstant(0, SDLoc(LocReference), VT); 2890 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2891 SDValue LL, LR, RL, RR, CC0, CC1; 2892 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2893 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2894 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2895 2896 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2897 LL.getValueType().isInteger()) { 2898 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2899 if (isNullConstant(LR) && Op1 == ISD::SETEQ) { 2900 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2901 LR.getValueType(), LL, RL); 2902 AddToWorklist(ORNode.getNode()); 2903 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2904 } 2905 if (isAllOnesConstant(LR)) { 2906 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2907 if (Op1 == ISD::SETEQ) { 2908 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2909 LR.getValueType(), LL, RL); 2910 AddToWorklist(ANDNode.getNode()); 2911 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2912 } 2913 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2914 if (Op1 == ISD::SETGT) { 2915 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2916 LR.getValueType(), LL, RL); 2917 AddToWorklist(ORNode.getNode()); 2918 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2919 } 2920 } 2921 } 2922 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2923 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2924 Op0 == Op1 && LL.getValueType().isInteger() && 2925 Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || 2926 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 2927 SDLoc DL(N0); 2928 SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), 2929 LL, DAG.getConstant(1, DL, 2930 LL.getValueType())); 2931 AddToWorklist(ADDNode.getNode()); 2932 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2933 DAG.getConstant(2, DL, LL.getValueType()), 2934 ISD::SETUGE); 2935 } 2936 // canonicalize equivalent to ll == rl 2937 if (LL == RR && LR == RL) { 2938 Op1 = ISD::getSetCCSwappedOperands(Op1); 2939 std::swap(RL, RR); 2940 } 2941 if (LL == RL && LR == RR) { 2942 bool isInteger = LL.getValueType().isInteger(); 2943 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2944 if (Result != ISD::SETCC_INVALID && 2945 (!LegalOperations || 2946 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2947 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 2948 EVT CCVT = getSetCCResultType(LL.getValueType()); 2949 if (N0.getValueType() == CCVT || 2950 (!LegalOperations && N0.getValueType() == MVT::i1)) 2951 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2952 LL, LR, Result); 2953 } 2954 } 2955 } 2956 2957 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2958 VT.getSizeInBits() <= 64) { 2959 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2960 APInt ADDC = ADDI->getAPIntValue(); 2961 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2962 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2963 // immediate for an add, but it is legal if its top c2 bits are set, 2964 // transform the ADD so the immediate doesn't need to be materialized 2965 // in a register. 2966 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2967 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2968 SRLI->getZExtValue()); 2969 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2970 ADDC |= Mask; 2971 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2972 SDLoc DL(N0); 2973 SDValue NewAdd = 2974 DAG.getNode(ISD::ADD, DL, VT, 2975 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 2976 CombineTo(N0.getNode(), NewAdd); 2977 // Return N so it doesn't get rechecked! 2978 return SDValue(LocReference, 0); 2979 } 2980 } 2981 } 2982 } 2983 } 2984 } 2985 2986 return SDValue(); 2987 } 2988 2989 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 2990 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 2991 bool &NarrowLoad) { 2992 uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); 2993 2994 if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) 2995 return false; 2996 2997 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2998 LoadedVT = LoadN->getMemoryVT(); 2999 3000 if (ExtVT == LoadedVT && 3001 (!LegalOperations || 3002 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 3003 // ZEXTLOAD will match without needing to change the size of the value being 3004 // loaded. 3005 NarrowLoad = false; 3006 return true; 3007 } 3008 3009 // Do not change the width of a volatile load. 3010 if (LoadN->isVolatile()) 3011 return false; 3012 3013 // Do not generate loads of non-round integer types since these can 3014 // be expensive (and would be wrong if the type is not byte sized). 3015 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 3016 return false; 3017 3018 if (LegalOperations && 3019 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 3020 return false; 3021 3022 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 3023 return false; 3024 3025 NarrowLoad = true; 3026 return true; 3027 } 3028 3029 SDValue DAGCombiner::visitAND(SDNode *N) { 3030 SDValue N0 = N->getOperand(0); 3031 SDValue N1 = N->getOperand(1); 3032 EVT VT = N1.getValueType(); 3033 3034 // fold vector ops 3035 if (VT.isVector()) { 3036 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3037 return FoldedVOp; 3038 3039 // fold (and x, 0) -> 0, vector edition 3040 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3041 // do not return N0, because undef node may exist in N0 3042 return DAG.getConstant( 3043 APInt::getNullValue( 3044 N0.getValueType().getScalarType().getSizeInBits()), 3045 SDLoc(N), N0.getValueType()); 3046 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3047 // do not return N1, because undef node may exist in N1 3048 return DAG.getConstant( 3049 APInt::getNullValue( 3050 N1.getValueType().getScalarType().getSizeInBits()), 3051 SDLoc(N), N1.getValueType()); 3052 3053 // fold (and x, -1) -> x, vector edition 3054 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3055 return N1; 3056 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3057 return N0; 3058 } 3059 3060 // fold (and c1, c2) -> c1&c2 3061 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3062 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3063 if (N0C && N1C && !N1C->isOpaque()) 3064 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 3065 // canonicalize constant to RHS 3066 if (isConstantIntBuildVectorOrConstantInt(N0) && 3067 !isConstantIntBuildVectorOrConstantInt(N1)) 3068 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 3069 // fold (and x, -1) -> x 3070 if (isAllOnesConstant(N1)) 3071 return N0; 3072 // if (and x, c) is known to be zero, return 0 3073 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 3074 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3075 APInt::getAllOnesValue(BitWidth))) 3076 return DAG.getConstant(0, SDLoc(N), VT); 3077 // reassociate and 3078 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 3079 return RAND; 3080 // fold (and (or x, C), D) -> D if (C & D) == D 3081 if (N1C && N0.getOpcode() == ISD::OR) 3082 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 3083 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 3084 return N1; 3085 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 3086 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3087 SDValue N0Op0 = N0.getOperand(0); 3088 APInt Mask = ~N1C->getAPIntValue(); 3089 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 3090 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 3091 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 3092 N0.getValueType(), N0Op0); 3093 3094 // Replace uses of the AND with uses of the Zero extend node. 3095 CombineTo(N, Zext); 3096 3097 // We actually want to replace all uses of the any_extend with the 3098 // zero_extend, to avoid duplicating things. This will later cause this 3099 // AND to be folded. 3100 CombineTo(N0.getNode(), Zext); 3101 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3102 } 3103 } 3104 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 3105 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 3106 // already be zero by virtue of the width of the base type of the load. 3107 // 3108 // the 'X' node here can either be nothing or an extract_vector_elt to catch 3109 // more cases. 3110 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 3111 N0.getOperand(0).getOpcode() == ISD::LOAD) || 3112 N0.getOpcode() == ISD::LOAD) { 3113 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 3114 N0 : N0.getOperand(0) ); 3115 3116 // Get the constant (if applicable) the zero'th operand is being ANDed with. 3117 // This can be a pure constant or a vector splat, in which case we treat the 3118 // vector as a scalar and use the splat value. 3119 APInt Constant = APInt::getNullValue(1); 3120 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 3121 Constant = C->getAPIntValue(); 3122 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 3123 APInt SplatValue, SplatUndef; 3124 unsigned SplatBitSize; 3125 bool HasAnyUndefs; 3126 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 3127 SplatBitSize, HasAnyUndefs); 3128 if (IsSplat) { 3129 // Undef bits can contribute to a possible optimisation if set, so 3130 // set them. 3131 SplatValue |= SplatUndef; 3132 3133 // The splat value may be something like "0x00FFFFFF", which means 0 for 3134 // the first vector value and FF for the rest, repeating. We need a mask 3135 // that will apply equally to all members of the vector, so AND all the 3136 // lanes of the constant together. 3137 EVT VT = Vector->getValueType(0); 3138 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 3139 3140 // If the splat value has been compressed to a bitlength lower 3141 // than the size of the vector lane, we need to re-expand it to 3142 // the lane size. 3143 if (BitWidth > SplatBitSize) 3144 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 3145 SplatBitSize < BitWidth; 3146 SplatBitSize = SplatBitSize * 2) 3147 SplatValue |= SplatValue.shl(SplatBitSize); 3148 3149 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 3150 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 3151 if (SplatBitSize % BitWidth == 0) { 3152 Constant = APInt::getAllOnesValue(BitWidth); 3153 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 3154 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 3155 } 3156 } 3157 } 3158 3159 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 3160 // actually legal and isn't going to get expanded, else this is a false 3161 // optimisation. 3162 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 3163 Load->getValueType(0), 3164 Load->getMemoryVT()); 3165 3166 // Resize the constant to the same size as the original memory access before 3167 // extension. If it is still the AllOnesValue then this AND is completely 3168 // unneeded. 3169 Constant = 3170 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 3171 3172 bool B; 3173 switch (Load->getExtensionType()) { 3174 default: B = false; break; 3175 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 3176 case ISD::ZEXTLOAD: 3177 case ISD::NON_EXTLOAD: B = true; break; 3178 } 3179 3180 if (B && Constant.isAllOnesValue()) { 3181 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 3182 // preserve semantics once we get rid of the AND. 3183 SDValue NewLoad(Load, 0); 3184 if (Load->getExtensionType() == ISD::EXTLOAD) { 3185 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 3186 Load->getValueType(0), SDLoc(Load), 3187 Load->getChain(), Load->getBasePtr(), 3188 Load->getOffset(), Load->getMemoryVT(), 3189 Load->getMemOperand()); 3190 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 3191 if (Load->getNumValues() == 3) { 3192 // PRE/POST_INC loads have 3 values. 3193 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 3194 NewLoad.getValue(2) }; 3195 CombineTo(Load, To, 3, true); 3196 } else { 3197 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 3198 } 3199 } 3200 3201 // Fold the AND away, taking care not to fold to the old load node if we 3202 // replaced it. 3203 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 3204 3205 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3206 } 3207 } 3208 3209 // fold (and (load x), 255) -> (zextload x, i8) 3210 // fold (and (extload x, i16), 255) -> (zextload x, i8) 3211 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 3212 if (N1C && (N0.getOpcode() == ISD::LOAD || 3213 (N0.getOpcode() == ISD::ANY_EXTEND && 3214 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 3215 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 3216 LoadSDNode *LN0 = HasAnyExt 3217 ? cast<LoadSDNode>(N0.getOperand(0)) 3218 : cast<LoadSDNode>(N0); 3219 if (LN0->getExtensionType() != ISD::SEXTLOAD && 3220 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 3221 auto NarrowLoad = false; 3222 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 3223 EVT ExtVT, LoadedVT; 3224 if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, 3225 NarrowLoad)) { 3226 if (!NarrowLoad) { 3227 SDValue NewLoad = 3228 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3229 LN0->getChain(), LN0->getBasePtr(), ExtVT, 3230 LN0->getMemOperand()); 3231 AddToWorklist(N); 3232 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 3233 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3234 } else { 3235 EVT PtrType = LN0->getOperand(1).getValueType(); 3236 3237 unsigned Alignment = LN0->getAlignment(); 3238 SDValue NewPtr = LN0->getBasePtr(); 3239 3240 // For big endian targets, we need to add an offset to the pointer 3241 // to load the correct bytes. For little endian systems, we merely 3242 // need to read fewer bytes from the same pointer. 3243 if (DAG.getDataLayout().isBigEndian()) { 3244 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3245 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3246 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3247 SDLoc DL(LN0); 3248 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, 3249 NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); 3250 Alignment = MinAlign(Alignment, PtrOff); 3251 } 3252 3253 AddToWorklist(NewPtr.getNode()); 3254 3255 SDValue Load = 3256 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3257 LN0->getChain(), NewPtr, 3258 LN0->getPointerInfo(), 3259 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3260 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3261 AddToWorklist(N); 3262 CombineTo(LN0, Load, Load.getValue(1)); 3263 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3264 } 3265 } 3266 } 3267 } 3268 3269 if (SDValue Combined = visitANDLike(N0, N1, N)) 3270 return Combined; 3271 3272 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 3273 if (N0.getOpcode() == N1.getOpcode()) 3274 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3275 return Tmp; 3276 3277 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 3278 // fold (and (sra)) -> (and (srl)) when possible. 3279 if (!VT.isVector() && 3280 SimplifyDemandedBits(SDValue(N, 0))) 3281 return SDValue(N, 0); 3282 3283 // fold (zext_inreg (extload x)) -> (zextload x) 3284 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 3285 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3286 EVT MemVT = LN0->getMemoryVT(); 3287 // If we zero all the possible extended bits, then we can turn this into 3288 // a zextload if we are running before legalize or the operation is legal. 3289 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3290 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3291 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3292 ((!LegalOperations && !LN0->isVolatile()) || 3293 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3294 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3295 LN0->getChain(), LN0->getBasePtr(), 3296 MemVT, LN0->getMemOperand()); 3297 AddToWorklist(N); 3298 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3299 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3300 } 3301 } 3302 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 3303 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3304 N0.hasOneUse()) { 3305 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3306 EVT MemVT = LN0->getMemoryVT(); 3307 // If we zero all the possible extended bits, then we can turn this into 3308 // a zextload if we are running before legalize or the operation is legal. 3309 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3310 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3311 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3312 ((!LegalOperations && !LN0->isVolatile()) || 3313 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3314 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3315 LN0->getChain(), LN0->getBasePtr(), 3316 MemVT, LN0->getMemOperand()); 3317 AddToWorklist(N); 3318 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3319 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3320 } 3321 } 3322 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3323 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3324 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3325 N0.getOperand(1), false); 3326 if (BSwap.getNode()) 3327 return BSwap; 3328 } 3329 3330 return SDValue(); 3331 } 3332 3333 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3334 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3335 bool DemandHighBits) { 3336 if (!LegalOperations) 3337 return SDValue(); 3338 3339 EVT VT = N->getValueType(0); 3340 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3341 return SDValue(); 3342 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3343 return SDValue(); 3344 3345 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3346 bool LookPassAnd0 = false; 3347 bool LookPassAnd1 = false; 3348 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3349 std::swap(N0, N1); 3350 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3351 std::swap(N0, N1); 3352 if (N0.getOpcode() == ISD::AND) { 3353 if (!N0.getNode()->hasOneUse()) 3354 return SDValue(); 3355 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3356 if (!N01C || N01C->getZExtValue() != 0xFF00) 3357 return SDValue(); 3358 N0 = N0.getOperand(0); 3359 LookPassAnd0 = true; 3360 } 3361 3362 if (N1.getOpcode() == ISD::AND) { 3363 if (!N1.getNode()->hasOneUse()) 3364 return SDValue(); 3365 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3366 if (!N11C || N11C->getZExtValue() != 0xFF) 3367 return SDValue(); 3368 N1 = N1.getOperand(0); 3369 LookPassAnd1 = true; 3370 } 3371 3372 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3373 std::swap(N0, N1); 3374 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3375 return SDValue(); 3376 if (!N0.getNode()->hasOneUse() || 3377 !N1.getNode()->hasOneUse()) 3378 return SDValue(); 3379 3380 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3381 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3382 if (!N01C || !N11C) 3383 return SDValue(); 3384 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3385 return SDValue(); 3386 3387 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3388 SDValue N00 = N0->getOperand(0); 3389 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3390 if (!N00.getNode()->hasOneUse()) 3391 return SDValue(); 3392 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3393 if (!N001C || N001C->getZExtValue() != 0xFF) 3394 return SDValue(); 3395 N00 = N00.getOperand(0); 3396 LookPassAnd0 = true; 3397 } 3398 3399 SDValue N10 = N1->getOperand(0); 3400 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3401 if (!N10.getNode()->hasOneUse()) 3402 return SDValue(); 3403 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3404 if (!N101C || N101C->getZExtValue() != 0xFF00) 3405 return SDValue(); 3406 N10 = N10.getOperand(0); 3407 LookPassAnd1 = true; 3408 } 3409 3410 if (N00 != N10) 3411 return SDValue(); 3412 3413 // Make sure everything beyond the low halfword gets set to zero since the SRL 3414 // 16 will clear the top bits. 3415 unsigned OpSizeInBits = VT.getSizeInBits(); 3416 if (DemandHighBits && OpSizeInBits > 16) { 3417 // If the left-shift isn't masked out then the only way this is a bswap is 3418 // if all bits beyond the low 8 are 0. In that case the entire pattern 3419 // reduces to a left shift anyway: leave it for other parts of the combiner. 3420 if (!LookPassAnd0) 3421 return SDValue(); 3422 3423 // However, if the right shift isn't masked out then it might be because 3424 // it's not needed. See if we can spot that too. 3425 if (!LookPassAnd1 && 3426 !DAG.MaskedValueIsZero( 3427 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3428 return SDValue(); 3429 } 3430 3431 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3432 if (OpSizeInBits > 16) { 3433 SDLoc DL(N); 3434 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 3435 DAG.getConstant(OpSizeInBits - 16, DL, 3436 getShiftAmountTy(VT))); 3437 } 3438 return Res; 3439 } 3440 3441 /// Return true if the specified node is an element that makes up a 32-bit 3442 /// packed halfword byteswap. 3443 /// ((x & 0x000000ff) << 8) | 3444 /// ((x & 0x0000ff00) >> 8) | 3445 /// ((x & 0x00ff0000) << 8) | 3446 /// ((x & 0xff000000) >> 8) 3447 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3448 if (!N.getNode()->hasOneUse()) 3449 return false; 3450 3451 unsigned Opc = N.getOpcode(); 3452 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3453 return false; 3454 3455 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3456 if (!N1C) 3457 return false; 3458 3459 unsigned Num; 3460 switch (N1C->getZExtValue()) { 3461 default: 3462 return false; 3463 case 0xFF: Num = 0; break; 3464 case 0xFF00: Num = 1; break; 3465 case 0xFF0000: Num = 2; break; 3466 case 0xFF000000: Num = 3; break; 3467 } 3468 3469 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3470 SDValue N0 = N.getOperand(0); 3471 if (Opc == ISD::AND) { 3472 if (Num == 0 || Num == 2) { 3473 // (x >> 8) & 0xff 3474 // (x >> 8) & 0xff0000 3475 if (N0.getOpcode() != ISD::SRL) 3476 return false; 3477 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3478 if (!C || C->getZExtValue() != 8) 3479 return false; 3480 } else { 3481 // (x << 8) & 0xff00 3482 // (x << 8) & 0xff000000 3483 if (N0.getOpcode() != ISD::SHL) 3484 return false; 3485 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3486 if (!C || C->getZExtValue() != 8) 3487 return false; 3488 } 3489 } else if (Opc == ISD::SHL) { 3490 // (x & 0xff) << 8 3491 // (x & 0xff0000) << 8 3492 if (Num != 0 && Num != 2) 3493 return false; 3494 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3495 if (!C || C->getZExtValue() != 8) 3496 return false; 3497 } else { // Opc == ISD::SRL 3498 // (x & 0xff00) >> 8 3499 // (x & 0xff000000) >> 8 3500 if (Num != 1 && Num != 3) 3501 return false; 3502 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3503 if (!C || C->getZExtValue() != 8) 3504 return false; 3505 } 3506 3507 if (Parts[Num]) 3508 return false; 3509 3510 Parts[Num] = N0.getOperand(0).getNode(); 3511 return true; 3512 } 3513 3514 /// Match a 32-bit packed halfword bswap. That is 3515 /// ((x & 0x000000ff) << 8) | 3516 /// ((x & 0x0000ff00) >> 8) | 3517 /// ((x & 0x00ff0000) << 8) | 3518 /// ((x & 0xff000000) >> 8) 3519 /// => (rotl (bswap x), 16) 3520 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3521 if (!LegalOperations) 3522 return SDValue(); 3523 3524 EVT VT = N->getValueType(0); 3525 if (VT != MVT::i32) 3526 return SDValue(); 3527 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3528 return SDValue(); 3529 3530 // Look for either 3531 // (or (or (and), (and)), (or (and), (and))) 3532 // (or (or (or (and), (and)), (and)), (and)) 3533 if (N0.getOpcode() != ISD::OR) 3534 return SDValue(); 3535 SDValue N00 = N0.getOperand(0); 3536 SDValue N01 = N0.getOperand(1); 3537 SDNode *Parts[4] = {}; 3538 3539 if (N1.getOpcode() == ISD::OR && 3540 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3541 // (or (or (and), (and)), (or (and), (and))) 3542 SDValue N000 = N00.getOperand(0); 3543 if (!isBSwapHWordElement(N000, Parts)) 3544 return SDValue(); 3545 3546 SDValue N001 = N00.getOperand(1); 3547 if (!isBSwapHWordElement(N001, Parts)) 3548 return SDValue(); 3549 SDValue N010 = N01.getOperand(0); 3550 if (!isBSwapHWordElement(N010, Parts)) 3551 return SDValue(); 3552 SDValue N011 = N01.getOperand(1); 3553 if (!isBSwapHWordElement(N011, Parts)) 3554 return SDValue(); 3555 } else { 3556 // (or (or (or (and), (and)), (and)), (and)) 3557 if (!isBSwapHWordElement(N1, Parts)) 3558 return SDValue(); 3559 if (!isBSwapHWordElement(N01, Parts)) 3560 return SDValue(); 3561 if (N00.getOpcode() != ISD::OR) 3562 return SDValue(); 3563 SDValue N000 = N00.getOperand(0); 3564 if (!isBSwapHWordElement(N000, Parts)) 3565 return SDValue(); 3566 SDValue N001 = N00.getOperand(1); 3567 if (!isBSwapHWordElement(N001, Parts)) 3568 return SDValue(); 3569 } 3570 3571 // Make sure the parts are all coming from the same node. 3572 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3573 return SDValue(); 3574 3575 SDLoc DL(N); 3576 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 3577 SDValue(Parts[0], 0)); 3578 3579 // Result of the bswap should be rotated by 16. If it's not legal, then 3580 // do (x << 16) | (x >> 16). 3581 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 3582 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3583 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 3584 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3585 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 3586 return DAG.getNode(ISD::OR, DL, VT, 3587 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 3588 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 3589 } 3590 3591 /// This contains all DAGCombine rules which reduce two values combined by 3592 /// an Or operation to a single value \see visitANDLike(). 3593 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { 3594 EVT VT = N1.getValueType(); 3595 // fold (or x, undef) -> -1 3596 if (!LegalOperations && 3597 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3598 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3599 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), 3600 SDLoc(LocReference), VT); 3601 } 3602 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3603 SDValue LL, LR, RL, RR, CC0, CC1; 3604 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3605 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3606 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3607 3608 if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { 3609 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3610 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3611 if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3612 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3613 LR.getValueType(), LL, RL); 3614 AddToWorklist(ORNode.getNode()); 3615 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 3616 } 3617 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3618 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3619 if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3620 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3621 LR.getValueType(), LL, RL); 3622 AddToWorklist(ANDNode.getNode()); 3623 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 3624 } 3625 } 3626 // canonicalize equivalent to ll == rl 3627 if (LL == RR && LR == RL) { 3628 Op1 = ISD::getSetCCSwappedOperands(Op1); 3629 std::swap(RL, RR); 3630 } 3631 if (LL == RL && LR == RR) { 3632 bool isInteger = LL.getValueType().isInteger(); 3633 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3634 if (Result != ISD::SETCC_INVALID && 3635 (!LegalOperations || 3636 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3637 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 3638 EVT CCVT = getSetCCResultType(LL.getValueType()); 3639 if (N0.getValueType() == CCVT || 3640 (!LegalOperations && N0.getValueType() == MVT::i1)) 3641 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 3642 LL, LR, Result); 3643 } 3644 } 3645 } 3646 3647 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3648 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3649 // Don't increase # computations. 3650 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3651 // We can only do this xform if we know that bits from X that are set in C2 3652 // but not in C1 are already zero. Likewise for Y. 3653 if (const ConstantSDNode *N0O1C = 3654 getAsNonOpaqueConstant(N0.getOperand(1))) { 3655 if (const ConstantSDNode *N1O1C = 3656 getAsNonOpaqueConstant(N1.getOperand(1))) { 3657 // We can only do this xform if we know that bits from X that are set in 3658 // C2 but not in C1 are already zero. Likewise for Y. 3659 const APInt &LHSMask = N0O1C->getAPIntValue(); 3660 const APInt &RHSMask = N1O1C->getAPIntValue(); 3661 3662 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3663 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3664 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3665 N0.getOperand(0), N1.getOperand(0)); 3666 SDLoc DL(LocReference); 3667 return DAG.getNode(ISD::AND, DL, VT, X, 3668 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 3669 } 3670 } 3671 } 3672 } 3673 3674 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 3675 if (N0.getOpcode() == ISD::AND && 3676 N1.getOpcode() == ISD::AND && 3677 N0.getOperand(0) == N1.getOperand(0) && 3678 // Don't increase # computations. 3679 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3680 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3681 N0.getOperand(1), N1.getOperand(1)); 3682 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); 3683 } 3684 3685 return SDValue(); 3686 } 3687 3688 SDValue DAGCombiner::visitOR(SDNode *N) { 3689 SDValue N0 = N->getOperand(0); 3690 SDValue N1 = N->getOperand(1); 3691 EVT VT = N1.getValueType(); 3692 3693 // fold vector ops 3694 if (VT.isVector()) { 3695 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3696 return FoldedVOp; 3697 3698 // fold (or x, 0) -> x, vector edition 3699 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3700 return N1; 3701 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3702 return N0; 3703 3704 // fold (or x, -1) -> -1, vector edition 3705 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3706 // do not return N0, because undef node may exist in N0 3707 return DAG.getConstant( 3708 APInt::getAllOnesValue( 3709 N0.getValueType().getScalarType().getSizeInBits()), 3710 SDLoc(N), N0.getValueType()); 3711 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3712 // do not return N1, because undef node may exist in N1 3713 return DAG.getConstant( 3714 APInt::getAllOnesValue( 3715 N1.getValueType().getScalarType().getSizeInBits()), 3716 SDLoc(N), N1.getValueType()); 3717 3718 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3719 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3720 // Do this only if the resulting shuffle is legal. 3721 if (isa<ShuffleVectorSDNode>(N0) && 3722 isa<ShuffleVectorSDNode>(N1) && 3723 // Avoid folding a node with illegal type. 3724 TLI.isTypeLegal(VT) && 3725 N0->getOperand(1) == N1->getOperand(1) && 3726 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3727 bool CanFold = true; 3728 unsigned NumElts = VT.getVectorNumElements(); 3729 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3730 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3731 // We construct two shuffle masks: 3732 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3733 // and N1 as the second operand. 3734 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3735 // and N0 as the second operand. 3736 // We do this because OR is commutable and therefore there might be 3737 // two ways to fold this node into a shuffle. 3738 SmallVector<int,4> Mask1; 3739 SmallVector<int,4> Mask2; 3740 3741 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3742 int M0 = SV0->getMaskElt(i); 3743 int M1 = SV1->getMaskElt(i); 3744 3745 // Both shuffle indexes are undef. Propagate Undef. 3746 if (M0 < 0 && M1 < 0) { 3747 Mask1.push_back(M0); 3748 Mask2.push_back(M0); 3749 continue; 3750 } 3751 3752 if (M0 < 0 || M1 < 0 || 3753 (M0 < (int)NumElts && M1 < (int)NumElts) || 3754 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3755 CanFold = false; 3756 break; 3757 } 3758 3759 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3760 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3761 } 3762 3763 if (CanFold) { 3764 // Fold this sequence only if the resulting shuffle is 'legal'. 3765 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3766 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3767 N1->getOperand(0), &Mask1[0]); 3768 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3769 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3770 N0->getOperand(0), &Mask2[0]); 3771 } 3772 } 3773 } 3774 3775 // fold (or c1, c2) -> c1|c2 3776 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3777 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3778 if (N0C && N1C && !N1C->isOpaque()) 3779 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 3780 // canonicalize constant to RHS 3781 if (isConstantIntBuildVectorOrConstantInt(N0) && 3782 !isConstantIntBuildVectorOrConstantInt(N1)) 3783 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3784 // fold (or x, 0) -> x 3785 if (isNullConstant(N1)) 3786 return N0; 3787 // fold (or x, -1) -> -1 3788 if (isAllOnesConstant(N1)) 3789 return N1; 3790 // fold (or x, c) -> c iff (x & ~c) == 0 3791 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3792 return N1; 3793 3794 if (SDValue Combined = visitORLike(N0, N1, N)) 3795 return Combined; 3796 3797 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3798 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 3799 return BSwap; 3800 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 3801 return BSwap; 3802 3803 // reassociate or 3804 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 3805 return ROR; 3806 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3807 // iff (c1 & c2) == 0. 3808 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3809 isa<ConstantSDNode>(N0.getOperand(1))) { 3810 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3811 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3812 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, 3813 N1C, C1)) 3814 return DAG.getNode( 3815 ISD::AND, SDLoc(N), VT, 3816 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3817 return SDValue(); 3818 } 3819 } 3820 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3821 if (N0.getOpcode() == N1.getOpcode()) 3822 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3823 return Tmp; 3824 3825 // See if this is some rotate idiom. 3826 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3827 return SDValue(Rot, 0); 3828 3829 // Simplify the operands using demanded-bits information. 3830 if (!VT.isVector() && 3831 SimplifyDemandedBits(SDValue(N, 0))) 3832 return SDValue(N, 0); 3833 3834 return SDValue(); 3835 } 3836 3837 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 3838 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3839 if (Op.getOpcode() == ISD::AND) { 3840 if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 3841 Mask = Op.getOperand(1); 3842 Op = Op.getOperand(0); 3843 } else { 3844 return false; 3845 } 3846 } 3847 3848 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3849 Shift = Op; 3850 return true; 3851 } 3852 3853 return false; 3854 } 3855 3856 // Return true if we can prove that, whenever Neg and Pos are both in the 3857 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 3858 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3859 // 3860 // (or (shift1 X, Neg), (shift2 X, Pos)) 3861 // 3862 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3863 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 3864 // to consider shift amounts with defined behavior. 3865 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { 3866 // If EltSize is a power of 2 then: 3867 // 3868 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 3869 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 3870 // 3871 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 3872 // for the stronger condition: 3873 // 3874 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 3875 // 3876 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 3877 // we can just replace Neg with Neg' for the rest of the function. 3878 // 3879 // In other cases we check for the even stronger condition: 3880 // 3881 // Neg == EltSize - Pos [B] 3882 // 3883 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3884 // behavior if Pos == 0 (and consequently Neg == EltSize). 3885 // 3886 // We could actually use [A] whenever EltSize is a power of 2, but the 3887 // only extra cases that it would match are those uninteresting ones 3888 // where Neg and Pos are never in range at the same time. E.g. for 3889 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3890 // as well as (sub 32, Pos), but: 3891 // 3892 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3893 // 3894 // always invokes undefined behavior for 32-bit X. 3895 // 3896 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 3897 unsigned MaskLoBits = 0; 3898 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 3899 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 3900 if (NegC->getAPIntValue() == EltSize - 1) { 3901 Neg = Neg.getOperand(0); 3902 MaskLoBits = Log2_64(EltSize); 3903 } 3904 } 3905 } 3906 3907 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3908 if (Neg.getOpcode() != ISD::SUB) 3909 return 0; 3910 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 3911 if (!NegC) 3912 return 0; 3913 SDValue NegOp1 = Neg.getOperand(1); 3914 3915 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 3916 // Pos'. The truncation is redundant for the purpose of the equality. 3917 if (MaskLoBits && Pos.getOpcode() == ISD::AND) 3918 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3919 if (PosC->getAPIntValue() == EltSize - 1) 3920 Pos = Pos.getOperand(0); 3921 3922 // The condition we need is now: 3923 // 3924 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 3925 // 3926 // If NegOp1 == Pos then we need: 3927 // 3928 // EltSize & Mask == NegC & Mask 3929 // 3930 // (because "x & Mask" is a truncation and distributes through subtraction). 3931 APInt Width; 3932 if (Pos == NegOp1) 3933 Width = NegC->getAPIntValue(); 3934 3935 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3936 // Then the condition we want to prove becomes: 3937 // 3938 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 3939 // 3940 // which, again because "x & Mask" is a truncation, becomes: 3941 // 3942 // NegC & Mask == (EltSize - PosC) & Mask 3943 // EltSize & Mask == (NegC + PosC) & Mask 3944 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 3945 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3946 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 3947 else 3948 return false; 3949 } else 3950 return false; 3951 3952 // Now we just need to check that EltSize & Mask == Width & Mask. 3953 if (MaskLoBits) 3954 // EltSize & Mask is 0 since Mask is EltSize - 1. 3955 return Width.getLoBits(MaskLoBits) == 0; 3956 return Width == EltSize; 3957 } 3958 3959 // A subroutine of MatchRotate used once we have found an OR of two opposite 3960 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3961 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3962 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3963 // Neg with outer conversions stripped away. 3964 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3965 SDValue Neg, SDValue InnerPos, 3966 SDValue InnerNeg, unsigned PosOpcode, 3967 unsigned NegOpcode, SDLoc DL) { 3968 // fold (or (shl x, (*ext y)), 3969 // (srl x, (*ext (sub 32, y)))) -> 3970 // (rotl x, y) or (rotr x, (sub 32, y)) 3971 // 3972 // fold (or (shl x, (*ext (sub 32, y))), 3973 // (srl x, (*ext y))) -> 3974 // (rotr x, y) or (rotl x, (sub 32, y)) 3975 EVT VT = Shifted.getValueType(); 3976 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { 3977 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3978 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3979 HasPos ? Pos : Neg).getNode(); 3980 } 3981 3982 return nullptr; 3983 } 3984 3985 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 3986 // idioms for rotate, and if the target supports rotation instructions, generate 3987 // a rot[lr]. 3988 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3989 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3990 EVT VT = LHS.getValueType(); 3991 if (!TLI.isTypeLegal(VT)) return nullptr; 3992 3993 // The target must have at least one rotate flavor. 3994 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3995 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3996 if (!HasROTL && !HasROTR) return nullptr; 3997 3998 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3999 SDValue LHSShift; // The shift. 4000 SDValue LHSMask; // AND value if any. 4001 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 4002 return nullptr; // Not part of a rotate. 4003 4004 SDValue RHSShift; // The shift. 4005 SDValue RHSMask; // AND value if any. 4006 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 4007 return nullptr; // Not part of a rotate. 4008 4009 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 4010 return nullptr; // Not shifting the same value. 4011 4012 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 4013 return nullptr; // Shifts must disagree. 4014 4015 // Canonicalize shl to left side in a shl/srl pair. 4016 if (RHSShift.getOpcode() == ISD::SHL) { 4017 std::swap(LHS, RHS); 4018 std::swap(LHSShift, RHSShift); 4019 std::swap(LHSMask, RHSMask); 4020 } 4021 4022 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4023 SDValue LHSShiftArg = LHSShift.getOperand(0); 4024 SDValue LHSShiftAmt = LHSShift.getOperand(1); 4025 SDValue RHSShiftArg = RHSShift.getOperand(0); 4026 SDValue RHSShiftAmt = RHSShift.getOperand(1); 4027 4028 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 4029 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 4030 if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { 4031 uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); 4032 uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); 4033 if ((LShVal + RShVal) != EltSizeInBits) 4034 return nullptr; 4035 4036 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 4037 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 4038 4039 // If there is an AND of either shifted operand, apply it to the result. 4040 if (LHSMask.getNode() || RHSMask.getNode()) { 4041 APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); 4042 SDValue Mask = DAG.getConstant(AllBits, DL, VT); 4043 4044 if (LHSMask.getNode()) { 4045 APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); 4046 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4047 DAG.getNode(ISD::OR, DL, VT, LHSMask, 4048 DAG.getConstant(RHSBits, DL, VT))); 4049 } 4050 if (RHSMask.getNode()) { 4051 APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); 4052 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4053 DAG.getNode(ISD::OR, DL, VT, RHSMask, 4054 DAG.getConstant(LHSBits, DL, VT))); 4055 } 4056 4057 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 4058 } 4059 4060 return Rot.getNode(); 4061 } 4062 4063 // If there is a mask here, and we have a variable shift, we can't be sure 4064 // that we're masking out the right stuff. 4065 if (LHSMask.getNode() || RHSMask.getNode()) 4066 return nullptr; 4067 4068 // If the shift amount is sign/zext/any-extended just peel it off. 4069 SDValue LExtOp0 = LHSShiftAmt; 4070 SDValue RExtOp0 = RHSShiftAmt; 4071 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4072 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4073 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4074 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 4075 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4076 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4077 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4078 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 4079 LExtOp0 = LHSShiftAmt.getOperand(0); 4080 RExtOp0 = RHSShiftAmt.getOperand(0); 4081 } 4082 4083 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 4084 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 4085 if (TryL) 4086 return TryL; 4087 4088 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 4089 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 4090 if (TryR) 4091 return TryR; 4092 4093 return nullptr; 4094 } 4095 4096 SDValue DAGCombiner::visitXOR(SDNode *N) { 4097 SDValue N0 = N->getOperand(0); 4098 SDValue N1 = N->getOperand(1); 4099 EVT VT = N0.getValueType(); 4100 4101 // fold vector ops 4102 if (VT.isVector()) { 4103 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4104 return FoldedVOp; 4105 4106 // fold (xor x, 0) -> x, vector edition 4107 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4108 return N1; 4109 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4110 return N0; 4111 } 4112 4113 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 4114 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 4115 return DAG.getConstant(0, SDLoc(N), VT); 4116 // fold (xor x, undef) -> undef 4117 if (N0.getOpcode() == ISD::UNDEF) 4118 return N0; 4119 if (N1.getOpcode() == ISD::UNDEF) 4120 return N1; 4121 // fold (xor c1, c2) -> c1^c2 4122 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4123 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 4124 if (N0C && N1C) 4125 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); 4126 // canonicalize constant to RHS 4127 if (isConstantIntBuildVectorOrConstantInt(N0) && 4128 !isConstantIntBuildVectorOrConstantInt(N1)) 4129 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 4130 // fold (xor x, 0) -> x 4131 if (isNullConstant(N1)) 4132 return N0; 4133 // reassociate xor 4134 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 4135 return RXOR; 4136 4137 // fold !(x cc y) -> (x !cc y) 4138 SDValue LHS, RHS, CC; 4139 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 4140 bool isInt = LHS.getValueType().isInteger(); 4141 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 4142 isInt); 4143 4144 if (!LegalOperations || 4145 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 4146 switch (N0.getOpcode()) { 4147 default: 4148 llvm_unreachable("Unhandled SetCC Equivalent!"); 4149 case ISD::SETCC: 4150 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 4151 case ISD::SELECT_CC: 4152 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 4153 N0.getOperand(3), NotCC); 4154 } 4155 } 4156 } 4157 4158 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 4159 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && 4160 N0.getNode()->hasOneUse() && 4161 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 4162 SDValue V = N0.getOperand(0); 4163 SDLoc DL(N0); 4164 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, 4165 DAG.getConstant(1, DL, V.getValueType())); 4166 AddToWorklist(V.getNode()); 4167 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 4168 } 4169 4170 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 4171 if (isOneConstant(N1) && VT == MVT::i1 && 4172 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4173 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4174 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 4175 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4176 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4177 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4178 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4179 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4180 } 4181 } 4182 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 4183 if (isAllOnesConstant(N1) && 4184 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4185 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4186 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 4187 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4188 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4189 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4190 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4191 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4192 } 4193 } 4194 // fold (xor (and x, y), y) -> (and (not x), y) 4195 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 4196 N0->getOperand(1) == N1) { 4197 SDValue X = N0->getOperand(0); 4198 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 4199 AddToWorklist(NotX.getNode()); 4200 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 4201 } 4202 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 4203 if (N1C && N0.getOpcode() == ISD::XOR) { 4204 if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { 4205 SDLoc DL(N); 4206 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), 4207 DAG.getConstant(N1C->getAPIntValue() ^ 4208 N00C->getAPIntValue(), DL, VT)); 4209 } 4210 if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { 4211 SDLoc DL(N); 4212 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), 4213 DAG.getConstant(N1C->getAPIntValue() ^ 4214 N01C->getAPIntValue(), DL, VT)); 4215 } 4216 } 4217 // fold (xor x, x) -> 0 4218 if (N0 == N1) 4219 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 4220 4221 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 4222 // Here is a concrete example of this equivalence: 4223 // i16 x == 14 4224 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 4225 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 4226 // 4227 // => 4228 // 4229 // i16 ~1 == 0b1111111111111110 4230 // i16 rol(~1, 14) == 0b1011111111111111 4231 // 4232 // Some additional tips to help conceptualize this transform: 4233 // - Try to see the operation as placing a single zero in a value of all ones. 4234 // - There exists no value for x which would allow the result to contain zero. 4235 // - Values of x larger than the bitwidth are undefined and do not require a 4236 // consistent result. 4237 // - Pushing the zero left requires shifting one bits in from the right. 4238 // A rotate left of ~1 is a nice way of achieving the desired result. 4239 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL 4240 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 4241 SDLoc DL(N); 4242 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 4243 N0.getOperand(1)); 4244 } 4245 4246 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 4247 if (N0.getOpcode() == N1.getOpcode()) 4248 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4249 return Tmp; 4250 4251 // Simplify the expression using non-local knowledge. 4252 if (!VT.isVector() && 4253 SimplifyDemandedBits(SDValue(N, 0))) 4254 return SDValue(N, 0); 4255 4256 return SDValue(); 4257 } 4258 4259 /// Handle transforms common to the three shifts, when the shift amount is a 4260 /// constant. 4261 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 4262 SDNode *LHS = N->getOperand(0).getNode(); 4263 if (!LHS->hasOneUse()) return SDValue(); 4264 4265 // We want to pull some binops through shifts, so that we have (and (shift)) 4266 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 4267 // thing happens with address calculations, so it's important to canonicalize 4268 // it. 4269 bool HighBitSet = false; // Can we transform this if the high bit is set? 4270 4271 switch (LHS->getOpcode()) { 4272 default: return SDValue(); 4273 case ISD::OR: 4274 case ISD::XOR: 4275 HighBitSet = false; // We can only transform sra if the high bit is clear. 4276 break; 4277 case ISD::AND: 4278 HighBitSet = true; // We can only transform sra if the high bit is set. 4279 break; 4280 case ISD::ADD: 4281 if (N->getOpcode() != ISD::SHL) 4282 return SDValue(); // only shl(add) not sr[al](add). 4283 HighBitSet = false; // We can only transform sra if the high bit is clear. 4284 break; 4285 } 4286 4287 // We require the RHS of the binop to be a constant and not opaque as well. 4288 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 4289 if (!BinOpCst) return SDValue(); 4290 4291 // FIXME: disable this unless the input to the binop is a shift by a constant. 4292 // If it is not a shift, it pessimizes some common cases like: 4293 // 4294 // void foo(int *X, int i) { X[i & 1235] = 1; } 4295 // int bar(int *X, int i) { return X[i & 255]; } 4296 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 4297 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 4298 BinOpLHSVal->getOpcode() != ISD::SRA && 4299 BinOpLHSVal->getOpcode() != ISD::SRL) || 4300 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 4301 return SDValue(); 4302 4303 EVT VT = N->getValueType(0); 4304 4305 // If this is a signed shift right, and the high bit is modified by the 4306 // logical operation, do not perform the transformation. The highBitSet 4307 // boolean indicates the value of the high bit of the constant which would 4308 // cause it to be modified for this operation. 4309 if (N->getOpcode() == ISD::SRA) { 4310 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 4311 if (BinOpRHSSignSet != HighBitSet) 4312 return SDValue(); 4313 } 4314 4315 if (!TLI.isDesirableToCommuteWithShift(LHS)) 4316 return SDValue(); 4317 4318 // Fold the constants, shifting the binop RHS by the shift amount. 4319 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 4320 N->getValueType(0), 4321 LHS->getOperand(1), N->getOperand(1)); 4322 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4323 4324 // Create the new shift. 4325 SDValue NewShift = DAG.getNode(N->getOpcode(), 4326 SDLoc(LHS->getOperand(0)), 4327 VT, LHS->getOperand(0), N->getOperand(1)); 4328 4329 // Create the new binop. 4330 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4331 } 4332 4333 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4334 assert(N->getOpcode() == ISD::TRUNCATE); 4335 assert(N->getOperand(0).getOpcode() == ISD::AND); 4336 4337 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4338 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4339 SDValue N01 = N->getOperand(0).getOperand(1); 4340 4341 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4342 if (!N01C->isOpaque()) { 4343 EVT TruncVT = N->getValueType(0); 4344 SDValue N00 = N->getOperand(0).getOperand(0); 4345 APInt TruncC = N01C->getAPIntValue(); 4346 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4347 SDLoc DL(N); 4348 4349 return DAG.getNode(ISD::AND, DL, TruncVT, 4350 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), 4351 DAG.getConstant(TruncC, DL, TruncVT)); 4352 } 4353 } 4354 } 4355 4356 return SDValue(); 4357 } 4358 4359 SDValue DAGCombiner::visitRotate(SDNode *N) { 4360 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4361 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4362 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4363 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4364 if (NewOp1.getNode()) 4365 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4366 N->getOperand(0), NewOp1); 4367 } 4368 return SDValue(); 4369 } 4370 4371 SDValue DAGCombiner::visitSHL(SDNode *N) { 4372 SDValue N0 = N->getOperand(0); 4373 SDValue N1 = N->getOperand(1); 4374 EVT VT = N0.getValueType(); 4375 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4376 4377 // fold vector ops 4378 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4379 if (VT.isVector()) { 4380 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4381 return FoldedVOp; 4382 4383 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4384 // If setcc produces all-one true value then: 4385 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4386 if (N1CV && N1CV->isConstant()) { 4387 if (N0.getOpcode() == ISD::AND) { 4388 SDValue N00 = N0->getOperand(0); 4389 SDValue N01 = N0->getOperand(1); 4390 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4391 4392 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4393 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4394 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4395 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 4396 N01CV, N1CV)) 4397 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4398 } 4399 } else { 4400 N1C = isConstOrConstSplat(N1); 4401 } 4402 } 4403 } 4404 4405 // fold (shl c1, c2) -> c1<<c2 4406 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4407 if (N0C && N1C && !N1C->isOpaque()) 4408 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 4409 // fold (shl 0, x) -> 0 4410 if (isNullConstant(N0)) 4411 return N0; 4412 // fold (shl x, c >= size(x)) -> undef 4413 if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) 4414 return DAG.getUNDEF(VT); 4415 // fold (shl x, 0) -> x 4416 if (N1C && N1C->isNullValue()) 4417 return N0; 4418 // fold (shl undef, x) -> 0 4419 if (N0.getOpcode() == ISD::UNDEF) 4420 return DAG.getConstant(0, SDLoc(N), VT); 4421 // if (shl x, c) is known to be zero, return 0 4422 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4423 APInt::getAllOnesValue(OpSizeInBits))) 4424 return DAG.getConstant(0, SDLoc(N), VT); 4425 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4426 if (N1.getOpcode() == ISD::TRUNCATE && 4427 N1.getOperand(0).getOpcode() == ISD::AND) { 4428 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4429 if (NewOp1.getNode()) 4430 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4431 } 4432 4433 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4434 return SDValue(N, 0); 4435 4436 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4437 if (N1C && N0.getOpcode() == ISD::SHL) { 4438 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4439 uint64_t c1 = N0C1->getZExtValue(); 4440 uint64_t c2 = N1C->getZExtValue(); 4441 SDLoc DL(N); 4442 if (c1 + c2 >= OpSizeInBits) 4443 return DAG.getConstant(0, DL, VT); 4444 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4445 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4446 } 4447 } 4448 4449 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4450 // For this to be valid, the second form must not preserve any of the bits 4451 // that are shifted out by the inner shift in the first form. This means 4452 // the outer shift size must be >= the number of bits added by the ext. 4453 // As a corollary, we don't care what kind of ext it is. 4454 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4455 N0.getOpcode() == ISD::ANY_EXTEND || 4456 N0.getOpcode() == ISD::SIGN_EXTEND) && 4457 N0.getOperand(0).getOpcode() == ISD::SHL) { 4458 SDValue N0Op0 = N0.getOperand(0); 4459 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4460 uint64_t c1 = N0Op0C1->getZExtValue(); 4461 uint64_t c2 = N1C->getZExtValue(); 4462 EVT InnerShiftVT = N0Op0.getValueType(); 4463 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4464 if (c2 >= OpSizeInBits - InnerShiftSize) { 4465 SDLoc DL(N0); 4466 if (c1 + c2 >= OpSizeInBits) 4467 return DAG.getConstant(0, DL, VT); 4468 return DAG.getNode(ISD::SHL, DL, VT, 4469 DAG.getNode(N0.getOpcode(), DL, VT, 4470 N0Op0->getOperand(0)), 4471 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4472 } 4473 } 4474 } 4475 4476 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4477 // Only fold this if the inner zext has no other uses to avoid increasing 4478 // the total number of instructions. 4479 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4480 N0.getOperand(0).getOpcode() == ISD::SRL) { 4481 SDValue N0Op0 = N0.getOperand(0); 4482 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4483 uint64_t c1 = N0Op0C1->getZExtValue(); 4484 if (c1 < VT.getScalarSizeInBits()) { 4485 uint64_t c2 = N1C->getZExtValue(); 4486 if (c1 == c2) { 4487 SDValue NewOp0 = N0.getOperand(0); 4488 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4489 SDLoc DL(N); 4490 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 4491 NewOp0, 4492 DAG.getConstant(c2, DL, CountVT)); 4493 AddToWorklist(NewSHL.getNode()); 4494 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4495 } 4496 } 4497 } 4498 } 4499 4500 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 4501 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 4502 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 4503 cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { 4504 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4505 uint64_t C1 = N0C1->getZExtValue(); 4506 uint64_t C2 = N1C->getZExtValue(); 4507 SDLoc DL(N); 4508 if (C1 <= C2) 4509 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4510 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 4511 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 4512 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 4513 } 4514 } 4515 4516 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4517 // (and (srl x, (sub c1, c2), MASK) 4518 // Only fold this if the inner shift has no other uses -- if it does, folding 4519 // this will increase the total number of instructions. 4520 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4521 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4522 uint64_t c1 = N0C1->getZExtValue(); 4523 if (c1 < OpSizeInBits) { 4524 uint64_t c2 = N1C->getZExtValue(); 4525 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4526 SDValue Shift; 4527 if (c2 > c1) { 4528 Mask = Mask.shl(c2 - c1); 4529 SDLoc DL(N); 4530 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4531 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 4532 } else { 4533 Mask = Mask.lshr(c1 - c2); 4534 SDLoc DL(N); 4535 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4536 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 4537 } 4538 SDLoc DL(N0); 4539 return DAG.getNode(ISD::AND, DL, VT, Shift, 4540 DAG.getConstant(Mask, DL, VT)); 4541 } 4542 } 4543 } 4544 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4545 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4546 unsigned BitSize = VT.getScalarSizeInBits(); 4547 SDLoc DL(N); 4548 SDValue HiBitsMask = 4549 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4550 BitSize - N1C->getZExtValue()), 4551 DL, VT); 4552 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), 4553 HiBitsMask); 4554 } 4555 4556 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4557 // Variant of version done on multiply, except mul by a power of 2 is turned 4558 // into a shift. 4559 APInt Val; 4560 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4561 (isa<ConstantSDNode>(N0.getOperand(1)) || 4562 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4563 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4564 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4565 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4566 } 4567 4568 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 4569 if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { 4570 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4571 if (SDValue Folded = 4572 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) 4573 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); 4574 } 4575 } 4576 4577 if (N1C && !N1C->isOpaque()) 4578 if (SDValue NewSHL = visitShiftByConstant(N, N1C)) 4579 return NewSHL; 4580 4581 return SDValue(); 4582 } 4583 4584 SDValue DAGCombiner::visitSRA(SDNode *N) { 4585 SDValue N0 = N->getOperand(0); 4586 SDValue N1 = N->getOperand(1); 4587 EVT VT = N0.getValueType(); 4588 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4589 4590 // fold vector ops 4591 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4592 if (VT.isVector()) { 4593 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4594 return FoldedVOp; 4595 4596 N1C = isConstOrConstSplat(N1); 4597 } 4598 4599 // fold (sra c1, c2) -> (sra c1, c2) 4600 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4601 if (N0C && N1C && !N1C->isOpaque()) 4602 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 4603 // fold (sra 0, x) -> 0 4604 if (isNullConstant(N0)) 4605 return N0; 4606 // fold (sra -1, x) -> -1 4607 if (isAllOnesConstant(N0)) 4608 return N0; 4609 // fold (sra x, (setge c, size(x))) -> undef 4610 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4611 return DAG.getUNDEF(VT); 4612 // fold (sra x, 0) -> x 4613 if (N1C && N1C->isNullValue()) 4614 return N0; 4615 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4616 // sext_inreg. 4617 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4618 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4619 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4620 if (VT.isVector()) 4621 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4622 ExtVT, VT.getVectorNumElements()); 4623 if ((!LegalOperations || 4624 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4625 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4626 N0.getOperand(0), DAG.getValueType(ExtVT)); 4627 } 4628 4629 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4630 if (N1C && N0.getOpcode() == ISD::SRA) { 4631 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4632 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4633 if (Sum >= OpSizeInBits) 4634 Sum = OpSizeInBits - 1; 4635 SDLoc DL(N); 4636 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), 4637 DAG.getConstant(Sum, DL, N1.getValueType())); 4638 } 4639 } 4640 4641 // fold (sra (shl X, m), (sub result_size, n)) 4642 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4643 // result_size - n != m. 4644 // If truncate is free for the target sext(shl) is likely to result in better 4645 // code. 4646 if (N0.getOpcode() == ISD::SHL && N1C) { 4647 // Get the two constanst of the shifts, CN0 = m, CN = n. 4648 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4649 if (N01C) { 4650 LLVMContext &Ctx = *DAG.getContext(); 4651 // Determine what the truncate's result bitsize and type would be. 4652 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4653 4654 if (VT.isVector()) 4655 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4656 4657 // Determine the residual right-shift amount. 4658 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4659 4660 // If the shift is not a no-op (in which case this should be just a sign 4661 // extend already), the truncated to type is legal, sign_extend is legal 4662 // on that type, and the truncate to that type is both legal and free, 4663 // perform the transform. 4664 if ((ShiftAmt > 0) && 4665 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4666 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4667 TLI.isTruncateFree(VT, TruncVT)) { 4668 4669 SDLoc DL(N); 4670 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 4671 getShiftAmountTy(N0.getOperand(0).getValueType())); 4672 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 4673 N0.getOperand(0), Amt); 4674 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 4675 Shift); 4676 return DAG.getNode(ISD::SIGN_EXTEND, DL, 4677 N->getValueType(0), Trunc); 4678 } 4679 } 4680 } 4681 4682 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4683 if (N1.getOpcode() == ISD::TRUNCATE && 4684 N1.getOperand(0).getOpcode() == ISD::AND) { 4685 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4686 if (NewOp1.getNode()) 4687 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4688 } 4689 4690 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4691 // if c1 is equal to the number of bits the trunc removes 4692 if (N0.getOpcode() == ISD::TRUNCATE && 4693 (N0.getOperand(0).getOpcode() == ISD::SRL || 4694 N0.getOperand(0).getOpcode() == ISD::SRA) && 4695 N0.getOperand(0).hasOneUse() && 4696 N0.getOperand(0).getOperand(1).hasOneUse() && 4697 N1C) { 4698 SDValue N0Op0 = N0.getOperand(0); 4699 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4700 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4701 EVT LargeVT = N0Op0.getValueType(); 4702 4703 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4704 SDLoc DL(N); 4705 SDValue Amt = 4706 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, 4707 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4708 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, 4709 N0Op0.getOperand(0), Amt); 4710 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); 4711 } 4712 } 4713 } 4714 4715 // Simplify, based on bits shifted out of the LHS. 4716 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4717 return SDValue(N, 0); 4718 4719 4720 // If the sign bit is known to be zero, switch this to a SRL. 4721 if (DAG.SignBitIsZero(N0)) 4722 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4723 4724 if (N1C && !N1C->isOpaque()) 4725 if (SDValue NewSRA = visitShiftByConstant(N, N1C)) 4726 return NewSRA; 4727 4728 return SDValue(); 4729 } 4730 4731 SDValue DAGCombiner::visitSRL(SDNode *N) { 4732 SDValue N0 = N->getOperand(0); 4733 SDValue N1 = N->getOperand(1); 4734 EVT VT = N0.getValueType(); 4735 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4736 4737 // fold vector ops 4738 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4739 if (VT.isVector()) { 4740 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4741 return FoldedVOp; 4742 4743 N1C = isConstOrConstSplat(N1); 4744 } 4745 4746 // fold (srl c1, c2) -> c1 >>u c2 4747 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4748 if (N0C && N1C && !N1C->isOpaque()) 4749 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); 4750 // fold (srl 0, x) -> 0 4751 if (isNullConstant(N0)) 4752 return N0; 4753 // fold (srl x, c >= size(x)) -> undef 4754 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4755 return DAG.getUNDEF(VT); 4756 // fold (srl x, 0) -> x 4757 if (N1C && N1C->isNullValue()) 4758 return N0; 4759 // if (srl x, c) is known to be zero, return 0 4760 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4761 APInt::getAllOnesValue(OpSizeInBits))) 4762 return DAG.getConstant(0, SDLoc(N), VT); 4763 4764 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4765 if (N1C && N0.getOpcode() == ISD::SRL) { 4766 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4767 uint64_t c1 = N01C->getZExtValue(); 4768 uint64_t c2 = N1C->getZExtValue(); 4769 SDLoc DL(N); 4770 if (c1 + c2 >= OpSizeInBits) 4771 return DAG.getConstant(0, DL, VT); 4772 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4773 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4774 } 4775 } 4776 4777 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4778 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4779 N0.getOperand(0).getOpcode() == ISD::SRL && 4780 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4781 uint64_t c1 = 4782 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4783 uint64_t c2 = N1C->getZExtValue(); 4784 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4785 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4786 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4787 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4788 if (c1 + OpSizeInBits == InnerShiftSize) { 4789 SDLoc DL(N0); 4790 if (c1 + c2 >= InnerShiftSize) 4791 return DAG.getConstant(0, DL, VT); 4792 return DAG.getNode(ISD::TRUNCATE, DL, VT, 4793 DAG.getNode(ISD::SRL, DL, InnerShiftVT, 4794 N0.getOperand(0)->getOperand(0), 4795 DAG.getConstant(c1 + c2, DL, 4796 ShiftCountVT))); 4797 } 4798 } 4799 4800 // fold (srl (shl x, c), c) -> (and x, cst2) 4801 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4802 unsigned BitSize = N0.getScalarValueSizeInBits(); 4803 if (BitSize <= 64) { 4804 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4805 SDLoc DL(N); 4806 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), 4807 DAG.getConstant(~0ULL >> ShAmt, DL, VT)); 4808 } 4809 } 4810 4811 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4812 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4813 // Shifting in all undef bits? 4814 EVT SmallVT = N0.getOperand(0).getValueType(); 4815 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4816 if (N1C->getZExtValue() >= BitSize) 4817 return DAG.getUNDEF(VT); 4818 4819 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4820 uint64_t ShiftAmt = N1C->getZExtValue(); 4821 SDLoc DL0(N0); 4822 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, 4823 N0.getOperand(0), 4824 DAG.getConstant(ShiftAmt, DL0, 4825 getShiftAmountTy(SmallVT))); 4826 AddToWorklist(SmallShift.getNode()); 4827 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4828 SDLoc DL(N); 4829 return DAG.getNode(ISD::AND, DL, VT, 4830 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), 4831 DAG.getConstant(Mask, DL, VT)); 4832 } 4833 } 4834 4835 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4836 // bit, which is unmodified by sra. 4837 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4838 if (N0.getOpcode() == ISD::SRA) 4839 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4840 } 4841 4842 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4843 if (N1C && N0.getOpcode() == ISD::CTLZ && 4844 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4845 APInt KnownZero, KnownOne; 4846 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4847 4848 // If any of the input bits are KnownOne, then the input couldn't be all 4849 // zeros, thus the result of the srl will always be zero. 4850 if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); 4851 4852 // If all of the bits input the to ctlz node are known to be zero, then 4853 // the result of the ctlz is "32" and the result of the shift is one. 4854 APInt UnknownBits = ~KnownZero; 4855 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); 4856 4857 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4858 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4859 // Okay, we know that only that the single bit specified by UnknownBits 4860 // could be set on input to the CTLZ node. If this bit is set, the SRL 4861 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4862 // to an SRL/XOR pair, which is likely to simplify more. 4863 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4864 SDValue Op = N0.getOperand(0); 4865 4866 if (ShAmt) { 4867 SDLoc DL(N0); 4868 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4869 DAG.getConstant(ShAmt, DL, 4870 getShiftAmountTy(Op.getValueType()))); 4871 AddToWorklist(Op.getNode()); 4872 } 4873 4874 SDLoc DL(N); 4875 return DAG.getNode(ISD::XOR, DL, VT, 4876 Op, DAG.getConstant(1, DL, VT)); 4877 } 4878 } 4879 4880 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4881 if (N1.getOpcode() == ISD::TRUNCATE && 4882 N1.getOperand(0).getOpcode() == ISD::AND) { 4883 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 4884 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4885 } 4886 4887 // fold operands of srl based on knowledge that the low bits are not 4888 // demanded. 4889 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4890 return SDValue(N, 0); 4891 4892 if (N1C && !N1C->isOpaque()) 4893 if (SDValue NewSRL = visitShiftByConstant(N, N1C)) 4894 return NewSRL; 4895 4896 // Attempt to convert a srl of a load into a narrower zero-extending load. 4897 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 4898 return NarrowLoad; 4899 4900 // Here is a common situation. We want to optimize: 4901 // 4902 // %a = ... 4903 // %b = and i32 %a, 2 4904 // %c = srl i32 %b, 1 4905 // brcond i32 %c ... 4906 // 4907 // into 4908 // 4909 // %a = ... 4910 // %b = and %a, 2 4911 // %c = setcc eq %b, 0 4912 // brcond %c ... 4913 // 4914 // However when after the source operand of SRL is optimized into AND, the SRL 4915 // itself may not be optimized further. Look for it and add the BRCOND into 4916 // the worklist. 4917 if (N->hasOneUse()) { 4918 SDNode *Use = *N->use_begin(); 4919 if (Use->getOpcode() == ISD::BRCOND) 4920 AddToWorklist(Use); 4921 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4922 // Also look pass the truncate. 4923 Use = *Use->use_begin(); 4924 if (Use->getOpcode() == ISD::BRCOND) 4925 AddToWorklist(Use); 4926 } 4927 } 4928 4929 return SDValue(); 4930 } 4931 4932 SDValue DAGCombiner::visitBSWAP(SDNode *N) { 4933 SDValue N0 = N->getOperand(0); 4934 EVT VT = N->getValueType(0); 4935 4936 // fold (bswap c1) -> c2 4937 if (isConstantIntBuildVectorOrConstantInt(N0)) 4938 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); 4939 // fold (bswap (bswap x)) -> x 4940 if (N0.getOpcode() == ISD::BSWAP) 4941 return N0->getOperand(0); 4942 return SDValue(); 4943 } 4944 4945 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4946 SDValue N0 = N->getOperand(0); 4947 EVT VT = N->getValueType(0); 4948 4949 // fold (ctlz c1) -> c2 4950 if (isConstantIntBuildVectorOrConstantInt(N0)) 4951 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4952 return SDValue(); 4953 } 4954 4955 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4956 SDValue N0 = N->getOperand(0); 4957 EVT VT = N->getValueType(0); 4958 4959 // fold (ctlz_zero_undef c1) -> c2 4960 if (isConstantIntBuildVectorOrConstantInt(N0)) 4961 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4962 return SDValue(); 4963 } 4964 4965 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4966 SDValue N0 = N->getOperand(0); 4967 EVT VT = N->getValueType(0); 4968 4969 // fold (cttz c1) -> c2 4970 if (isConstantIntBuildVectorOrConstantInt(N0)) 4971 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4972 return SDValue(); 4973 } 4974 4975 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4976 SDValue N0 = N->getOperand(0); 4977 EVT VT = N->getValueType(0); 4978 4979 // fold (cttz_zero_undef c1) -> c2 4980 if (isConstantIntBuildVectorOrConstantInt(N0)) 4981 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4982 return SDValue(); 4983 } 4984 4985 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4986 SDValue N0 = N->getOperand(0); 4987 EVT VT = N->getValueType(0); 4988 4989 // fold (ctpop c1) -> c2 4990 if (isConstantIntBuildVectorOrConstantInt(N0)) 4991 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4992 return SDValue(); 4993 } 4994 4995 4996 /// \brief Generate Min/Max node 4997 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4998 SDValue True, SDValue False, 4999 ISD::CondCode CC, const TargetLowering &TLI, 5000 SelectionDAG &DAG) { 5001 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 5002 return SDValue(); 5003 5004 switch (CC) { 5005 case ISD::SETOLT: 5006 case ISD::SETOLE: 5007 case ISD::SETLT: 5008 case ISD::SETLE: 5009 case ISD::SETULT: 5010 case ISD::SETULE: { 5011 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 5012 if (TLI.isOperationLegal(Opcode, VT)) 5013 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 5014 return SDValue(); 5015 } 5016 case ISD::SETOGT: 5017 case ISD::SETOGE: 5018 case ISD::SETGT: 5019 case ISD::SETGE: 5020 case ISD::SETUGT: 5021 case ISD::SETUGE: { 5022 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 5023 if (TLI.isOperationLegal(Opcode, VT)) 5024 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 5025 return SDValue(); 5026 } 5027 default: 5028 return SDValue(); 5029 } 5030 } 5031 5032 SDValue DAGCombiner::visitSELECT(SDNode *N) { 5033 SDValue N0 = N->getOperand(0); 5034 SDValue N1 = N->getOperand(1); 5035 SDValue N2 = N->getOperand(2); 5036 EVT VT = N->getValueType(0); 5037 EVT VT0 = N0.getValueType(); 5038 5039 // fold (select C, X, X) -> X 5040 if (N1 == N2) 5041 return N1; 5042 if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { 5043 // fold (select true, X, Y) -> X 5044 // fold (select false, X, Y) -> Y 5045 return !N0C->isNullValue() ? N1 : N2; 5046 } 5047 // fold (select C, 1, X) -> (or C, X) 5048 if (VT == MVT::i1 && isOneConstant(N1)) 5049 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 5050 // fold (select C, 0, 1) -> (xor C, 1) 5051 // We can't do this reliably if integer based booleans have different contents 5052 // to floating point based booleans. This is because we can't tell whether we 5053 // have an integer-based boolean or a floating-point-based boolean unless we 5054 // can find the SETCC that produced it and inspect its operands. This is 5055 // fairly easy if C is the SETCC node, but it can potentially be 5056 // undiscoverable (or not reasonably discoverable). For example, it could be 5057 // in another basic block or it could require searching a complicated 5058 // expression. 5059 if (VT.isInteger() && 5060 (VT0 == MVT::i1 || (VT0.isInteger() && 5061 TLI.getBooleanContents(false, false) == 5062 TLI.getBooleanContents(false, true) && 5063 TLI.getBooleanContents(false, false) == 5064 TargetLowering::ZeroOrOneBooleanContent)) && 5065 isNullConstant(N1) && isOneConstant(N2)) { 5066 SDValue XORNode; 5067 if (VT == VT0) { 5068 SDLoc DL(N); 5069 return DAG.getNode(ISD::XOR, DL, VT0, 5070 N0, DAG.getConstant(1, DL, VT0)); 5071 } 5072 SDLoc DL0(N0); 5073 XORNode = DAG.getNode(ISD::XOR, DL0, VT0, 5074 N0, DAG.getConstant(1, DL0, VT0)); 5075 AddToWorklist(XORNode.getNode()); 5076 if (VT.bitsGT(VT0)) 5077 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 5078 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 5079 } 5080 // fold (select C, 0, X) -> (and (not C), X) 5081 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { 5082 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 5083 AddToWorklist(NOTNode.getNode()); 5084 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 5085 } 5086 // fold (select C, X, 1) -> (or (not C), X) 5087 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { 5088 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 5089 AddToWorklist(NOTNode.getNode()); 5090 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 5091 } 5092 // fold (select C, X, 0) -> (and C, X) 5093 if (VT == MVT::i1 && isNullConstant(N2)) 5094 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 5095 // fold (select X, X, Y) -> (or X, Y) 5096 // fold (select X, 1, Y) -> (or X, Y) 5097 if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) 5098 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 5099 // fold (select X, Y, X) -> (and X, Y) 5100 // fold (select X, Y, 0) -> (and X, Y) 5101 if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) 5102 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 5103 5104 // If we can fold this based on the true/false value, do so. 5105 if (SimplifySelectOps(N, N1, N2)) 5106 return SDValue(N, 0); // Don't revisit N. 5107 5108 if (VT0 == MVT::i1) { 5109 // The code in this block deals with the following 2 equivalences: 5110 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) 5111 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) 5112 // The target can specify its prefered form with the 5113 // shouldNormalizeToSelectSequence() callback. However we always transform 5114 // to the right anyway if we find the inner select exists in the DAG anyway 5115 // and we always transform to the left side if we know that we can further 5116 // optimize the combination of the conditions. 5117 bool normalizeToSequence 5118 = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); 5119 // select (and Cond0, Cond1), X, Y 5120 // -> select Cond0, (select Cond1, X, Y), Y 5121 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 5122 SDValue Cond0 = N0->getOperand(0); 5123 SDValue Cond1 = N0->getOperand(1); 5124 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 5125 N1.getValueType(), Cond1, N1, N2); 5126 if (normalizeToSequence || !InnerSelect.use_empty()) 5127 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, 5128 InnerSelect, N2); 5129 } 5130 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 5131 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 5132 SDValue Cond0 = N0->getOperand(0); 5133 SDValue Cond1 = N0->getOperand(1); 5134 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 5135 N1.getValueType(), Cond1, N1, N2); 5136 if (normalizeToSequence || !InnerSelect.use_empty()) 5137 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, 5138 InnerSelect); 5139 } 5140 5141 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 5142 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { 5143 SDValue N1_0 = N1->getOperand(0); 5144 SDValue N1_1 = N1->getOperand(1); 5145 SDValue N1_2 = N1->getOperand(2); 5146 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 5147 // Create the actual and node if we can generate good code for it. 5148 if (!normalizeToSequence) { 5149 SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), 5150 N0, N1_0); 5151 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, 5152 N1_1, N2); 5153 } 5154 // Otherwise see if we can optimize the "and" to a better pattern. 5155 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 5156 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 5157 N1_1, N2); 5158 } 5159 } 5160 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 5161 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { 5162 SDValue N2_0 = N2->getOperand(0); 5163 SDValue N2_1 = N2->getOperand(1); 5164 SDValue N2_2 = N2->getOperand(2); 5165 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 5166 // Create the actual or node if we can generate good code for it. 5167 if (!normalizeToSequence) { 5168 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), 5169 N0, N2_0); 5170 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, 5171 N1, N2_2); 5172 } 5173 // Otherwise see if we can optimize to a better pattern. 5174 if (SDValue Combined = visitORLike(N0, N2_0, N)) 5175 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 5176 N1, N2_2); 5177 } 5178 } 5179 } 5180 5181 // fold selects based on a setcc into other things, such as min/max/abs 5182 if (N0.getOpcode() == ISD::SETCC) { 5183 // select x, y (fcmp lt x, y) -> fminnum x, y 5184 // select x, y (fcmp gt x, y) -> fmaxnum x, y 5185 // 5186 // This is OK if we don't care about what happens if either operand is a 5187 // NaN. 5188 // 5189 5190 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 5191 // no signed zeros as well as no nans. 5192 const TargetOptions &Options = DAG.getTarget().Options; 5193 if (Options.UnsafeFPMath && 5194 VT.isFloatingPoint() && N0.hasOneUse() && 5195 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 5196 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5197 5198 if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), 5199 N0.getOperand(1), N1, N2, CC, 5200 TLI, DAG)) 5201 return FMinMax; 5202 } 5203 5204 if ((!LegalOperations && 5205 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 5206 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 5207 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 5208 N0.getOperand(0), N0.getOperand(1), 5209 N1, N2, N0.getOperand(2)); 5210 return SimplifySelect(SDLoc(N), N0, N1, N2); 5211 } 5212 5213 return SDValue(); 5214 } 5215 5216 static 5217 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 5218 SDLoc DL(N); 5219 EVT LoVT, HiVT; 5220 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 5221 5222 // Split the inputs. 5223 SDValue Lo, Hi, LL, LH, RL, RH; 5224 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 5225 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 5226 5227 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 5228 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 5229 5230 return std::make_pair(Lo, Hi); 5231 } 5232 5233 // This function assumes all the vselect's arguments are CONCAT_VECTOR 5234 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 5235 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 5236 SDLoc dl(N); 5237 SDValue Cond = N->getOperand(0); 5238 SDValue LHS = N->getOperand(1); 5239 SDValue RHS = N->getOperand(2); 5240 EVT VT = N->getValueType(0); 5241 int NumElems = VT.getVectorNumElements(); 5242 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 5243 RHS.getOpcode() == ISD::CONCAT_VECTORS && 5244 Cond.getOpcode() == ISD::BUILD_VECTOR); 5245 5246 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 5247 // binary ones here. 5248 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 5249 return SDValue(); 5250 5251 // We're sure we have an even number of elements due to the 5252 // concat_vectors we have as arguments to vselect. 5253 // Skip BV elements until we find one that's not an UNDEF 5254 // After we find an UNDEF element, keep looping until we get to half the 5255 // length of the BV and see if all the non-undef nodes are the same. 5256 ConstantSDNode *BottomHalf = nullptr; 5257 for (int i = 0; i < NumElems / 2; ++i) { 5258 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 5259 continue; 5260 5261 if (BottomHalf == nullptr) 5262 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 5263 else if (Cond->getOperand(i).getNode() != BottomHalf) 5264 return SDValue(); 5265 } 5266 5267 // Do the same for the second half of the BuildVector 5268 ConstantSDNode *TopHalf = nullptr; 5269 for (int i = NumElems / 2; i < NumElems; ++i) { 5270 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 5271 continue; 5272 5273 if (TopHalf == nullptr) 5274 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 5275 else if (Cond->getOperand(i).getNode() != TopHalf) 5276 return SDValue(); 5277 } 5278 5279 assert(TopHalf && BottomHalf && 5280 "One half of the selector was all UNDEFs and the other was all the " 5281 "same value. This should have been addressed before this function."); 5282 return DAG.getNode( 5283 ISD::CONCAT_VECTORS, dl, VT, 5284 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 5285 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 5286 } 5287 5288 SDValue DAGCombiner::visitMSCATTER(SDNode *N) { 5289 5290 if (Level >= AfterLegalizeTypes) 5291 return SDValue(); 5292 5293 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 5294 SDValue Mask = MSC->getMask(); 5295 SDValue Data = MSC->getValue(); 5296 SDLoc DL(N); 5297 5298 // If the MSCATTER data type requires splitting and the mask is provided by a 5299 // SETCC, then split both nodes and its operands before legalization. This 5300 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5301 // and enables future optimizations (e.g. min/max pattern matching on X86). 5302 if (Mask.getOpcode() != ISD::SETCC) 5303 return SDValue(); 5304 5305 // Check if any splitting is required. 5306 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 5307 TargetLowering::TypeSplitVector) 5308 return SDValue(); 5309 SDValue MaskLo, MaskHi, Lo, Hi; 5310 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5311 5312 EVT LoVT, HiVT; 5313 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); 5314 5315 SDValue Chain = MSC->getChain(); 5316 5317 EVT MemoryVT = MSC->getMemoryVT(); 5318 unsigned Alignment = MSC->getOriginalAlignment(); 5319 5320 EVT LoMemVT, HiMemVT; 5321 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5322 5323 SDValue DataLo, DataHi; 5324 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 5325 5326 SDValue BasePtr = MSC->getBasePtr(); 5327 SDValue IndexLo, IndexHi; 5328 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); 5329 5330 MachineMemOperand *MMO = DAG.getMachineFunction(). 5331 getMachineMemOperand(MSC->getPointerInfo(), 5332 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 5333 Alignment, MSC->getAAInfo(), MSC->getRanges()); 5334 5335 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; 5336 Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), 5337 DL, OpsLo, MMO); 5338 5339 SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; 5340 Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), 5341 DL, OpsHi, MMO); 5342 5343 AddToWorklist(Lo.getNode()); 5344 AddToWorklist(Hi.getNode()); 5345 5346 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 5347 } 5348 5349 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 5350 5351 if (Level >= AfterLegalizeTypes) 5352 return SDValue(); 5353 5354 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 5355 SDValue Mask = MST->getMask(); 5356 SDValue Data = MST->getValue(); 5357 SDLoc DL(N); 5358 5359 // If the MSTORE data type requires splitting and the mask is provided by a 5360 // SETCC, then split both nodes and its operands before legalization. This 5361 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5362 // and enables future optimizations (e.g. min/max pattern matching on X86). 5363 if (Mask.getOpcode() == ISD::SETCC) { 5364 5365 // Check if any splitting is required. 5366 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 5367 TargetLowering::TypeSplitVector) 5368 return SDValue(); 5369 5370 SDValue MaskLo, MaskHi, Lo, Hi; 5371 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5372 5373 EVT LoVT, HiVT; 5374 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 5375 5376 SDValue Chain = MST->getChain(); 5377 SDValue Ptr = MST->getBasePtr(); 5378 5379 EVT MemoryVT = MST->getMemoryVT(); 5380 unsigned Alignment = MST->getOriginalAlignment(); 5381 5382 // if Alignment is equal to the vector size, 5383 // take the half of it for the second part 5384 unsigned SecondHalfAlignment = 5385 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 5386 Alignment/2 : Alignment; 5387 5388 EVT LoMemVT, HiMemVT; 5389 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5390 5391 SDValue DataLo, DataHi; 5392 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 5393 5394 MachineMemOperand *MMO = DAG.getMachineFunction(). 5395 getMachineMemOperand(MST->getPointerInfo(), 5396 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 5397 Alignment, MST->getAAInfo(), MST->getRanges()); 5398 5399 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 5400 MST->isTruncatingStore()); 5401 5402 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5403 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5404 DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); 5405 5406 MMO = DAG.getMachineFunction(). 5407 getMachineMemOperand(MST->getPointerInfo(), 5408 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 5409 SecondHalfAlignment, MST->getAAInfo(), 5410 MST->getRanges()); 5411 5412 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 5413 MST->isTruncatingStore()); 5414 5415 AddToWorklist(Lo.getNode()); 5416 AddToWorklist(Hi.getNode()); 5417 5418 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 5419 } 5420 return SDValue(); 5421 } 5422 5423 SDValue DAGCombiner::visitMGATHER(SDNode *N) { 5424 5425 if (Level >= AfterLegalizeTypes) 5426 return SDValue(); 5427 5428 MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); 5429 SDValue Mask = MGT->getMask(); 5430 SDLoc DL(N); 5431 5432 // If the MGATHER result requires splitting and the mask is provided by a 5433 // SETCC, then split both nodes and its operands before legalization. This 5434 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5435 // and enables future optimizations (e.g. min/max pattern matching on X86). 5436 5437 if (Mask.getOpcode() != ISD::SETCC) 5438 return SDValue(); 5439 5440 EVT VT = N->getValueType(0); 5441 5442 // Check if any splitting is required. 5443 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5444 TargetLowering::TypeSplitVector) 5445 return SDValue(); 5446 5447 SDValue MaskLo, MaskHi, Lo, Hi; 5448 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5449 5450 SDValue Src0 = MGT->getValue(); 5451 SDValue Src0Lo, Src0Hi; 5452 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 5453 5454 EVT LoVT, HiVT; 5455 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); 5456 5457 SDValue Chain = MGT->getChain(); 5458 EVT MemoryVT = MGT->getMemoryVT(); 5459 unsigned Alignment = MGT->getOriginalAlignment(); 5460 5461 EVT LoMemVT, HiMemVT; 5462 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5463 5464 SDValue BasePtr = MGT->getBasePtr(); 5465 SDValue Index = MGT->getIndex(); 5466 SDValue IndexLo, IndexHi; 5467 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); 5468 5469 MachineMemOperand *MMO = DAG.getMachineFunction(). 5470 getMachineMemOperand(MGT->getPointerInfo(), 5471 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 5472 Alignment, MGT->getAAInfo(), MGT->getRanges()); 5473 5474 SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; 5475 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, 5476 MMO); 5477 5478 SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; 5479 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, 5480 MMO); 5481 5482 AddToWorklist(Lo.getNode()); 5483 AddToWorklist(Hi.getNode()); 5484 5485 // Build a factor node to remember that this load is independent of the 5486 // other one. 5487 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 5488 Hi.getValue(1)); 5489 5490 // Legalized the chain result - switch anything that used the old chain to 5491 // use the new one. 5492 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); 5493 5494 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5495 5496 SDValue RetOps[] = { GatherRes, Chain }; 5497 return DAG.getMergeValues(RetOps, DL); 5498 } 5499 5500 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 5501 5502 if (Level >= AfterLegalizeTypes) 5503 return SDValue(); 5504 5505 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 5506 SDValue Mask = MLD->getMask(); 5507 SDLoc DL(N); 5508 5509 // If the MLOAD result requires splitting and the mask is provided by a 5510 // SETCC, then split both nodes and its operands before legalization. This 5511 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5512 // and enables future optimizations (e.g. min/max pattern matching on X86). 5513 5514 if (Mask.getOpcode() == ISD::SETCC) { 5515 EVT VT = N->getValueType(0); 5516 5517 // Check if any splitting is required. 5518 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5519 TargetLowering::TypeSplitVector) 5520 return SDValue(); 5521 5522 SDValue MaskLo, MaskHi, Lo, Hi; 5523 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5524 5525 SDValue Src0 = MLD->getSrc0(); 5526 SDValue Src0Lo, Src0Hi; 5527 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 5528 5529 EVT LoVT, HiVT; 5530 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 5531 5532 SDValue Chain = MLD->getChain(); 5533 SDValue Ptr = MLD->getBasePtr(); 5534 EVT MemoryVT = MLD->getMemoryVT(); 5535 unsigned Alignment = MLD->getOriginalAlignment(); 5536 5537 // if Alignment is equal to the vector size, 5538 // take the half of it for the second part 5539 unsigned SecondHalfAlignment = 5540 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 5541 Alignment/2 : Alignment; 5542 5543 EVT LoMemVT, HiMemVT; 5544 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5545 5546 MachineMemOperand *MMO = DAG.getMachineFunction(). 5547 getMachineMemOperand(MLD->getPointerInfo(), 5548 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 5549 Alignment, MLD->getAAInfo(), MLD->getRanges()); 5550 5551 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 5552 ISD::NON_EXTLOAD); 5553 5554 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5555 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5556 DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); 5557 5558 MMO = DAG.getMachineFunction(). 5559 getMachineMemOperand(MLD->getPointerInfo(), 5560 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 5561 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 5562 5563 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 5564 ISD::NON_EXTLOAD); 5565 5566 AddToWorklist(Lo.getNode()); 5567 AddToWorklist(Hi.getNode()); 5568 5569 // Build a factor node to remember that this load is independent of the 5570 // other one. 5571 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 5572 Hi.getValue(1)); 5573 5574 // Legalized the chain result - switch anything that used the old chain to 5575 // use the new one. 5576 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 5577 5578 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5579 5580 SDValue RetOps[] = { LoadRes, Chain }; 5581 return DAG.getMergeValues(RetOps, DL); 5582 } 5583 return SDValue(); 5584 } 5585 5586 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 5587 SDValue N0 = N->getOperand(0); 5588 SDValue N1 = N->getOperand(1); 5589 SDValue N2 = N->getOperand(2); 5590 SDLoc DL(N); 5591 5592 // Canonicalize integer abs. 5593 // vselect (setg[te] X, 0), X, -X -> 5594 // vselect (setgt X, -1), X, -X -> 5595 // vselect (setl[te] X, 0), -X, X -> 5596 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5597 if (N0.getOpcode() == ISD::SETCC) { 5598 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5599 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5600 bool isAbs = false; 5601 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5602 5603 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 5604 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5605 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5606 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5607 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 5608 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5609 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5610 5611 if (isAbs) { 5612 EVT VT = LHS.getValueType(); 5613 SDValue Shift = DAG.getNode( 5614 ISD::SRA, DL, VT, LHS, 5615 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); 5616 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5617 AddToWorklist(Shift.getNode()); 5618 AddToWorklist(Add.getNode()); 5619 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5620 } 5621 } 5622 5623 if (SimplifySelectOps(N, N1, N2)) 5624 return SDValue(N, 0); // Don't revisit N. 5625 5626 // If the VSELECT result requires splitting and the mask is provided by a 5627 // SETCC, then split both nodes and its operands before legalization. This 5628 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5629 // and enables future optimizations (e.g. min/max pattern matching on X86). 5630 if (N0.getOpcode() == ISD::SETCC) { 5631 EVT VT = N->getValueType(0); 5632 5633 // Check if any splitting is required. 5634 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5635 TargetLowering::TypeSplitVector) 5636 return SDValue(); 5637 5638 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5639 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5640 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5641 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5642 5643 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5644 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5645 5646 // Add the new VSELECT nodes to the work list in case they need to be split 5647 // again. 5648 AddToWorklist(Lo.getNode()); 5649 AddToWorklist(Hi.getNode()); 5650 5651 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5652 } 5653 5654 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5655 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5656 return N1; 5657 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5658 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5659 return N2; 5660 5661 // The ConvertSelectToConcatVector function is assuming both the above 5662 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5663 // and addressed. 5664 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5665 N2.getOpcode() == ISD::CONCAT_VECTORS && 5666 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5667 if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) 5668 return CV; 5669 } 5670 5671 return SDValue(); 5672 } 5673 5674 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 5675 SDValue N0 = N->getOperand(0); 5676 SDValue N1 = N->getOperand(1); 5677 SDValue N2 = N->getOperand(2); 5678 SDValue N3 = N->getOperand(3); 5679 SDValue N4 = N->getOperand(4); 5680 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5681 5682 // fold select_cc lhs, rhs, x, x, cc -> x 5683 if (N2 == N3) 5684 return N2; 5685 5686 // Determine if the condition we're dealing with is constant 5687 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5688 N0, N1, CC, SDLoc(N), false); 5689 if (SCC.getNode()) { 5690 AddToWorklist(SCC.getNode()); 5691 5692 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5693 if (!SCCC->isNullValue()) 5694 return N2; // cond always true -> true val 5695 else 5696 return N3; // cond always false -> false val 5697 } else if (SCC->getOpcode() == ISD::UNDEF) { 5698 // When the condition is UNDEF, just return the first operand. This is 5699 // coherent the DAG creation, no setcc node is created in this case 5700 return N2; 5701 } else if (SCC.getOpcode() == ISD::SETCC) { 5702 // Fold to a simpler select_cc 5703 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5704 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5705 SCC.getOperand(2)); 5706 } 5707 } 5708 5709 // If we can fold this based on the true/false value, do so. 5710 if (SimplifySelectOps(N, N2, N3)) 5711 return SDValue(N, 0); // Don't revisit N. 5712 5713 // fold select_cc into other things, such as min/max/abs 5714 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5715 } 5716 5717 SDValue DAGCombiner::visitSETCC(SDNode *N) { 5718 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5719 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5720 SDLoc(N)); 5721 } 5722 5723 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 5724 /// a build_vector of constants. 5725 /// This function is called by the DAGCombiner when visiting sext/zext/aext 5726 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5727 /// Vector extends are not folded if operations are legal; this is to 5728 /// avoid introducing illegal build_vector dag nodes. 5729 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 5730 SelectionDAG &DAG, bool LegalTypes, 5731 bool LegalOperations) { 5732 unsigned Opcode = N->getOpcode(); 5733 SDValue N0 = N->getOperand(0); 5734 EVT VT = N->getValueType(0); 5735 5736 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 5737 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 5738 && "Expected EXTEND dag node in input!"); 5739 5740 // fold (sext c1) -> c1 5741 // fold (zext c1) -> c1 5742 // fold (aext c1) -> c1 5743 if (isa<ConstantSDNode>(N0)) 5744 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5745 5746 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5747 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5748 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5749 EVT SVT = VT.getScalarType(); 5750 if (!(VT.isVector() && 5751 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 5752 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5753 return nullptr; 5754 5755 // We can fold this node into a build_vector. 5756 unsigned VTBits = SVT.getSizeInBits(); 5757 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5758 SmallVector<SDValue, 8> Elts; 5759 unsigned NumElts = VT.getVectorNumElements(); 5760 SDLoc DL(N); 5761 5762 for (unsigned i=0; i != NumElts; ++i) { 5763 SDValue Op = N0->getOperand(i); 5764 if (Op->getOpcode() == ISD::UNDEF) { 5765 Elts.push_back(DAG.getUNDEF(SVT)); 5766 continue; 5767 } 5768 5769 SDLoc DL(Op); 5770 // Get the constant value and if needed trunc it to the size of the type. 5771 // Nodes like build_vector might have constants wider than the scalar type. 5772 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); 5773 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 5774 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); 5775 else 5776 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); 5777 } 5778 5779 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5780 } 5781 5782 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5783 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 5784 // transformation. Returns true if extension are possible and the above 5785 // mentioned transformation is profitable. 5786 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 5787 unsigned ExtOpc, 5788 SmallVectorImpl<SDNode *> &ExtendNodes, 5789 const TargetLowering &TLI) { 5790 bool HasCopyToRegUses = false; 5791 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5792 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5793 UE = N0.getNode()->use_end(); 5794 UI != UE; ++UI) { 5795 SDNode *User = *UI; 5796 if (User == N) 5797 continue; 5798 if (UI.getUse().getResNo() != N0.getResNo()) 5799 continue; 5800 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5801 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5802 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5803 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5804 // Sign bits will be lost after a zext. 5805 return false; 5806 bool Add = false; 5807 for (unsigned i = 0; i != 2; ++i) { 5808 SDValue UseOp = User->getOperand(i); 5809 if (UseOp == N0) 5810 continue; 5811 if (!isa<ConstantSDNode>(UseOp)) 5812 return false; 5813 Add = true; 5814 } 5815 if (Add) 5816 ExtendNodes.push_back(User); 5817 continue; 5818 } 5819 // If truncates aren't free and there are users we can't 5820 // extend, it isn't worthwhile. 5821 if (!isTruncFree) 5822 return false; 5823 // Remember if this value is live-out. 5824 if (User->getOpcode() == ISD::CopyToReg) 5825 HasCopyToRegUses = true; 5826 } 5827 5828 if (HasCopyToRegUses) { 5829 bool BothLiveOut = false; 5830 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5831 UI != UE; ++UI) { 5832 SDUse &Use = UI.getUse(); 5833 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5834 BothLiveOut = true; 5835 break; 5836 } 5837 } 5838 if (BothLiveOut) 5839 // Both unextended and extended values are live out. There had better be 5840 // a good reason for the transformation. 5841 return ExtendNodes.size(); 5842 } 5843 return true; 5844 } 5845 5846 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 5847 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5848 ISD::NodeType ExtType) { 5849 // Extend SetCC uses if necessary. 5850 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5851 SDNode *SetCC = SetCCs[i]; 5852 SmallVector<SDValue, 4> Ops; 5853 5854 for (unsigned j = 0; j != 2; ++j) { 5855 SDValue SOp = SetCC->getOperand(j); 5856 if (SOp == Trunc) 5857 Ops.push_back(ExtLoad); 5858 else 5859 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5860 } 5861 5862 Ops.push_back(SetCC->getOperand(2)); 5863 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5864 } 5865 } 5866 5867 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). 5868 SDValue DAGCombiner::CombineExtLoad(SDNode *N) { 5869 SDValue N0 = N->getOperand(0); 5870 EVT DstVT = N->getValueType(0); 5871 EVT SrcVT = N0.getValueType(); 5872 5873 assert((N->getOpcode() == ISD::SIGN_EXTEND || 5874 N->getOpcode() == ISD::ZERO_EXTEND) && 5875 "Unexpected node type (not an extend)!"); 5876 5877 // fold (sext (load x)) to multiple smaller sextloads; same for zext. 5878 // For example, on a target with legal v4i32, but illegal v8i32, turn: 5879 // (v8i32 (sext (v8i16 (load x)))) 5880 // into: 5881 // (v8i32 (concat_vectors (v4i32 (sextload x)), 5882 // (v4i32 (sextload (x + 16))))) 5883 // Where uses of the original load, i.e.: 5884 // (v8i16 (load x)) 5885 // are replaced with: 5886 // (v8i16 (truncate 5887 // (v8i32 (concat_vectors (v4i32 (sextload x)), 5888 // (v4i32 (sextload (x + 16))))))) 5889 // 5890 // This combine is only applicable to illegal, but splittable, vectors. 5891 // All legal types, and illegal non-vector types, are handled elsewhere. 5892 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. 5893 // 5894 if (N0->getOpcode() != ISD::LOAD) 5895 return SDValue(); 5896 5897 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5898 5899 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || 5900 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || 5901 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) 5902 return SDValue(); 5903 5904 SmallVector<SDNode *, 4> SetCCs; 5905 if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) 5906 return SDValue(); 5907 5908 ISD::LoadExtType ExtType = 5909 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 5910 5911 // Try to split the vector types to get down to legal types. 5912 EVT SplitSrcVT = SrcVT; 5913 EVT SplitDstVT = DstVT; 5914 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && 5915 SplitSrcVT.getVectorNumElements() > 1) { 5916 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; 5917 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; 5918 } 5919 5920 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) 5921 return SDValue(); 5922 5923 SDLoc DL(N); 5924 const unsigned NumSplits = 5925 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); 5926 const unsigned Stride = SplitSrcVT.getStoreSize(); 5927 SmallVector<SDValue, 4> Loads; 5928 SmallVector<SDValue, 4> Chains; 5929 5930 SDValue BasePtr = LN0->getBasePtr(); 5931 for (unsigned Idx = 0; Idx < NumSplits; Idx++) { 5932 const unsigned Offset = Idx * Stride; 5933 const unsigned Align = MinAlign(LN0->getAlignment(), Offset); 5934 5935 SDValue SplitLoad = DAG.getExtLoad( 5936 ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, 5937 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, 5938 LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), 5939 Align, LN0->getAAInfo()); 5940 5941 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 5942 DAG.getConstant(Stride, DL, BasePtr.getValueType())); 5943 5944 Loads.push_back(SplitLoad.getValue(0)); 5945 Chains.push_back(SplitLoad.getValue(1)); 5946 } 5947 5948 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 5949 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); 5950 5951 CombineTo(N, NewValue); 5952 5953 // Replace uses of the original load (before extension) 5954 // with a truncate of the concatenated sextloaded vectors. 5955 SDValue Trunc = 5956 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); 5957 CombineTo(N0.getNode(), Trunc, NewChain); 5958 ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, 5959 (ISD::NodeType)N->getOpcode()); 5960 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5961 } 5962 5963 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 5964 SDValue N0 = N->getOperand(0); 5965 EVT VT = N->getValueType(0); 5966 5967 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5968 LegalOperations)) 5969 return SDValue(Res, 0); 5970 5971 // fold (sext (sext x)) -> (sext x) 5972 // fold (sext (aext x)) -> (sext x) 5973 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5974 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5975 N0.getOperand(0)); 5976 5977 if (N0.getOpcode() == ISD::TRUNCATE) { 5978 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5979 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5980 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 5981 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5982 if (NarrowLoad.getNode() != N0.getNode()) { 5983 CombineTo(N0.getNode(), NarrowLoad); 5984 // CombineTo deleted the truncate, if needed, but not what's under it. 5985 AddToWorklist(oye); 5986 } 5987 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5988 } 5989 5990 // See if the value being truncated is already sign extended. If so, just 5991 // eliminate the trunc/sext pair. 5992 SDValue Op = N0.getOperand(0); 5993 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5994 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5995 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5996 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5997 5998 if (OpBits == DestBits) { 5999 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 6000 // bits, it is already ready. 6001 if (NumSignBits > DestBits-MidBits) 6002 return Op; 6003 } else if (OpBits < DestBits) { 6004 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 6005 // bits, just sext from i32. 6006 if (NumSignBits > OpBits-MidBits) 6007 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 6008 } else { 6009 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 6010 // bits, just truncate to i32. 6011 if (NumSignBits > OpBits-MidBits) 6012 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 6013 } 6014 6015 // fold (sext (truncate x)) -> (sextinreg x). 6016 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 6017 N0.getValueType())) { 6018 if (OpBits < DestBits) 6019 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 6020 else if (OpBits > DestBits) 6021 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 6022 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 6023 DAG.getValueType(N0.getValueType())); 6024 } 6025 } 6026 6027 // fold (sext (load x)) -> (sext (truncate (sextload x))) 6028 // Only generate vector extloads when 1) they're legal, and 2) they are 6029 // deemed desirable by the target. 6030 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6031 ((!LegalOperations && !VT.isVector() && 6032 !cast<LoadSDNode>(N0)->isVolatile()) || 6033 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 6034 bool DoXform = true; 6035 SmallVector<SDNode*, 4> SetCCs; 6036 if (!N0.hasOneUse()) 6037 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 6038 if (VT.isVector()) 6039 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 6040 if (DoXform) { 6041 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6042 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6043 LN0->getChain(), 6044 LN0->getBasePtr(), N0.getValueType(), 6045 LN0->getMemOperand()); 6046 CombineTo(N, ExtLoad); 6047 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6048 N0.getValueType(), ExtLoad); 6049 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 6050 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 6051 ISD::SIGN_EXTEND); 6052 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6053 } 6054 } 6055 6056 // fold (sext (load x)) to multiple smaller sextloads. 6057 // Only on illegal but splittable vectors. 6058 if (SDValue ExtLoad = CombineExtLoad(N)) 6059 return ExtLoad; 6060 6061 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 6062 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 6063 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 6064 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 6065 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6066 EVT MemVT = LN0->getMemoryVT(); 6067 if ((!LegalOperations && !LN0->isVolatile()) || 6068 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 6069 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6070 LN0->getChain(), 6071 LN0->getBasePtr(), MemVT, 6072 LN0->getMemOperand()); 6073 CombineTo(N, ExtLoad); 6074 CombineTo(N0.getNode(), 6075 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6076 N0.getValueType(), ExtLoad), 6077 ExtLoad.getValue(1)); 6078 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6079 } 6080 } 6081 6082 // fold (sext (and/or/xor (load x), cst)) -> 6083 // (and/or/xor (sextload x), (sext cst)) 6084 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 6085 N0.getOpcode() == ISD::XOR) && 6086 isa<LoadSDNode>(N0.getOperand(0)) && 6087 N0.getOperand(1).getOpcode() == ISD::Constant && 6088 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 6089 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 6090 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 6091 if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 6092 bool DoXform = true; 6093 SmallVector<SDNode*, 4> SetCCs; 6094 if (!N0.hasOneUse()) 6095 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 6096 SetCCs, TLI); 6097 if (DoXform) { 6098 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 6099 LN0->getChain(), LN0->getBasePtr(), 6100 LN0->getMemoryVT(), 6101 LN0->getMemOperand()); 6102 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6103 Mask = Mask.sext(VT.getSizeInBits()); 6104 SDLoc DL(N); 6105 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 6106 ExtLoad, DAG.getConstant(Mask, DL, VT)); 6107 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 6108 SDLoc(N0.getOperand(0)), 6109 N0.getOperand(0).getValueType(), ExtLoad); 6110 CombineTo(N, And); 6111 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 6112 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, 6113 ISD::SIGN_EXTEND); 6114 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6115 } 6116 } 6117 } 6118 6119 if (N0.getOpcode() == ISD::SETCC) { 6120 EVT N0VT = N0.getOperand(0).getValueType(); 6121 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 6122 // Only do this before legalize for now. 6123 if (VT.isVector() && !LegalOperations && 6124 TLI.getBooleanContents(N0VT) == 6125 TargetLowering::ZeroOrNegativeOneBooleanContent) { 6126 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 6127 // of the same size as the compared operands. Only optimize sext(setcc()) 6128 // if this is the case. 6129 EVT SVT = getSetCCResultType(N0VT); 6130 6131 // We know that the # elements of the results is the same as the 6132 // # elements of the compare (and the # elements of the compare result 6133 // for that matter). Check to see that they are the same size. If so, 6134 // we know that the element size of the sext'd result matches the 6135 // element size of the compare operands. 6136 if (VT.getSizeInBits() == SVT.getSizeInBits()) 6137 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 6138 N0.getOperand(1), 6139 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6140 6141 // If the desired elements are smaller or larger than the source 6142 // elements we can use a matching integer vector type and then 6143 // truncate/sign extend 6144 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 6145 if (SVT == MatchingVectorType) { 6146 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 6147 N0.getOperand(0), N0.getOperand(1), 6148 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6149 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 6150 } 6151 } 6152 6153 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 6154 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 6155 SDLoc DL(N); 6156 SDValue NegOne = 6157 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); 6158 SDValue SCC = 6159 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6160 NegOne, DAG.getConstant(0, DL, VT), 6161 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6162 if (SCC.getNode()) return SCC; 6163 6164 if (!VT.isVector()) { 6165 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 6166 if (!LegalOperations || 6167 TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { 6168 SDLoc DL(N); 6169 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 6170 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 6171 N0.getOperand(0), N0.getOperand(1), CC); 6172 return DAG.getSelect(DL, VT, SetCC, 6173 NegOne, DAG.getConstant(0, DL, VT)); 6174 } 6175 } 6176 } 6177 6178 // fold (sext x) -> (zext x) if the sign bit is known zero. 6179 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 6180 DAG.SignBitIsZero(N0)) 6181 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 6182 6183 return SDValue(); 6184 } 6185 6186 // isTruncateOf - If N is a truncate of some other value, return true, record 6187 // the value being truncated in Op and which of Op's bits are zero in KnownZero. 6188 // This function computes KnownZero to avoid a duplicated call to 6189 // computeKnownBits in the caller. 6190 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 6191 APInt &KnownZero) { 6192 APInt KnownOne; 6193 if (N->getOpcode() == ISD::TRUNCATE) { 6194 Op = N->getOperand(0); 6195 DAG.computeKnownBits(Op, KnownZero, KnownOne); 6196 return true; 6197 } 6198 6199 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 6200 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 6201 return false; 6202 6203 SDValue Op0 = N->getOperand(0); 6204 SDValue Op1 = N->getOperand(1); 6205 assert(Op0.getValueType() == Op1.getValueType()); 6206 6207 if (isNullConstant(Op0)) 6208 Op = Op1; 6209 else if (isNullConstant(Op1)) 6210 Op = Op0; 6211 else 6212 return false; 6213 6214 DAG.computeKnownBits(Op, KnownZero, KnownOne); 6215 6216 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 6217 return false; 6218 6219 return true; 6220 } 6221 6222 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 6223 SDValue N0 = N->getOperand(0); 6224 EVT VT = N->getValueType(0); 6225 6226 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 6227 LegalOperations)) 6228 return SDValue(Res, 0); 6229 6230 // fold (zext (zext x)) -> (zext x) 6231 // fold (zext (aext x)) -> (zext x) 6232 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 6233 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 6234 N0.getOperand(0)); 6235 6236 // fold (zext (truncate x)) -> (zext x) or 6237 // (zext (truncate x)) -> (truncate x) 6238 // This is valid when the truncated bits of x are already zero. 6239 // FIXME: We should extend this to work for vectors too. 6240 SDValue Op; 6241 APInt KnownZero; 6242 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 6243 APInt TruncatedBits = 6244 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 6245 APInt(Op.getValueSizeInBits(), 0) : 6246 APInt::getBitsSet(Op.getValueSizeInBits(), 6247 N0.getValueSizeInBits(), 6248 std::min(Op.getValueSizeInBits(), 6249 VT.getSizeInBits())); 6250 if (TruncatedBits == (KnownZero & TruncatedBits)) { 6251 if (VT.bitsGT(Op.getValueType())) 6252 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 6253 if (VT.bitsLT(Op.getValueType())) 6254 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 6255 6256 return Op; 6257 } 6258 } 6259 6260 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 6261 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 6262 if (N0.getOpcode() == ISD::TRUNCATE) { 6263 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 6264 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 6265 if (NarrowLoad.getNode() != N0.getNode()) { 6266 CombineTo(N0.getNode(), NarrowLoad); 6267 // CombineTo deleted the truncate, if needed, but not what's under it. 6268 AddToWorklist(oye); 6269 } 6270 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6271 } 6272 } 6273 6274 // fold (zext (truncate x)) -> (and x, mask) 6275 if (N0.getOpcode() == ISD::TRUNCATE) { 6276 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 6277 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 6278 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 6279 SDNode *oye = N0.getNode()->getOperand(0).getNode(); 6280 if (NarrowLoad.getNode() != N0.getNode()) { 6281 CombineTo(N0.getNode(), NarrowLoad); 6282 // CombineTo deleted the truncate, if needed, but not what's under it. 6283 AddToWorklist(oye); 6284 } 6285 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6286 } 6287 6288 EVT SrcVT = N0.getOperand(0).getValueType(); 6289 EVT MinVT = N0.getValueType(); 6290 6291 // Try to mask before the extension to avoid having to generate a larger mask, 6292 // possibly over several sub-vectors. 6293 if (SrcVT.bitsLT(VT)) { 6294 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && 6295 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { 6296 SDValue Op = N0.getOperand(0); 6297 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 6298 AddToWorklist(Op.getNode()); 6299 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); 6300 } 6301 } 6302 6303 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { 6304 SDValue Op = N0.getOperand(0); 6305 if (SrcVT.bitsLT(VT)) { 6306 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 6307 AddToWorklist(Op.getNode()); 6308 } else if (SrcVT.bitsGT(VT)) { 6309 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 6310 AddToWorklist(Op.getNode()); 6311 } 6312 return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); 6313 } 6314 } 6315 6316 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 6317 // if either of the casts is not free. 6318 if (N0.getOpcode() == ISD::AND && 6319 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6320 N0.getOperand(1).getOpcode() == ISD::Constant && 6321 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 6322 N0.getValueType()) || 6323 !TLI.isZExtFree(N0.getValueType(), VT))) { 6324 SDValue X = N0.getOperand(0).getOperand(0); 6325 if (X.getValueType().bitsLT(VT)) { 6326 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 6327 } else if (X.getValueType().bitsGT(VT)) { 6328 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6329 } 6330 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6331 Mask = Mask.zext(VT.getSizeInBits()); 6332 SDLoc DL(N); 6333 return DAG.getNode(ISD::AND, DL, VT, 6334 X, DAG.getConstant(Mask, DL, VT)); 6335 } 6336 6337 // fold (zext (load x)) -> (zext (truncate (zextload x))) 6338 // Only generate vector extloads when 1) they're legal, and 2) they are 6339 // deemed desirable by the target. 6340 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6341 ((!LegalOperations && !VT.isVector() && 6342 !cast<LoadSDNode>(N0)->isVolatile()) || 6343 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 6344 bool DoXform = true; 6345 SmallVector<SDNode*, 4> SetCCs; 6346 if (!N0.hasOneUse()) 6347 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 6348 if (VT.isVector()) 6349 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 6350 if (DoXform) { 6351 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6352 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 6353 LN0->getChain(), 6354 LN0->getBasePtr(), N0.getValueType(), 6355 LN0->getMemOperand()); 6356 CombineTo(N, ExtLoad); 6357 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6358 N0.getValueType(), ExtLoad); 6359 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 6360 6361 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 6362 ISD::ZERO_EXTEND); 6363 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6364 } 6365 } 6366 6367 // fold (zext (load x)) to multiple smaller zextloads. 6368 // Only on illegal but splittable vectors. 6369 if (SDValue ExtLoad = CombineExtLoad(N)) 6370 return ExtLoad; 6371 6372 // fold (zext (and/or/xor (load x), cst)) -> 6373 // (and/or/xor (zextload x), (zext cst)) 6374 // Unless (and (load x) cst) will match as a zextload already and has 6375 // additional users. 6376 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 6377 N0.getOpcode() == ISD::XOR) && 6378 isa<LoadSDNode>(N0.getOperand(0)) && 6379 N0.getOperand(1).getOpcode() == ISD::Constant && 6380 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 6381 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 6382 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 6383 if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 6384 bool DoXform = true; 6385 SmallVector<SDNode*, 4> SetCCs; 6386 if (!N0.hasOneUse()) { 6387 if (N0.getOpcode() == ISD::AND) { 6388 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); 6389 auto NarrowLoad = false; 6390 EVT LoadResultTy = AndC->getValueType(0); 6391 EVT ExtVT, LoadedVT; 6392 if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, 6393 NarrowLoad)) 6394 DoXform = false; 6395 } 6396 if (DoXform) 6397 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), 6398 ISD::ZERO_EXTEND, SetCCs, TLI); 6399 } 6400 if (DoXform) { 6401 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 6402 LN0->getChain(), LN0->getBasePtr(), 6403 LN0->getMemoryVT(), 6404 LN0->getMemOperand()); 6405 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6406 Mask = Mask.zext(VT.getSizeInBits()); 6407 SDLoc DL(N); 6408 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 6409 ExtLoad, DAG.getConstant(Mask, DL, VT)); 6410 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 6411 SDLoc(N0.getOperand(0)), 6412 N0.getOperand(0).getValueType(), ExtLoad); 6413 CombineTo(N, And); 6414 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 6415 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, 6416 ISD::ZERO_EXTEND); 6417 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6418 } 6419 } 6420 } 6421 6422 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 6423 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 6424 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 6425 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 6426 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6427 EVT MemVT = LN0->getMemoryVT(); 6428 if ((!LegalOperations && !LN0->isVolatile()) || 6429 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 6430 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 6431 LN0->getChain(), 6432 LN0->getBasePtr(), MemVT, 6433 LN0->getMemOperand()); 6434 CombineTo(N, ExtLoad); 6435 CombineTo(N0.getNode(), 6436 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 6437 ExtLoad), 6438 ExtLoad.getValue(1)); 6439 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6440 } 6441 } 6442 6443 if (N0.getOpcode() == ISD::SETCC) { 6444 if (!LegalOperations && VT.isVector() && 6445 N0.getValueType().getVectorElementType() == MVT::i1) { 6446 EVT N0VT = N0.getOperand(0).getValueType(); 6447 if (getSetCCResultType(N0VT) == N0.getValueType()) 6448 return SDValue(); 6449 6450 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 6451 // Only do this before legalize for now. 6452 EVT EltVT = VT.getVectorElementType(); 6453 SDLoc DL(N); 6454 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 6455 DAG.getConstant(1, DL, EltVT)); 6456 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 6457 // We know that the # elements of the results is the same as the 6458 // # elements of the compare (and the # elements of the compare result 6459 // for that matter). Check to see that they are the same size. If so, 6460 // we know that the element size of the sext'd result matches the 6461 // element size of the compare operands. 6462 return DAG.getNode(ISD::AND, DL, VT, 6463 DAG.getSetCC(DL, VT, N0.getOperand(0), 6464 N0.getOperand(1), 6465 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 6466 DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 6467 OneOps)); 6468 6469 // If the desired elements are smaller or larger than the source 6470 // elements we can use a matching integer vector type and then 6471 // truncate/sign extend 6472 EVT MatchingElementType = 6473 EVT::getIntegerVT(*DAG.getContext(), 6474 N0VT.getScalarType().getSizeInBits()); 6475 EVT MatchingVectorType = 6476 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 6477 N0VT.getVectorNumElements()); 6478 SDValue VsetCC = 6479 DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), 6480 N0.getOperand(1), 6481 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6482 return DAG.getNode(ISD::AND, DL, VT, 6483 DAG.getSExtOrTrunc(VsetCC, DL, VT), 6484 DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); 6485 } 6486 6487 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 6488 SDLoc DL(N); 6489 SDValue SCC = 6490 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6491 DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), 6492 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6493 if (SCC.getNode()) return SCC; 6494 } 6495 6496 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 6497 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 6498 isa<ConstantSDNode>(N0.getOperand(1)) && 6499 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 6500 N0.hasOneUse()) { 6501 SDValue ShAmt = N0.getOperand(1); 6502 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 6503 if (N0.getOpcode() == ISD::SHL) { 6504 SDValue InnerZExt = N0.getOperand(0); 6505 // If the original shl may be shifting out bits, do not perform this 6506 // transformation. 6507 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 6508 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 6509 if (ShAmtVal > KnownZeroBits) 6510 return SDValue(); 6511 } 6512 6513 SDLoc DL(N); 6514 6515 // Ensure that the shift amount is wide enough for the shifted value. 6516 if (VT.getSizeInBits() >= 256) 6517 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 6518 6519 return DAG.getNode(N0.getOpcode(), DL, VT, 6520 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 6521 ShAmt); 6522 } 6523 6524 return SDValue(); 6525 } 6526 6527 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 6528 SDValue N0 = N->getOperand(0); 6529 EVT VT = N->getValueType(0); 6530 6531 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 6532 LegalOperations)) 6533 return SDValue(Res, 0); 6534 6535 // fold (aext (aext x)) -> (aext x) 6536 // fold (aext (zext x)) -> (zext x) 6537 // fold (aext (sext x)) -> (sext x) 6538 if (N0.getOpcode() == ISD::ANY_EXTEND || 6539 N0.getOpcode() == ISD::ZERO_EXTEND || 6540 N0.getOpcode() == ISD::SIGN_EXTEND) 6541 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 6542 6543 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 6544 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 6545 if (N0.getOpcode() == ISD::TRUNCATE) { 6546 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { 6547 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 6548 if (NarrowLoad.getNode() != N0.getNode()) { 6549 CombineTo(N0.getNode(), NarrowLoad); 6550 // CombineTo deleted the truncate, if needed, but not what's under it. 6551 AddToWorklist(oye); 6552 } 6553 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6554 } 6555 } 6556 6557 // fold (aext (truncate x)) 6558 if (N0.getOpcode() == ISD::TRUNCATE) { 6559 SDValue TruncOp = N0.getOperand(0); 6560 if (TruncOp.getValueType() == VT) 6561 return TruncOp; // x iff x size == zext size. 6562 if (TruncOp.getValueType().bitsGT(VT)) 6563 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 6564 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 6565 } 6566 6567 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 6568 // if the trunc is not free. 6569 if (N0.getOpcode() == ISD::AND && 6570 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6571 N0.getOperand(1).getOpcode() == ISD::Constant && 6572 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 6573 N0.getValueType())) { 6574 SDValue X = N0.getOperand(0).getOperand(0); 6575 if (X.getValueType().bitsLT(VT)) { 6576 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 6577 } else if (X.getValueType().bitsGT(VT)) { 6578 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 6579 } 6580 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6581 Mask = Mask.zext(VT.getSizeInBits()); 6582 SDLoc DL(N); 6583 return DAG.getNode(ISD::AND, DL, VT, 6584 X, DAG.getConstant(Mask, DL, VT)); 6585 } 6586 6587 // fold (aext (load x)) -> (aext (truncate (extload x))) 6588 // None of the supported targets knows how to perform load and any_ext 6589 // on vectors in one instruction. We only perform this transformation on 6590 // scalars. 6591 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 6592 ISD::isUNINDEXEDLoad(N0.getNode()) && 6593 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 6594 bool DoXform = true; 6595 SmallVector<SDNode*, 4> SetCCs; 6596 if (!N0.hasOneUse()) 6597 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 6598 if (DoXform) { 6599 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6600 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 6601 LN0->getChain(), 6602 LN0->getBasePtr(), N0.getValueType(), 6603 LN0->getMemOperand()); 6604 CombineTo(N, ExtLoad); 6605 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6606 N0.getValueType(), ExtLoad); 6607 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 6608 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 6609 ISD::ANY_EXTEND); 6610 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6611 } 6612 } 6613 6614 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 6615 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 6616 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 6617 if (N0.getOpcode() == ISD::LOAD && 6618 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6619 N0.hasOneUse()) { 6620 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6621 ISD::LoadExtType ExtType = LN0->getExtensionType(); 6622 EVT MemVT = LN0->getMemoryVT(); 6623 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 6624 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 6625 VT, LN0->getChain(), LN0->getBasePtr(), 6626 MemVT, LN0->getMemOperand()); 6627 CombineTo(N, ExtLoad); 6628 CombineTo(N0.getNode(), 6629 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6630 N0.getValueType(), ExtLoad), 6631 ExtLoad.getValue(1)); 6632 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6633 } 6634 } 6635 6636 if (N0.getOpcode() == ISD::SETCC) { 6637 // For vectors: 6638 // aext(setcc) -> vsetcc 6639 // aext(setcc) -> truncate(vsetcc) 6640 // aext(setcc) -> aext(vsetcc) 6641 // Only do this before legalize for now. 6642 if (VT.isVector() && !LegalOperations) { 6643 EVT N0VT = N0.getOperand(0).getValueType(); 6644 // We know that the # elements of the results is the same as the 6645 // # elements of the compare (and the # elements of the compare result 6646 // for that matter). Check to see that they are the same size. If so, 6647 // we know that the element size of the sext'd result matches the 6648 // element size of the compare operands. 6649 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 6650 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 6651 N0.getOperand(1), 6652 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6653 // If the desired elements are smaller or larger than the source 6654 // elements we can use a matching integer vector type and then 6655 // truncate/any extend 6656 else { 6657 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 6658 SDValue VsetCC = 6659 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 6660 N0.getOperand(1), 6661 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6662 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 6663 } 6664 } 6665 6666 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 6667 SDLoc DL(N); 6668 SDValue SCC = 6669 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6670 DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), 6671 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6672 if (SCC.getNode()) 6673 return SCC; 6674 } 6675 6676 return SDValue(); 6677 } 6678 6679 /// See if the specified operand can be simplified with the knowledge that only 6680 /// the bits specified by Mask are used. If so, return the simpler operand, 6681 /// otherwise return a null SDValue. 6682 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 6683 switch (V.getOpcode()) { 6684 default: break; 6685 case ISD::Constant: { 6686 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 6687 assert(CV && "Const value should be ConstSDNode."); 6688 const APInt &CVal = CV->getAPIntValue(); 6689 APInt NewVal = CVal & Mask; 6690 if (NewVal != CVal) 6691 return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); 6692 break; 6693 } 6694 case ISD::OR: 6695 case ISD::XOR: 6696 // If the LHS or RHS don't contribute bits to the or, drop them. 6697 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 6698 return V.getOperand(1); 6699 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 6700 return V.getOperand(0); 6701 break; 6702 case ISD::SRL: 6703 // Only look at single-use SRLs. 6704 if (!V.getNode()->hasOneUse()) 6705 break; 6706 if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { 6707 // See if we can recursively simplify the LHS. 6708 unsigned Amt = RHSC->getZExtValue(); 6709 6710 // Watch out for shift count overflow though. 6711 if (Amt >= Mask.getBitWidth()) break; 6712 APInt NewMask = Mask << Amt; 6713 if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) 6714 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 6715 SimplifyLHS, V.getOperand(1)); 6716 } 6717 } 6718 return SDValue(); 6719 } 6720 6721 /// If the result of a wider load is shifted to right of N bits and then 6722 /// truncated to a narrower type and where N is a multiple of number of bits of 6723 /// the narrower type, transform it to a narrower load from address + N / num of 6724 /// bits of new type. If the result is to be extended, also fold the extension 6725 /// to form a extending load. 6726 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 6727 unsigned Opc = N->getOpcode(); 6728 6729 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6730 SDValue N0 = N->getOperand(0); 6731 EVT VT = N->getValueType(0); 6732 EVT ExtVT = VT; 6733 6734 // This transformation isn't valid for vector loads. 6735 if (VT.isVector()) 6736 return SDValue(); 6737 6738 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6739 // extended to VT. 6740 if (Opc == ISD::SIGN_EXTEND_INREG) { 6741 ExtType = ISD::SEXTLOAD; 6742 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6743 } else if (Opc == ISD::SRL) { 6744 // Another special-case: SRL is basically zero-extending a narrower value. 6745 ExtType = ISD::ZEXTLOAD; 6746 N0 = SDValue(N, 0); 6747 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6748 if (!N01) return SDValue(); 6749 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 6750 VT.getSizeInBits() - N01->getZExtValue()); 6751 } 6752 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6753 return SDValue(); 6754 6755 unsigned EVTBits = ExtVT.getSizeInBits(); 6756 6757 // Do not generate loads of non-round integer types since these can 6758 // be expensive (and would be wrong if the type is not byte sized). 6759 if (!ExtVT.isRound()) 6760 return SDValue(); 6761 6762 unsigned ShAmt = 0; 6763 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6764 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6765 ShAmt = N01->getZExtValue(); 6766 // Is the shift amount a multiple of size of VT? 6767 if ((ShAmt & (EVTBits-1)) == 0) { 6768 N0 = N0.getOperand(0); 6769 // Is the load width a multiple of size of VT? 6770 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6771 return SDValue(); 6772 } 6773 6774 // At this point, we must have a load or else we can't do the transform. 6775 if (!isa<LoadSDNode>(N0)) return SDValue(); 6776 6777 // Because a SRL must be assumed to *need* to zero-extend the high bits 6778 // (as opposed to anyext the high bits), we can't combine the zextload 6779 // lowering of SRL and an sextload. 6780 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6781 return SDValue(); 6782 6783 // If the shift amount is larger than the input type then we're not 6784 // accessing any of the loaded bytes. If the load was a zextload/extload 6785 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6786 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6787 return SDValue(); 6788 } 6789 } 6790 6791 // If the load is shifted left (and the result isn't shifted back right), 6792 // we can fold the truncate through the shift. 6793 unsigned ShLeftAmt = 0; 6794 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6795 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6796 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6797 ShLeftAmt = N01->getZExtValue(); 6798 N0 = N0.getOperand(0); 6799 } 6800 } 6801 6802 // If we haven't found a load, we can't narrow it. Don't transform one with 6803 // multiple uses, this would require adding a new load. 6804 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 6805 return SDValue(); 6806 6807 // Don't change the width of a volatile load. 6808 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6809 if (LN0->isVolatile()) 6810 return SDValue(); 6811 6812 // Verify that we are actually reducing a load width here. 6813 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6814 return SDValue(); 6815 6816 // For the transform to be legal, the load must produce only two values 6817 // (the value loaded and the chain). Don't transform a pre-increment 6818 // load, for example, which produces an extra value. Otherwise the 6819 // transformation is not equivalent, and the downstream logic to replace 6820 // uses gets things wrong. 6821 if (LN0->getNumValues() > 2) 6822 return SDValue(); 6823 6824 // If the load that we're shrinking is an extload and we're not just 6825 // discarding the extension we can't simply shrink the load. Bail. 6826 // TODO: It would be possible to merge the extensions in some cases. 6827 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6828 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6829 return SDValue(); 6830 6831 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6832 return SDValue(); 6833 6834 EVT PtrType = N0.getOperand(1).getValueType(); 6835 6836 if (PtrType == MVT::Untyped || PtrType.isExtended()) 6837 // It's not possible to generate a constant of extended or untyped type. 6838 return SDValue(); 6839 6840 // For big endian targets, we need to adjust the offset to the pointer to 6841 // load the correct bytes. 6842 if (DAG.getDataLayout().isBigEndian()) { 6843 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6844 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6845 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6846 } 6847 6848 uint64_t PtrOff = ShAmt / 8; 6849 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6850 SDLoc DL(LN0); 6851 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, 6852 PtrType, LN0->getBasePtr(), 6853 DAG.getConstant(PtrOff, DL, PtrType)); 6854 AddToWorklist(NewPtr.getNode()); 6855 6856 SDValue Load; 6857 if (ExtType == ISD::NON_EXTLOAD) 6858 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6859 LN0->getPointerInfo().getWithOffset(PtrOff), 6860 LN0->isVolatile(), LN0->isNonTemporal(), 6861 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6862 else 6863 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6864 LN0->getPointerInfo().getWithOffset(PtrOff), 6865 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6866 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6867 6868 // Replace the old load's chain with the new load's chain. 6869 WorklistRemover DeadNodes(*this); 6870 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6871 6872 // Shift the result left, if we've swallowed a left shift. 6873 SDValue Result = Load; 6874 if (ShLeftAmt != 0) { 6875 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6876 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6877 ShImmTy = VT; 6878 // If the shift amount is as large as the result size (but, presumably, 6879 // no larger than the source) then the useful bits of the result are 6880 // zero; we can't simply return the shortened shift, because the result 6881 // of that operation is undefined. 6882 SDLoc DL(N0); 6883 if (ShLeftAmt >= VT.getSizeInBits()) 6884 Result = DAG.getConstant(0, DL, VT); 6885 else 6886 Result = DAG.getNode(ISD::SHL, DL, VT, 6887 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); 6888 } 6889 6890 // Return the new loaded value. 6891 return Result; 6892 } 6893 6894 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 6895 SDValue N0 = N->getOperand(0); 6896 SDValue N1 = N->getOperand(1); 6897 EVT VT = N->getValueType(0); 6898 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6899 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6900 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6901 6902 if (N0.isUndef()) 6903 return DAG.getUNDEF(VT); 6904 6905 // fold (sext_in_reg c1) -> c1 6906 if (isConstantIntBuildVectorOrConstantInt(N0)) 6907 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6908 6909 // If the input is already sign extended, just drop the extension. 6910 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6911 return N0; 6912 6913 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6914 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6915 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6916 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6917 N0.getOperand(0), N1); 6918 6919 // fold (sext_in_reg (sext x)) -> (sext x) 6920 // fold (sext_in_reg (aext x)) -> (sext x) 6921 // if x is small enough. 6922 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 6923 SDValue N00 = N0.getOperand(0); 6924 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6925 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6926 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6927 } 6928 6929 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6930 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6931 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6932 6933 // fold operands of sext_in_reg based on knowledge that the top bits are not 6934 // demanded. 6935 if (SimplifyDemandedBits(SDValue(N, 0))) 6936 return SDValue(N, 0); 6937 6938 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6939 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6940 if (SDValue NarrowLoad = ReduceLoadWidth(N)) 6941 return NarrowLoad; 6942 6943 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6944 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6945 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6946 if (N0.getOpcode() == ISD::SRL) { 6947 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6948 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6949 // We can turn this into an SRA iff the input to the SRL is already sign 6950 // extended enough. 6951 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6952 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6953 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6954 N0.getOperand(0), N0.getOperand(1)); 6955 } 6956 } 6957 6958 // fold (sext_inreg (extload x)) -> (sextload x) 6959 if (ISD::isEXTLoad(N0.getNode()) && 6960 ISD::isUNINDEXEDLoad(N0.getNode()) && 6961 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6962 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6963 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6964 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6965 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6966 LN0->getChain(), 6967 LN0->getBasePtr(), EVT, 6968 LN0->getMemOperand()); 6969 CombineTo(N, ExtLoad); 6970 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6971 AddToWorklist(ExtLoad.getNode()); 6972 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6973 } 6974 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6975 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6976 N0.hasOneUse() && 6977 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6978 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6979 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6980 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6981 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6982 LN0->getChain(), 6983 LN0->getBasePtr(), EVT, 6984 LN0->getMemOperand()); 6985 CombineTo(N, ExtLoad); 6986 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6987 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6988 } 6989 6990 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6991 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6992 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6993 N0.getOperand(1), false); 6994 if (BSwap.getNode()) 6995 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6996 BSwap, N1); 6997 } 6998 6999 return SDValue(); 7000 } 7001 7002 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { 7003 SDValue N0 = N->getOperand(0); 7004 EVT VT = N->getValueType(0); 7005 7006 if (N0.getOpcode() == ISD::UNDEF) 7007 return DAG.getUNDEF(VT); 7008 7009 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 7010 LegalOperations)) 7011 return SDValue(Res, 0); 7012 7013 return SDValue(); 7014 } 7015 7016 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 7017 SDValue N0 = N->getOperand(0); 7018 EVT VT = N->getValueType(0); 7019 bool isLE = DAG.getDataLayout().isLittleEndian(); 7020 7021 // noop truncate 7022 if (N0.getValueType() == N->getValueType(0)) 7023 return N0; 7024 // fold (truncate c1) -> c1 7025 if (isConstantIntBuildVectorOrConstantInt(N0)) 7026 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 7027 // fold (truncate (truncate x)) -> (truncate x) 7028 if (N0.getOpcode() == ISD::TRUNCATE) 7029 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 7030 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 7031 if (N0.getOpcode() == ISD::ZERO_EXTEND || 7032 N0.getOpcode() == ISD::SIGN_EXTEND || 7033 N0.getOpcode() == ISD::ANY_EXTEND) { 7034 if (N0.getOperand(0).getValueType().bitsLT(VT)) 7035 // if the source is smaller than the dest, we still need an extend 7036 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 7037 N0.getOperand(0)); 7038 if (N0.getOperand(0).getValueType().bitsGT(VT)) 7039 // if the source is larger than the dest, than we just need the truncate 7040 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 7041 // if the source and dest are the same type, we can drop both the extend 7042 // and the truncate. 7043 return N0.getOperand(0); 7044 } 7045 7046 // Fold extract-and-trunc into a narrow extract. For example: 7047 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 7048 // i32 y = TRUNCATE(i64 x) 7049 // -- becomes -- 7050 // v16i8 b = BITCAST (v2i64 val) 7051 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 7052 // 7053 // Note: We only run this optimization after type legalization (which often 7054 // creates this pattern) and before operation legalization after which 7055 // we need to be more careful about the vector instructions that we generate. 7056 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7057 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 7058 7059 EVT VecTy = N0.getOperand(0).getValueType(); 7060 EVT ExTy = N0.getValueType(); 7061 EVT TrTy = N->getValueType(0); 7062 7063 unsigned NumElem = VecTy.getVectorNumElements(); 7064 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 7065 7066 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 7067 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 7068 7069 SDValue EltNo = N0->getOperand(1); 7070 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 7071 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 7072 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 7073 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 7074 7075 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 7076 NVT, N0.getOperand(0)); 7077 7078 SDLoc DL(N); 7079 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 7080 DL, TrTy, V, 7081 DAG.getConstant(Index, DL, IndexTy)); 7082 } 7083 } 7084 7085 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 7086 if (N0.getOpcode() == ISD::SELECT) { 7087 EVT SrcVT = N0.getValueType(); 7088 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 7089 TLI.isTruncateFree(SrcVT, VT)) { 7090 SDLoc SL(N0); 7091 SDValue Cond = N0.getOperand(0); 7092 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 7093 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 7094 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 7095 } 7096 } 7097 7098 // Fold a series of buildvector, bitcast, and truncate if possible. 7099 // For example fold 7100 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 7101 // (2xi32 (buildvector x, y)). 7102 if (Level == AfterLegalizeVectorOps && VT.isVector() && 7103 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 7104 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 7105 N0.getOperand(0).hasOneUse()) { 7106 7107 SDValue BuildVect = N0.getOperand(0); 7108 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 7109 EVT TruncVecEltTy = VT.getVectorElementType(); 7110 7111 // Check that the element types match. 7112 if (BuildVectEltTy == TruncVecEltTy) { 7113 // Now we only need to compute the offset of the truncated elements. 7114 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 7115 unsigned TruncVecNumElts = VT.getVectorNumElements(); 7116 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 7117 7118 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 7119 "Invalid number of elements"); 7120 7121 SmallVector<SDValue, 8> Opnds; 7122 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 7123 Opnds.push_back(BuildVect.getOperand(i)); 7124 7125 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 7126 } 7127 } 7128 7129 // See if we can simplify the input to this truncate through knowledge that 7130 // only the low bits are being used. 7131 // For example "trunc (or (shl x, 8), y)" // -> trunc y 7132 // Currently we only perform this optimization on scalars because vectors 7133 // may have different active low bits. 7134 if (!VT.isVector()) { 7135 SDValue Shorter = 7136 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 7137 VT.getSizeInBits())); 7138 if (Shorter.getNode()) 7139 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 7140 } 7141 // fold (truncate (load x)) -> (smaller load x) 7142 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 7143 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 7144 if (SDValue Reduced = ReduceLoadWidth(N)) 7145 return Reduced; 7146 7147 // Handle the case where the load remains an extending load even 7148 // after truncation. 7149 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 7150 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7151 if (!LN0->isVolatile() && 7152 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 7153 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 7154 VT, LN0->getChain(), LN0->getBasePtr(), 7155 LN0->getMemoryVT(), 7156 LN0->getMemOperand()); 7157 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 7158 return NewLoad; 7159 } 7160 } 7161 } 7162 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 7163 // where ... are all 'undef'. 7164 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 7165 SmallVector<EVT, 8> VTs; 7166 SDValue V; 7167 unsigned Idx = 0; 7168 unsigned NumDefs = 0; 7169 7170 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 7171 SDValue X = N0.getOperand(i); 7172 if (X.getOpcode() != ISD::UNDEF) { 7173 V = X; 7174 Idx = i; 7175 NumDefs++; 7176 } 7177 // Stop if more than one members are non-undef. 7178 if (NumDefs > 1) 7179 break; 7180 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 7181 VT.getVectorElementType(), 7182 X.getValueType().getVectorNumElements())); 7183 } 7184 7185 if (NumDefs == 0) 7186 return DAG.getUNDEF(VT); 7187 7188 if (NumDefs == 1) { 7189 assert(V.getNode() && "The single defined operand is empty!"); 7190 SmallVector<SDValue, 8> Opnds; 7191 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 7192 if (i != Idx) { 7193 Opnds.push_back(DAG.getUNDEF(VTs[i])); 7194 continue; 7195 } 7196 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 7197 AddToWorklist(NV.getNode()); 7198 Opnds.push_back(NV); 7199 } 7200 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 7201 } 7202 } 7203 7204 // Simplify the operands using demanded-bits information. 7205 if (!VT.isVector() && 7206 SimplifyDemandedBits(SDValue(N, 0))) 7207 return SDValue(N, 0); 7208 7209 return SDValue(); 7210 } 7211 7212 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 7213 SDValue Elt = N->getOperand(i); 7214 if (Elt.getOpcode() != ISD::MERGE_VALUES) 7215 return Elt.getNode(); 7216 return Elt.getOperand(Elt.getResNo()).getNode(); 7217 } 7218 7219 /// build_pair (load, load) -> load 7220 /// if load locations are consecutive. 7221 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 7222 assert(N->getOpcode() == ISD::BUILD_PAIR); 7223 7224 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 7225 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 7226 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 7227 LD1->getAddressSpace() != LD2->getAddressSpace()) 7228 return SDValue(); 7229 EVT LD1VT = LD1->getValueType(0); 7230 7231 if (ISD::isNON_EXTLoad(LD2) && 7232 LD2->hasOneUse() && 7233 // If both are volatile this would reduce the number of volatile loads. 7234 // If one is volatile it might be ok, but play conservative and bail out. 7235 !LD1->isVolatile() && 7236 !LD2->isVolatile() && 7237 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 7238 unsigned Align = LD1->getAlignment(); 7239 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 7240 VT.getTypeForEVT(*DAG.getContext())); 7241 7242 if (NewAlign <= Align && 7243 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 7244 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 7245 LD1->getBasePtr(), LD1->getPointerInfo(), 7246 false, false, false, Align); 7247 } 7248 7249 return SDValue(); 7250 } 7251 7252 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 7253 SDValue N0 = N->getOperand(0); 7254 EVT VT = N->getValueType(0); 7255 7256 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 7257 // Only do this before legalize, since afterward the target may be depending 7258 // on the bitconvert. 7259 // First check to see if this is all constant. 7260 if (!LegalTypes && 7261 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 7262 VT.isVector()) { 7263 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 7264 7265 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 7266 assert(!DestEltVT.isVector() && 7267 "Element type of vector ValueType must not be vector!"); 7268 if (isSimple) 7269 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 7270 } 7271 7272 // If the input is a constant, let getNode fold it. 7273 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 7274 // If we can't allow illegal operations, we need to check that this is just 7275 // a fp -> int or int -> conversion and that the resulting operation will 7276 // be legal. 7277 if (!LegalOperations || 7278 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 7279 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 7280 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 7281 TLI.isOperationLegal(ISD::Constant, VT))) 7282 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 7283 } 7284 7285 // (conv (conv x, t1), t2) -> (conv x, t2) 7286 if (N0.getOpcode() == ISD::BITCAST) 7287 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 7288 N0.getOperand(0)); 7289 7290 // fold (conv (load x)) -> (load (conv*)x) 7291 // If the resultant load doesn't need a higher alignment than the original! 7292 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7293 // Do not change the width of a volatile load. 7294 !cast<LoadSDNode>(N0)->isVolatile() && 7295 // Do not remove the cast if the types differ in endian layout. 7296 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == 7297 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && 7298 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 7299 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 7300 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7301 unsigned Align = DAG.getDataLayout().getABITypeAlignment( 7302 VT.getTypeForEVT(*DAG.getContext())); 7303 unsigned OrigAlign = LN0->getAlignment(); 7304 7305 if (Align <= OrigAlign) { 7306 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 7307 LN0->getBasePtr(), LN0->getPointerInfo(), 7308 LN0->isVolatile(), LN0->isNonTemporal(), 7309 LN0->isInvariant(), OrigAlign, 7310 LN0->getAAInfo()); 7311 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 7312 return Load; 7313 } 7314 } 7315 7316 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 7317 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 7318 // This often reduces constant pool loads. 7319 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 7320 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 7321 N0.getNode()->hasOneUse() && VT.isInteger() && 7322 !VT.isVector() && !N0.getValueType().isVector()) { 7323 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 7324 N0.getOperand(0)); 7325 AddToWorklist(NewConv.getNode()); 7326 7327 SDLoc DL(N); 7328 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 7329 if (N0.getOpcode() == ISD::FNEG) 7330 return DAG.getNode(ISD::XOR, DL, VT, 7331 NewConv, DAG.getConstant(SignBit, DL, VT)); 7332 assert(N0.getOpcode() == ISD::FABS); 7333 return DAG.getNode(ISD::AND, DL, VT, 7334 NewConv, DAG.getConstant(~SignBit, DL, VT)); 7335 } 7336 7337 // fold (bitconvert (fcopysign cst, x)) -> 7338 // (or (and (bitconvert x), sign), (and cst, (not sign))) 7339 // Note that we don't handle (copysign x, cst) because this can always be 7340 // folded to an fneg or fabs. 7341 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 7342 isa<ConstantFPSDNode>(N0.getOperand(0)) && 7343 VT.isInteger() && !VT.isVector()) { 7344 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 7345 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 7346 if (isTypeLegal(IntXVT)) { 7347 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 7348 IntXVT, N0.getOperand(1)); 7349 AddToWorklist(X.getNode()); 7350 7351 // If X has a different width than the result/lhs, sext it or truncate it. 7352 unsigned VTWidth = VT.getSizeInBits(); 7353 if (OrigXWidth < VTWidth) { 7354 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 7355 AddToWorklist(X.getNode()); 7356 } else if (OrigXWidth > VTWidth) { 7357 // To get the sign bit in the right place, we have to shift it right 7358 // before truncating. 7359 SDLoc DL(X); 7360 X = DAG.getNode(ISD::SRL, DL, 7361 X.getValueType(), X, 7362 DAG.getConstant(OrigXWidth-VTWidth, DL, 7363 X.getValueType())); 7364 AddToWorklist(X.getNode()); 7365 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 7366 AddToWorklist(X.getNode()); 7367 } 7368 7369 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 7370 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 7371 X, DAG.getConstant(SignBit, SDLoc(X), VT)); 7372 AddToWorklist(X.getNode()); 7373 7374 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 7375 VT, N0.getOperand(0)); 7376 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 7377 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); 7378 AddToWorklist(Cst.getNode()); 7379 7380 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 7381 } 7382 } 7383 7384 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 7385 if (N0.getOpcode() == ISD::BUILD_PAIR) 7386 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) 7387 return CombineLD; 7388 7389 // Remove double bitcasts from shuffles - this is often a legacy of 7390 // XformToShuffleWithZero being used to combine bitmaskings (of 7391 // float vectors bitcast to integer vectors) into shuffles. 7392 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) 7393 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && 7394 N0->getOpcode() == ISD::VECTOR_SHUFFLE && 7395 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && 7396 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { 7397 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); 7398 7399 // If operands are a bitcast, peek through if it casts the original VT. 7400 // If operands are a constant, just bitcast back to original VT. 7401 auto PeekThroughBitcast = [&](SDValue Op) { 7402 if (Op.getOpcode() == ISD::BITCAST && 7403 Op.getOperand(0).getValueType() == VT) 7404 return SDValue(Op.getOperand(0)); 7405 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 7406 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 7407 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 7408 return SDValue(); 7409 }; 7410 7411 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); 7412 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); 7413 if (!(SV0 && SV1)) 7414 return SDValue(); 7415 7416 int MaskScale = 7417 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); 7418 SmallVector<int, 8> NewMask; 7419 for (int M : SVN->getMask()) 7420 for (int i = 0; i != MaskScale; ++i) 7421 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); 7422 7423 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 7424 if (!LegalMask) { 7425 std::swap(SV0, SV1); 7426 ShuffleVectorSDNode::commuteMask(NewMask); 7427 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 7428 } 7429 7430 if (LegalMask) 7431 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); 7432 } 7433 7434 return SDValue(); 7435 } 7436 7437 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 7438 EVT VT = N->getValueType(0); 7439 return CombineConsecutiveLoads(N, VT); 7440 } 7441 7442 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef 7443 /// operands. DstEltVT indicates the destination element value type. 7444 SDValue DAGCombiner:: 7445 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 7446 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 7447 7448 // If this is already the right type, we're done. 7449 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 7450 7451 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 7452 unsigned DstBitSize = DstEltVT.getSizeInBits(); 7453 7454 // If this is a conversion of N elements of one type to N elements of another 7455 // type, convert each element. This handles FP<->INT cases. 7456 if (SrcBitSize == DstBitSize) { 7457 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 7458 BV->getValueType(0).getVectorNumElements()); 7459 7460 // Due to the FP element handling below calling this routine recursively, 7461 // we can end up with a scalar-to-vector node here. 7462 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 7463 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 7464 DAG.getNode(ISD::BITCAST, SDLoc(BV), 7465 DstEltVT, BV->getOperand(0))); 7466 7467 SmallVector<SDValue, 8> Ops; 7468 for (SDValue Op : BV->op_values()) { 7469 // If the vector element type is not legal, the BUILD_VECTOR operands 7470 // are promoted and implicitly truncated. Make that explicit here. 7471 if (Op.getValueType() != SrcEltVT) 7472 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 7473 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 7474 DstEltVT, Op)); 7475 AddToWorklist(Ops.back().getNode()); 7476 } 7477 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 7478 } 7479 7480 // Otherwise, we're growing or shrinking the elements. To avoid having to 7481 // handle annoying details of growing/shrinking FP values, we convert them to 7482 // int first. 7483 if (SrcEltVT.isFloatingPoint()) { 7484 // Convert the input float vector to a int vector where the elements are the 7485 // same sizes. 7486 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 7487 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 7488 SrcEltVT = IntVT; 7489 } 7490 7491 // Now we know the input is an integer vector. If the output is a FP type, 7492 // convert to integer first, then to FP of the right size. 7493 if (DstEltVT.isFloatingPoint()) { 7494 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 7495 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 7496 7497 // Next, convert to FP elements of the same size. 7498 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 7499 } 7500 7501 SDLoc DL(BV); 7502 7503 // Okay, we know the src/dst types are both integers of differing types. 7504 // Handling growing first. 7505 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 7506 if (SrcBitSize < DstBitSize) { 7507 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 7508 7509 SmallVector<SDValue, 8> Ops; 7510 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 7511 i += NumInputsPerOutput) { 7512 bool isLE = DAG.getDataLayout().isLittleEndian(); 7513 APInt NewBits = APInt(DstBitSize, 0); 7514 bool EltIsUndef = true; 7515 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 7516 // Shift the previously computed bits over. 7517 NewBits <<= SrcBitSize; 7518 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 7519 if (Op.getOpcode() == ISD::UNDEF) continue; 7520 EltIsUndef = false; 7521 7522 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 7523 zextOrTrunc(SrcBitSize).zext(DstBitSize); 7524 } 7525 7526 if (EltIsUndef) 7527 Ops.push_back(DAG.getUNDEF(DstEltVT)); 7528 else 7529 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); 7530 } 7531 7532 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 7533 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); 7534 } 7535 7536 // Finally, this must be the case where we are shrinking elements: each input 7537 // turns into multiple outputs. 7538 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 7539 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 7540 NumOutputsPerInput*BV->getNumOperands()); 7541 SmallVector<SDValue, 8> Ops; 7542 7543 for (const SDValue &Op : BV->op_values()) { 7544 if (Op.getOpcode() == ISD::UNDEF) { 7545 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); 7546 continue; 7547 } 7548 7549 APInt OpVal = cast<ConstantSDNode>(Op)-> 7550 getAPIntValue().zextOrTrunc(SrcBitSize); 7551 7552 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 7553 APInt ThisVal = OpVal.trunc(DstBitSize); 7554 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); 7555 OpVal = OpVal.lshr(DstBitSize); 7556 } 7557 7558 // For big endian targets, swap the order of the pieces of each element. 7559 if (DAG.getDataLayout().isBigEndian()) 7560 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 7561 } 7562 7563 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); 7564 } 7565 7566 /// Try to perform FMA combining on a given FADD node. 7567 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { 7568 SDValue N0 = N->getOperand(0); 7569 SDValue N1 = N->getOperand(1); 7570 EVT VT = N->getValueType(0); 7571 SDLoc SL(N); 7572 7573 const TargetOptions &Options = DAG.getTarget().Options; 7574 bool AllowFusion = 7575 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); 7576 7577 // Floating-point multiply-add with intermediate rounding. 7578 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 7579 7580 // Floating-point multiply-add without intermediate rounding. 7581 bool HasFMA = 7582 AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && 7583 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 7584 7585 // No valid opcode, do not combine. 7586 if (!HasFMAD && !HasFMA) 7587 return SDValue(); 7588 7589 // Always prefer FMAD to FMA for precision. 7590 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 7591 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 7592 bool LookThroughFPExt = TLI.isFPExtFree(VT); 7593 7594 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), 7595 // prefer to fold the multiply with fewer uses. 7596 if (Aggressive && N0.getOpcode() == ISD::FMUL && 7597 N1.getOpcode() == ISD::FMUL) { 7598 if (N0.getNode()->use_size() > N1.getNode()->use_size()) 7599 std::swap(N0, N1); 7600 } 7601 7602 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 7603 if (N0.getOpcode() == ISD::FMUL && 7604 (Aggressive || N0->hasOneUse())) { 7605 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7606 N0.getOperand(0), N0.getOperand(1), N1); 7607 } 7608 7609 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 7610 // Note: Commutes FADD operands. 7611 if (N1.getOpcode() == ISD::FMUL && 7612 (Aggressive || N1->hasOneUse())) { 7613 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7614 N1.getOperand(0), N1.getOperand(1), N0); 7615 } 7616 7617 // Look through FP_EXTEND nodes to do more combining. 7618 if (AllowFusion && LookThroughFPExt) { 7619 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 7620 if (N0.getOpcode() == ISD::FP_EXTEND) { 7621 SDValue N00 = N0.getOperand(0); 7622 if (N00.getOpcode() == ISD::FMUL) 7623 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7624 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7625 N00.getOperand(0)), 7626 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7627 N00.getOperand(1)), N1); 7628 } 7629 7630 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 7631 // Note: Commutes FADD operands. 7632 if (N1.getOpcode() == ISD::FP_EXTEND) { 7633 SDValue N10 = N1.getOperand(0); 7634 if (N10.getOpcode() == ISD::FMUL) 7635 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7636 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7637 N10.getOperand(0)), 7638 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7639 N10.getOperand(1)), N0); 7640 } 7641 } 7642 7643 // More folding opportunities when target permits. 7644 if ((AllowFusion || HasFMAD) && Aggressive) { 7645 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 7646 if (N0.getOpcode() == PreferredFusedOpcode && 7647 N0.getOperand(2).getOpcode() == ISD::FMUL) { 7648 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7649 N0.getOperand(0), N0.getOperand(1), 7650 DAG.getNode(PreferredFusedOpcode, SL, VT, 7651 N0.getOperand(2).getOperand(0), 7652 N0.getOperand(2).getOperand(1), 7653 N1)); 7654 } 7655 7656 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 7657 if (N1->getOpcode() == PreferredFusedOpcode && 7658 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7659 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7660 N1.getOperand(0), N1.getOperand(1), 7661 DAG.getNode(PreferredFusedOpcode, SL, VT, 7662 N1.getOperand(2).getOperand(0), 7663 N1.getOperand(2).getOperand(1), 7664 N0)); 7665 } 7666 7667 if (AllowFusion && LookThroughFPExt) { 7668 // fold (fadd (fma x, y, (fpext (fmul u, v))), z) 7669 // -> (fma x, y, (fma (fpext u), (fpext v), z)) 7670 auto FoldFAddFMAFPExtFMul = [&] ( 7671 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { 7672 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, 7673 DAG.getNode(PreferredFusedOpcode, SL, VT, 7674 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 7675 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 7676 Z)); 7677 }; 7678 if (N0.getOpcode() == PreferredFusedOpcode) { 7679 SDValue N02 = N0.getOperand(2); 7680 if (N02.getOpcode() == ISD::FP_EXTEND) { 7681 SDValue N020 = N02.getOperand(0); 7682 if (N020.getOpcode() == ISD::FMUL) 7683 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), 7684 N020.getOperand(0), N020.getOperand(1), 7685 N1); 7686 } 7687 } 7688 7689 // fold (fadd (fpext (fma x, y, (fmul u, v))), z) 7690 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) 7691 // FIXME: This turns two single-precision and one double-precision 7692 // operation into two double-precision operations, which might not be 7693 // interesting for all targets, especially GPUs. 7694 auto FoldFAddFPExtFMAFMul = [&] ( 7695 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { 7696 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7697 DAG.getNode(ISD::FP_EXTEND, SL, VT, X), 7698 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), 7699 DAG.getNode(PreferredFusedOpcode, SL, VT, 7700 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 7701 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 7702 Z)); 7703 }; 7704 if (N0.getOpcode() == ISD::FP_EXTEND) { 7705 SDValue N00 = N0.getOperand(0); 7706 if (N00.getOpcode() == PreferredFusedOpcode) { 7707 SDValue N002 = N00.getOperand(2); 7708 if (N002.getOpcode() == ISD::FMUL) 7709 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), 7710 N002.getOperand(0), N002.getOperand(1), 7711 N1); 7712 } 7713 } 7714 7715 // fold (fadd x, (fma y, z, (fpext (fmul u, v))) 7716 // -> (fma y, z, (fma (fpext u), (fpext v), x)) 7717 if (N1.getOpcode() == PreferredFusedOpcode) { 7718 SDValue N12 = N1.getOperand(2); 7719 if (N12.getOpcode() == ISD::FP_EXTEND) { 7720 SDValue N120 = N12.getOperand(0); 7721 if (N120.getOpcode() == ISD::FMUL) 7722 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), 7723 N120.getOperand(0), N120.getOperand(1), 7724 N0); 7725 } 7726 } 7727 7728 // fold (fadd x, (fpext (fma y, z, (fmul u, v))) 7729 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 7730 // FIXME: This turns two single-precision and one double-precision 7731 // operation into two double-precision operations, which might not be 7732 // interesting for all targets, especially GPUs. 7733 if (N1.getOpcode() == ISD::FP_EXTEND) { 7734 SDValue N10 = N1.getOperand(0); 7735 if (N10.getOpcode() == PreferredFusedOpcode) { 7736 SDValue N102 = N10.getOperand(2); 7737 if (N102.getOpcode() == ISD::FMUL) 7738 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), 7739 N102.getOperand(0), N102.getOperand(1), 7740 N0); 7741 } 7742 } 7743 } 7744 } 7745 7746 return SDValue(); 7747 } 7748 7749 /// Try to perform FMA combining on a given FSUB node. 7750 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { 7751 SDValue N0 = N->getOperand(0); 7752 SDValue N1 = N->getOperand(1); 7753 EVT VT = N->getValueType(0); 7754 SDLoc SL(N); 7755 7756 const TargetOptions &Options = DAG.getTarget().Options; 7757 bool AllowFusion = 7758 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); 7759 7760 // Floating-point multiply-add with intermediate rounding. 7761 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 7762 7763 // Floating-point multiply-add without intermediate rounding. 7764 bool HasFMA = 7765 AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && 7766 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 7767 7768 // No valid opcode, do not combine. 7769 if (!HasFMAD && !HasFMA) 7770 return SDValue(); 7771 7772 // Always prefer FMAD to FMA for precision. 7773 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 7774 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 7775 bool LookThroughFPExt = TLI.isFPExtFree(VT); 7776 7777 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7778 if (N0.getOpcode() == ISD::FMUL && 7779 (Aggressive || N0->hasOneUse())) { 7780 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7781 N0.getOperand(0), N0.getOperand(1), 7782 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7783 } 7784 7785 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7786 // Note: Commutes FSUB operands. 7787 if (N1.getOpcode() == ISD::FMUL && 7788 (Aggressive || N1->hasOneUse())) 7789 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7790 DAG.getNode(ISD::FNEG, SL, VT, 7791 N1.getOperand(0)), 7792 N1.getOperand(1), N0); 7793 7794 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7795 if (N0.getOpcode() == ISD::FNEG && 7796 N0.getOperand(0).getOpcode() == ISD::FMUL && 7797 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { 7798 SDValue N00 = N0.getOperand(0).getOperand(0); 7799 SDValue N01 = N0.getOperand(0).getOperand(1); 7800 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7801 DAG.getNode(ISD::FNEG, SL, VT, N00), N01, 7802 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7803 } 7804 7805 // Look through FP_EXTEND nodes to do more combining. 7806 if (AllowFusion && LookThroughFPExt) { 7807 // fold (fsub (fpext (fmul x, y)), z) 7808 // -> (fma (fpext x), (fpext y), (fneg z)) 7809 if (N0.getOpcode() == ISD::FP_EXTEND) { 7810 SDValue N00 = N0.getOperand(0); 7811 if (N00.getOpcode() == ISD::FMUL) 7812 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7813 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7814 N00.getOperand(0)), 7815 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7816 N00.getOperand(1)), 7817 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7818 } 7819 7820 // fold (fsub x, (fpext (fmul y, z))) 7821 // -> (fma (fneg (fpext y)), (fpext z), x) 7822 // Note: Commutes FSUB operands. 7823 if (N1.getOpcode() == ISD::FP_EXTEND) { 7824 SDValue N10 = N1.getOperand(0); 7825 if (N10.getOpcode() == ISD::FMUL) 7826 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7827 DAG.getNode(ISD::FNEG, SL, VT, 7828 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7829 N10.getOperand(0))), 7830 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7831 N10.getOperand(1)), 7832 N0); 7833 } 7834 7835 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7836 // -> (fneg (fma (fpext x), (fpext y), z)) 7837 // Note: This could be removed with appropriate canonicalization of the 7838 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 7839 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 7840 // from implementing the canonicalization in visitFSUB. 7841 if (N0.getOpcode() == ISD::FP_EXTEND) { 7842 SDValue N00 = N0.getOperand(0); 7843 if (N00.getOpcode() == ISD::FNEG) { 7844 SDValue N000 = N00.getOperand(0); 7845 if (N000.getOpcode() == ISD::FMUL) { 7846 return DAG.getNode(ISD::FNEG, SL, VT, 7847 DAG.getNode(PreferredFusedOpcode, SL, VT, 7848 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7849 N000.getOperand(0)), 7850 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7851 N000.getOperand(1)), 7852 N1)); 7853 } 7854 } 7855 } 7856 7857 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7858 // -> (fneg (fma (fpext x)), (fpext y), z) 7859 // Note: This could be removed with appropriate canonicalization of the 7860 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 7861 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 7862 // from implementing the canonicalization in visitFSUB. 7863 if (N0.getOpcode() == ISD::FNEG) { 7864 SDValue N00 = N0.getOperand(0); 7865 if (N00.getOpcode() == ISD::FP_EXTEND) { 7866 SDValue N000 = N00.getOperand(0); 7867 if (N000.getOpcode() == ISD::FMUL) { 7868 return DAG.getNode(ISD::FNEG, SL, VT, 7869 DAG.getNode(PreferredFusedOpcode, SL, VT, 7870 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7871 N000.getOperand(0)), 7872 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7873 N000.getOperand(1)), 7874 N1)); 7875 } 7876 } 7877 } 7878 7879 } 7880 7881 // More folding opportunities when target permits. 7882 if ((AllowFusion || HasFMAD) && Aggressive) { 7883 // fold (fsub (fma x, y, (fmul u, v)), z) 7884 // -> (fma x, y (fma u, v, (fneg z))) 7885 if (N0.getOpcode() == PreferredFusedOpcode && 7886 N0.getOperand(2).getOpcode() == ISD::FMUL) { 7887 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7888 N0.getOperand(0), N0.getOperand(1), 7889 DAG.getNode(PreferredFusedOpcode, SL, VT, 7890 N0.getOperand(2).getOperand(0), 7891 N0.getOperand(2).getOperand(1), 7892 DAG.getNode(ISD::FNEG, SL, VT, 7893 N1))); 7894 } 7895 7896 // fold (fsub x, (fma y, z, (fmul u, v))) 7897 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7898 if (N1.getOpcode() == PreferredFusedOpcode && 7899 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7900 SDValue N20 = N1.getOperand(2).getOperand(0); 7901 SDValue N21 = N1.getOperand(2).getOperand(1); 7902 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7903 DAG.getNode(ISD::FNEG, SL, VT, 7904 N1.getOperand(0)), 7905 N1.getOperand(1), 7906 DAG.getNode(PreferredFusedOpcode, SL, VT, 7907 DAG.getNode(ISD::FNEG, SL, VT, N20), 7908 7909 N21, N0)); 7910 } 7911 7912 if (AllowFusion && LookThroughFPExt) { 7913 // fold (fsub (fma x, y, (fpext (fmul u, v))), z) 7914 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) 7915 if (N0.getOpcode() == PreferredFusedOpcode) { 7916 SDValue N02 = N0.getOperand(2); 7917 if (N02.getOpcode() == ISD::FP_EXTEND) { 7918 SDValue N020 = N02.getOperand(0); 7919 if (N020.getOpcode() == ISD::FMUL) 7920 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7921 N0.getOperand(0), N0.getOperand(1), 7922 DAG.getNode(PreferredFusedOpcode, SL, VT, 7923 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7924 N020.getOperand(0)), 7925 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7926 N020.getOperand(1)), 7927 DAG.getNode(ISD::FNEG, SL, VT, 7928 N1))); 7929 } 7930 } 7931 7932 // fold (fsub (fpext (fma x, y, (fmul u, v))), z) 7933 // -> (fma (fpext x), (fpext y), 7934 // (fma (fpext u), (fpext v), (fneg z))) 7935 // FIXME: This turns two single-precision and one double-precision 7936 // operation into two double-precision operations, which might not be 7937 // interesting for all targets, especially GPUs. 7938 if (N0.getOpcode() == ISD::FP_EXTEND) { 7939 SDValue N00 = N0.getOperand(0); 7940 if (N00.getOpcode() == PreferredFusedOpcode) { 7941 SDValue N002 = N00.getOperand(2); 7942 if (N002.getOpcode() == ISD::FMUL) 7943 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7944 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7945 N00.getOperand(0)), 7946 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7947 N00.getOperand(1)), 7948 DAG.getNode(PreferredFusedOpcode, SL, VT, 7949 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7950 N002.getOperand(0)), 7951 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7952 N002.getOperand(1)), 7953 DAG.getNode(ISD::FNEG, SL, VT, 7954 N1))); 7955 } 7956 } 7957 7958 // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) 7959 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) 7960 if (N1.getOpcode() == PreferredFusedOpcode && 7961 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { 7962 SDValue N120 = N1.getOperand(2).getOperand(0); 7963 if (N120.getOpcode() == ISD::FMUL) { 7964 SDValue N1200 = N120.getOperand(0); 7965 SDValue N1201 = N120.getOperand(1); 7966 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7967 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), 7968 N1.getOperand(1), 7969 DAG.getNode(PreferredFusedOpcode, SL, VT, 7970 DAG.getNode(ISD::FNEG, SL, VT, 7971 DAG.getNode(ISD::FP_EXTEND, SL, 7972 VT, N1200)), 7973 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7974 N1201), 7975 N0)); 7976 } 7977 } 7978 7979 // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 7980 // -> (fma (fneg (fpext y)), (fpext z), 7981 // (fma (fneg (fpext u)), (fpext v), x)) 7982 // FIXME: This turns two single-precision and one double-precision 7983 // operation into two double-precision operations, which might not be 7984 // interesting for all targets, especially GPUs. 7985 if (N1.getOpcode() == ISD::FP_EXTEND && 7986 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { 7987 SDValue N100 = N1.getOperand(0).getOperand(0); 7988 SDValue N101 = N1.getOperand(0).getOperand(1); 7989 SDValue N102 = N1.getOperand(0).getOperand(2); 7990 if (N102.getOpcode() == ISD::FMUL) { 7991 SDValue N1020 = N102.getOperand(0); 7992 SDValue N1021 = N102.getOperand(1); 7993 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7994 DAG.getNode(ISD::FNEG, SL, VT, 7995 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7996 N100)), 7997 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), 7998 DAG.getNode(PreferredFusedOpcode, SL, VT, 7999 DAG.getNode(ISD::FNEG, SL, VT, 8000 DAG.getNode(ISD::FP_EXTEND, SL, 8001 VT, N1020)), 8002 DAG.getNode(ISD::FP_EXTEND, SL, VT, 8003 N1021), 8004 N0)); 8005 } 8006 } 8007 } 8008 } 8009 8010 return SDValue(); 8011 } 8012 8013 /// Try to perform FMA combining on a given FMUL node. 8014 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { 8015 SDValue N0 = N->getOperand(0); 8016 SDValue N1 = N->getOperand(1); 8017 EVT VT = N->getValueType(0); 8018 SDLoc SL(N); 8019 8020 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); 8021 8022 const TargetOptions &Options = DAG.getTarget().Options; 8023 bool AllowFusion = 8024 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); 8025 8026 // Floating-point multiply-add with intermediate rounding. 8027 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); 8028 8029 // Floating-point multiply-add without intermediate rounding. 8030 bool HasFMA = 8031 AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && 8032 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); 8033 8034 // No valid opcode, do not combine. 8035 if (!HasFMAD && !HasFMA) 8036 return SDValue(); 8037 8038 // Always prefer FMAD to FMA for precision. 8039 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 8040 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 8041 8042 // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) 8043 // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) 8044 auto FuseFADD = [&](SDValue X, SDValue Y) { 8045 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { 8046 auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); 8047 if (XC1 && XC1->isExactlyValue(+1.0)) 8048 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); 8049 if (XC1 && XC1->isExactlyValue(-1.0)) 8050 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 8051 DAG.getNode(ISD::FNEG, SL, VT, Y)); 8052 } 8053 return SDValue(); 8054 }; 8055 8056 if (SDValue FMA = FuseFADD(N0, N1)) 8057 return FMA; 8058 if (SDValue FMA = FuseFADD(N1, N0)) 8059 return FMA; 8060 8061 // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) 8062 // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) 8063 // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) 8064 // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) 8065 auto FuseFSUB = [&](SDValue X, SDValue Y) { 8066 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { 8067 auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); 8068 if (XC0 && XC0->isExactlyValue(+1.0)) 8069 return DAG.getNode(PreferredFusedOpcode, SL, VT, 8070 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 8071 Y); 8072 if (XC0 && XC0->isExactlyValue(-1.0)) 8073 return DAG.getNode(PreferredFusedOpcode, SL, VT, 8074 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, 8075 DAG.getNode(ISD::FNEG, SL, VT, Y)); 8076 8077 auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); 8078 if (XC1 && XC1->isExactlyValue(+1.0)) 8079 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, 8080 DAG.getNode(ISD::FNEG, SL, VT, Y)); 8081 if (XC1 && XC1->isExactlyValue(-1.0)) 8082 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); 8083 } 8084 return SDValue(); 8085 }; 8086 8087 if (SDValue FMA = FuseFSUB(N0, N1)) 8088 return FMA; 8089 if (SDValue FMA = FuseFSUB(N1, N0)) 8090 return FMA; 8091 8092 return SDValue(); 8093 } 8094 8095 SDValue DAGCombiner::visitFADD(SDNode *N) { 8096 SDValue N0 = N->getOperand(0); 8097 SDValue N1 = N->getOperand(1); 8098 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); 8099 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); 8100 EVT VT = N->getValueType(0); 8101 SDLoc DL(N); 8102 const TargetOptions &Options = DAG.getTarget().Options; 8103 const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; 8104 8105 // fold vector ops 8106 if (VT.isVector()) 8107 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8108 return FoldedVOp; 8109 8110 // fold (fadd c1, c2) -> c1 + c2 8111 if (N0CFP && N1CFP) 8112 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); 8113 8114 // canonicalize constant to RHS 8115 if (N0CFP && !N1CFP) 8116 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); 8117 8118 // fold (fadd A, (fneg B)) -> (fsub A, B) 8119 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 8120 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 8121 return DAG.getNode(ISD::FSUB, DL, VT, N0, 8122 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 8123 8124 // fold (fadd (fneg A), B) -> (fsub B, A) 8125 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 8126 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 8127 return DAG.getNode(ISD::FSUB, DL, VT, N1, 8128 GetNegatedExpression(N0, DAG, LegalOperations), Flags); 8129 8130 // If 'unsafe math' is enabled, fold lots of things. 8131 if (Options.UnsafeFPMath) { 8132 // No FP constant should be created after legalization as Instruction 8133 // Selection pass has a hard time dealing with FP constants. 8134 bool AllowNewConst = (Level < AfterLegalizeDAG); 8135 8136 // fold (fadd A, 0) -> A 8137 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) 8138 if (N1C->isZero()) 8139 return N0; 8140 8141 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 8142 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 8143 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) 8144 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), 8145 DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, 8146 Flags), 8147 Flags); 8148 8149 // If allowed, fold (fadd (fneg x), x) -> 0.0 8150 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 8151 return DAG.getConstantFP(0.0, DL, VT); 8152 8153 // If allowed, fold (fadd x, (fneg x)) -> 0.0 8154 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 8155 return DAG.getConstantFP(0.0, DL, VT); 8156 8157 // We can fold chains of FADD's of the same value into multiplications. 8158 // This transform is not safe in general because we are reducing the number 8159 // of rounding steps. 8160 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 8161 if (N0.getOpcode() == ISD::FMUL) { 8162 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 8163 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); 8164 8165 // (fadd (fmul x, c), x) -> (fmul x, c+1) 8166 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 8167 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 8168 DAG.getConstantFP(1.0, DL, VT), Flags); 8169 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); 8170 } 8171 8172 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 8173 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 8174 N1.getOperand(0) == N1.getOperand(1) && 8175 N0.getOperand(0) == N1.getOperand(0)) { 8176 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), 8177 DAG.getConstantFP(2.0, DL, VT), Flags); 8178 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); 8179 } 8180 } 8181 8182 if (N1.getOpcode() == ISD::FMUL) { 8183 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 8184 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); 8185 8186 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 8187 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 8188 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 8189 DAG.getConstantFP(1.0, DL, VT), Flags); 8190 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); 8191 } 8192 8193 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 8194 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 8195 N0.getOperand(0) == N0.getOperand(1) && 8196 N1.getOperand(0) == N0.getOperand(0)) { 8197 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), 8198 DAG.getConstantFP(2.0, DL, VT), Flags); 8199 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); 8200 } 8201 } 8202 8203 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 8204 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); 8205 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 8206 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && 8207 (N0.getOperand(0) == N1)) { 8208 return DAG.getNode(ISD::FMUL, DL, VT, 8209 N1, DAG.getConstantFP(3.0, DL, VT), Flags); 8210 } 8211 } 8212 8213 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 8214 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); 8215 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 8216 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 8217 N1.getOperand(0) == N0) { 8218 return DAG.getNode(ISD::FMUL, DL, VT, 8219 N0, DAG.getConstantFP(3.0, DL, VT), Flags); 8220 } 8221 } 8222 8223 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 8224 if (AllowNewConst && 8225 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 8226 N0.getOperand(0) == N0.getOperand(1) && 8227 N1.getOperand(0) == N1.getOperand(1) && 8228 N0.getOperand(0) == N1.getOperand(0)) { 8229 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), 8230 DAG.getConstantFP(4.0, DL, VT), Flags); 8231 } 8232 } 8233 } // enable-unsafe-fp-math 8234 8235 // FADD -> FMA combines: 8236 if (SDValue Fused = visitFADDForFMACombine(N)) { 8237 AddToWorklist(Fused.getNode()); 8238 return Fused; 8239 } 8240 8241 return SDValue(); 8242 } 8243 8244 SDValue DAGCombiner::visitFSUB(SDNode *N) { 8245 SDValue N0 = N->getOperand(0); 8246 SDValue N1 = N->getOperand(1); 8247 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 8248 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 8249 EVT VT = N->getValueType(0); 8250 SDLoc dl(N); 8251 const TargetOptions &Options = DAG.getTarget().Options; 8252 const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; 8253 8254 // fold vector ops 8255 if (VT.isVector()) 8256 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8257 return FoldedVOp; 8258 8259 // fold (fsub c1, c2) -> c1-c2 8260 if (N0CFP && N1CFP) 8261 return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); 8262 8263 // fold (fsub A, (fneg B)) -> (fadd A, B) 8264 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 8265 return DAG.getNode(ISD::FADD, dl, VT, N0, 8266 GetNegatedExpression(N1, DAG, LegalOperations), Flags); 8267 8268 // If 'unsafe math' is enabled, fold lots of things. 8269 if (Options.UnsafeFPMath) { 8270 // (fsub A, 0) -> A 8271 if (N1CFP && N1CFP->isZero()) 8272 return N0; 8273 8274 // (fsub 0, B) -> -B 8275 if (N0CFP && N0CFP->isZero()) { 8276 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 8277 return GetNegatedExpression(N1, DAG, LegalOperations); 8278 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8279 return DAG.getNode(ISD::FNEG, dl, VT, N1); 8280 } 8281 8282 // (fsub x, x) -> 0.0 8283 if (N0 == N1) 8284 return DAG.getConstantFP(0.0f, dl, VT); 8285 8286 // (fsub x, (fadd x, y)) -> (fneg y) 8287 // (fsub x, (fadd y, x)) -> (fneg y) 8288 if (N1.getOpcode() == ISD::FADD) { 8289 SDValue N10 = N1->getOperand(0); 8290 SDValue N11 = N1->getOperand(1); 8291 8292 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 8293 return GetNegatedExpression(N11, DAG, LegalOperations); 8294 8295 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 8296 return GetNegatedExpression(N10, DAG, LegalOperations); 8297 } 8298 } 8299 8300 // FSUB -> FMA combines: 8301 if (SDValue Fused = visitFSUBForFMACombine(N)) { 8302 AddToWorklist(Fused.getNode()); 8303 return Fused; 8304 } 8305 8306 return SDValue(); 8307 } 8308 8309 SDValue DAGCombiner::visitFMUL(SDNode *N) { 8310 SDValue N0 = N->getOperand(0); 8311 SDValue N1 = N->getOperand(1); 8312 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 8313 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 8314 EVT VT = N->getValueType(0); 8315 SDLoc DL(N); 8316 const TargetOptions &Options = DAG.getTarget().Options; 8317 const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; 8318 8319 // fold vector ops 8320 if (VT.isVector()) { 8321 // This just handles C1 * C2 for vectors. Other vector folds are below. 8322 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8323 return FoldedVOp; 8324 } 8325 8326 // fold (fmul c1, c2) -> c1*c2 8327 if (N0CFP && N1CFP) 8328 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); 8329 8330 // canonicalize constant to RHS 8331 if (isConstantFPBuildVectorOrConstantFP(N0) && 8332 !isConstantFPBuildVectorOrConstantFP(N1)) 8333 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); 8334 8335 // fold (fmul A, 1.0) -> A 8336 if (N1CFP && N1CFP->isExactlyValue(1.0)) 8337 return N0; 8338 8339 if (Options.UnsafeFPMath) { 8340 // fold (fmul A, 0) -> 0 8341 if (N1CFP && N1CFP->isZero()) 8342 return N1; 8343 8344 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 8345 if (N0.getOpcode() == ISD::FMUL) { 8346 // Fold scalars or any vector constants (not just splats). 8347 // This fold is done in general by InstCombine, but extra fmul insts 8348 // may have been generated during lowering. 8349 SDValue N00 = N0.getOperand(0); 8350 SDValue N01 = N0.getOperand(1); 8351 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 8352 auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); 8353 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); 8354 8355 // Check 1: Make sure that the first operand of the inner multiply is NOT 8356 // a constant. Otherwise, we may induce infinite looping. 8357 if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { 8358 // Check 2: Make sure that the second operand of the inner multiply and 8359 // the second operand of the outer multiply are constants. 8360 if ((N1CFP && isConstOrConstSplatFP(N01)) || 8361 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 8362 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); 8363 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); 8364 } 8365 } 8366 } 8367 8368 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 8369 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 8370 // during an early run of DAGCombiner can prevent folding with fmuls 8371 // inserted during lowering. 8372 if (N0.getOpcode() == ISD::FADD && 8373 (N0.getOperand(0) == N0.getOperand(1)) && 8374 N0.hasOneUse()) { 8375 const SDValue Two = DAG.getConstantFP(2.0, DL, VT); 8376 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); 8377 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); 8378 } 8379 } 8380 8381 // fold (fmul X, 2.0) -> (fadd X, X) 8382 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 8383 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); 8384 8385 // fold (fmul X, -1.0) -> (fneg X) 8386 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 8387 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8388 return DAG.getNode(ISD::FNEG, DL, VT, N0); 8389 8390 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 8391 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 8392 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 8393 // Both can be negated for free, check to see if at least one is cheaper 8394 // negated. 8395 if (LHSNeg == 2 || RHSNeg == 2) 8396 return DAG.getNode(ISD::FMUL, DL, VT, 8397 GetNegatedExpression(N0, DAG, LegalOperations), 8398 GetNegatedExpression(N1, DAG, LegalOperations), 8399 Flags); 8400 } 8401 } 8402 8403 // FMUL -> FMA combines: 8404 if (SDValue Fused = visitFMULForFMACombine(N)) { 8405 AddToWorklist(Fused.getNode()); 8406 return Fused; 8407 } 8408 8409 return SDValue(); 8410 } 8411 8412 SDValue DAGCombiner::visitFMA(SDNode *N) { 8413 SDValue N0 = N->getOperand(0); 8414 SDValue N1 = N->getOperand(1); 8415 SDValue N2 = N->getOperand(2); 8416 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8417 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8418 EVT VT = N->getValueType(0); 8419 SDLoc dl(N); 8420 const TargetOptions &Options = DAG.getTarget().Options; 8421 8422 // Constant fold FMA. 8423 if (isa<ConstantFPSDNode>(N0) && 8424 isa<ConstantFPSDNode>(N1) && 8425 isa<ConstantFPSDNode>(N2)) { 8426 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 8427 } 8428 8429 if (Options.UnsafeFPMath) { 8430 if (N0CFP && N0CFP->isZero()) 8431 return N2; 8432 if (N1CFP && N1CFP->isZero()) 8433 return N2; 8434 } 8435 // TODO: The FMA node should have flags that propagate to these nodes. 8436 if (N0CFP && N0CFP->isExactlyValue(1.0)) 8437 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 8438 if (N1CFP && N1CFP->isExactlyValue(1.0)) 8439 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 8440 8441 // Canonicalize (fma c, x, y) -> (fma x, c, y) 8442 if (isConstantFPBuildVectorOrConstantFP(N0) && 8443 !isConstantFPBuildVectorOrConstantFP(N1)) 8444 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 8445 8446 // TODO: FMA nodes should have flags that propagate to the created nodes. 8447 // For now, create a Flags object for use with all unsafe math transforms. 8448 SDNodeFlags Flags; 8449 Flags.setUnsafeAlgebra(true); 8450 8451 if (Options.UnsafeFPMath) { 8452 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 8453 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && 8454 isConstantFPBuildVectorOrConstantFP(N1) && 8455 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { 8456 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8457 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), 8458 &Flags), &Flags); 8459 } 8460 8461 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 8462 if (N0.getOpcode() == ISD::FMUL && 8463 isConstantFPBuildVectorOrConstantFP(N1) && 8464 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { 8465 return DAG.getNode(ISD::FMA, dl, VT, 8466 N0.getOperand(0), 8467 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), 8468 &Flags), 8469 N2); 8470 } 8471 } 8472 8473 // (fma x, 1, y) -> (fadd x, y) 8474 // (fma x, -1, y) -> (fadd (fneg x), y) 8475 if (N1CFP) { 8476 if (N1CFP->isExactlyValue(1.0)) 8477 // TODO: The FMA node should have flags that propagate to this node. 8478 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 8479 8480 if (N1CFP->isExactlyValue(-1.0) && 8481 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 8482 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 8483 AddToWorklist(RHSNeg.getNode()); 8484 // TODO: The FMA node should have flags that propagate to this node. 8485 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 8486 } 8487 } 8488 8489 if (Options.UnsafeFPMath) { 8490 // (fma x, c, x) -> (fmul x, (c+1)) 8491 if (N1CFP && N0 == N2) { 8492 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8493 DAG.getNode(ISD::FADD, dl, VT, 8494 N1, DAG.getConstantFP(1.0, dl, VT), 8495 &Flags), &Flags); 8496 } 8497 8498 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 8499 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 8500 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8501 DAG.getNode(ISD::FADD, dl, VT, 8502 N1, DAG.getConstantFP(-1.0, dl, VT), 8503 &Flags), &Flags); 8504 } 8505 } 8506 8507 return SDValue(); 8508 } 8509 8510 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 8511 // reciprocal. 8512 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 8513 // Notice that this is not always beneficial. One reason is different target 8514 // may have different costs for FDIV and FMUL, so sometimes the cost of two 8515 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 8516 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 8517 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { 8518 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; 8519 const SDNodeFlags *Flags = N->getFlags(); 8520 if (!UnsafeMath && !Flags->hasAllowReciprocal()) 8521 return SDValue(); 8522 8523 // Skip if current node is a reciprocal. 8524 SDValue N0 = N->getOperand(0); 8525 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8526 if (N0CFP && N0CFP->isExactlyValue(1.0)) 8527 return SDValue(); 8528 8529 // Exit early if the target does not want this transform or if there can't 8530 // possibly be enough uses of the divisor to make the transform worthwhile. 8531 SDValue N1 = N->getOperand(1); 8532 unsigned MinUses = TLI.combineRepeatedFPDivisors(); 8533 if (!MinUses || N1->use_size() < MinUses) 8534 return SDValue(); 8535 8536 // Find all FDIV users of the same divisor. 8537 // Use a set because duplicates may be present in the user list. 8538 SetVector<SDNode *> Users; 8539 for (auto *U : N1->uses()) { 8540 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { 8541 // This division is eligible for optimization only if global unsafe math 8542 // is enabled or if this division allows reciprocal formation. 8543 if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) 8544 Users.insert(U); 8545 } 8546 } 8547 8548 // Now that we have the actual number of divisor uses, make sure it meets 8549 // the minimum threshold specified by the target. 8550 if (Users.size() < MinUses) 8551 return SDValue(); 8552 8553 EVT VT = N->getValueType(0); 8554 SDLoc DL(N); 8555 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 8556 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); 8557 8558 // Dividend / Divisor -> Dividend * Reciprocal 8559 for (auto *U : Users) { 8560 SDValue Dividend = U->getOperand(0); 8561 if (Dividend != FPOne) { 8562 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, 8563 Reciprocal, Flags); 8564 CombineTo(U, NewNode); 8565 } else if (U != Reciprocal.getNode()) { 8566 // In the absence of fast-math-flags, this user node is always the 8567 // same node as Reciprocal, but with FMF they may be different nodes. 8568 CombineTo(U, Reciprocal); 8569 } 8570 } 8571 return SDValue(N, 0); // N was replaced. 8572 } 8573 8574 SDValue DAGCombiner::visitFDIV(SDNode *N) { 8575 SDValue N0 = N->getOperand(0); 8576 SDValue N1 = N->getOperand(1); 8577 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8578 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8579 EVT VT = N->getValueType(0); 8580 SDLoc DL(N); 8581 const TargetOptions &Options = DAG.getTarget().Options; 8582 SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; 8583 8584 // fold vector ops 8585 if (VT.isVector()) 8586 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8587 return FoldedVOp; 8588 8589 // fold (fdiv c1, c2) -> c1/c2 8590 if (N0CFP && N1CFP) 8591 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); 8592 8593 if (Options.UnsafeFPMath) { 8594 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 8595 if (N1CFP) { 8596 // Compute the reciprocal 1.0 / c2. 8597 APFloat N1APF = N1CFP->getValueAPF(); 8598 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 8599 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 8600 // Only do the transform if the reciprocal is a legal fp immediate that 8601 // isn't too nasty (eg NaN, denormal, ...). 8602 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 8603 (!LegalOperations || 8604 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 8605 // backend)... we should handle this gracefully after Legalize. 8606 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 8607 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 8608 TLI.isFPImmLegal(Recip, VT))) 8609 return DAG.getNode(ISD::FMUL, DL, VT, N0, 8610 DAG.getConstantFP(Recip, DL, VT), Flags); 8611 } 8612 8613 // If this FDIV is part of a reciprocal square root, it may be folded 8614 // into a target-specific square root estimate instruction. 8615 if (N1.getOpcode() == ISD::FSQRT) { 8616 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { 8617 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 8618 } 8619 } else if (N1.getOpcode() == ISD::FP_EXTEND && 8620 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8621 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), 8622 Flags)) { 8623 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 8624 AddToWorklist(RV.getNode()); 8625 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 8626 } 8627 } else if (N1.getOpcode() == ISD::FP_ROUND && 8628 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8629 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), 8630 Flags)) { 8631 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 8632 AddToWorklist(RV.getNode()); 8633 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 8634 } 8635 } else if (N1.getOpcode() == ISD::FMUL) { 8636 // Look through an FMUL. Even though this won't remove the FDIV directly, 8637 // it's still worthwhile to get rid of the FSQRT if possible. 8638 SDValue SqrtOp; 8639 SDValue OtherOp; 8640 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8641 SqrtOp = N1.getOperand(0); 8642 OtherOp = N1.getOperand(1); 8643 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 8644 SqrtOp = N1.getOperand(1); 8645 OtherOp = N1.getOperand(0); 8646 } 8647 if (SqrtOp.getNode()) { 8648 // We found a FSQRT, so try to make this fold: 8649 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 8650 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { 8651 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); 8652 AddToWorklist(RV.getNode()); 8653 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 8654 } 8655 } 8656 } 8657 8658 // Fold into a reciprocal estimate and multiply instead of a real divide. 8659 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { 8660 AddToWorklist(RV.getNode()); 8661 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); 8662 } 8663 } 8664 8665 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 8666 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 8667 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 8668 // Both can be negated for free, check to see if at least one is cheaper 8669 // negated. 8670 if (LHSNeg == 2 || RHSNeg == 2) 8671 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 8672 GetNegatedExpression(N0, DAG, LegalOperations), 8673 GetNegatedExpression(N1, DAG, LegalOperations), 8674 Flags); 8675 } 8676 } 8677 8678 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) 8679 return CombineRepeatedDivisors; 8680 8681 return SDValue(); 8682 } 8683 8684 SDValue DAGCombiner::visitFREM(SDNode *N) { 8685 SDValue N0 = N->getOperand(0); 8686 SDValue N1 = N->getOperand(1); 8687 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8688 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8689 EVT VT = N->getValueType(0); 8690 8691 // fold (frem c1, c2) -> fmod(c1,c2) 8692 if (N0CFP && N1CFP) 8693 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, 8694 &cast<BinaryWithFlagsSDNode>(N)->Flags); 8695 8696 return SDValue(); 8697 } 8698 8699 SDValue DAGCombiner::visitFSQRT(SDNode *N) { 8700 if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) 8701 return SDValue(); 8702 8703 // TODO: FSQRT nodes should have flags that propagate to the created nodes. 8704 // For now, create a Flags object for use with all unsafe math transforms. 8705 SDNodeFlags Flags; 8706 Flags.setUnsafeAlgebra(true); 8707 8708 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 8709 SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); 8710 if (!RV) 8711 return SDValue(); 8712 8713 EVT VT = RV.getValueType(); 8714 SDLoc DL(N); 8715 RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); 8716 AddToWorklist(RV.getNode()); 8717 8718 // Unfortunately, RV is now NaN if the input was exactly 0. 8719 // Select out this case and force the answer to 0. 8720 SDValue Zero = DAG.getConstantFP(0.0, DL, VT); 8721 EVT CCVT = getSetCCResultType(VT); 8722 SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); 8723 AddToWorklist(ZeroCmp.getNode()); 8724 AddToWorklist(RV.getNode()); 8725 8726 return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, 8727 ZeroCmp, Zero, RV); 8728 } 8729 8730 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 8731 SDValue N0 = N->getOperand(0); 8732 SDValue N1 = N->getOperand(1); 8733 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8734 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8735 EVT VT = N->getValueType(0); 8736 8737 if (N0CFP && N1CFP) // Constant fold 8738 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 8739 8740 if (N1CFP) { 8741 const APFloat& V = N1CFP->getValueAPF(); 8742 // copysign(x, c1) -> fabs(x) iff ispos(c1) 8743 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 8744 if (!V.isNegative()) { 8745 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 8746 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 8747 } else { 8748 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8749 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 8750 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 8751 } 8752 } 8753 8754 // copysign(fabs(x), y) -> copysign(x, y) 8755 // copysign(fneg(x), y) -> copysign(x, y) 8756 // copysign(copysign(x,z), y) -> copysign(x, y) 8757 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 8758 N0.getOpcode() == ISD::FCOPYSIGN) 8759 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8760 N0.getOperand(0), N1); 8761 8762 // copysign(x, abs(y)) -> abs(x) 8763 if (N1.getOpcode() == ISD::FABS) 8764 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 8765 8766 // copysign(x, copysign(y,z)) -> copysign(x, z) 8767 if (N1.getOpcode() == ISD::FCOPYSIGN) 8768 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8769 N0, N1.getOperand(1)); 8770 8771 // copysign(x, fp_extend(y)) -> copysign(x, y) 8772 // copysign(x, fp_round(y)) -> copysign(x, y) 8773 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 8774 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8775 N0, N1.getOperand(0)); 8776 8777 return SDValue(); 8778 } 8779 8780 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 8781 SDValue N0 = N->getOperand(0); 8782 EVT VT = N->getValueType(0); 8783 EVT OpVT = N0.getValueType(); 8784 8785 // fold (sint_to_fp c1) -> c1fp 8786 if (isConstantIntBuildVectorOrConstantInt(N0) && 8787 // ...but only if the target supports immediate floating-point values 8788 (!LegalOperations || 8789 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 8790 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 8791 8792 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 8793 // but UINT_TO_FP is legal on this target, try to convert. 8794 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 8795 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 8796 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 8797 if (DAG.SignBitIsZero(N0)) 8798 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 8799 } 8800 8801 // The next optimizations are desirable only if SELECT_CC can be lowered. 8802 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 8803 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 8804 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 8805 !VT.isVector() && 8806 (!LegalOperations || 8807 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8808 SDLoc DL(N); 8809 SDValue Ops[] = 8810 { N0.getOperand(0), N0.getOperand(1), 8811 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8812 N0.getOperand(2) }; 8813 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8814 } 8815 8816 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 8817 // (select_cc x, y, 1.0, 0.0,, cc) 8818 if (N0.getOpcode() == ISD::ZERO_EXTEND && 8819 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 8820 (!LegalOperations || 8821 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8822 SDLoc DL(N); 8823 SDValue Ops[] = 8824 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 8825 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8826 N0.getOperand(0).getOperand(2) }; 8827 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8828 } 8829 } 8830 8831 return SDValue(); 8832 } 8833 8834 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 8835 SDValue N0 = N->getOperand(0); 8836 EVT VT = N->getValueType(0); 8837 EVT OpVT = N0.getValueType(); 8838 8839 // fold (uint_to_fp c1) -> c1fp 8840 if (isConstantIntBuildVectorOrConstantInt(N0) && 8841 // ...but only if the target supports immediate floating-point values 8842 (!LegalOperations || 8843 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 8844 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 8845 8846 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 8847 // but SINT_TO_FP is legal on this target, try to convert. 8848 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 8849 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 8850 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 8851 if (DAG.SignBitIsZero(N0)) 8852 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 8853 } 8854 8855 // The next optimizations are desirable only if SELECT_CC can be lowered. 8856 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 8857 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 8858 8859 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 8860 (!LegalOperations || 8861 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8862 SDLoc DL(N); 8863 SDValue Ops[] = 8864 { N0.getOperand(0), N0.getOperand(1), 8865 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8866 N0.getOperand(2) }; 8867 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8868 } 8869 } 8870 8871 return SDValue(); 8872 } 8873 8874 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x 8875 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { 8876 SDValue N0 = N->getOperand(0); 8877 EVT VT = N->getValueType(0); 8878 8879 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) 8880 return SDValue(); 8881 8882 SDValue Src = N0.getOperand(0); 8883 EVT SrcVT = Src.getValueType(); 8884 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; 8885 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; 8886 8887 // We can safely assume the conversion won't overflow the output range, 8888 // because (for example) (uint8_t)18293.f is undefined behavior. 8889 8890 // Since we can assume the conversion won't overflow, our decision as to 8891 // whether the input will fit in the float should depend on the minimum 8892 // of the input range and output range. 8893 8894 // This means this is also safe for a signed input and unsigned output, since 8895 // a negative input would lead to undefined behavior. 8896 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; 8897 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; 8898 unsigned ActualSize = std::min(InputSize, OutputSize); 8899 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); 8900 8901 // We can only fold away the float conversion if the input range can be 8902 // represented exactly in the float range. 8903 if (APFloat::semanticsPrecision(sem) >= ActualSize) { 8904 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { 8905 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND 8906 : ISD::ZERO_EXTEND; 8907 return DAG.getNode(ExtOp, SDLoc(N), VT, Src); 8908 } 8909 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) 8910 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); 8911 if (SrcVT == VT) 8912 return Src; 8913 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); 8914 } 8915 return SDValue(); 8916 } 8917 8918 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 8919 SDValue N0 = N->getOperand(0); 8920 EVT VT = N->getValueType(0); 8921 8922 // fold (fp_to_sint c1fp) -> c1 8923 if (isConstantFPBuildVectorOrConstantFP(N0)) 8924 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 8925 8926 return FoldIntToFPToInt(N, DAG); 8927 } 8928 8929 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 8930 SDValue N0 = N->getOperand(0); 8931 EVT VT = N->getValueType(0); 8932 8933 // fold (fp_to_uint c1fp) -> c1 8934 if (isConstantFPBuildVectorOrConstantFP(N0)) 8935 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 8936 8937 return FoldIntToFPToInt(N, DAG); 8938 } 8939 8940 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 8941 SDValue N0 = N->getOperand(0); 8942 SDValue N1 = N->getOperand(1); 8943 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8944 EVT VT = N->getValueType(0); 8945 8946 // fold (fp_round c1fp) -> c1fp 8947 if (N0CFP) 8948 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 8949 8950 // fold (fp_round (fp_extend x)) -> x 8951 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 8952 return N0.getOperand(0); 8953 8954 // fold (fp_round (fp_round x)) -> (fp_round x) 8955 if (N0.getOpcode() == ISD::FP_ROUND) { 8956 const bool NIsTrunc = N->getConstantOperandVal(1) == 1; 8957 const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; 8958 // If the first fp_round isn't a value preserving truncation, it might 8959 // introduce a tie in the second fp_round, that wouldn't occur in the 8960 // single-step fp_round we want to fold to. 8961 // In other words, double rounding isn't the same as rounding. 8962 // Also, this is a value preserving truncation iff both fp_round's are. 8963 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { 8964 SDLoc DL(N); 8965 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), 8966 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); 8967 } 8968 } 8969 8970 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 8971 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 8972 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 8973 N0.getOperand(0), N1); 8974 AddToWorklist(Tmp.getNode()); 8975 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8976 Tmp, N0.getOperand(1)); 8977 } 8978 8979 return SDValue(); 8980 } 8981 8982 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 8983 SDValue N0 = N->getOperand(0); 8984 EVT VT = N->getValueType(0); 8985 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 8986 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8987 8988 // fold (fp_round_inreg c1fp) -> c1fp 8989 if (N0CFP && isTypeLegal(EVT)) { 8990 SDLoc DL(N); 8991 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); 8992 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); 8993 } 8994 8995 return SDValue(); 8996 } 8997 8998 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 8999 SDValue N0 = N->getOperand(0); 9000 EVT VT = N->getValueType(0); 9001 9002 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 9003 if (N->hasOneUse() && 9004 N->use_begin()->getOpcode() == ISD::FP_ROUND) 9005 return SDValue(); 9006 9007 // fold (fp_extend c1fp) -> c1fp 9008 if (isConstantFPBuildVectorOrConstantFP(N0)) 9009 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 9010 9011 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) 9012 if (N0.getOpcode() == ISD::FP16_TO_FP && 9013 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) 9014 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); 9015 9016 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 9017 // value of X. 9018 if (N0.getOpcode() == ISD::FP_ROUND 9019 && N0.getNode()->getConstantOperandVal(1) == 1) { 9020 SDValue In = N0.getOperand(0); 9021 if (In.getValueType() == VT) return In; 9022 if (VT.bitsLT(In.getValueType())) 9023 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 9024 In, N0.getOperand(1)); 9025 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 9026 } 9027 9028 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 9029 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 9030 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 9031 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 9032 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 9033 LN0->getChain(), 9034 LN0->getBasePtr(), N0.getValueType(), 9035 LN0->getMemOperand()); 9036 CombineTo(N, ExtLoad); 9037 CombineTo(N0.getNode(), 9038 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 9039 N0.getValueType(), ExtLoad, 9040 DAG.getIntPtrConstant(1, SDLoc(N0))), 9041 ExtLoad.getValue(1)); 9042 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9043 } 9044 9045 return SDValue(); 9046 } 9047 9048 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 9049 SDValue N0 = N->getOperand(0); 9050 EVT VT = N->getValueType(0); 9051 9052 // fold (fceil c1) -> fceil(c1) 9053 if (isConstantFPBuildVectorOrConstantFP(N0)) 9054 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 9055 9056 return SDValue(); 9057 } 9058 9059 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 9060 SDValue N0 = N->getOperand(0); 9061 EVT VT = N->getValueType(0); 9062 9063 // fold (ftrunc c1) -> ftrunc(c1) 9064 if (isConstantFPBuildVectorOrConstantFP(N0)) 9065 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 9066 9067 return SDValue(); 9068 } 9069 9070 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 9071 SDValue N0 = N->getOperand(0); 9072 EVT VT = N->getValueType(0); 9073 9074 // fold (ffloor c1) -> ffloor(c1) 9075 if (isConstantFPBuildVectorOrConstantFP(N0)) 9076 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 9077 9078 return SDValue(); 9079 } 9080 9081 // FIXME: FNEG and FABS have a lot in common; refactor. 9082 SDValue DAGCombiner::visitFNEG(SDNode *N) { 9083 SDValue N0 = N->getOperand(0); 9084 EVT VT = N->getValueType(0); 9085 9086 // Constant fold FNEG. 9087 if (isConstantFPBuildVectorOrConstantFP(N0)) 9088 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 9089 9090 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 9091 &DAG.getTarget().Options)) 9092 return GetNegatedExpression(N0, DAG, LegalOperations); 9093 9094 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 9095 // constant pool values. 9096 if (!TLI.isFNegFree(VT) && 9097 N0.getOpcode() == ISD::BITCAST && 9098 N0.getNode()->hasOneUse()) { 9099 SDValue Int = N0.getOperand(0); 9100 EVT IntVT = Int.getValueType(); 9101 if (IntVT.isInteger() && !IntVT.isVector()) { 9102 APInt SignMask; 9103 if (N0.getValueType().isVector()) { 9104 // For a vector, get a mask such as 0x80... per scalar element 9105 // and splat it. 9106 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 9107 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 9108 } else { 9109 // For a scalar, just generate 0x80... 9110 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 9111 } 9112 SDLoc DL0(N0); 9113 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, 9114 DAG.getConstant(SignMask, DL0, IntVT)); 9115 AddToWorklist(Int.getNode()); 9116 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 9117 } 9118 } 9119 9120 // (fneg (fmul c, x)) -> (fmul -c, x) 9121 if (N0.getOpcode() == ISD::FMUL && 9122 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { 9123 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 9124 if (CFP1) { 9125 APFloat CVal = CFP1->getValueAPF(); 9126 CVal.changeSign(); 9127 if (Level >= AfterLegalizeDAG && 9128 (TLI.isFPImmLegal(CVal, N->getValueType(0)) || 9129 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 9130 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 9131 DAG.getNode(ISD::FNEG, SDLoc(N), VT, 9132 N0.getOperand(1)), 9133 &cast<BinaryWithFlagsSDNode>(N0)->Flags); 9134 } 9135 } 9136 9137 return SDValue(); 9138 } 9139 9140 SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 9141 SDValue N0 = N->getOperand(0); 9142 SDValue N1 = N->getOperand(1); 9143 EVT VT = N->getValueType(0); 9144 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 9145 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 9146 9147 if (N0CFP && N1CFP) { 9148 const APFloat &C0 = N0CFP->getValueAPF(); 9149 const APFloat &C1 = N1CFP->getValueAPF(); 9150 return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); 9151 } 9152 9153 // Canonicalize to constant on RHS. 9154 if (isConstantFPBuildVectorOrConstantFP(N0) && 9155 !isConstantFPBuildVectorOrConstantFP(N1)) 9156 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 9157 9158 return SDValue(); 9159 } 9160 9161 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 9162 SDValue N0 = N->getOperand(0); 9163 SDValue N1 = N->getOperand(1); 9164 EVT VT = N->getValueType(0); 9165 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 9166 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 9167 9168 if (N0CFP && N1CFP) { 9169 const APFloat &C0 = N0CFP->getValueAPF(); 9170 const APFloat &C1 = N1CFP->getValueAPF(); 9171 return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); 9172 } 9173 9174 // Canonicalize to constant on RHS. 9175 if (isConstantFPBuildVectorOrConstantFP(N0) && 9176 !isConstantFPBuildVectorOrConstantFP(N1)) 9177 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 9178 9179 return SDValue(); 9180 } 9181 9182 SDValue DAGCombiner::visitFABS(SDNode *N) { 9183 SDValue N0 = N->getOperand(0); 9184 EVT VT = N->getValueType(0); 9185 9186 // fold (fabs c1) -> fabs(c1) 9187 if (isConstantFPBuildVectorOrConstantFP(N0)) 9188 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 9189 9190 // fold (fabs (fabs x)) -> (fabs x) 9191 if (N0.getOpcode() == ISD::FABS) 9192 return N->getOperand(0); 9193 9194 // fold (fabs (fneg x)) -> (fabs x) 9195 // fold (fabs (fcopysign x, y)) -> (fabs x) 9196 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 9197 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 9198 9199 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 9200 // constant pool values. 9201 if (!TLI.isFAbsFree(VT) && 9202 N0.getOpcode() == ISD::BITCAST && 9203 N0.getNode()->hasOneUse()) { 9204 SDValue Int = N0.getOperand(0); 9205 EVT IntVT = Int.getValueType(); 9206 if (IntVT.isInteger() && !IntVT.isVector()) { 9207 APInt SignMask; 9208 if (N0.getValueType().isVector()) { 9209 // For a vector, get a mask such as 0x7f... per scalar element 9210 // and splat it. 9211 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 9212 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 9213 } else { 9214 // For a scalar, just generate 0x7f... 9215 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 9216 } 9217 SDLoc DL(N0); 9218 Int = DAG.getNode(ISD::AND, DL, IntVT, Int, 9219 DAG.getConstant(SignMask, DL, IntVT)); 9220 AddToWorklist(Int.getNode()); 9221 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 9222 } 9223 } 9224 9225 return SDValue(); 9226 } 9227 9228 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 9229 SDValue Chain = N->getOperand(0); 9230 SDValue N1 = N->getOperand(1); 9231 SDValue N2 = N->getOperand(2); 9232 9233 // If N is a constant we could fold this into a fallthrough or unconditional 9234 // branch. However that doesn't happen very often in normal code, because 9235 // Instcombine/SimplifyCFG should have handled the available opportunities. 9236 // If we did this folding here, it would be necessary to update the 9237 // MachineBasicBlock CFG, which is awkward. 9238 9239 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 9240 // on the target. 9241 if (N1.getOpcode() == ISD::SETCC && 9242 TLI.isOperationLegalOrCustom(ISD::BR_CC, 9243 N1.getOperand(0).getValueType())) { 9244 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 9245 Chain, N1.getOperand(2), 9246 N1.getOperand(0), N1.getOperand(1), N2); 9247 } 9248 9249 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 9250 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 9251 (N1.getOperand(0).hasOneUse() && 9252 N1.getOperand(0).getOpcode() == ISD::SRL))) { 9253 SDNode *Trunc = nullptr; 9254 if (N1.getOpcode() == ISD::TRUNCATE) { 9255 // Look pass the truncate. 9256 Trunc = N1.getNode(); 9257 N1 = N1.getOperand(0); 9258 } 9259 9260 // Match this pattern so that we can generate simpler code: 9261 // 9262 // %a = ... 9263 // %b = and i32 %a, 2 9264 // %c = srl i32 %b, 1 9265 // brcond i32 %c ... 9266 // 9267 // into 9268 // 9269 // %a = ... 9270 // %b = and i32 %a, 2 9271 // %c = setcc eq %b, 0 9272 // brcond %c ... 9273 // 9274 // This applies only when the AND constant value has one bit set and the 9275 // SRL constant is equal to the log2 of the AND constant. The back-end is 9276 // smart enough to convert the result into a TEST/JMP sequence. 9277 SDValue Op0 = N1.getOperand(0); 9278 SDValue Op1 = N1.getOperand(1); 9279 9280 if (Op0.getOpcode() == ISD::AND && 9281 Op1.getOpcode() == ISD::Constant) { 9282 SDValue AndOp1 = Op0.getOperand(1); 9283 9284 if (AndOp1.getOpcode() == ISD::Constant) { 9285 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 9286 9287 if (AndConst.isPowerOf2() && 9288 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 9289 SDLoc DL(N); 9290 SDValue SetCC = 9291 DAG.getSetCC(DL, 9292 getSetCCResultType(Op0.getValueType()), 9293 Op0, DAG.getConstant(0, DL, Op0.getValueType()), 9294 ISD::SETNE); 9295 9296 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, 9297 MVT::Other, Chain, SetCC, N2); 9298 // Don't add the new BRCond into the worklist or else SimplifySelectCC 9299 // will convert it back to (X & C1) >> C2. 9300 CombineTo(N, NewBRCond, false); 9301 // Truncate is dead. 9302 if (Trunc) 9303 deleteAndRecombine(Trunc); 9304 // Replace the uses of SRL with SETCC 9305 WorklistRemover DeadNodes(*this); 9306 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 9307 deleteAndRecombine(N1.getNode()); 9308 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9309 } 9310 } 9311 } 9312 9313 if (Trunc) 9314 // Restore N1 if the above transformation doesn't match. 9315 N1 = N->getOperand(1); 9316 } 9317 9318 // Transform br(xor(x, y)) -> br(x != y) 9319 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 9320 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 9321 SDNode *TheXor = N1.getNode(); 9322 SDValue Op0 = TheXor->getOperand(0); 9323 SDValue Op1 = TheXor->getOperand(1); 9324 if (Op0.getOpcode() == Op1.getOpcode()) { 9325 // Avoid missing important xor optimizations. 9326 if (SDValue Tmp = visitXOR(TheXor)) { 9327 if (Tmp.getNode() != TheXor) { 9328 DEBUG(dbgs() << "\nReplacing.8 "; 9329 TheXor->dump(&DAG); 9330 dbgs() << "\nWith: "; 9331 Tmp.getNode()->dump(&DAG); 9332 dbgs() << '\n'); 9333 WorklistRemover DeadNodes(*this); 9334 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 9335 deleteAndRecombine(TheXor); 9336 return DAG.getNode(ISD::BRCOND, SDLoc(N), 9337 MVT::Other, Chain, Tmp, N2); 9338 } 9339 9340 // visitXOR has changed XOR's operands or replaced the XOR completely, 9341 // bail out. 9342 return SDValue(N, 0); 9343 } 9344 } 9345 9346 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 9347 bool Equal = false; 9348 if (isOneConstant(Op0) && Op0.hasOneUse() && 9349 Op0.getOpcode() == ISD::XOR) { 9350 TheXor = Op0.getNode(); 9351 Equal = true; 9352 } 9353 9354 EVT SetCCVT = N1.getValueType(); 9355 if (LegalTypes) 9356 SetCCVT = getSetCCResultType(SetCCVT); 9357 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 9358 SetCCVT, 9359 Op0, Op1, 9360 Equal ? ISD::SETEQ : ISD::SETNE); 9361 // Replace the uses of XOR with SETCC 9362 WorklistRemover DeadNodes(*this); 9363 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 9364 deleteAndRecombine(N1.getNode()); 9365 return DAG.getNode(ISD::BRCOND, SDLoc(N), 9366 MVT::Other, Chain, SetCC, N2); 9367 } 9368 } 9369 9370 return SDValue(); 9371 } 9372 9373 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 9374 // 9375 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 9376 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 9377 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 9378 9379 // If N is a constant we could fold this into a fallthrough or unconditional 9380 // branch. However that doesn't happen very often in normal code, because 9381 // Instcombine/SimplifyCFG should have handled the available opportunities. 9382 // If we did this folding here, it would be necessary to update the 9383 // MachineBasicBlock CFG, which is awkward. 9384 9385 // Use SimplifySetCC to simplify SETCC's. 9386 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 9387 CondLHS, CondRHS, CC->get(), SDLoc(N), 9388 false); 9389 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 9390 9391 // fold to a simpler setcc 9392 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 9393 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 9394 N->getOperand(0), Simp.getOperand(2), 9395 Simp.getOperand(0), Simp.getOperand(1), 9396 N->getOperand(4)); 9397 9398 return SDValue(); 9399 } 9400 9401 /// Return true if 'Use' is a load or a store that uses N as its base pointer 9402 /// and that N may be folded in the load / store addressing mode. 9403 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 9404 SelectionDAG &DAG, 9405 const TargetLowering &TLI) { 9406 EVT VT; 9407 unsigned AS; 9408 9409 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 9410 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 9411 return false; 9412 VT = LD->getMemoryVT(); 9413 AS = LD->getAddressSpace(); 9414 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 9415 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 9416 return false; 9417 VT = ST->getMemoryVT(); 9418 AS = ST->getAddressSpace(); 9419 } else 9420 return false; 9421 9422 TargetLowering::AddrMode AM; 9423 if (N->getOpcode() == ISD::ADD) { 9424 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9425 if (Offset) 9426 // [reg +/- imm] 9427 AM.BaseOffs = Offset->getSExtValue(); 9428 else 9429 // [reg +/- reg] 9430 AM.Scale = 1; 9431 } else if (N->getOpcode() == ISD::SUB) { 9432 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9433 if (Offset) 9434 // [reg +/- imm] 9435 AM.BaseOffs = -Offset->getSExtValue(); 9436 else 9437 // [reg +/- reg] 9438 AM.Scale = 1; 9439 } else 9440 return false; 9441 9442 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, 9443 VT.getTypeForEVT(*DAG.getContext()), AS); 9444 } 9445 9446 /// Try turning a load/store into a pre-indexed load/store when the base 9447 /// pointer is an add or subtract and it has other uses besides the load/store. 9448 /// After the transformation, the new indexed load/store has effectively folded 9449 /// the add/subtract in and all of its other uses are redirected to the 9450 /// new load/store. 9451 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 9452 if (Level < AfterLegalizeDAG) 9453 return false; 9454 9455 bool isLoad = true; 9456 SDValue Ptr; 9457 EVT VT; 9458 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9459 if (LD->isIndexed()) 9460 return false; 9461 VT = LD->getMemoryVT(); 9462 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 9463 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 9464 return false; 9465 Ptr = LD->getBasePtr(); 9466 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9467 if (ST->isIndexed()) 9468 return false; 9469 VT = ST->getMemoryVT(); 9470 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 9471 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 9472 return false; 9473 Ptr = ST->getBasePtr(); 9474 isLoad = false; 9475 } else { 9476 return false; 9477 } 9478 9479 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 9480 // out. There is no reason to make this a preinc/predec. 9481 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 9482 Ptr.getNode()->hasOneUse()) 9483 return false; 9484 9485 // Ask the target to do addressing mode selection. 9486 SDValue BasePtr; 9487 SDValue Offset; 9488 ISD::MemIndexedMode AM = ISD::UNINDEXED; 9489 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 9490 return false; 9491 9492 // Backends without true r+i pre-indexed forms may need to pass a 9493 // constant base with a variable offset so that constant coercion 9494 // will work with the patterns in canonical form. 9495 bool Swapped = false; 9496 if (isa<ConstantSDNode>(BasePtr)) { 9497 std::swap(BasePtr, Offset); 9498 Swapped = true; 9499 } 9500 9501 // Don't create a indexed load / store with zero offset. 9502 if (isNullConstant(Offset)) 9503 return false; 9504 9505 // Try turning it into a pre-indexed load / store except when: 9506 // 1) The new base ptr is a frame index. 9507 // 2) If N is a store and the new base ptr is either the same as or is a 9508 // predecessor of the value being stored. 9509 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 9510 // that would create a cycle. 9511 // 4) All uses are load / store ops that use it as old base ptr. 9512 9513 // Check #1. Preinc'ing a frame index would require copying the stack pointer 9514 // (plus the implicit offset) to a register to preinc anyway. 9515 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 9516 return false; 9517 9518 // Check #2. 9519 if (!isLoad) { 9520 SDValue Val = cast<StoreSDNode>(N)->getValue(); 9521 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 9522 return false; 9523 } 9524 9525 // If the offset is a constant, there may be other adds of constants that 9526 // can be folded with this one. We should do this to avoid having to keep 9527 // a copy of the original base pointer. 9528 SmallVector<SDNode *, 16> OtherUses; 9529 if (isa<ConstantSDNode>(Offset)) 9530 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), 9531 UE = BasePtr.getNode()->use_end(); 9532 UI != UE; ++UI) { 9533 SDUse &Use = UI.getUse(); 9534 // Skip the use that is Ptr and uses of other results from BasePtr's 9535 // node (important for nodes that return multiple results). 9536 if (Use.getUser() == Ptr.getNode() || Use != BasePtr) 9537 continue; 9538 9539 if (Use.getUser()->isPredecessorOf(N)) 9540 continue; 9541 9542 if (Use.getUser()->getOpcode() != ISD::ADD && 9543 Use.getUser()->getOpcode() != ISD::SUB) { 9544 OtherUses.clear(); 9545 break; 9546 } 9547 9548 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); 9549 if (!isa<ConstantSDNode>(Op1)) { 9550 OtherUses.clear(); 9551 break; 9552 } 9553 9554 // FIXME: In some cases, we can be smarter about this. 9555 if (Op1.getValueType() != Offset.getValueType()) { 9556 OtherUses.clear(); 9557 break; 9558 } 9559 9560 OtherUses.push_back(Use.getUser()); 9561 } 9562 9563 if (Swapped) 9564 std::swap(BasePtr, Offset); 9565 9566 // Now check for #3 and #4. 9567 bool RealUse = false; 9568 9569 // Caches for hasPredecessorHelper 9570 SmallPtrSet<const SDNode *, 32> Visited; 9571 SmallVector<const SDNode *, 16> Worklist; 9572 9573 for (SDNode *Use : Ptr.getNode()->uses()) { 9574 if (Use == N) 9575 continue; 9576 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 9577 return false; 9578 9579 // If Ptr may be folded in addressing mode of other use, then it's 9580 // not profitable to do this transformation. 9581 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 9582 RealUse = true; 9583 } 9584 9585 if (!RealUse) 9586 return false; 9587 9588 SDValue Result; 9589 if (isLoad) 9590 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 9591 BasePtr, Offset, AM); 9592 else 9593 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 9594 BasePtr, Offset, AM); 9595 ++PreIndexedNodes; 9596 ++NodesCombined; 9597 DEBUG(dbgs() << "\nReplacing.4 "; 9598 N->dump(&DAG); 9599 dbgs() << "\nWith: "; 9600 Result.getNode()->dump(&DAG); 9601 dbgs() << '\n'); 9602 WorklistRemover DeadNodes(*this); 9603 if (isLoad) { 9604 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 9605 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 9606 } else { 9607 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 9608 } 9609 9610 // Finally, since the node is now dead, remove it from the graph. 9611 deleteAndRecombine(N); 9612 9613 if (Swapped) 9614 std::swap(BasePtr, Offset); 9615 9616 // Replace other uses of BasePtr that can be updated to use Ptr 9617 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 9618 unsigned OffsetIdx = 1; 9619 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 9620 OffsetIdx = 0; 9621 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 9622 BasePtr.getNode() && "Expected BasePtr operand"); 9623 9624 // We need to replace ptr0 in the following expression: 9625 // x0 * offset0 + y0 * ptr0 = t0 9626 // knowing that 9627 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 9628 // 9629 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 9630 // indexed load/store and the expresion that needs to be re-written. 9631 // 9632 // Therefore, we have: 9633 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 9634 9635 ConstantSDNode *CN = 9636 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 9637 int X0, X1, Y0, Y1; 9638 APInt Offset0 = CN->getAPIntValue(); 9639 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 9640 9641 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 9642 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 9643 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 9644 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 9645 9646 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 9647 9648 APInt CNV = Offset0; 9649 if (X0 < 0) CNV = -CNV; 9650 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 9651 else CNV = CNV - Offset1; 9652 9653 SDLoc DL(OtherUses[i]); 9654 9655 // We can now generate the new expression. 9656 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); 9657 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 9658 9659 SDValue NewUse = DAG.getNode(Opcode, 9660 DL, 9661 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 9662 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 9663 deleteAndRecombine(OtherUses[i]); 9664 } 9665 9666 // Replace the uses of Ptr with uses of the updated base value. 9667 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 9668 deleteAndRecombine(Ptr.getNode()); 9669 9670 return true; 9671 } 9672 9673 /// Try to combine a load/store with a add/sub of the base pointer node into a 9674 /// post-indexed load/store. The transformation folded the add/subtract into the 9675 /// new indexed load/store effectively and all of its uses are redirected to the 9676 /// new load/store. 9677 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 9678 if (Level < AfterLegalizeDAG) 9679 return false; 9680 9681 bool isLoad = true; 9682 SDValue Ptr; 9683 EVT VT; 9684 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9685 if (LD->isIndexed()) 9686 return false; 9687 VT = LD->getMemoryVT(); 9688 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 9689 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 9690 return false; 9691 Ptr = LD->getBasePtr(); 9692 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9693 if (ST->isIndexed()) 9694 return false; 9695 VT = ST->getMemoryVT(); 9696 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 9697 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 9698 return false; 9699 Ptr = ST->getBasePtr(); 9700 isLoad = false; 9701 } else { 9702 return false; 9703 } 9704 9705 if (Ptr.getNode()->hasOneUse()) 9706 return false; 9707 9708 for (SDNode *Op : Ptr.getNode()->uses()) { 9709 if (Op == N || 9710 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 9711 continue; 9712 9713 SDValue BasePtr; 9714 SDValue Offset; 9715 ISD::MemIndexedMode AM = ISD::UNINDEXED; 9716 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 9717 // Don't create a indexed load / store with zero offset. 9718 if (isNullConstant(Offset)) 9719 continue; 9720 9721 // Try turning it into a post-indexed load / store except when 9722 // 1) All uses are load / store ops that use it as base ptr (and 9723 // it may be folded as addressing mmode). 9724 // 2) Op must be independent of N, i.e. Op is neither a predecessor 9725 // nor a successor of N. Otherwise, if Op is folded that would 9726 // create a cycle. 9727 9728 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 9729 continue; 9730 9731 // Check for #1. 9732 bool TryNext = false; 9733 for (SDNode *Use : BasePtr.getNode()->uses()) { 9734 if (Use == Ptr.getNode()) 9735 continue; 9736 9737 // If all the uses are load / store addresses, then don't do the 9738 // transformation. 9739 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 9740 bool RealUse = false; 9741 for (SDNode *UseUse : Use->uses()) { 9742 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 9743 RealUse = true; 9744 } 9745 9746 if (!RealUse) { 9747 TryNext = true; 9748 break; 9749 } 9750 } 9751 } 9752 9753 if (TryNext) 9754 continue; 9755 9756 // Check for #2 9757 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 9758 SDValue Result = isLoad 9759 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 9760 BasePtr, Offset, AM) 9761 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 9762 BasePtr, Offset, AM); 9763 ++PostIndexedNodes; 9764 ++NodesCombined; 9765 DEBUG(dbgs() << "\nReplacing.5 "; 9766 N->dump(&DAG); 9767 dbgs() << "\nWith: "; 9768 Result.getNode()->dump(&DAG); 9769 dbgs() << '\n'); 9770 WorklistRemover DeadNodes(*this); 9771 if (isLoad) { 9772 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 9773 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 9774 } else { 9775 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 9776 } 9777 9778 // Finally, since the node is now dead, remove it from the graph. 9779 deleteAndRecombine(N); 9780 9781 // Replace the uses of Use with uses of the updated base value. 9782 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 9783 Result.getValue(isLoad ? 1 : 0)); 9784 deleteAndRecombine(Op); 9785 return true; 9786 } 9787 } 9788 } 9789 9790 return false; 9791 } 9792 9793 /// \brief Return the base-pointer arithmetic from an indexed \p LD. 9794 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 9795 ISD::MemIndexedMode AM = LD->getAddressingMode(); 9796 assert(AM != ISD::UNINDEXED); 9797 SDValue BP = LD->getOperand(1); 9798 SDValue Inc = LD->getOperand(2); 9799 9800 // Some backends use TargetConstants for load offsets, but don't expect 9801 // TargetConstants in general ADD nodes. We can convert these constants into 9802 // regular Constants (if the constant is not opaque). 9803 assert((Inc.getOpcode() != ISD::TargetConstant || 9804 !cast<ConstantSDNode>(Inc)->isOpaque()) && 9805 "Cannot split out indexing using opaque target constants"); 9806 if (Inc.getOpcode() == ISD::TargetConstant) { 9807 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 9808 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), 9809 ConstInc->getValueType(0)); 9810 } 9811 9812 unsigned Opc = 9813 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 9814 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 9815 } 9816 9817 SDValue DAGCombiner::visitLOAD(SDNode *N) { 9818 LoadSDNode *LD = cast<LoadSDNode>(N); 9819 SDValue Chain = LD->getChain(); 9820 SDValue Ptr = LD->getBasePtr(); 9821 9822 // If load is not volatile and there are no uses of the loaded value (and 9823 // the updated indexed value in case of indexed loads), change uses of the 9824 // chain value into uses of the chain input (i.e. delete the dead load). 9825 if (!LD->isVolatile()) { 9826 if (N->getValueType(1) == MVT::Other) { 9827 // Unindexed loads. 9828 if (!N->hasAnyUseOfValue(0)) { 9829 // It's not safe to use the two value CombineTo variant here. e.g. 9830 // v1, chain2 = load chain1, loc 9831 // v2, chain3 = load chain2, loc 9832 // v3 = add v2, c 9833 // Now we replace use of chain2 with chain1. This makes the second load 9834 // isomorphic to the one we are deleting, and thus makes this load live. 9835 DEBUG(dbgs() << "\nReplacing.6 "; 9836 N->dump(&DAG); 9837 dbgs() << "\nWith chain: "; 9838 Chain.getNode()->dump(&DAG); 9839 dbgs() << "\n"); 9840 WorklistRemover DeadNodes(*this); 9841 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9842 9843 if (N->use_empty()) 9844 deleteAndRecombine(N); 9845 9846 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9847 } 9848 } else { 9849 // Indexed loads. 9850 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 9851 9852 // If this load has an opaque TargetConstant offset, then we cannot split 9853 // the indexing into an add/sub directly (that TargetConstant may not be 9854 // valid for a different type of node, and we cannot convert an opaque 9855 // target constant into a regular constant). 9856 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 9857 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 9858 9859 if (!N->hasAnyUseOfValue(0) && 9860 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 9861 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 9862 SDValue Index; 9863 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 9864 Index = SplitIndexingFromLoad(LD); 9865 // Try to fold the base pointer arithmetic into subsequent loads and 9866 // stores. 9867 AddUsersToWorklist(N); 9868 } else 9869 Index = DAG.getUNDEF(N->getValueType(1)); 9870 DEBUG(dbgs() << "\nReplacing.7 "; 9871 N->dump(&DAG); 9872 dbgs() << "\nWith: "; 9873 Undef.getNode()->dump(&DAG); 9874 dbgs() << " and 2 other values\n"); 9875 WorklistRemover DeadNodes(*this); 9876 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 9877 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 9878 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 9879 deleteAndRecombine(N); 9880 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9881 } 9882 } 9883 } 9884 9885 // If this load is directly stored, replace the load value with the stored 9886 // value. 9887 // TODO: Handle store large -> read small portion. 9888 // TODO: Handle TRUNCSTORE/LOADEXT 9889 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 9890 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 9891 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 9892 if (PrevST->getBasePtr() == Ptr && 9893 PrevST->getValue().getValueType() == N->getValueType(0)) 9894 return CombineTo(N, Chain.getOperand(1), Chain); 9895 } 9896 } 9897 9898 // Try to infer better alignment information than the load already has. 9899 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 9900 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 9901 if (Align > LD->getMemOperand()->getBaseAlignment()) { 9902 SDValue NewLoad = 9903 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 9904 LD->getValueType(0), 9905 Chain, Ptr, LD->getPointerInfo(), 9906 LD->getMemoryVT(), 9907 LD->isVolatile(), LD->isNonTemporal(), 9908 LD->isInvariant(), Align, LD->getAAInfo()); 9909 if (NewLoad.getNode() != N) 9910 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 9911 } 9912 } 9913 } 9914 9915 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 9916 : DAG.getSubtarget().useAA(); 9917 #ifndef NDEBUG 9918 if (CombinerAAOnlyFunc.getNumOccurrences() && 9919 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 9920 UseAA = false; 9921 #endif 9922 if (UseAA && LD->isUnindexed()) { 9923 // Walk up chain skipping non-aliasing memory nodes. 9924 SDValue BetterChain = FindBetterChain(N, Chain); 9925 9926 // If there is a better chain. 9927 if (Chain != BetterChain) { 9928 SDValue ReplLoad; 9929 9930 // Replace the chain to void dependency. 9931 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 9932 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 9933 BetterChain, Ptr, LD->getMemOperand()); 9934 } else { 9935 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 9936 LD->getValueType(0), 9937 BetterChain, Ptr, LD->getMemoryVT(), 9938 LD->getMemOperand()); 9939 } 9940 9941 // Create token factor to keep old chain connected. 9942 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 9943 MVT::Other, Chain, ReplLoad.getValue(1)); 9944 9945 // Make sure the new and old chains are cleaned up. 9946 AddToWorklist(Token.getNode()); 9947 9948 // Replace uses with load result and token factor. Don't add users 9949 // to work list. 9950 return CombineTo(N, ReplLoad.getValue(0), Token, false); 9951 } 9952 } 9953 9954 // Try transforming N to an indexed load. 9955 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 9956 return SDValue(N, 0); 9957 9958 // Try to slice up N to more direct loads if the slices are mapped to 9959 // different register banks or pairing can take place. 9960 if (SliceUpLoad(N)) 9961 return SDValue(N, 0); 9962 9963 return SDValue(); 9964 } 9965 9966 namespace { 9967 /// \brief Helper structure used to slice a load in smaller loads. 9968 /// Basically a slice is obtained from the following sequence: 9969 /// Origin = load Ty1, Base 9970 /// Shift = srl Ty1 Origin, CstTy Amount 9971 /// Inst = trunc Shift to Ty2 9972 /// 9973 /// Then, it will be rewriten into: 9974 /// Slice = load SliceTy, Base + SliceOffset 9975 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 9976 /// 9977 /// SliceTy is deduced from the number of bits that are actually used to 9978 /// build Inst. 9979 struct LoadedSlice { 9980 /// \brief Helper structure used to compute the cost of a slice. 9981 struct Cost { 9982 /// Are we optimizing for code size. 9983 bool ForCodeSize; 9984 /// Various cost. 9985 unsigned Loads; 9986 unsigned Truncates; 9987 unsigned CrossRegisterBanksCopies; 9988 unsigned ZExts; 9989 unsigned Shift; 9990 9991 Cost(bool ForCodeSize = false) 9992 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 9993 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 9994 9995 /// \brief Get the cost of one isolated slice. 9996 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 9997 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 9998 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 9999 EVT TruncType = LS.Inst->getValueType(0); 10000 EVT LoadedType = LS.getLoadedType(); 10001 if (TruncType != LoadedType && 10002 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 10003 ZExts = 1; 10004 } 10005 10006 /// \brief Account for slicing gain in the current cost. 10007 /// Slicing provide a few gains like removing a shift or a 10008 /// truncate. This method allows to grow the cost of the original 10009 /// load with the gain from this slice. 10010 void addSliceGain(const LoadedSlice &LS) { 10011 // Each slice saves a truncate. 10012 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 10013 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), 10014 LS.Inst->getValueType(0))) 10015 ++Truncates; 10016 // If there is a shift amount, this slice gets rid of it. 10017 if (LS.Shift) 10018 ++Shift; 10019 // If this slice can merge a cross register bank copy, account for it. 10020 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 10021 ++CrossRegisterBanksCopies; 10022 } 10023 10024 Cost &operator+=(const Cost &RHS) { 10025 Loads += RHS.Loads; 10026 Truncates += RHS.Truncates; 10027 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 10028 ZExts += RHS.ZExts; 10029 Shift += RHS.Shift; 10030 return *this; 10031 } 10032 10033 bool operator==(const Cost &RHS) const { 10034 return Loads == RHS.Loads && Truncates == RHS.Truncates && 10035 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 10036 ZExts == RHS.ZExts && Shift == RHS.Shift; 10037 } 10038 10039 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 10040 10041 bool operator<(const Cost &RHS) const { 10042 // Assume cross register banks copies are as expensive as loads. 10043 // FIXME: Do we want some more target hooks? 10044 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 10045 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 10046 // Unless we are optimizing for code size, consider the 10047 // expensive operation first. 10048 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 10049 return ExpensiveOpsLHS < ExpensiveOpsRHS; 10050 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 10051 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 10052 } 10053 10054 bool operator>(const Cost &RHS) const { return RHS < *this; } 10055 10056 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 10057 10058 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 10059 }; 10060 // The last instruction that represent the slice. This should be a 10061 // truncate instruction. 10062 SDNode *Inst; 10063 // The original load instruction. 10064 LoadSDNode *Origin; 10065 // The right shift amount in bits from the original load. 10066 unsigned Shift; 10067 // The DAG from which Origin came from. 10068 // This is used to get some contextual information about legal types, etc. 10069 SelectionDAG *DAG; 10070 10071 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 10072 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 10073 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 10074 10075 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 10076 /// \return Result is \p BitWidth and has used bits set to 1 and 10077 /// not used bits set to 0. 10078 APInt getUsedBits() const { 10079 // Reproduce the trunc(lshr) sequence: 10080 // - Start from the truncated value. 10081 // - Zero extend to the desired bit width. 10082 // - Shift left. 10083 assert(Origin && "No original load to compare against."); 10084 unsigned BitWidth = Origin->getValueSizeInBits(0); 10085 assert(Inst && "This slice is not bound to an instruction"); 10086 assert(Inst->getValueSizeInBits(0) <= BitWidth && 10087 "Extracted slice is bigger than the whole type!"); 10088 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 10089 UsedBits.setAllBits(); 10090 UsedBits = UsedBits.zext(BitWidth); 10091 UsedBits <<= Shift; 10092 return UsedBits; 10093 } 10094 10095 /// \brief Get the size of the slice to be loaded in bytes. 10096 unsigned getLoadedSize() const { 10097 unsigned SliceSize = getUsedBits().countPopulation(); 10098 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 10099 return SliceSize / 8; 10100 } 10101 10102 /// \brief Get the type that will be loaded for this slice. 10103 /// Note: This may not be the final type for the slice. 10104 EVT getLoadedType() const { 10105 assert(DAG && "Missing context"); 10106 LLVMContext &Ctxt = *DAG->getContext(); 10107 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 10108 } 10109 10110 /// \brief Get the alignment of the load used for this slice. 10111 unsigned getAlignment() const { 10112 unsigned Alignment = Origin->getAlignment(); 10113 unsigned Offset = getOffsetFromBase(); 10114 if (Offset != 0) 10115 Alignment = MinAlign(Alignment, Alignment + Offset); 10116 return Alignment; 10117 } 10118 10119 /// \brief Check if this slice can be rewritten with legal operations. 10120 bool isLegal() const { 10121 // An invalid slice is not legal. 10122 if (!Origin || !Inst || !DAG) 10123 return false; 10124 10125 // Offsets are for indexed load only, we do not handle that. 10126 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 10127 return false; 10128 10129 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 10130 10131 // Check that the type is legal. 10132 EVT SliceType = getLoadedType(); 10133 if (!TLI.isTypeLegal(SliceType)) 10134 return false; 10135 10136 // Check that the load is legal for this type. 10137 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 10138 return false; 10139 10140 // Check that the offset can be computed. 10141 // 1. Check its type. 10142 EVT PtrType = Origin->getBasePtr().getValueType(); 10143 if (PtrType == MVT::Untyped || PtrType.isExtended()) 10144 return false; 10145 10146 // 2. Check that it fits in the immediate. 10147 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 10148 return false; 10149 10150 // 3. Check that the computation is legal. 10151 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 10152 return false; 10153 10154 // Check that the zext is legal if it needs one. 10155 EVT TruncateType = Inst->getValueType(0); 10156 if (TruncateType != SliceType && 10157 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 10158 return false; 10159 10160 return true; 10161 } 10162 10163 /// \brief Get the offset in bytes of this slice in the original chunk of 10164 /// bits. 10165 /// \pre DAG != nullptr. 10166 uint64_t getOffsetFromBase() const { 10167 assert(DAG && "Missing context."); 10168 bool IsBigEndian = DAG->getDataLayout().isBigEndian(); 10169 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 10170 uint64_t Offset = Shift / 8; 10171 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 10172 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 10173 "The size of the original loaded type is not a multiple of a" 10174 " byte."); 10175 // If Offset is bigger than TySizeInBytes, it means we are loading all 10176 // zeros. This should have been optimized before in the process. 10177 assert(TySizeInBytes > Offset && 10178 "Invalid shift amount for given loaded size"); 10179 if (IsBigEndian) 10180 Offset = TySizeInBytes - Offset - getLoadedSize(); 10181 return Offset; 10182 } 10183 10184 /// \brief Generate the sequence of instructions to load the slice 10185 /// represented by this object and redirect the uses of this slice to 10186 /// this new sequence of instructions. 10187 /// \pre this->Inst && this->Origin are valid Instructions and this 10188 /// object passed the legal check: LoadedSlice::isLegal returned true. 10189 /// \return The last instruction of the sequence used to load the slice. 10190 SDValue loadSlice() const { 10191 assert(Inst && Origin && "Unable to replace a non-existing slice."); 10192 const SDValue &OldBaseAddr = Origin->getBasePtr(); 10193 SDValue BaseAddr = OldBaseAddr; 10194 // Get the offset in that chunk of bytes w.r.t. the endianess. 10195 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 10196 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 10197 if (Offset) { 10198 // BaseAddr = BaseAddr + Offset. 10199 EVT ArithType = BaseAddr.getValueType(); 10200 SDLoc DL(Origin); 10201 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, 10202 DAG->getConstant(Offset, DL, ArithType)); 10203 } 10204 10205 // Create the type of the loaded slice according to its size. 10206 EVT SliceType = getLoadedType(); 10207 10208 // Create the load for the slice. 10209 SDValue LastInst = DAG->getLoad( 10210 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 10211 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 10212 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 10213 // If the final type is not the same as the loaded type, this means that 10214 // we have to pad with zero. Create a zero extend for that. 10215 EVT FinalType = Inst->getValueType(0); 10216 if (SliceType != FinalType) 10217 LastInst = 10218 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 10219 return LastInst; 10220 } 10221 10222 /// \brief Check if this slice can be merged with an expensive cross register 10223 /// bank copy. E.g., 10224 /// i = load i32 10225 /// f = bitcast i32 i to float 10226 bool canMergeExpensiveCrossRegisterBankCopy() const { 10227 if (!Inst || !Inst->hasOneUse()) 10228 return false; 10229 SDNode *Use = *Inst->use_begin(); 10230 if (Use->getOpcode() != ISD::BITCAST) 10231 return false; 10232 assert(DAG && "Missing context"); 10233 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 10234 EVT ResVT = Use->getValueType(0); 10235 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 10236 const TargetRegisterClass *ArgRC = 10237 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 10238 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 10239 return false; 10240 10241 // At this point, we know that we perform a cross-register-bank copy. 10242 // Check if it is expensive. 10243 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 10244 // Assume bitcasts are cheap, unless both register classes do not 10245 // explicitly share a common sub class. 10246 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 10247 return false; 10248 10249 // Check if it will be merged with the load. 10250 // 1. Check the alignment constraint. 10251 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( 10252 ResVT.getTypeForEVT(*DAG->getContext())); 10253 10254 if (RequiredAlignment > getAlignment()) 10255 return false; 10256 10257 // 2. Check that the load is a legal operation for that type. 10258 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 10259 return false; 10260 10261 // 3. Check that we do not have a zext in the way. 10262 if (Inst->getValueType(0) != getLoadedType()) 10263 return false; 10264 10265 return true; 10266 } 10267 }; 10268 } 10269 10270 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 10271 /// \p UsedBits looks like 0..0 1..1 0..0. 10272 static bool areUsedBitsDense(const APInt &UsedBits) { 10273 // If all the bits are one, this is dense! 10274 if (UsedBits.isAllOnesValue()) 10275 return true; 10276 10277 // Get rid of the unused bits on the right. 10278 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 10279 // Get rid of the unused bits on the left. 10280 if (NarrowedUsedBits.countLeadingZeros()) 10281 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 10282 // Check that the chunk of bits is completely used. 10283 return NarrowedUsedBits.isAllOnesValue(); 10284 } 10285 10286 /// \brief Check whether or not \p First and \p Second are next to each other 10287 /// in memory. This means that there is no hole between the bits loaded 10288 /// by \p First and the bits loaded by \p Second. 10289 static bool areSlicesNextToEachOther(const LoadedSlice &First, 10290 const LoadedSlice &Second) { 10291 assert(First.Origin == Second.Origin && First.Origin && 10292 "Unable to match different memory origins."); 10293 APInt UsedBits = First.getUsedBits(); 10294 assert((UsedBits & Second.getUsedBits()) == 0 && 10295 "Slices are not supposed to overlap."); 10296 UsedBits |= Second.getUsedBits(); 10297 return areUsedBitsDense(UsedBits); 10298 } 10299 10300 /// \brief Adjust the \p GlobalLSCost according to the target 10301 /// paring capabilities and the layout of the slices. 10302 /// \pre \p GlobalLSCost should account for at least as many loads as 10303 /// there is in the slices in \p LoadedSlices. 10304 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 10305 LoadedSlice::Cost &GlobalLSCost) { 10306 unsigned NumberOfSlices = LoadedSlices.size(); 10307 // If there is less than 2 elements, no pairing is possible. 10308 if (NumberOfSlices < 2) 10309 return; 10310 10311 // Sort the slices so that elements that are likely to be next to each 10312 // other in memory are next to each other in the list. 10313 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 10314 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 10315 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 10316 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 10317 }); 10318 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 10319 // First (resp. Second) is the first (resp. Second) potentially candidate 10320 // to be placed in a paired load. 10321 const LoadedSlice *First = nullptr; 10322 const LoadedSlice *Second = nullptr; 10323 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 10324 // Set the beginning of the pair. 10325 First = Second) { 10326 10327 Second = &LoadedSlices[CurrSlice]; 10328 10329 // If First is NULL, it means we start a new pair. 10330 // Get to the next slice. 10331 if (!First) 10332 continue; 10333 10334 EVT LoadedType = First->getLoadedType(); 10335 10336 // If the types of the slices are different, we cannot pair them. 10337 if (LoadedType != Second->getLoadedType()) 10338 continue; 10339 10340 // Check if the target supplies paired loads for this type. 10341 unsigned RequiredAlignment = 0; 10342 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 10343 // move to the next pair, this type is hopeless. 10344 Second = nullptr; 10345 continue; 10346 } 10347 // Check if we meet the alignment requirement. 10348 if (RequiredAlignment > First->getAlignment()) 10349 continue; 10350 10351 // Check that both loads are next to each other in memory. 10352 if (!areSlicesNextToEachOther(*First, *Second)) 10353 continue; 10354 10355 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 10356 --GlobalLSCost.Loads; 10357 // Move to the next pair. 10358 Second = nullptr; 10359 } 10360 } 10361 10362 /// \brief Check the profitability of all involved LoadedSlice. 10363 /// Currently, it is considered profitable if there is exactly two 10364 /// involved slices (1) which are (2) next to each other in memory, and 10365 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 10366 /// 10367 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 10368 /// the elements themselves. 10369 /// 10370 /// FIXME: When the cost model will be mature enough, we can relax 10371 /// constraints (1) and (2). 10372 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 10373 const APInt &UsedBits, bool ForCodeSize) { 10374 unsigned NumberOfSlices = LoadedSlices.size(); 10375 if (StressLoadSlicing) 10376 return NumberOfSlices > 1; 10377 10378 // Check (1). 10379 if (NumberOfSlices != 2) 10380 return false; 10381 10382 // Check (2). 10383 if (!areUsedBitsDense(UsedBits)) 10384 return false; 10385 10386 // Check (3). 10387 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 10388 // The original code has one big load. 10389 OrigCost.Loads = 1; 10390 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 10391 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 10392 // Accumulate the cost of all the slices. 10393 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 10394 GlobalSlicingCost += SliceCost; 10395 10396 // Account as cost in the original configuration the gain obtained 10397 // with the current slices. 10398 OrigCost.addSliceGain(LS); 10399 } 10400 10401 // If the target supports paired load, adjust the cost accordingly. 10402 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 10403 return OrigCost > GlobalSlicingCost; 10404 } 10405 10406 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 10407 /// operations, split it in the various pieces being extracted. 10408 /// 10409 /// This sort of thing is introduced by SROA. 10410 /// This slicing takes care not to insert overlapping loads. 10411 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 10412 bool DAGCombiner::SliceUpLoad(SDNode *N) { 10413 if (Level < AfterLegalizeDAG) 10414 return false; 10415 10416 LoadSDNode *LD = cast<LoadSDNode>(N); 10417 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 10418 !LD->getValueType(0).isInteger()) 10419 return false; 10420 10421 // Keep track of already used bits to detect overlapping values. 10422 // In that case, we will just abort the transformation. 10423 APInt UsedBits(LD->getValueSizeInBits(0), 0); 10424 10425 SmallVector<LoadedSlice, 4> LoadedSlices; 10426 10427 // Check if this load is used as several smaller chunks of bits. 10428 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 10429 // of computation for each trunc. 10430 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 10431 UI != UIEnd; ++UI) { 10432 // Skip the uses of the chain. 10433 if (UI.getUse().getResNo() != 0) 10434 continue; 10435 10436 SDNode *User = *UI; 10437 unsigned Shift = 0; 10438 10439 // Check if this is a trunc(lshr). 10440 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 10441 isa<ConstantSDNode>(User->getOperand(1))) { 10442 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 10443 User = *User->use_begin(); 10444 } 10445 10446 // At this point, User is a Truncate, iff we encountered, trunc or 10447 // trunc(lshr). 10448 if (User->getOpcode() != ISD::TRUNCATE) 10449 return false; 10450 10451 // The width of the type must be a power of 2 and greater than 8-bits. 10452 // Otherwise the load cannot be represented in LLVM IR. 10453 // Moreover, if we shifted with a non-8-bits multiple, the slice 10454 // will be across several bytes. We do not support that. 10455 unsigned Width = User->getValueSizeInBits(0); 10456 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 10457 return 0; 10458 10459 // Build the slice for this chain of computations. 10460 LoadedSlice LS(User, LD, Shift, &DAG); 10461 APInt CurrentUsedBits = LS.getUsedBits(); 10462 10463 // Check if this slice overlaps with another. 10464 if ((CurrentUsedBits & UsedBits) != 0) 10465 return false; 10466 // Update the bits used globally. 10467 UsedBits |= CurrentUsedBits; 10468 10469 // Check if the new slice would be legal. 10470 if (!LS.isLegal()) 10471 return false; 10472 10473 // Record the slice. 10474 LoadedSlices.push_back(LS); 10475 } 10476 10477 // Abort slicing if it does not seem to be profitable. 10478 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 10479 return false; 10480 10481 ++SlicedLoads; 10482 10483 // Rewrite each chain to use an independent load. 10484 // By construction, each chain can be represented by a unique load. 10485 10486 // Prepare the argument for the new token factor for all the slices. 10487 SmallVector<SDValue, 8> ArgChains; 10488 for (SmallVectorImpl<LoadedSlice>::const_iterator 10489 LSIt = LoadedSlices.begin(), 10490 LSItEnd = LoadedSlices.end(); 10491 LSIt != LSItEnd; ++LSIt) { 10492 SDValue SliceInst = LSIt->loadSlice(); 10493 CombineTo(LSIt->Inst, SliceInst, true); 10494 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 10495 SliceInst = SliceInst.getOperand(0); 10496 assert(SliceInst->getOpcode() == ISD::LOAD && 10497 "It takes more than a zext to get to the loaded slice!!"); 10498 ArgChains.push_back(SliceInst.getValue(1)); 10499 } 10500 10501 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 10502 ArgChains); 10503 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 10504 return true; 10505 } 10506 10507 /// Check to see if V is (and load (ptr), imm), where the load is having 10508 /// specific bytes cleared out. If so, return the byte size being masked out 10509 /// and the shift amount. 10510 static std::pair<unsigned, unsigned> 10511 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 10512 std::pair<unsigned, unsigned> Result(0, 0); 10513 10514 // Check for the structure we're looking for. 10515 if (V->getOpcode() != ISD::AND || 10516 !isa<ConstantSDNode>(V->getOperand(1)) || 10517 !ISD::isNormalLoad(V->getOperand(0).getNode())) 10518 return Result; 10519 10520 // Check the chain and pointer. 10521 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 10522 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 10523 10524 // The store should be chained directly to the load or be an operand of a 10525 // tokenfactor. 10526 if (LD == Chain.getNode()) 10527 ; // ok. 10528 else if (Chain->getOpcode() != ISD::TokenFactor) 10529 return Result; // Fail. 10530 else { 10531 bool isOk = false; 10532 for (const SDValue &ChainOp : Chain->op_values()) 10533 if (ChainOp.getNode() == LD) { 10534 isOk = true; 10535 break; 10536 } 10537 if (!isOk) return Result; 10538 } 10539 10540 // This only handles simple types. 10541 if (V.getValueType() != MVT::i16 && 10542 V.getValueType() != MVT::i32 && 10543 V.getValueType() != MVT::i64) 10544 return Result; 10545 10546 // Check the constant mask. Invert it so that the bits being masked out are 10547 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 10548 // follow the sign bit for uniformity. 10549 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 10550 unsigned NotMaskLZ = countLeadingZeros(NotMask); 10551 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 10552 unsigned NotMaskTZ = countTrailingZeros(NotMask); 10553 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 10554 if (NotMaskLZ == 64) return Result; // All zero mask. 10555 10556 // See if we have a continuous run of bits. If so, we have 0*1+0* 10557 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) 10558 return Result; 10559 10560 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 10561 if (V.getValueType() != MVT::i64 && NotMaskLZ) 10562 NotMaskLZ -= 64-V.getValueSizeInBits(); 10563 10564 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 10565 switch (MaskedBytes) { 10566 case 1: 10567 case 2: 10568 case 4: break; 10569 default: return Result; // All one mask, or 5-byte mask. 10570 } 10571 10572 // Verify that the first bit starts at a multiple of mask so that the access 10573 // is aligned the same as the access width. 10574 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 10575 10576 Result.first = MaskedBytes; 10577 Result.second = NotMaskTZ/8; 10578 return Result; 10579 } 10580 10581 10582 /// Check to see if IVal is something that provides a value as specified by 10583 /// MaskInfo. If so, replace the specified store with a narrower store of 10584 /// truncated IVal. 10585 static SDNode * 10586 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 10587 SDValue IVal, StoreSDNode *St, 10588 DAGCombiner *DC) { 10589 unsigned NumBytes = MaskInfo.first; 10590 unsigned ByteShift = MaskInfo.second; 10591 SelectionDAG &DAG = DC->getDAG(); 10592 10593 // Check to see if IVal is all zeros in the part being masked in by the 'or' 10594 // that uses this. If not, this is not a replacement. 10595 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 10596 ByteShift*8, (ByteShift+NumBytes)*8); 10597 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 10598 10599 // Check that it is legal on the target to do this. It is legal if the new 10600 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 10601 // legalization. 10602 MVT VT = MVT::getIntegerVT(NumBytes*8); 10603 if (!DC->isTypeLegal(VT)) 10604 return nullptr; 10605 10606 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 10607 // shifted by ByteShift and truncated down to NumBytes. 10608 if (ByteShift) { 10609 SDLoc DL(IVal); 10610 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, 10611 DAG.getConstant(ByteShift*8, DL, 10612 DC->getShiftAmountTy(IVal.getValueType()))); 10613 } 10614 10615 // Figure out the offset for the store and the alignment of the access. 10616 unsigned StOffset; 10617 unsigned NewAlign = St->getAlignment(); 10618 10619 if (DAG.getDataLayout().isLittleEndian()) 10620 StOffset = ByteShift; 10621 else 10622 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 10623 10624 SDValue Ptr = St->getBasePtr(); 10625 if (StOffset) { 10626 SDLoc DL(IVal); 10627 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), 10628 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); 10629 NewAlign = MinAlign(NewAlign, StOffset); 10630 } 10631 10632 // Truncate down to the new size. 10633 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 10634 10635 ++OpsNarrowed; 10636 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 10637 St->getPointerInfo().getWithOffset(StOffset), 10638 false, false, NewAlign).getNode(); 10639 } 10640 10641 10642 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and 10643 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 10644 /// narrowing the load and store if it would end up being a win for performance 10645 /// or code size. 10646 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 10647 StoreSDNode *ST = cast<StoreSDNode>(N); 10648 if (ST->isVolatile()) 10649 return SDValue(); 10650 10651 SDValue Chain = ST->getChain(); 10652 SDValue Value = ST->getValue(); 10653 SDValue Ptr = ST->getBasePtr(); 10654 EVT VT = Value.getValueType(); 10655 10656 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 10657 return SDValue(); 10658 10659 unsigned Opc = Value.getOpcode(); 10660 10661 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 10662 // is a byte mask indicating a consecutive number of bytes, check to see if 10663 // Y is known to provide just those bytes. If so, we try to replace the 10664 // load + replace + store sequence with a single (narrower) store, which makes 10665 // the load dead. 10666 if (Opc == ISD::OR) { 10667 std::pair<unsigned, unsigned> MaskedLoad; 10668 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 10669 if (MaskedLoad.first) 10670 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 10671 Value.getOperand(1), ST,this)) 10672 return SDValue(NewST, 0); 10673 10674 // Or is commutative, so try swapping X and Y. 10675 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 10676 if (MaskedLoad.first) 10677 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 10678 Value.getOperand(0), ST,this)) 10679 return SDValue(NewST, 0); 10680 } 10681 10682 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 10683 Value.getOperand(1).getOpcode() != ISD::Constant) 10684 return SDValue(); 10685 10686 SDValue N0 = Value.getOperand(0); 10687 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 10688 Chain == SDValue(N0.getNode(), 1)) { 10689 LoadSDNode *LD = cast<LoadSDNode>(N0); 10690 if (LD->getBasePtr() != Ptr || 10691 LD->getPointerInfo().getAddrSpace() != 10692 ST->getPointerInfo().getAddrSpace()) 10693 return SDValue(); 10694 10695 // Find the type to narrow it the load / op / store to. 10696 SDValue N1 = Value.getOperand(1); 10697 unsigned BitWidth = N1.getValueSizeInBits(); 10698 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 10699 if (Opc == ISD::AND) 10700 Imm ^= APInt::getAllOnesValue(BitWidth); 10701 if (Imm == 0 || Imm.isAllOnesValue()) 10702 return SDValue(); 10703 unsigned ShAmt = Imm.countTrailingZeros(); 10704 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 10705 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 10706 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 10707 // The narrowing should be profitable, the load/store operation should be 10708 // legal (or custom) and the store size should be equal to the NewVT width. 10709 while (NewBW < BitWidth && 10710 (NewVT.getStoreSizeInBits() != NewBW || 10711 !TLI.isOperationLegalOrCustom(Opc, NewVT) || 10712 !TLI.isNarrowingProfitable(VT, NewVT))) { 10713 NewBW = NextPowerOf2(NewBW); 10714 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 10715 } 10716 if (NewBW >= BitWidth) 10717 return SDValue(); 10718 10719 // If the lsb changed does not start at the type bitwidth boundary, 10720 // start at the previous one. 10721 if (ShAmt % NewBW) 10722 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 10723 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 10724 std::min(BitWidth, ShAmt + NewBW)); 10725 if ((Imm & Mask) == Imm) { 10726 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 10727 if (Opc == ISD::AND) 10728 NewImm ^= APInt::getAllOnesValue(NewBW); 10729 uint64_t PtrOff = ShAmt / 8; 10730 // For big endian targets, we need to adjust the offset to the pointer to 10731 // load the correct bytes. 10732 if (DAG.getDataLayout().isBigEndian()) 10733 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 10734 10735 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 10736 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 10737 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) 10738 return SDValue(); 10739 10740 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 10741 Ptr.getValueType(), Ptr, 10742 DAG.getConstant(PtrOff, SDLoc(LD), 10743 Ptr.getValueType())); 10744 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 10745 LD->getChain(), NewPtr, 10746 LD->getPointerInfo().getWithOffset(PtrOff), 10747 LD->isVolatile(), LD->isNonTemporal(), 10748 LD->isInvariant(), NewAlign, 10749 LD->getAAInfo()); 10750 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 10751 DAG.getConstant(NewImm, SDLoc(Value), 10752 NewVT)); 10753 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 10754 NewVal, NewPtr, 10755 ST->getPointerInfo().getWithOffset(PtrOff), 10756 false, false, NewAlign); 10757 10758 AddToWorklist(NewPtr.getNode()); 10759 AddToWorklist(NewLD.getNode()); 10760 AddToWorklist(NewVal.getNode()); 10761 WorklistRemover DeadNodes(*this); 10762 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 10763 ++OpsNarrowed; 10764 return NewST; 10765 } 10766 } 10767 10768 return SDValue(); 10769 } 10770 10771 /// For a given floating point load / store pair, if the load value isn't used 10772 /// by any other operations, then consider transforming the pair to integer 10773 /// load / store operations if the target deems the transformation profitable. 10774 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 10775 StoreSDNode *ST = cast<StoreSDNode>(N); 10776 SDValue Chain = ST->getChain(); 10777 SDValue Value = ST->getValue(); 10778 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 10779 Value.hasOneUse() && 10780 Chain == SDValue(Value.getNode(), 1)) { 10781 LoadSDNode *LD = cast<LoadSDNode>(Value); 10782 EVT VT = LD->getMemoryVT(); 10783 if (!VT.isFloatingPoint() || 10784 VT != ST->getMemoryVT() || 10785 LD->isNonTemporal() || 10786 ST->isNonTemporal() || 10787 LD->getPointerInfo().getAddrSpace() != 0 || 10788 ST->getPointerInfo().getAddrSpace() != 0) 10789 return SDValue(); 10790 10791 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 10792 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 10793 !TLI.isOperationLegal(ISD::STORE, IntVT) || 10794 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 10795 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 10796 return SDValue(); 10797 10798 unsigned LDAlign = LD->getAlignment(); 10799 unsigned STAlign = ST->getAlignment(); 10800 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 10801 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); 10802 if (LDAlign < ABIAlign || STAlign < ABIAlign) 10803 return SDValue(); 10804 10805 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 10806 LD->getChain(), LD->getBasePtr(), 10807 LD->getPointerInfo(), 10808 false, false, false, LDAlign); 10809 10810 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 10811 NewLD, ST->getBasePtr(), 10812 ST->getPointerInfo(), 10813 false, false, STAlign); 10814 10815 AddToWorklist(NewLD.getNode()); 10816 AddToWorklist(NewST.getNode()); 10817 WorklistRemover DeadNodes(*this); 10818 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 10819 ++LdStFP2Int; 10820 return NewST; 10821 } 10822 10823 return SDValue(); 10824 } 10825 10826 namespace { 10827 /// Helper struct to parse and store a memory address as base + index + offset. 10828 /// We ignore sign extensions when it is safe to do so. 10829 /// The following two expressions are not equivalent. To differentiate we need 10830 /// to store whether there was a sign extension involved in the index 10831 /// computation. 10832 /// (load (i64 add (i64 copyfromreg %c) 10833 /// (i64 signextend (add (i8 load %index) 10834 /// (i8 1)))) 10835 /// vs 10836 /// 10837 /// (load (i64 add (i64 copyfromreg %c) 10838 /// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 10839 /// (i32 1))))) 10840 struct BaseIndexOffset { 10841 SDValue Base; 10842 SDValue Index; 10843 int64_t Offset; 10844 bool IsIndexSignExt; 10845 10846 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 10847 10848 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 10849 bool IsIndexSignExt) : 10850 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 10851 10852 bool equalBaseIndex(const BaseIndexOffset &Other) { 10853 return Other.Base == Base && Other.Index == Index && 10854 Other.IsIndexSignExt == IsIndexSignExt; 10855 } 10856 10857 /// Parses tree in Ptr for base, index, offset addresses. 10858 static BaseIndexOffset match(SDValue Ptr) { 10859 bool IsIndexSignExt = false; 10860 10861 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 10862 // instruction, then it could be just the BASE or everything else we don't 10863 // know how to handle. Just use Ptr as BASE and give up. 10864 if (Ptr->getOpcode() != ISD::ADD) 10865 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10866 10867 // We know that we have at least an ADD instruction. Try to pattern match 10868 // the simple case of BASE + OFFSET. 10869 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 10870 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 10871 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 10872 IsIndexSignExt); 10873 } 10874 10875 // Inside a loop the current BASE pointer is calculated using an ADD and a 10876 // MUL instruction. In this case Ptr is the actual BASE pointer. 10877 // (i64 add (i64 %array_ptr) 10878 // (i64 mul (i64 %induction_var) 10879 // (i64 %element_size))) 10880 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 10881 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10882 10883 // Look at Base + Index + Offset cases. 10884 SDValue Base = Ptr->getOperand(0); 10885 SDValue IndexOffset = Ptr->getOperand(1); 10886 10887 // Skip signextends. 10888 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 10889 IndexOffset = IndexOffset->getOperand(0); 10890 IsIndexSignExt = true; 10891 } 10892 10893 // Either the case of Base + Index (no offset) or something else. 10894 if (IndexOffset->getOpcode() != ISD::ADD) 10895 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 10896 10897 // Now we have the case of Base + Index + offset. 10898 SDValue Index = IndexOffset->getOperand(0); 10899 SDValue Offset = IndexOffset->getOperand(1); 10900 10901 if (!isa<ConstantSDNode>(Offset)) 10902 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10903 10904 // Ignore signextends. 10905 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 10906 Index = Index->getOperand(0); 10907 IsIndexSignExt = true; 10908 } else IsIndexSignExt = false; 10909 10910 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 10911 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 10912 } 10913 }; 10914 } // namespace 10915 10916 // This is a helper function for visitMUL to check the profitability 10917 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 10918 // MulNode is the original multiply, AddNode is (add x, c1), 10919 // and ConstNode is c2. 10920 // 10921 // If the (add x, c1) has multiple uses, we could increase 10922 // the number of adds if we make this transformation. 10923 // It would only be worth doing this if we can remove a 10924 // multiply in the process. Check for that here. 10925 // To illustrate: 10926 // (A + c1) * c3 10927 // (A + c2) * c3 10928 // We're checking for cases where we have common "c3 * A" expressions. 10929 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, 10930 SDValue &AddNode, 10931 SDValue &ConstNode) { 10932 APInt Val; 10933 10934 // If the add only has one use, this would be OK to do. 10935 if (AddNode.getNode()->hasOneUse()) 10936 return true; 10937 10938 // Walk all the users of the constant with which we're multiplying. 10939 for (SDNode *Use : ConstNode->uses()) { 10940 10941 if (Use == MulNode) // This use is the one we're on right now. Skip it. 10942 continue; 10943 10944 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. 10945 SDNode *OtherOp; 10946 SDNode *MulVar = AddNode.getOperand(0).getNode(); 10947 10948 // OtherOp is what we're multiplying against the constant. 10949 if (Use->getOperand(0) == ConstNode) 10950 OtherOp = Use->getOperand(1).getNode(); 10951 else 10952 OtherOp = Use->getOperand(0).getNode(); 10953 10954 // Check to see if multiply is with the same operand of our "add". 10955 // 10956 // ConstNode = CONST 10957 // Use = ConstNode * A <-- visiting Use. OtherOp is A. 10958 // ... 10959 // AddNode = (A + c1) <-- MulVar is A. 10960 // = AddNode * ConstNode <-- current visiting instruction. 10961 // 10962 // If we make this transformation, we will have a common 10963 // multiply (ConstNode * A) that we can save. 10964 if (OtherOp == MulVar) 10965 return true; 10966 10967 // Now check to see if a future expansion will give us a common 10968 // multiply. 10969 // 10970 // ConstNode = CONST 10971 // AddNode = (A + c1) 10972 // ... = AddNode * ConstNode <-- current visiting instruction. 10973 // ... 10974 // OtherOp = (A + c2) 10975 // Use = OtherOp * ConstNode <-- visiting Use. 10976 // 10977 // If we make this transformation, we will have a common 10978 // multiply (CONST * A) after we also do the same transformation 10979 // to the "t2" instruction. 10980 if (OtherOp->getOpcode() == ISD::ADD && 10981 isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && 10982 OtherOp->getOperand(0).getNode() == MulVar) 10983 return true; 10984 } 10985 } 10986 10987 // Didn't find a case where this would be profitable. 10988 return false; 10989 } 10990 10991 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, 10992 SDLoc SL, 10993 ArrayRef<MemOpLink> Stores, 10994 SmallVectorImpl<SDValue> &Chains, 10995 EVT Ty) const { 10996 SmallVector<SDValue, 8> BuildVector; 10997 10998 for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { 10999 StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); 11000 Chains.push_back(St->getChain()); 11001 BuildVector.push_back(St->getValue()); 11002 } 11003 11004 return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); 11005 } 11006 11007 bool DAGCombiner::MergeStoresOfConstantsOrVecElts( 11008 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, 11009 unsigned NumStores, bool IsConstantSrc, bool UseVector) { 11010 // Make sure we have something to merge. 11011 if (NumStores < 2) 11012 return false; 11013 11014 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; 11015 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 11016 unsigned LatestNodeUsed = 0; 11017 11018 for (unsigned i=0; i < NumStores; ++i) { 11019 // Find a chain for the new wide-store operand. Notice that some 11020 // of the store nodes that we found may not be selected for inclusion 11021 // in the wide store. The chain we use needs to be the chain of the 11022 // latest store node which is *used* and replaced by the wide store. 11023 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) 11024 LatestNodeUsed = i; 11025 } 11026 11027 SmallVector<SDValue, 8> Chains; 11028 11029 // The latest Node in the DAG. 11030 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; 11031 SDLoc DL(StoreNodes[0].MemNode); 11032 11033 SDValue StoredVal; 11034 if (UseVector) { 11035 bool IsVec = MemVT.isVector(); 11036 unsigned Elts = NumStores; 11037 if (IsVec) { 11038 // When merging vector stores, get the total number of elements. 11039 Elts *= MemVT.getVectorNumElements(); 11040 } 11041 // Get the type for the merged vector store. 11042 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 11043 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 11044 11045 if (IsConstantSrc) { 11046 StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); 11047 } else { 11048 SmallVector<SDValue, 8> Ops; 11049 for (unsigned i = 0; i < NumStores; ++i) { 11050 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11051 SDValue Val = St->getValue(); 11052 // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. 11053 if (Val.getValueType() != MemVT) 11054 return false; 11055 Ops.push_back(Val); 11056 Chains.push_back(St->getChain()); 11057 } 11058 11059 // Build the extracted vector elements back into a vector. 11060 StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, 11061 DL, Ty, Ops); } 11062 } else { 11063 // We should always use a vector store when merging extracted vector 11064 // elements, so this path implies a store of constants. 11065 assert(IsConstantSrc && "Merged vector elements should use vector store"); 11066 11067 unsigned SizeInBits = NumStores * ElementSizeBytes * 8; 11068 APInt StoreInt(SizeInBits, 0); 11069 11070 // Construct a single integer constant which is made of the smaller 11071 // constant inputs. 11072 bool IsLE = DAG.getDataLayout().isLittleEndian(); 11073 for (unsigned i = 0; i < NumStores; ++i) { 11074 unsigned Idx = IsLE ? (NumStores - 1 - i) : i; 11075 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 11076 Chains.push_back(St->getChain()); 11077 11078 SDValue Val = St->getValue(); 11079 StoreInt <<= ElementSizeBytes * 8; 11080 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 11081 StoreInt |= C->getAPIntValue().zext(SizeInBits); 11082 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 11083 StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); 11084 } else { 11085 llvm_unreachable("Invalid constant element type"); 11086 } 11087 } 11088 11089 // Create the new Load and Store operations. 11090 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 11091 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); 11092 } 11093 11094 assert(!Chains.empty()); 11095 11096 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 11097 SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, 11098 FirstInChain->getBasePtr(), 11099 FirstInChain->getPointerInfo(), 11100 false, false, 11101 FirstInChain->getAlignment()); 11102 11103 // Replace the last store with the new store 11104 CombineTo(LatestOp, NewStore); 11105 // Erase all other stores. 11106 for (unsigned i = 0; i < NumStores; ++i) { 11107 if (StoreNodes[i].MemNode == LatestOp) 11108 continue; 11109 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11110 // ReplaceAllUsesWith will replace all uses that existed when it was 11111 // called, but graph optimizations may cause new ones to appear. For 11112 // example, the case in pr14333 looks like 11113 // 11114 // St's chain -> St -> another store -> X 11115 // 11116 // And the only difference from St to the other store is the chain. 11117 // When we change it's chain to be St's chain they become identical, 11118 // get CSEed and the net result is that X is now a use of St. 11119 // Since we know that St is redundant, just iterate. 11120 while (!St->use_empty()) 11121 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 11122 deleteAndRecombine(St); 11123 } 11124 11125 return true; 11126 } 11127 11128 void DAGCombiner::getStoreMergeAndAliasCandidates( 11129 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 11130 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { 11131 // This holds the base pointer, index, and the offset in bytes from the base 11132 // pointer. 11133 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 11134 11135 // We must have a base and an offset. 11136 if (!BasePtr.Base.getNode()) 11137 return; 11138 11139 // Do not handle stores to undef base pointers. 11140 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 11141 return; 11142 11143 // Walk up the chain and look for nodes with offsets from the same 11144 // base pointer. Stop when reaching an instruction with a different kind 11145 // or instruction which has a different base pointer. 11146 EVT MemVT = St->getMemoryVT(); 11147 unsigned Seq = 0; 11148 StoreSDNode *Index = St; 11149 11150 11151 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 11152 : DAG.getSubtarget().useAA(); 11153 11154 if (UseAA) { 11155 // Look at other users of the same chain. Stores on the same chain do not 11156 // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized 11157 // to be on the same chain, so don't bother looking at adjacent chains. 11158 11159 SDValue Chain = St->getChain(); 11160 for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { 11161 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { 11162 if (I.getOperandNo() != 0) 11163 continue; 11164 11165 if (OtherST->isVolatile() || OtherST->isIndexed()) 11166 continue; 11167 11168 if (OtherST->getMemoryVT() != MemVT) 11169 continue; 11170 11171 BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); 11172 11173 if (Ptr.equalBaseIndex(BasePtr)) 11174 StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); 11175 } 11176 } 11177 11178 return; 11179 } 11180 11181 while (Index) { 11182 // If the chain has more than one use, then we can't reorder the mem ops. 11183 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 11184 break; 11185 11186 // Find the base pointer and offset for this memory node. 11187 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 11188 11189 // Check that the base pointer is the same as the original one. 11190 if (!Ptr.equalBaseIndex(BasePtr)) 11191 break; 11192 11193 // The memory operands must not be volatile. 11194 if (Index->isVolatile() || Index->isIndexed()) 11195 break; 11196 11197 // No truncation. 11198 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 11199 if (St->isTruncatingStore()) 11200 break; 11201 11202 // The stored memory type must be the same. 11203 if (Index->getMemoryVT() != MemVT) 11204 break; 11205 11206 // We do not allow under-aligned stores in order to prevent 11207 // overriding stores. NOTE: this is a bad hack. Alignment SHOULD 11208 // be irrelevant here; what MATTERS is that we not move memory 11209 // operations that potentially overlap past each-other. 11210 if (Index->getAlignment() < MemVT.getStoreSize()) 11211 break; 11212 11213 // We found a potential memory operand to merge. 11214 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 11215 11216 // Find the next memory operand in the chain. If the next operand in the 11217 // chain is a store then move up and continue the scan with the next 11218 // memory operand. If the next operand is a load save it and use alias 11219 // information to check if it interferes with anything. 11220 SDNode *NextInChain = Index->getChain().getNode(); 11221 while (1) { 11222 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 11223 // We found a store node. Use it for the next iteration. 11224 Index = STn; 11225 break; 11226 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 11227 if (Ldn->isVolatile()) { 11228 Index = nullptr; 11229 break; 11230 } 11231 11232 // Save the load node for later. Continue the scan. 11233 AliasLoadNodes.push_back(Ldn); 11234 NextInChain = Ldn->getChain().getNode(); 11235 continue; 11236 } else { 11237 Index = nullptr; 11238 break; 11239 } 11240 } 11241 } 11242 } 11243 11244 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 11245 if (OptLevel == CodeGenOpt::None) 11246 return false; 11247 11248 EVT MemVT = St->getMemoryVT(); 11249 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; 11250 bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( 11251 Attribute::NoImplicitFloat); 11252 11253 // This function cannot currently deal with non-byte-sized memory sizes. 11254 if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) 11255 return false; 11256 11257 if (!MemVT.isSimple()) 11258 return false; 11259 11260 // Perform an early exit check. Do not bother looking at stored values that 11261 // are not constants, loads, or extracted vector elements. 11262 SDValue StoredVal = St->getValue(); 11263 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 11264 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || 11265 isa<ConstantFPSDNode>(StoredVal); 11266 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 11267 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); 11268 11269 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) 11270 return false; 11271 11272 // Don't merge vectors into wider vectors if the source data comes from loads. 11273 // TODO: This restriction can be lifted by using logic similar to the 11274 // ExtractVecSrc case. 11275 if (MemVT.isVector() && IsLoadSrc) 11276 return false; 11277 11278 // Only look at ends of store sequences. 11279 SDValue Chain = SDValue(St, 0); 11280 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 11281 return false; 11282 11283 // Save the LoadSDNodes that we find in the chain. 11284 // We need to make sure that these nodes do not interfere with 11285 // any of the store nodes. 11286 SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; 11287 11288 // Save the StoreSDNodes that we find in the chain. 11289 SmallVector<MemOpLink, 8> StoreNodes; 11290 11291 getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); 11292 11293 // Check if there is anything to merge. 11294 if (StoreNodes.size() < 2) 11295 return false; 11296 11297 // Sort the memory operands according to their distance from the 11298 // base pointer. As a secondary criteria: make sure stores coming 11299 // later in the code come first in the list. This is important for 11300 // the non-UseAA case, because we're merging stores into the FINAL 11301 // store along a chain which potentially contains aliasing stores. 11302 // Thus, if there are multiple stores to the same address, the last 11303 // one can be considered for merging but not the others. 11304 std::sort(StoreNodes.begin(), StoreNodes.end(), 11305 [](MemOpLink LHS, MemOpLink RHS) { 11306 return LHS.OffsetFromBase < RHS.OffsetFromBase || 11307 (LHS.OffsetFromBase == RHS.OffsetFromBase && 11308 LHS.SequenceNum < RHS.SequenceNum); 11309 }); 11310 11311 // Scan the memory operations on the chain and find the first non-consecutive 11312 // store memory address. 11313 unsigned LastConsecutiveStore = 0; 11314 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 11315 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 11316 11317 // Check that the addresses are consecutive starting from the second 11318 // element in the list of stores. 11319 if (i > 0) { 11320 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 11321 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 11322 break; 11323 } 11324 11325 bool Alias = false; 11326 // Check if this store interferes with any of the loads that we found. 11327 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 11328 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 11329 Alias = true; 11330 break; 11331 } 11332 // We found a load that alias with this store. Stop the sequence. 11333 if (Alias) 11334 break; 11335 11336 // Mark this node as useful. 11337 LastConsecutiveStore = i; 11338 } 11339 11340 // The node with the lowest store address. 11341 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 11342 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 11343 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 11344 LLVMContext &Context = *DAG.getContext(); 11345 const DataLayout &DL = DAG.getDataLayout(); 11346 11347 // Store the constants into memory as one consecutive store. 11348 if (IsConstantSrc) { 11349 unsigned LastLegalType = 0; 11350 unsigned LastLegalVectorType = 0; 11351 bool NonZero = false; 11352 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 11353 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11354 SDValue StoredVal = St->getValue(); 11355 11356 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 11357 NonZero |= !C->isNullValue(); 11358 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 11359 NonZero |= !C->getConstantFPValue()->isNullValue(); 11360 } else { 11361 // Non-constant. 11362 break; 11363 } 11364 11365 // Find a legal type for the constant store. 11366 unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; 11367 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); 11368 bool IsFast; 11369 if (TLI.isTypeLegal(StoreTy) && 11370 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 11371 FirstStoreAlign, &IsFast) && IsFast) { 11372 LastLegalType = i+1; 11373 // Or check whether a truncstore is legal. 11374 } else if (TLI.getTypeAction(Context, StoreTy) == 11375 TargetLowering::TypePromoteInteger) { 11376 EVT LegalizedStoredValueTy = 11377 TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); 11378 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 11379 TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, 11380 FirstStoreAS, FirstStoreAlign, &IsFast) && 11381 IsFast) { 11382 LastLegalType = i + 1; 11383 } 11384 } 11385 11386 // We only use vectors if the constant is known to be zero or the target 11387 // allows it and the function is not marked with the noimplicitfloat 11388 // attribute. 11389 if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, 11390 FirstStoreAS)) && 11391 !NoVectors) { 11392 // Find a legal type for the vector store. 11393 EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); 11394 if (TLI.isTypeLegal(Ty) && 11395 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 11396 FirstStoreAlign, &IsFast) && IsFast) 11397 LastLegalVectorType = i + 1; 11398 } 11399 } 11400 11401 // Check if we found a legal integer type to store. 11402 if (LastLegalType == 0 && LastLegalVectorType == 0) 11403 return false; 11404 11405 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 11406 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 11407 11408 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, 11409 true, UseVector); 11410 } 11411 11412 // When extracting multiple vector elements, try to store them 11413 // in one vector store rather than a sequence of scalar stores. 11414 if (IsExtractVecSrc) { 11415 unsigned NumStoresToMerge = 0; 11416 bool IsVec = MemVT.isVector(); 11417 for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { 11418 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11419 unsigned StoreValOpcode = St->getValue().getOpcode(); 11420 // This restriction could be loosened. 11421 // Bail out if any stored values are not elements extracted from a vector. 11422 // It should be possible to handle mixed sources, but load sources need 11423 // more careful handling (see the block of code below that handles 11424 // consecutive loads). 11425 if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && 11426 StoreValOpcode != ISD::EXTRACT_SUBVECTOR) 11427 return false; 11428 11429 // Find a legal type for the vector store. 11430 unsigned Elts = i + 1; 11431 if (IsVec) { 11432 // When merging vector stores, get the total number of elements. 11433 Elts *= MemVT.getVectorNumElements(); 11434 } 11435 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); 11436 bool IsFast; 11437 if (TLI.isTypeLegal(Ty) && 11438 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, 11439 FirstStoreAlign, &IsFast) && IsFast) 11440 NumStoresToMerge = i + 1; 11441 } 11442 11443 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, 11444 false, true); 11445 } 11446 11447 // Below we handle the case of multiple consecutive stores that 11448 // come from multiple consecutive loads. We merge them into a single 11449 // wide load and a single wide store. 11450 11451 // Look for load nodes which are used by the stored values. 11452 SmallVector<MemOpLink, 8> LoadNodes; 11453 11454 // Find acceptable loads. Loads need to have the same chain (token factor), 11455 // must not be zext, volatile, indexed, and they must be consecutive. 11456 BaseIndexOffset LdBasePtr; 11457 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 11458 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11459 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 11460 if (!Ld) break; 11461 11462 // Loads must only have one use. 11463 if (!Ld->hasNUsesOfValue(1, 0)) 11464 break; 11465 11466 // The memory operands must not be volatile. 11467 if (Ld->isVolatile() || Ld->isIndexed()) 11468 break; 11469 11470 // We do not accept ext loads. 11471 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 11472 break; 11473 11474 // The stored memory type must be the same. 11475 if (Ld->getMemoryVT() != MemVT) 11476 break; 11477 11478 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 11479 // If this is not the first ptr that we check. 11480 if (LdBasePtr.Base.getNode()) { 11481 // The base ptr must be the same. 11482 if (!LdPtr.equalBaseIndex(LdBasePtr)) 11483 break; 11484 } else { 11485 // Check that all other base pointers are the same as this one. 11486 LdBasePtr = LdPtr; 11487 } 11488 11489 // We found a potential memory operand to merge. 11490 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 11491 } 11492 11493 if (LoadNodes.size() < 2) 11494 return false; 11495 11496 // If we have load/store pair instructions and we only have two values, 11497 // don't bother. 11498 unsigned RequiredAlignment; 11499 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 11500 St->getAlignment() >= RequiredAlignment) 11501 return false; 11502 11503 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 11504 unsigned FirstLoadAS = FirstLoad->getAddressSpace(); 11505 unsigned FirstLoadAlign = FirstLoad->getAlignment(); 11506 11507 // Scan the memory operations on the chain and find the first non-consecutive 11508 // load memory address. These variables hold the index in the store node 11509 // array. 11510 unsigned LastConsecutiveLoad = 0; 11511 // This variable refers to the size and not index in the array. 11512 unsigned LastLegalVectorType = 0; 11513 unsigned LastLegalIntegerType = 0; 11514 StartAddress = LoadNodes[0].OffsetFromBase; 11515 SDValue FirstChain = FirstLoad->getChain(); 11516 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 11517 // All loads much share the same chain. 11518 if (LoadNodes[i].MemNode->getChain() != FirstChain) 11519 break; 11520 11521 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 11522 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 11523 break; 11524 LastConsecutiveLoad = i; 11525 // Find a legal type for the vector store. 11526 EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); 11527 bool IsFastSt, IsFastLd; 11528 if (TLI.isTypeLegal(StoreTy) && 11529 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 11530 FirstStoreAlign, &IsFastSt) && IsFastSt && 11531 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 11532 FirstLoadAlign, &IsFastLd) && IsFastLd) { 11533 LastLegalVectorType = i + 1; 11534 } 11535 11536 // Find a legal type for the integer store. 11537 unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; 11538 StoreTy = EVT::getIntegerVT(Context, SizeInBits); 11539 if (TLI.isTypeLegal(StoreTy) && 11540 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, 11541 FirstStoreAlign, &IsFastSt) && IsFastSt && 11542 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, 11543 FirstLoadAlign, &IsFastLd) && IsFastLd) 11544 LastLegalIntegerType = i + 1; 11545 // Or check whether a truncstore and extload is legal. 11546 else if (TLI.getTypeAction(Context, StoreTy) == 11547 TargetLowering::TypePromoteInteger) { 11548 EVT LegalizedStoredValueTy = 11549 TLI.getTypeToTransformTo(Context, StoreTy); 11550 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 11551 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 11552 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 11553 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && 11554 TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, 11555 FirstStoreAS, FirstStoreAlign, &IsFastSt) && 11556 IsFastSt && 11557 TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, 11558 FirstLoadAS, FirstLoadAlign, &IsFastLd) && 11559 IsFastLd) 11560 LastLegalIntegerType = i+1; 11561 } 11562 } 11563 11564 // Only use vector types if the vector type is larger than the integer type. 11565 // If they are the same, use integers. 11566 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 11567 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 11568 11569 // We add +1 here because the LastXXX variables refer to location while 11570 // the NumElem refers to array/index size. 11571 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 11572 NumElem = std::min(LastLegalType, NumElem); 11573 11574 if (NumElem < 2) 11575 return false; 11576 11577 // Collect the chains from all merged stores. 11578 SmallVector<SDValue, 8> MergeStoreChains; 11579 MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); 11580 11581 // The latest Node in the DAG. 11582 unsigned LatestNodeUsed = 0; 11583 for (unsigned i=1; i<NumElem; ++i) { 11584 // Find a chain for the new wide-store operand. Notice that some 11585 // of the store nodes that we found may not be selected for inclusion 11586 // in the wide store. The chain we use needs to be the chain of the 11587 // latest store node which is *used* and replaced by the wide store. 11588 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) 11589 LatestNodeUsed = i; 11590 11591 MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); 11592 } 11593 11594 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; 11595 11596 // Find if it is better to use vectors or integers to load and store 11597 // to memory. 11598 EVT JointMemOpVT; 11599 if (UseVectorTy) { 11600 JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); 11601 } else { 11602 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 11603 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); 11604 } 11605 11606 SDLoc LoadDL(LoadNodes[0].MemNode); 11607 SDLoc StoreDL(StoreNodes[0].MemNode); 11608 11609 // The merged loads are required to have the same chain, so using the first's 11610 // chain is acceptable. 11611 SDValue NewLoad = DAG.getLoad( 11612 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), 11613 FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); 11614 11615 SDValue NewStoreChain = 11616 DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); 11617 11618 SDValue NewStore = DAG.getStore( 11619 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), 11620 FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); 11621 11622 // Replace one of the loads with the new load. 11623 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 11624 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 11625 SDValue(NewLoad.getNode(), 1)); 11626 11627 // Remove the rest of the load chains. 11628 for (unsigned i = 1; i < NumElem ; ++i) { 11629 // Replace all chain users of the old load nodes with the chain of the new 11630 // load node. 11631 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 11632 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 11633 } 11634 11635 // Replace the last store with the new store. 11636 CombineTo(LatestOp, NewStore); 11637 // Erase all other stores. 11638 for (unsigned i = 0; i < NumElem ; ++i) { 11639 // Remove all Store nodes. 11640 if (StoreNodes[i].MemNode == LatestOp) 11641 continue; 11642 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11643 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 11644 deleteAndRecombine(St); 11645 } 11646 11647 return true; 11648 } 11649 11650 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { 11651 SDLoc SL(ST); 11652 SDValue ReplStore; 11653 11654 // Replace the chain to avoid dependency. 11655 if (ST->isTruncatingStore()) { 11656 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), 11657 ST->getBasePtr(), ST->getMemoryVT(), 11658 ST->getMemOperand()); 11659 } else { 11660 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), 11661 ST->getMemOperand()); 11662 } 11663 11664 // Create token to keep both nodes around. 11665 SDValue Token = DAG.getNode(ISD::TokenFactor, SL, 11666 MVT::Other, ST->getChain(), ReplStore); 11667 11668 // Make sure the new and old chains are cleaned up. 11669 AddToWorklist(Token.getNode()); 11670 11671 // Don't add users to work list. 11672 return CombineTo(ST, Token, false); 11673 } 11674 11675 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { 11676 SDValue Value = ST->getValue(); 11677 if (Value.getOpcode() == ISD::TargetConstantFP) 11678 return SDValue(); 11679 11680 SDLoc DL(ST); 11681 11682 SDValue Chain = ST->getChain(); 11683 SDValue Ptr = ST->getBasePtr(); 11684 11685 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); 11686 11687 // NOTE: If the original store is volatile, this transform must not increase 11688 // the number of stores. For example, on x86-32 an f64 can be stored in one 11689 // processor operation but an i64 (which is not legal) requires two. So the 11690 // transform should not be done in this case. 11691 11692 SDValue Tmp; 11693 switch (CFP->getSimpleValueType(0).SimpleTy) { 11694 default: 11695 llvm_unreachable("Unknown FP type"); 11696 case MVT::f16: // We don't do this for these yet. 11697 case MVT::f80: 11698 case MVT::f128: 11699 case MVT::ppcf128: 11700 return SDValue(); 11701 case MVT::f32: 11702 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 11703 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 11704 ; 11705 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 11706 bitcastToAPInt().getZExtValue(), SDLoc(CFP), 11707 MVT::i32); 11708 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); 11709 } 11710 11711 return SDValue(); 11712 case MVT::f64: 11713 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 11714 !ST->isVolatile()) || 11715 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 11716 ; 11717 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 11718 getZExtValue(), SDLoc(CFP), MVT::i64); 11719 return DAG.getStore(Chain, DL, Tmp, 11720 Ptr, ST->getMemOperand()); 11721 } 11722 11723 if (!ST->isVolatile() && 11724 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 11725 // Many FP stores are not made apparent until after legalize, e.g. for 11726 // argument passing. Since this is so common, custom legalize the 11727 // 64-bit integer store into two 32-bit stores. 11728 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 11729 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); 11730 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); 11731 if (DAG.getDataLayout().isBigEndian()) 11732 std::swap(Lo, Hi); 11733 11734 unsigned Alignment = ST->getAlignment(); 11735 bool isVolatile = ST->isVolatile(); 11736 bool isNonTemporal = ST->isNonTemporal(); 11737 AAMDNodes AAInfo = ST->getAAInfo(); 11738 11739 SDValue St0 = DAG.getStore(Chain, DL, Lo, 11740 Ptr, ST->getPointerInfo(), 11741 isVolatile, isNonTemporal, 11742 ST->getAlignment(), AAInfo); 11743 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 11744 DAG.getConstant(4, DL, Ptr.getValueType())); 11745 Alignment = MinAlign(Alignment, 4U); 11746 SDValue St1 = DAG.getStore(Chain, DL, Hi, 11747 Ptr, ST->getPointerInfo().getWithOffset(4), 11748 isVolatile, isNonTemporal, 11749 Alignment, AAInfo); 11750 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 11751 St0, St1); 11752 } 11753 11754 return SDValue(); 11755 } 11756 } 11757 11758 SDValue DAGCombiner::visitSTORE(SDNode *N) { 11759 StoreSDNode *ST = cast<StoreSDNode>(N); 11760 SDValue Chain = ST->getChain(); 11761 SDValue Value = ST->getValue(); 11762 SDValue Ptr = ST->getBasePtr(); 11763 11764 // If this is a store of a bit convert, store the input value if the 11765 // resultant store does not need a higher alignment than the original. 11766 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 11767 ST->isUnindexed()) { 11768 unsigned OrigAlign = ST->getAlignment(); 11769 EVT SVT = Value.getOperand(0).getValueType(); 11770 unsigned Align = DAG.getDataLayout().getABITypeAlignment( 11771 SVT.getTypeForEVT(*DAG.getContext())); 11772 if (Align <= OrigAlign && 11773 ((!LegalOperations && !ST->isVolatile()) || 11774 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 11775 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 11776 Ptr, ST->getPointerInfo(), ST->isVolatile(), 11777 ST->isNonTemporal(), OrigAlign, 11778 ST->getAAInfo()); 11779 } 11780 11781 // Turn 'store undef, Ptr' -> nothing. 11782 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 11783 return Chain; 11784 11785 // Try to infer better alignment information than the store already has. 11786 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 11787 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 11788 if (Align > ST->getAlignment()) { 11789 SDValue NewStore = 11790 DAG.getTruncStore(Chain, SDLoc(N), Value, 11791 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 11792 ST->isVolatile(), ST->isNonTemporal(), Align, 11793 ST->getAAInfo()); 11794 if (NewStore.getNode() != N) 11795 return CombineTo(ST, NewStore, true); 11796 } 11797 } 11798 } 11799 11800 // Try transforming a pair floating point load / store ops to integer 11801 // load / store ops. 11802 if (SDValue NewST = TransformFPLoadStorePair(N)) 11803 return NewST; 11804 11805 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 11806 : DAG.getSubtarget().useAA(); 11807 #ifndef NDEBUG 11808 if (CombinerAAOnlyFunc.getNumOccurrences() && 11809 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 11810 UseAA = false; 11811 #endif 11812 if (UseAA && ST->isUnindexed()) { 11813 // FIXME: We should do this even without AA enabled. AA will just allow 11814 // FindBetterChain to work in more situations. The problem with this is that 11815 // any combine that expects memory operations to be on consecutive chains 11816 // first needs to be updated to look for users of the same chain. 11817 11818 // Walk up chain skipping non-aliasing memory nodes, on this store and any 11819 // adjacent stores. 11820 if (findBetterNeighborChains(ST)) { 11821 // replaceStoreChain uses CombineTo, which handled all of the worklist 11822 // manipulation. Return the original node to not do anything else. 11823 return SDValue(ST, 0); 11824 } 11825 } 11826 11827 // Try transforming N to an indexed store. 11828 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 11829 return SDValue(N, 0); 11830 11831 // FIXME: is there such a thing as a truncating indexed store? 11832 if (ST->isTruncatingStore() && ST->isUnindexed() && 11833 Value.getValueType().isInteger()) { 11834 // See if we can simplify the input to this truncstore with knowledge that 11835 // only the low bits are being used. For example: 11836 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 11837 SDValue Shorter = 11838 GetDemandedBits(Value, 11839 APInt::getLowBitsSet( 11840 Value.getValueType().getScalarType().getSizeInBits(), 11841 ST->getMemoryVT().getScalarType().getSizeInBits())); 11842 AddToWorklist(Value.getNode()); 11843 if (Shorter.getNode()) 11844 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 11845 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 11846 11847 // Otherwise, see if we can simplify the operation with 11848 // SimplifyDemandedBits, which only works if the value has a single use. 11849 if (SimplifyDemandedBits(Value, 11850 APInt::getLowBitsSet( 11851 Value.getValueType().getScalarType().getSizeInBits(), 11852 ST->getMemoryVT().getScalarType().getSizeInBits()))) 11853 return SDValue(N, 0); 11854 } 11855 11856 // If this is a load followed by a store to the same location, then the store 11857 // is dead/noop. 11858 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 11859 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 11860 ST->isUnindexed() && !ST->isVolatile() && 11861 // There can't be any side effects between the load and store, such as 11862 // a call or store. 11863 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 11864 // The store is dead, remove it. 11865 return Chain; 11866 } 11867 } 11868 11869 // If this is a store followed by a store with the same value to the same 11870 // location, then the store is dead/noop. 11871 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 11872 if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 11873 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 11874 ST1->isUnindexed() && !ST1->isVolatile()) { 11875 // The store is dead, remove it. 11876 return Chain; 11877 } 11878 } 11879 11880 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 11881 // truncating store. We can do this even if this is already a truncstore. 11882 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 11883 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 11884 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 11885 ST->getMemoryVT())) { 11886 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 11887 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 11888 } 11889 11890 // Only perform this optimization before the types are legal, because we 11891 // don't want to perform this optimization on every DAGCombine invocation. 11892 if (!LegalTypes) { 11893 bool EverChanged = false; 11894 11895 do { 11896 // There can be multiple store sequences on the same chain. 11897 // Keep trying to merge store sequences until we are unable to do so 11898 // or until we merge the last store on the chain. 11899 bool Changed = MergeConsecutiveStores(ST); 11900 EverChanged |= Changed; 11901 if (!Changed) break; 11902 } while (ST->getOpcode() != ISD::DELETED_NODE); 11903 11904 if (EverChanged) 11905 return SDValue(N, 0); 11906 } 11907 11908 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 11909 // 11910 // Make sure to do this only after attempting to merge stores in order to 11911 // avoid changing the types of some subset of stores due to visit order, 11912 // preventing their merging. 11913 if (isa<ConstantFPSDNode>(Value)) { 11914 if (SDValue NewSt = replaceStoreOfFPConstant(ST)) 11915 return NewSt; 11916 } 11917 11918 return ReduceLoadOpStoreWidth(N); 11919 } 11920 11921 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 11922 SDValue InVec = N->getOperand(0); 11923 SDValue InVal = N->getOperand(1); 11924 SDValue EltNo = N->getOperand(2); 11925 SDLoc dl(N); 11926 11927 // If the inserted element is an UNDEF, just use the input vector. 11928 if (InVal.getOpcode() == ISD::UNDEF) 11929 return InVec; 11930 11931 EVT VT = InVec.getValueType(); 11932 11933 // If we can't generate a legal BUILD_VECTOR, exit 11934 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 11935 return SDValue(); 11936 11937 // Check that we know which element is being inserted 11938 if (!isa<ConstantSDNode>(EltNo)) 11939 return SDValue(); 11940 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 11941 11942 // Canonicalize insert_vector_elt dag nodes. 11943 // Example: 11944 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 11945 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 11946 // 11947 // Do this only if the child insert_vector node has one use; also 11948 // do this only if indices are both constants and Idx1 < Idx0. 11949 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 11950 && isa<ConstantSDNode>(InVec.getOperand(2))) { 11951 unsigned OtherElt = 11952 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 11953 if (Elt < OtherElt) { 11954 // Swap nodes. 11955 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 11956 InVec.getOperand(0), InVal, EltNo); 11957 AddToWorklist(NewOp.getNode()); 11958 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 11959 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 11960 } 11961 } 11962 11963 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 11964 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 11965 // vector elements. 11966 SmallVector<SDValue, 8> Ops; 11967 // Do not combine these two vectors if the output vector will not replace 11968 // the input vector. 11969 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 11970 Ops.append(InVec.getNode()->op_begin(), 11971 InVec.getNode()->op_end()); 11972 } else if (InVec.getOpcode() == ISD::UNDEF) { 11973 unsigned NElts = VT.getVectorNumElements(); 11974 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 11975 } else { 11976 return SDValue(); 11977 } 11978 11979 // Insert the element 11980 if (Elt < Ops.size()) { 11981 // All the operands of BUILD_VECTOR must have the same type; 11982 // we enforce that here. 11983 EVT OpVT = Ops[0].getValueType(); 11984 if (InVal.getValueType() != OpVT) 11985 InVal = OpVT.bitsGT(InVal.getValueType()) ? 11986 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 11987 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 11988 Ops[Elt] = InVal; 11989 } 11990 11991 // Return the new vector 11992 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 11993 } 11994 11995 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 11996 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 11997 EVT ResultVT = EVE->getValueType(0); 11998 EVT VecEltVT = InVecVT.getVectorElementType(); 11999 unsigned Align = OriginalLoad->getAlignment(); 12000 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 12001 VecEltVT.getTypeForEVT(*DAG.getContext())); 12002 12003 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 12004 return SDValue(); 12005 12006 Align = NewAlign; 12007 12008 SDValue NewPtr = OriginalLoad->getBasePtr(); 12009 SDValue Offset; 12010 EVT PtrType = NewPtr.getValueType(); 12011 MachinePointerInfo MPI; 12012 SDLoc DL(EVE); 12013 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 12014 int Elt = ConstEltNo->getZExtValue(); 12015 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 12016 Offset = DAG.getConstant(PtrOff, DL, PtrType); 12017 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 12018 } else { 12019 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); 12020 Offset = DAG.getNode( 12021 ISD::MUL, DL, PtrType, Offset, 12022 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); 12023 MPI = OriginalLoad->getPointerInfo(); 12024 } 12025 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); 12026 12027 // The replacement we need to do here is a little tricky: we need to 12028 // replace an extractelement of a load with a load. 12029 // Use ReplaceAllUsesOfValuesWith to do the replacement. 12030 // Note that this replacement assumes that the extractvalue is the only 12031 // use of the load; that's okay because we don't want to perform this 12032 // transformation in other cases anyway. 12033 SDValue Load; 12034 SDValue Chain; 12035 if (ResultVT.bitsGT(VecEltVT)) { 12036 // If the result type of vextract is wider than the load, then issue an 12037 // extending load instead. 12038 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 12039 VecEltVT) 12040 ? ISD::ZEXTLOAD 12041 : ISD::EXTLOAD; 12042 Load = DAG.getExtLoad( 12043 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 12044 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 12045 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 12046 Chain = Load.getValue(1); 12047 } else { 12048 Load = DAG.getLoad( 12049 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 12050 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 12051 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 12052 Chain = Load.getValue(1); 12053 if (ResultVT.bitsLT(VecEltVT)) 12054 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 12055 else 12056 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 12057 } 12058 WorklistRemover DeadNodes(*this); 12059 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 12060 SDValue To[] = { Load, Chain }; 12061 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 12062 // Since we're explicitly calling ReplaceAllUses, add the new node to the 12063 // worklist explicitly as well. 12064 AddToWorklist(Load.getNode()); 12065 AddUsersToWorklist(Load.getNode()); // Add users too 12066 // Make sure to revisit this node to clean it up; it will usually be dead. 12067 AddToWorklist(EVE); 12068 ++OpsNarrowed; 12069 return SDValue(EVE, 0); 12070 } 12071 12072 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 12073 // (vextract (scalar_to_vector val, 0) -> val 12074 SDValue InVec = N->getOperand(0); 12075 EVT VT = InVec.getValueType(); 12076 EVT NVT = N->getValueType(0); 12077 12078 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 12079 // Check if the result type doesn't match the inserted element type. A 12080 // SCALAR_TO_VECTOR may truncate the inserted element and the 12081 // EXTRACT_VECTOR_ELT may widen the extracted vector. 12082 SDValue InOp = InVec.getOperand(0); 12083 if (InOp.getValueType() != NVT) { 12084 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 12085 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 12086 } 12087 return InOp; 12088 } 12089 12090 SDValue EltNo = N->getOperand(1); 12091 ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); 12092 12093 // extract_vector_elt (build_vector x, y), 1 -> y 12094 if (ConstEltNo && 12095 InVec.getOpcode() == ISD::BUILD_VECTOR && 12096 TLI.isTypeLegal(VT) && 12097 (InVec.hasOneUse() || 12098 TLI.aggressivelyPreferBuildVectorSources(VT))) { 12099 SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); 12100 EVT InEltVT = Elt.getValueType(); 12101 12102 // Sometimes build_vector's scalar input types do not match result type. 12103 if (NVT == InEltVT) 12104 return Elt; 12105 12106 // TODO: It may be useful to truncate if free if the build_vector implicitly 12107 // converts. 12108 } 12109 12110 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 12111 // We only perform this optimization before the op legalization phase because 12112 // we may introduce new vector instructions which are not backed by TD 12113 // patterns. For example on AVX, extracting elements from a wide vector 12114 // without using extract_subvector. However, if we can find an underlying 12115 // scalar value, then we can always use that. 12116 if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { 12117 int NumElem = VT.getVectorNumElements(); 12118 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 12119 // Find the new index to extract from. 12120 int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); 12121 12122 // Extracting an undef index is undef. 12123 if (OrigElt == -1) 12124 return DAG.getUNDEF(NVT); 12125 12126 // Select the right vector half to extract from. 12127 SDValue SVInVec; 12128 if (OrigElt < NumElem) { 12129 SVInVec = InVec->getOperand(0); 12130 } else { 12131 SVInVec = InVec->getOperand(1); 12132 OrigElt -= NumElem; 12133 } 12134 12135 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 12136 SDValue InOp = SVInVec.getOperand(OrigElt); 12137 if (InOp.getValueType() != NVT) { 12138 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 12139 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 12140 } 12141 12142 return InOp; 12143 } 12144 12145 // FIXME: We should handle recursing on other vector shuffles and 12146 // scalar_to_vector here as well. 12147 12148 if (!LegalOperations) { 12149 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 12150 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, 12151 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); 12152 } 12153 } 12154 12155 bool BCNumEltsChanged = false; 12156 EVT ExtVT = VT.getVectorElementType(); 12157 EVT LVT = ExtVT; 12158 12159 // If the result of load has to be truncated, then it's not necessarily 12160 // profitable. 12161 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 12162 return SDValue(); 12163 12164 if (InVec.getOpcode() == ISD::BITCAST) { 12165 // Don't duplicate a load with other uses. 12166 if (!InVec.hasOneUse()) 12167 return SDValue(); 12168 12169 EVT BCVT = InVec.getOperand(0).getValueType(); 12170 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 12171 return SDValue(); 12172 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 12173 BCNumEltsChanged = true; 12174 InVec = InVec.getOperand(0); 12175 ExtVT = BCVT.getVectorElementType(); 12176 } 12177 12178 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 12179 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 12180 ISD::isNormalLoad(InVec.getNode()) && 12181 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 12182 SDValue Index = N->getOperand(1); 12183 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) 12184 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 12185 OrigLoad); 12186 } 12187 12188 // Perform only after legalization to ensure build_vector / vector_shuffle 12189 // optimizations have already been done. 12190 if (!LegalOperations) return SDValue(); 12191 12192 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 12193 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 12194 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 12195 12196 if (ConstEltNo) { 12197 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 12198 12199 LoadSDNode *LN0 = nullptr; 12200 const ShuffleVectorSDNode *SVN = nullptr; 12201 if (ISD::isNormalLoad(InVec.getNode())) { 12202 LN0 = cast<LoadSDNode>(InVec); 12203 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 12204 InVec.getOperand(0).getValueType() == ExtVT && 12205 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 12206 // Don't duplicate a load with other uses. 12207 if (!InVec.hasOneUse()) 12208 return SDValue(); 12209 12210 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 12211 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 12212 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 12213 // => 12214 // (load $addr+1*size) 12215 12216 // Don't duplicate a load with other uses. 12217 if (!InVec.hasOneUse()) 12218 return SDValue(); 12219 12220 // If the bit convert changed the number of elements, it is unsafe 12221 // to examine the mask. 12222 if (BCNumEltsChanged) 12223 return SDValue(); 12224 12225 // Select the input vector, guarding against out of range extract vector. 12226 unsigned NumElems = VT.getVectorNumElements(); 12227 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 12228 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 12229 12230 if (InVec.getOpcode() == ISD::BITCAST) { 12231 // Don't duplicate a load with other uses. 12232 if (!InVec.hasOneUse()) 12233 return SDValue(); 12234 12235 InVec = InVec.getOperand(0); 12236 } 12237 if (ISD::isNormalLoad(InVec.getNode())) { 12238 LN0 = cast<LoadSDNode>(InVec); 12239 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 12240 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); 12241 } 12242 } 12243 12244 // Make sure we found a non-volatile load and the extractelement is 12245 // the only use. 12246 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 12247 return SDValue(); 12248 12249 // If Idx was -1 above, Elt is going to be -1, so just return undef. 12250 if (Elt == -1) 12251 return DAG.getUNDEF(LVT); 12252 12253 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 12254 } 12255 12256 return SDValue(); 12257 } 12258 12259 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 12260 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 12261 // We perform this optimization post type-legalization because 12262 // the type-legalizer often scalarizes integer-promoted vectors. 12263 // Performing this optimization before may create bit-casts which 12264 // will be type-legalized to complex code sequences. 12265 // We perform this optimization only before the operation legalizer because we 12266 // may introduce illegal operations. 12267 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 12268 return SDValue(); 12269 12270 unsigned NumInScalars = N->getNumOperands(); 12271 SDLoc dl(N); 12272 EVT VT = N->getValueType(0); 12273 12274 // Check to see if this is a BUILD_VECTOR of a bunch of values 12275 // which come from any_extend or zero_extend nodes. If so, we can create 12276 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 12277 // optimizations. We do not handle sign-extend because we can't fill the sign 12278 // using shuffles. 12279 EVT SourceType = MVT::Other; 12280 bool AllAnyExt = true; 12281 12282 for (unsigned i = 0; i != NumInScalars; ++i) { 12283 SDValue In = N->getOperand(i); 12284 // Ignore undef inputs. 12285 if (In.getOpcode() == ISD::UNDEF) continue; 12286 12287 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 12288 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 12289 12290 // Abort if the element is not an extension. 12291 if (!ZeroExt && !AnyExt) { 12292 SourceType = MVT::Other; 12293 break; 12294 } 12295 12296 // The input is a ZeroExt or AnyExt. Check the original type. 12297 EVT InTy = In.getOperand(0).getValueType(); 12298 12299 // Check that all of the widened source types are the same. 12300 if (SourceType == MVT::Other) 12301 // First time. 12302 SourceType = InTy; 12303 else if (InTy != SourceType) { 12304 // Multiple income types. Abort. 12305 SourceType = MVT::Other; 12306 break; 12307 } 12308 12309 // Check if all of the extends are ANY_EXTENDs. 12310 AllAnyExt &= AnyExt; 12311 } 12312 12313 // In order to have valid types, all of the inputs must be extended from the 12314 // same source type and all of the inputs must be any or zero extend. 12315 // Scalar sizes must be a power of two. 12316 EVT OutScalarTy = VT.getScalarType(); 12317 bool ValidTypes = SourceType != MVT::Other && 12318 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 12319 isPowerOf2_32(SourceType.getSizeInBits()); 12320 12321 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 12322 // turn into a single shuffle instruction. 12323 if (!ValidTypes) 12324 return SDValue(); 12325 12326 bool isLE = DAG.getDataLayout().isLittleEndian(); 12327 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 12328 assert(ElemRatio > 1 && "Invalid element size ratio"); 12329 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 12330 DAG.getConstant(0, SDLoc(N), SourceType); 12331 12332 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 12333 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 12334 12335 // Populate the new build_vector 12336 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 12337 SDValue Cast = N->getOperand(i); 12338 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 12339 Cast.getOpcode() == ISD::ZERO_EXTEND || 12340 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 12341 SDValue In; 12342 if (Cast.getOpcode() == ISD::UNDEF) 12343 In = DAG.getUNDEF(SourceType); 12344 else 12345 In = Cast->getOperand(0); 12346 unsigned Index = isLE ? (i * ElemRatio) : 12347 (i * ElemRatio + (ElemRatio - 1)); 12348 12349 assert(Index < Ops.size() && "Invalid index"); 12350 Ops[Index] = In; 12351 } 12352 12353 // The type of the new BUILD_VECTOR node. 12354 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 12355 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 12356 "Invalid vector size"); 12357 // Check if the new vector type is legal. 12358 if (!isTypeLegal(VecVT)) return SDValue(); 12359 12360 // Make the new BUILD_VECTOR. 12361 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 12362 12363 // The new BUILD_VECTOR node has the potential to be further optimized. 12364 AddToWorklist(BV.getNode()); 12365 // Bitcast to the desired type. 12366 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 12367 } 12368 12369 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 12370 EVT VT = N->getValueType(0); 12371 12372 unsigned NumInScalars = N->getNumOperands(); 12373 SDLoc dl(N); 12374 12375 EVT SrcVT = MVT::Other; 12376 unsigned Opcode = ISD::DELETED_NODE; 12377 unsigned NumDefs = 0; 12378 12379 for (unsigned i = 0; i != NumInScalars; ++i) { 12380 SDValue In = N->getOperand(i); 12381 unsigned Opc = In.getOpcode(); 12382 12383 if (Opc == ISD::UNDEF) 12384 continue; 12385 12386 // If all scalar values are floats and converted from integers. 12387 if (Opcode == ISD::DELETED_NODE && 12388 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 12389 Opcode = Opc; 12390 } 12391 12392 if (Opc != Opcode) 12393 return SDValue(); 12394 12395 EVT InVT = In.getOperand(0).getValueType(); 12396 12397 // If all scalar values are typed differently, bail out. It's chosen to 12398 // simplify BUILD_VECTOR of integer types. 12399 if (SrcVT == MVT::Other) 12400 SrcVT = InVT; 12401 if (SrcVT != InVT) 12402 return SDValue(); 12403 NumDefs++; 12404 } 12405 12406 // If the vector has just one element defined, it's not worth to fold it into 12407 // a vectorized one. 12408 if (NumDefs < 2) 12409 return SDValue(); 12410 12411 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 12412 && "Should only handle conversion from integer to float."); 12413 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 12414 12415 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 12416 12417 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 12418 return SDValue(); 12419 12420 // Just because the floating-point vector type is legal does not necessarily 12421 // mean that the corresponding integer vector type is. 12422 if (!isTypeLegal(NVT)) 12423 return SDValue(); 12424 12425 SmallVector<SDValue, 8> Opnds; 12426 for (unsigned i = 0; i != NumInScalars; ++i) { 12427 SDValue In = N->getOperand(i); 12428 12429 if (In.getOpcode() == ISD::UNDEF) 12430 Opnds.push_back(DAG.getUNDEF(SrcVT)); 12431 else 12432 Opnds.push_back(In.getOperand(0)); 12433 } 12434 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 12435 AddToWorklist(BV.getNode()); 12436 12437 return DAG.getNode(Opcode, dl, VT, BV); 12438 } 12439 12440 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 12441 unsigned NumInScalars = N->getNumOperands(); 12442 SDLoc dl(N); 12443 EVT VT = N->getValueType(0); 12444 12445 // A vector built entirely of undefs is undef. 12446 if (ISD::allOperandsUndef(N)) 12447 return DAG.getUNDEF(VT); 12448 12449 if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) 12450 return V; 12451 12452 if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) 12453 return V; 12454 12455 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 12456 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 12457 // at most two distinct vectors, turn this into a shuffle node. 12458 12459 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 12460 if (!isTypeLegal(VT)) 12461 return SDValue(); 12462 12463 // May only combine to shuffle after legalize if shuffle is legal. 12464 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 12465 return SDValue(); 12466 12467 SDValue VecIn1, VecIn2; 12468 bool UsesZeroVector = false; 12469 for (unsigned i = 0; i != NumInScalars; ++i) { 12470 SDValue Op = N->getOperand(i); 12471 // Ignore undef inputs. 12472 if (Op.getOpcode() == ISD::UNDEF) continue; 12473 12474 // See if we can combine this build_vector into a blend with a zero vector. 12475 if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { 12476 UsesZeroVector = true; 12477 continue; 12478 } 12479 12480 // If this input is something other than a EXTRACT_VECTOR_ELT with a 12481 // constant index, bail out. 12482 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 12483 !isa<ConstantSDNode>(Op.getOperand(1))) { 12484 VecIn1 = VecIn2 = SDValue(nullptr, 0); 12485 break; 12486 } 12487 12488 // We allow up to two distinct input vectors. 12489 SDValue ExtractedFromVec = Op.getOperand(0); 12490 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 12491 continue; 12492 12493 if (!VecIn1.getNode()) { 12494 VecIn1 = ExtractedFromVec; 12495 } else if (!VecIn2.getNode() && !UsesZeroVector) { 12496 VecIn2 = ExtractedFromVec; 12497 } else { 12498 // Too many inputs. 12499 VecIn1 = VecIn2 = SDValue(nullptr, 0); 12500 break; 12501 } 12502 } 12503 12504 // If everything is good, we can make a shuffle operation. 12505 if (VecIn1.getNode()) { 12506 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 12507 SmallVector<int, 8> Mask; 12508 for (unsigned i = 0; i != NumInScalars; ++i) { 12509 unsigned Opcode = N->getOperand(i).getOpcode(); 12510 if (Opcode == ISD::UNDEF) { 12511 Mask.push_back(-1); 12512 continue; 12513 } 12514 12515 // Operands can also be zero. 12516 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 12517 assert(UsesZeroVector && 12518 (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && 12519 "Unexpected node found!"); 12520 Mask.push_back(NumInScalars+i); 12521 continue; 12522 } 12523 12524 // If extracting from the first vector, just use the index directly. 12525 SDValue Extract = N->getOperand(i); 12526 SDValue ExtVal = Extract.getOperand(1); 12527 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 12528 if (Extract.getOperand(0) == VecIn1) { 12529 Mask.push_back(ExtIndex); 12530 continue; 12531 } 12532 12533 // Otherwise, use InIdx + InputVecSize 12534 Mask.push_back(InNumElements + ExtIndex); 12535 } 12536 12537 // Avoid introducing illegal shuffles with zero. 12538 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 12539 return SDValue(); 12540 12541 // We can't generate a shuffle node with mismatched input and output types. 12542 // Attempt to transform a single input vector to the correct type. 12543 if ((VT != VecIn1.getValueType())) { 12544 // If the input vector type has a different base type to the output 12545 // vector type, bail out. 12546 EVT VTElemType = VT.getVectorElementType(); 12547 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || 12548 (VecIn2.getNode() && 12549 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 12550 return SDValue(); 12551 12552 // If the input vector is too small, widen it. 12553 // We only support widening of vectors which are half the size of the 12554 // output registers. For example XMM->YMM widening on X86 with AVX. 12555 EVT VecInT = VecIn1.getValueType(); 12556 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 12557 // If we only have one small input, widen it by adding undef values. 12558 if (!VecIn2.getNode()) 12559 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 12560 DAG.getUNDEF(VecIn1.getValueType())); 12561 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 12562 // If we have two small inputs of the same type, try to concat them. 12563 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 12564 VecIn2 = SDValue(nullptr, 0); 12565 } else 12566 return SDValue(); 12567 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 12568 // If the input vector is too large, try to split it. 12569 // We don't support having two input vectors that are too large. 12570 // If the zero vector was used, we can not split the vector, 12571 // since we'd need 3 inputs. 12572 if (UsesZeroVector || VecIn2.getNode()) 12573 return SDValue(); 12574 12575 if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) 12576 return SDValue(); 12577 12578 // Try to replace VecIn1 with two extract_subvectors 12579 // No need to update the masks, they should still be correct. 12580 VecIn2 = DAG.getNode( 12581 ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 12582 DAG.getConstant(VT.getVectorNumElements(), dl, 12583 TLI.getVectorIdxTy(DAG.getDataLayout()))); 12584 VecIn1 = DAG.getNode( 12585 ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 12586 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 12587 } else 12588 return SDValue(); 12589 } 12590 12591 if (UsesZeroVector) 12592 VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : 12593 DAG.getConstantFP(0.0, dl, VT); 12594 else 12595 // If VecIn2 is unused then change it to undef. 12596 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 12597 12598 // Check that we were able to transform all incoming values to the same 12599 // type. 12600 if (VecIn2.getValueType() != VecIn1.getValueType() || 12601 VecIn1.getValueType() != VT) 12602 return SDValue(); 12603 12604 // Return the new VECTOR_SHUFFLE node. 12605 SDValue Ops[2]; 12606 Ops[0] = VecIn1; 12607 Ops[1] = VecIn2; 12608 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 12609 } 12610 12611 return SDValue(); 12612 } 12613 12614 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { 12615 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 12616 EVT OpVT = N->getOperand(0).getValueType(); 12617 12618 // If the operands are legal vectors, leave them alone. 12619 if (TLI.isTypeLegal(OpVT)) 12620 return SDValue(); 12621 12622 SDLoc DL(N); 12623 EVT VT = N->getValueType(0); 12624 SmallVector<SDValue, 8> Ops; 12625 12626 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); 12627 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 12628 12629 // Keep track of what we encounter. 12630 bool AnyInteger = false; 12631 bool AnyFP = false; 12632 for (const SDValue &Op : N->ops()) { 12633 if (ISD::BITCAST == Op.getOpcode() && 12634 !Op.getOperand(0).getValueType().isVector()) 12635 Ops.push_back(Op.getOperand(0)); 12636 else if (ISD::UNDEF == Op.getOpcode()) 12637 Ops.push_back(ScalarUndef); 12638 else 12639 return SDValue(); 12640 12641 // Note whether we encounter an integer or floating point scalar. 12642 // If it's neither, bail out, it could be something weird like x86mmx. 12643 EVT LastOpVT = Ops.back().getValueType(); 12644 if (LastOpVT.isFloatingPoint()) 12645 AnyFP = true; 12646 else if (LastOpVT.isInteger()) 12647 AnyInteger = true; 12648 else 12649 return SDValue(); 12650 } 12651 12652 // If any of the operands is a floating point scalar bitcast to a vector, 12653 // use floating point types throughout, and bitcast everything. 12654 // Replace UNDEFs by another scalar UNDEF node, of the final desired type. 12655 if (AnyFP) { 12656 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); 12657 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 12658 if (AnyInteger) { 12659 for (SDValue &Op : Ops) { 12660 if (Op.getValueType() == SVT) 12661 continue; 12662 if (Op.getOpcode() == ISD::UNDEF) 12663 Op = ScalarUndef; 12664 else 12665 Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); 12666 } 12667 } 12668 } 12669 12670 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, 12671 VT.getSizeInBits() / SVT.getSizeInBits()); 12672 return DAG.getNode(ISD::BITCAST, DL, VT, 12673 DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); 12674 } 12675 12676 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR 12677 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at 12678 // most two distinct vectors the same size as the result, attempt to turn this 12679 // into a legal shuffle. 12680 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { 12681 EVT VT = N->getValueType(0); 12682 EVT OpVT = N->getOperand(0).getValueType(); 12683 int NumElts = VT.getVectorNumElements(); 12684 int NumOpElts = OpVT.getVectorNumElements(); 12685 12686 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); 12687 SmallVector<int, 8> Mask; 12688 12689 for (SDValue Op : N->ops()) { 12690 // Peek through any bitcast. 12691 while (Op.getOpcode() == ISD::BITCAST) 12692 Op = Op.getOperand(0); 12693 12694 // UNDEF nodes convert to UNDEF shuffle mask values. 12695 if (Op.getOpcode() == ISD::UNDEF) { 12696 Mask.append((unsigned)NumOpElts, -1); 12697 continue; 12698 } 12699 12700 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 12701 return SDValue(); 12702 12703 // What vector are we extracting the subvector from and at what index? 12704 SDValue ExtVec = Op.getOperand(0); 12705 12706 // We want the EVT of the original extraction to correctly scale the 12707 // extraction index. 12708 EVT ExtVT = ExtVec.getValueType(); 12709 12710 // Peek through any bitcast. 12711 while (ExtVec.getOpcode() == ISD::BITCAST) 12712 ExtVec = ExtVec.getOperand(0); 12713 12714 // UNDEF nodes convert to UNDEF shuffle mask values. 12715 if (ExtVec.getOpcode() == ISD::UNDEF) { 12716 Mask.append((unsigned)NumOpElts, -1); 12717 continue; 12718 } 12719 12720 if (!isa<ConstantSDNode>(Op.getOperand(1))) 12721 return SDValue(); 12722 int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 12723 12724 // Ensure that we are extracting a subvector from a vector the same 12725 // size as the result. 12726 if (ExtVT.getSizeInBits() != VT.getSizeInBits()) 12727 return SDValue(); 12728 12729 // Scale the subvector index to account for any bitcast. 12730 int NumExtElts = ExtVT.getVectorNumElements(); 12731 if (0 == (NumExtElts % NumElts)) 12732 ExtIdx /= (NumExtElts / NumElts); 12733 else if (0 == (NumElts % NumExtElts)) 12734 ExtIdx *= (NumElts / NumExtElts); 12735 else 12736 return SDValue(); 12737 12738 // At most we can reference 2 inputs in the final shuffle. 12739 if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { 12740 SV0 = ExtVec; 12741 for (int i = 0; i != NumOpElts; ++i) 12742 Mask.push_back(i + ExtIdx); 12743 } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { 12744 SV1 = ExtVec; 12745 for (int i = 0; i != NumOpElts; ++i) 12746 Mask.push_back(i + ExtIdx + NumElts); 12747 } else { 12748 return SDValue(); 12749 } 12750 } 12751 12752 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) 12753 return SDValue(); 12754 12755 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), 12756 DAG.getBitcast(VT, SV1), Mask); 12757 } 12758 12759 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 12760 // If we only have one input vector, we don't need to do any concatenation. 12761 if (N->getNumOperands() == 1) 12762 return N->getOperand(0); 12763 12764 // Check if all of the operands are undefs. 12765 EVT VT = N->getValueType(0); 12766 if (ISD::allOperandsUndef(N)) 12767 return DAG.getUNDEF(VT); 12768 12769 // Optimize concat_vectors where all but the first of the vectors are undef. 12770 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { 12771 return Op.getOpcode() == ISD::UNDEF; 12772 })) { 12773 SDValue In = N->getOperand(0); 12774 assert(In.getValueType().isVector() && "Must concat vectors"); 12775 12776 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 12777 if (In->getOpcode() == ISD::BITCAST && 12778 !In->getOperand(0)->getValueType(0).isVector()) { 12779 SDValue Scalar = In->getOperand(0); 12780 12781 // If the bitcast type isn't legal, it might be a trunc of a legal type; 12782 // look through the trunc so we can still do the transform: 12783 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) 12784 if (Scalar->getOpcode() == ISD::TRUNCATE && 12785 !TLI.isTypeLegal(Scalar.getValueType()) && 12786 TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) 12787 Scalar = Scalar->getOperand(0); 12788 12789 EVT SclTy = Scalar->getValueType(0); 12790 12791 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 12792 return SDValue(); 12793 12794 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, 12795 VT.getSizeInBits() / SclTy.getSizeInBits()); 12796 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 12797 return SDValue(); 12798 12799 SDLoc dl = SDLoc(N); 12800 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); 12801 return DAG.getNode(ISD::BITCAST, dl, VT, Res); 12802 } 12803 } 12804 12805 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. 12806 // We have already tested above for an UNDEF only concatenation. 12807 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 12808 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 12809 auto IsBuildVectorOrUndef = [](const SDValue &Op) { 12810 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); 12811 }; 12812 bool AllBuildVectorsOrUndefs = 12813 std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); 12814 if (AllBuildVectorsOrUndefs) { 12815 SmallVector<SDValue, 8> Opnds; 12816 EVT SVT = VT.getScalarType(); 12817 12818 EVT MinVT = SVT; 12819 if (!SVT.isFloatingPoint()) { 12820 // If BUILD_VECTOR are from built from integer, they may have different 12821 // operand types. Get the smallest type and truncate all operands to it. 12822 bool FoundMinVT = false; 12823 for (const SDValue &Op : N->ops()) 12824 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 12825 EVT OpSVT = Op.getOperand(0)->getValueType(0); 12826 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; 12827 FoundMinVT = true; 12828 } 12829 assert(FoundMinVT && "Concat vector type mismatch"); 12830 } 12831 12832 for (const SDValue &Op : N->ops()) { 12833 EVT OpVT = Op.getValueType(); 12834 unsigned NumElts = OpVT.getVectorNumElements(); 12835 12836 if (ISD::UNDEF == Op.getOpcode()) 12837 Opnds.append(NumElts, DAG.getUNDEF(MinVT)); 12838 12839 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 12840 if (SVT.isFloatingPoint()) { 12841 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); 12842 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); 12843 } else { 12844 for (unsigned i = 0; i != NumElts; ++i) 12845 Opnds.push_back( 12846 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); 12847 } 12848 } 12849 } 12850 12851 assert(VT.getVectorNumElements() == Opnds.size() && 12852 "Concat vector type mismatch"); 12853 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 12854 } 12855 12856 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. 12857 if (SDValue V = combineConcatVectorOfScalars(N, DAG)) 12858 return V; 12859 12860 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. 12861 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) 12862 if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) 12863 return V; 12864 12865 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 12866 // nodes often generate nop CONCAT_VECTOR nodes. 12867 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 12868 // place the incoming vectors at the exact same location. 12869 SDValue SingleSource = SDValue(); 12870 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 12871 12872 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 12873 SDValue Op = N->getOperand(i); 12874 12875 if (Op.getOpcode() == ISD::UNDEF) 12876 continue; 12877 12878 // Check if this is the identity extract: 12879 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 12880 return SDValue(); 12881 12882 // Find the single incoming vector for the extract_subvector. 12883 if (SingleSource.getNode()) { 12884 if (Op.getOperand(0) != SingleSource) 12885 return SDValue(); 12886 } else { 12887 SingleSource = Op.getOperand(0); 12888 12889 // Check the source type is the same as the type of the result. 12890 // If not, this concat may extend the vector, so we can not 12891 // optimize it away. 12892 if (SingleSource.getValueType() != N->getValueType(0)) 12893 return SDValue(); 12894 } 12895 12896 unsigned IdentityIndex = i * PartNumElem; 12897 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 12898 // The extract index must be constant. 12899 if (!CS) 12900 return SDValue(); 12901 12902 // Check that we are reading from the identity index. 12903 if (CS->getZExtValue() != IdentityIndex) 12904 return SDValue(); 12905 } 12906 12907 if (SingleSource.getNode()) 12908 return SingleSource; 12909 12910 return SDValue(); 12911 } 12912 12913 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 12914 EVT NVT = N->getValueType(0); 12915 SDValue V = N->getOperand(0); 12916 12917 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 12918 // Combine: 12919 // (extract_subvec (concat V1, V2, ...), i) 12920 // Into: 12921 // Vi if possible 12922 // Only operand 0 is checked as 'concat' assumes all inputs of the same 12923 // type. 12924 if (V->getOperand(0).getValueType() != NVT) 12925 return SDValue(); 12926 unsigned Idx = N->getConstantOperandVal(1); 12927 unsigned NumElems = NVT.getVectorNumElements(); 12928 assert((Idx % NumElems) == 0 && 12929 "IDX in concat is not a multiple of the result vector length."); 12930 return V->getOperand(Idx / NumElems); 12931 } 12932 12933 // Skip bitcasting 12934 if (V->getOpcode() == ISD::BITCAST) 12935 V = V.getOperand(0); 12936 12937 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 12938 SDLoc dl(N); 12939 // Handle only simple case where vector being inserted and vector 12940 // being extracted are of same type, and are half size of larger vectors. 12941 EVT BigVT = V->getOperand(0).getValueType(); 12942 EVT SmallVT = V->getOperand(1).getValueType(); 12943 if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 12944 return SDValue(); 12945 12946 // Only handle cases where both indexes are constants with the same type. 12947 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12948 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 12949 12950 if (InsIdx && ExtIdx && 12951 InsIdx->getValueType(0).getSizeInBits() <= 64 && 12952 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 12953 // Combine: 12954 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 12955 // Into: 12956 // indices are equal or bit offsets are equal => V1 12957 // otherwise => (extract_subvec V1, ExtIdx) 12958 if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == 12959 ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) 12960 return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); 12961 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, 12962 DAG.getNode(ISD::BITCAST, dl, 12963 N->getOperand(0).getValueType(), 12964 V->getOperand(0)), N->getOperand(1)); 12965 } 12966 } 12967 12968 return SDValue(); 12969 } 12970 12971 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, 12972 SDValue V, SelectionDAG &DAG) { 12973 SDLoc DL(V); 12974 EVT VT = V.getValueType(); 12975 12976 switch (V.getOpcode()) { 12977 default: 12978 return V; 12979 12980 case ISD::CONCAT_VECTORS: { 12981 EVT OpVT = V->getOperand(0).getValueType(); 12982 int OpSize = OpVT.getVectorNumElements(); 12983 SmallBitVector OpUsedElements(OpSize, false); 12984 bool FoundSimplification = false; 12985 SmallVector<SDValue, 4> NewOps; 12986 NewOps.reserve(V->getNumOperands()); 12987 for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { 12988 SDValue Op = V->getOperand(i); 12989 bool OpUsed = false; 12990 for (int j = 0; j < OpSize; ++j) 12991 if (UsedElements[i * OpSize + j]) { 12992 OpUsedElements[j] = true; 12993 OpUsed = true; 12994 } 12995 NewOps.push_back( 12996 OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) 12997 : DAG.getUNDEF(OpVT)); 12998 FoundSimplification |= Op == NewOps.back(); 12999 OpUsedElements.reset(); 13000 } 13001 if (FoundSimplification) 13002 V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); 13003 return V; 13004 } 13005 13006 case ISD::INSERT_SUBVECTOR: { 13007 SDValue BaseV = V->getOperand(0); 13008 SDValue SubV = V->getOperand(1); 13009 auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); 13010 if (!IdxN) 13011 return V; 13012 13013 int SubSize = SubV.getValueType().getVectorNumElements(); 13014 int Idx = IdxN->getZExtValue(); 13015 bool SubVectorUsed = false; 13016 SmallBitVector SubUsedElements(SubSize, false); 13017 for (int i = 0; i < SubSize; ++i) 13018 if (UsedElements[i + Idx]) { 13019 SubVectorUsed = true; 13020 SubUsedElements[i] = true; 13021 UsedElements[i + Idx] = false; 13022 } 13023 13024 // Now recurse on both the base and sub vectors. 13025 SDValue SimplifiedSubV = 13026 SubVectorUsed 13027 ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) 13028 : DAG.getUNDEF(SubV.getValueType()); 13029 SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); 13030 if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) 13031 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 13032 SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); 13033 return V; 13034 } 13035 } 13036 } 13037 13038 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, 13039 SDValue N1, SelectionDAG &DAG) { 13040 EVT VT = SVN->getValueType(0); 13041 int NumElts = VT.getVectorNumElements(); 13042 SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); 13043 for (int M : SVN->getMask()) 13044 if (M >= 0 && M < NumElts) 13045 N0UsedElements[M] = true; 13046 else if (M >= NumElts) 13047 N1UsedElements[M - NumElts] = true; 13048 13049 SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); 13050 SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); 13051 if (S0 == N0 && S1 == N1) 13052 return SDValue(); 13053 13054 return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); 13055 } 13056 13057 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, 13058 // or turn a shuffle of a single concat into simpler shuffle then concat. 13059 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 13060 EVT VT = N->getValueType(0); 13061 unsigned NumElts = VT.getVectorNumElements(); 13062 13063 SDValue N0 = N->getOperand(0); 13064 SDValue N1 = N->getOperand(1); 13065 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 13066 13067 SmallVector<SDValue, 4> Ops; 13068 EVT ConcatVT = N0.getOperand(0).getValueType(); 13069 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 13070 unsigned NumConcats = NumElts / NumElemsPerConcat; 13071 13072 // Special case: shuffle(concat(A,B)) can be more efficiently represented 13073 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high 13074 // half vector elements. 13075 if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && 13076 std::all_of(SVN->getMask().begin() + NumElemsPerConcat, 13077 SVN->getMask().end(), [](int i) { return i == -1; })) { 13078 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), 13079 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); 13080 N1 = DAG.getUNDEF(ConcatVT); 13081 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); 13082 } 13083 13084 // Look at every vector that's inserted. We're looking for exact 13085 // subvector-sized copies from a concatenated vector 13086 for (unsigned I = 0; I != NumConcats; ++I) { 13087 // Make sure we're dealing with a copy. 13088 unsigned Begin = I * NumElemsPerConcat; 13089 bool AllUndef = true, NoUndef = true; 13090 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 13091 if (SVN->getMaskElt(J) >= 0) 13092 AllUndef = false; 13093 else 13094 NoUndef = false; 13095 } 13096 13097 if (NoUndef) { 13098 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 13099 return SDValue(); 13100 13101 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 13102 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 13103 return SDValue(); 13104 13105 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 13106 if (FirstElt < N0.getNumOperands()) 13107 Ops.push_back(N0.getOperand(FirstElt)); 13108 else 13109 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 13110 13111 } else if (AllUndef) { 13112 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 13113 } else { // Mixed with general masks and undefs, can't do optimization. 13114 return SDValue(); 13115 } 13116 } 13117 13118 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 13119 } 13120 13121 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 13122 EVT VT = N->getValueType(0); 13123 unsigned NumElts = VT.getVectorNumElements(); 13124 13125 SDValue N0 = N->getOperand(0); 13126 SDValue N1 = N->getOperand(1); 13127 13128 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 13129 13130 // Canonicalize shuffle undef, undef -> undef 13131 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 13132 return DAG.getUNDEF(VT); 13133 13134 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 13135 13136 // Canonicalize shuffle v, v -> v, undef 13137 if (N0 == N1) { 13138 SmallVector<int, 8> NewMask; 13139 for (unsigned i = 0; i != NumElts; ++i) { 13140 int Idx = SVN->getMaskElt(i); 13141 if (Idx >= (int)NumElts) Idx -= NumElts; 13142 NewMask.push_back(Idx); 13143 } 13144 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), 13145 &NewMask[0]); 13146 } 13147 13148 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 13149 if (N0.getOpcode() == ISD::UNDEF) { 13150 SmallVector<int, 8> NewMask; 13151 for (unsigned i = 0; i != NumElts; ++i) { 13152 int Idx = SVN->getMaskElt(i); 13153 if (Idx >= 0) { 13154 if (Idx >= (int)NumElts) 13155 Idx -= NumElts; 13156 else 13157 Idx = -1; // remove reference to lhs 13158 } 13159 NewMask.push_back(Idx); 13160 } 13161 return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), 13162 &NewMask[0]); 13163 } 13164 13165 // Remove references to rhs if it is undef 13166 if (N1.getOpcode() == ISD::UNDEF) { 13167 bool Changed = false; 13168 SmallVector<int, 8> NewMask; 13169 for (unsigned i = 0; i != NumElts; ++i) { 13170 int Idx = SVN->getMaskElt(i); 13171 if (Idx >= (int)NumElts) { 13172 Idx = -1; 13173 Changed = true; 13174 } 13175 NewMask.push_back(Idx); 13176 } 13177 if (Changed) 13178 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); 13179 } 13180 13181 // If it is a splat, check if the argument vector is another splat or a 13182 // build_vector. 13183 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 13184 SDNode *V = N0.getNode(); 13185 13186 // If this is a bit convert that changes the element type of the vector but 13187 // not the number of vector elements, look through it. Be careful not to 13188 // look though conversions that change things like v4f32 to v2f64. 13189 if (V->getOpcode() == ISD::BITCAST) { 13190 SDValue ConvInput = V->getOperand(0); 13191 if (ConvInput.getValueType().isVector() && 13192 ConvInput.getValueType().getVectorNumElements() == NumElts) 13193 V = ConvInput.getNode(); 13194 } 13195 13196 if (V->getOpcode() == ISD::BUILD_VECTOR) { 13197 assert(V->getNumOperands() == NumElts && 13198 "BUILD_VECTOR has wrong number of operands"); 13199 SDValue Base; 13200 bool AllSame = true; 13201 for (unsigned i = 0; i != NumElts; ++i) { 13202 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 13203 Base = V->getOperand(i); 13204 break; 13205 } 13206 } 13207 // Splat of <u, u, u, u>, return <u, u, u, u> 13208 if (!Base.getNode()) 13209 return N0; 13210 for (unsigned i = 0; i != NumElts; ++i) { 13211 if (V->getOperand(i) != Base) { 13212 AllSame = false; 13213 break; 13214 } 13215 } 13216 // Splat of <x, x, x, x>, return <x, x, x, x> 13217 if (AllSame) 13218 return N0; 13219 13220 // Canonicalize any other splat as a build_vector. 13221 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); 13222 SmallVector<SDValue, 8> Ops(NumElts, Splatted); 13223 SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 13224 V->getValueType(0), Ops); 13225 13226 // We may have jumped through bitcasts, so the type of the 13227 // BUILD_VECTOR may not match the type of the shuffle. 13228 if (V->getValueType(0) != VT) 13229 NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); 13230 return NewBV; 13231 } 13232 } 13233 13234 // There are various patterns used to build up a vector from smaller vectors, 13235 // subvectors, or elements. Scan chains of these and replace unused insertions 13236 // or components with undef. 13237 if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) 13238 return S; 13239 13240 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 13241 Level < AfterLegalizeVectorOps && 13242 (N1.getOpcode() == ISD::UNDEF || 13243 (N1.getOpcode() == ISD::CONCAT_VECTORS && 13244 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 13245 SDValue V = partitionShuffleOfConcats(N, DAG); 13246 13247 if (V.getNode()) 13248 return V; 13249 } 13250 13251 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 13252 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 13253 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { 13254 SmallVector<SDValue, 8> Ops; 13255 for (int M : SVN->getMask()) { 13256 SDValue Op = DAG.getUNDEF(VT.getScalarType()); 13257 if (M >= 0) { 13258 int Idx = M % NumElts; 13259 SDValue &S = (M < (int)NumElts ? N0 : N1); 13260 if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { 13261 Op = S.getOperand(Idx); 13262 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { 13263 if (Idx == 0) 13264 Op = S.getOperand(0); 13265 } else { 13266 // Operand can't be combined - bail out. 13267 break; 13268 } 13269 } 13270 Ops.push_back(Op); 13271 } 13272 if (Ops.size() == VT.getVectorNumElements()) { 13273 // BUILD_VECTOR requires all inputs to be of the same type, find the 13274 // maximum type and extend them all. 13275 EVT SVT = VT.getScalarType(); 13276 if (SVT.isInteger()) 13277 for (SDValue &Op : Ops) 13278 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 13279 if (SVT != VT.getScalarType()) 13280 for (SDValue &Op : Ops) 13281 Op = TLI.isZExtFree(Op.getValueType(), SVT) 13282 ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) 13283 : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); 13284 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); 13285 } 13286 } 13287 13288 // If this shuffle only has a single input that is a bitcasted shuffle, 13289 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output 13290 // back to their original types. 13291 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 13292 N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && 13293 TLI.isTypeLegal(VT)) { 13294 13295 // Peek through the bitcast only if there is one user. 13296 SDValue BC0 = N0; 13297 while (BC0.getOpcode() == ISD::BITCAST) { 13298 if (!BC0.hasOneUse()) 13299 break; 13300 BC0 = BC0.getOperand(0); 13301 } 13302 13303 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { 13304 if (Scale == 1) 13305 return SmallVector<int, 8>(Mask.begin(), Mask.end()); 13306 13307 SmallVector<int, 8> NewMask; 13308 for (int M : Mask) 13309 for (int s = 0; s != Scale; ++s) 13310 NewMask.push_back(M < 0 ? -1 : Scale * M + s); 13311 return NewMask; 13312 }; 13313 13314 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { 13315 EVT SVT = VT.getScalarType(); 13316 EVT InnerVT = BC0->getValueType(0); 13317 EVT InnerSVT = InnerVT.getScalarType(); 13318 13319 // Determine which shuffle works with the smaller scalar type. 13320 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; 13321 EVT ScaleSVT = ScaleVT.getScalarType(); 13322 13323 if (TLI.isTypeLegal(ScaleVT) && 13324 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 13325 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { 13326 13327 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 13328 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 13329 13330 // Scale the shuffle masks to the smaller scalar type. 13331 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); 13332 SmallVector<int, 8> InnerMask = 13333 ScaleShuffleMask(InnerSVN->getMask(), InnerScale); 13334 SmallVector<int, 8> OuterMask = 13335 ScaleShuffleMask(SVN->getMask(), OuterScale); 13336 13337 // Merge the shuffle masks. 13338 SmallVector<int, 8> NewMask; 13339 for (int M : OuterMask) 13340 NewMask.push_back(M < 0 ? -1 : InnerMask[M]); 13341 13342 // Test for shuffle mask legality over both commutations. 13343 SDValue SV0 = BC0->getOperand(0); 13344 SDValue SV1 = BC0->getOperand(1); 13345 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 13346 if (!LegalMask) { 13347 std::swap(SV0, SV1); 13348 ShuffleVectorSDNode::commuteMask(NewMask); 13349 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 13350 } 13351 13352 if (LegalMask) { 13353 SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); 13354 SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); 13355 return DAG.getNode( 13356 ISD::BITCAST, SDLoc(N), VT, 13357 DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); 13358 } 13359 } 13360 } 13361 } 13362 13363 // Canonicalize shuffles according to rules: 13364 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 13365 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 13366 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 13367 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 13368 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 13369 TLI.isTypeLegal(VT)) { 13370 // The incoming shuffle must be of the same type as the result of the 13371 // current shuffle. 13372 assert(N1->getOperand(0).getValueType() == VT && 13373 "Shuffle types don't match"); 13374 13375 SDValue SV0 = N1->getOperand(0); 13376 SDValue SV1 = N1->getOperand(1); 13377 bool HasSameOp0 = N0 == SV0; 13378 bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; 13379 if (HasSameOp0 || IsSV1Undef || N0 == SV1) 13380 // Commute the operands of this shuffle so that next rule 13381 // will trigger. 13382 return DAG.getCommutedVectorShuffle(*SVN); 13383 } 13384 13385 // Try to fold according to rules: 13386 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 13387 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 13388 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 13389 // Don't try to fold shuffles with illegal type. 13390 // Only fold if this shuffle is the only user of the other shuffle. 13391 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && 13392 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { 13393 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 13394 13395 // The incoming shuffle must be of the same type as the result of the 13396 // current shuffle. 13397 assert(OtherSV->getOperand(0).getValueType() == VT && 13398 "Shuffle types don't match"); 13399 13400 SDValue SV0, SV1; 13401 SmallVector<int, 4> Mask; 13402 // Compute the combined shuffle mask for a shuffle with SV0 as the first 13403 // operand, and SV1 as the second operand. 13404 for (unsigned i = 0; i != NumElts; ++i) { 13405 int Idx = SVN->getMaskElt(i); 13406 if (Idx < 0) { 13407 // Propagate Undef. 13408 Mask.push_back(Idx); 13409 continue; 13410 } 13411 13412 SDValue CurrentVec; 13413 if (Idx < (int)NumElts) { 13414 // This shuffle index refers to the inner shuffle N0. Lookup the inner 13415 // shuffle mask to identify which vector is actually referenced. 13416 Idx = OtherSV->getMaskElt(Idx); 13417 if (Idx < 0) { 13418 // Propagate Undef. 13419 Mask.push_back(Idx); 13420 continue; 13421 } 13422 13423 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 13424 : OtherSV->getOperand(1); 13425 } else { 13426 // This shuffle index references an element within N1. 13427 CurrentVec = N1; 13428 } 13429 13430 // Simple case where 'CurrentVec' is UNDEF. 13431 if (CurrentVec.getOpcode() == ISD::UNDEF) { 13432 Mask.push_back(-1); 13433 continue; 13434 } 13435 13436 // Canonicalize the shuffle index. We don't know yet if CurrentVec 13437 // will be the first or second operand of the combined shuffle. 13438 Idx = Idx % NumElts; 13439 if (!SV0.getNode() || SV0 == CurrentVec) { 13440 // Ok. CurrentVec is the left hand side. 13441 // Update the mask accordingly. 13442 SV0 = CurrentVec; 13443 Mask.push_back(Idx); 13444 continue; 13445 } 13446 13447 // Bail out if we cannot convert the shuffle pair into a single shuffle. 13448 if (SV1.getNode() && SV1 != CurrentVec) 13449 return SDValue(); 13450 13451 // Ok. CurrentVec is the right hand side. 13452 // Update the mask accordingly. 13453 SV1 = CurrentVec; 13454 Mask.push_back(Idx + NumElts); 13455 } 13456 13457 // Check if all indices in Mask are Undef. In case, propagate Undef. 13458 bool isUndefMask = true; 13459 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 13460 isUndefMask &= Mask[i] < 0; 13461 13462 if (isUndefMask) 13463 return DAG.getUNDEF(VT); 13464 13465 if (!SV0.getNode()) 13466 SV0 = DAG.getUNDEF(VT); 13467 if (!SV1.getNode()) 13468 SV1 = DAG.getUNDEF(VT); 13469 13470 // Avoid introducing shuffles with illegal mask. 13471 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 13472 ShuffleVectorSDNode::commuteMask(Mask); 13473 13474 if (!TLI.isShuffleMaskLegal(Mask, VT)) 13475 return SDValue(); 13476 13477 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 13478 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 13479 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 13480 std::swap(SV0, SV1); 13481 } 13482 13483 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 13484 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 13485 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 13486 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); 13487 } 13488 13489 return SDValue(); 13490 } 13491 13492 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { 13493 SDValue InVal = N->getOperand(0); 13494 EVT VT = N->getValueType(0); 13495 13496 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern 13497 // with a VECTOR_SHUFFLE. 13498 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 13499 SDValue InVec = InVal->getOperand(0); 13500 SDValue EltNo = InVal->getOperand(1); 13501 13502 // FIXME: We could support implicit truncation if the shuffle can be 13503 // scaled to a smaller vector scalar type. 13504 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); 13505 if (C0 && VT == InVec.getValueType() && 13506 VT.getScalarType() == InVal.getValueType()) { 13507 SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); 13508 int Elt = C0->getZExtValue(); 13509 NewMask[0] = Elt; 13510 13511 if (TLI.isShuffleMaskLegal(NewMask, VT)) 13512 return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), 13513 NewMask); 13514 } 13515 } 13516 13517 return SDValue(); 13518 } 13519 13520 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 13521 SDValue N0 = N->getOperand(0); 13522 SDValue N2 = N->getOperand(2); 13523 13524 // If the input vector is a concatenation, and the insert replaces 13525 // one of the halves, we can optimize into a single concat_vectors. 13526 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 13527 N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { 13528 APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); 13529 EVT VT = N->getValueType(0); 13530 13531 // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> 13532 // (concat_vectors Z, Y) 13533 if (InsIdx == 0) 13534 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 13535 N->getOperand(1), N0.getOperand(1)); 13536 13537 // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> 13538 // (concat_vectors X, Z) 13539 if (InsIdx == VT.getVectorNumElements()/2) 13540 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 13541 N0.getOperand(0), N->getOperand(1)); 13542 } 13543 13544 return SDValue(); 13545 } 13546 13547 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { 13548 SDValue N0 = N->getOperand(0); 13549 13550 // fold (fp_to_fp16 (fp16_to_fp op)) -> op 13551 if (N0->getOpcode() == ISD::FP16_TO_FP) 13552 return N0->getOperand(0); 13553 13554 return SDValue(); 13555 } 13556 13557 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { 13558 SDValue N0 = N->getOperand(0); 13559 13560 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) 13561 if (N0->getOpcode() == ISD::AND) { 13562 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); 13563 if (AndConst && AndConst->getAPIntValue() == 0xffff) { 13564 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), 13565 N0.getOperand(0)); 13566 } 13567 } 13568 13569 return SDValue(); 13570 } 13571 13572 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 13573 /// with the destination vector and a zero vector. 13574 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 13575 /// vector_shuffle V, Zero, <0, 4, 2, 4> 13576 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 13577 EVT VT = N->getValueType(0); 13578 SDValue LHS = N->getOperand(0); 13579 SDValue RHS = N->getOperand(1); 13580 SDLoc dl(N); 13581 13582 // Make sure we're not running after operation legalization where it 13583 // may have custom lowered the vector shuffles. 13584 if (LegalOperations) 13585 return SDValue(); 13586 13587 if (N->getOpcode() != ISD::AND) 13588 return SDValue(); 13589 13590 if (RHS.getOpcode() == ISD::BITCAST) 13591 RHS = RHS.getOperand(0); 13592 13593 if (RHS.getOpcode() != ISD::BUILD_VECTOR) 13594 return SDValue(); 13595 13596 EVT RVT = RHS.getValueType(); 13597 unsigned NumElts = RHS.getNumOperands(); 13598 13599 // Attempt to create a valid clear mask, splitting the mask into 13600 // sub elements and checking to see if each is 13601 // all zeros or all ones - suitable for shuffle masking. 13602 auto BuildClearMask = [&](int Split) { 13603 int NumSubElts = NumElts * Split; 13604 int NumSubBits = RVT.getScalarSizeInBits() / Split; 13605 13606 SmallVector<int, 8> Indices; 13607 for (int i = 0; i != NumSubElts; ++i) { 13608 int EltIdx = i / Split; 13609 int SubIdx = i % Split; 13610 SDValue Elt = RHS.getOperand(EltIdx); 13611 if (Elt.getOpcode() == ISD::UNDEF) { 13612 Indices.push_back(-1); 13613 continue; 13614 } 13615 13616 APInt Bits; 13617 if (isa<ConstantSDNode>(Elt)) 13618 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); 13619 else if (isa<ConstantFPSDNode>(Elt)) 13620 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); 13621 else 13622 return SDValue(); 13623 13624 // Extract the sub element from the constant bit mask. 13625 if (DAG.getDataLayout().isBigEndian()) { 13626 Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); 13627 } else { 13628 Bits = Bits.lshr(SubIdx * NumSubBits); 13629 } 13630 13631 if (Split > 1) 13632 Bits = Bits.trunc(NumSubBits); 13633 13634 if (Bits.isAllOnesValue()) 13635 Indices.push_back(i); 13636 else if (Bits == 0) 13637 Indices.push_back(i + NumSubElts); 13638 else 13639 return SDValue(); 13640 } 13641 13642 // Let's see if the target supports this vector_shuffle. 13643 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); 13644 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); 13645 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) 13646 return SDValue(); 13647 13648 SDValue Zero = DAG.getConstant(0, dl, ClearVT); 13649 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, 13650 DAG.getBitcast(ClearVT, LHS), 13651 Zero, &Indices[0])); 13652 }; 13653 13654 // Determine maximum split level (byte level masking). 13655 int MaxSplit = 1; 13656 if (RVT.getScalarSizeInBits() % 8 == 0) 13657 MaxSplit = RVT.getScalarSizeInBits() / 8; 13658 13659 for (int Split = 1; Split <= MaxSplit; ++Split) 13660 if (RVT.getScalarSizeInBits() % Split == 0) 13661 if (SDValue S = BuildClearMask(Split)) 13662 return S; 13663 13664 return SDValue(); 13665 } 13666 13667 /// Visit a binary vector operation, like ADD. 13668 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 13669 assert(N->getValueType(0).isVector() && 13670 "SimplifyVBinOp only works on vectors!"); 13671 13672 SDValue LHS = N->getOperand(0); 13673 SDValue RHS = N->getOperand(1); 13674 SDValue Ops[] = {LHS, RHS}; 13675 13676 // See if we can constant fold the vector operation. 13677 if (SDValue Fold = DAG.FoldConstantVectorArithmetic( 13678 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) 13679 return Fold; 13680 13681 // Try to convert a constant mask AND into a shuffle clear mask. 13682 if (SDValue Shuffle = XformToShuffleWithZero(N)) 13683 return Shuffle; 13684 13685 // Type legalization might introduce new shuffles in the DAG. 13686 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 13687 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 13688 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 13689 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 13690 LHS.getOperand(1).getOpcode() == ISD::UNDEF && 13691 RHS.getOperand(1).getOpcode() == ISD::UNDEF) { 13692 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 13693 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 13694 13695 if (SVN0->getMask().equals(SVN1->getMask())) { 13696 EVT VT = N->getValueType(0); 13697 SDValue UndefVector = LHS.getOperand(1); 13698 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 13699 LHS.getOperand(0), RHS.getOperand(0), 13700 N->getFlags()); 13701 AddUsersToWorklist(N); 13702 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 13703 &SVN0->getMask()[0]); 13704 } 13705 } 13706 13707 return SDValue(); 13708 } 13709 13710 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, 13711 SDValue N1, SDValue N2){ 13712 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 13713 13714 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 13715 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 13716 13717 // If we got a simplified select_cc node back from SimplifySelectCC, then 13718 // break it down into a new SETCC node, and a new SELECT node, and then return 13719 // the SELECT node, since we were called with a SELECT node. 13720 if (SCC.getNode()) { 13721 // Check to see if we got a select_cc back (to turn into setcc/select). 13722 // Otherwise, just return whatever node we got back, like fabs. 13723 if (SCC.getOpcode() == ISD::SELECT_CC) { 13724 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 13725 N0.getValueType(), 13726 SCC.getOperand(0), SCC.getOperand(1), 13727 SCC.getOperand(4)); 13728 AddToWorklist(SETCC.getNode()); 13729 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 13730 SCC.getOperand(2), SCC.getOperand(3)); 13731 } 13732 13733 return SCC; 13734 } 13735 return SDValue(); 13736 } 13737 13738 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 13739 /// being selected between, see if we can simplify the select. Callers of this 13740 /// should assume that TheSelect is deleted if this returns true. As such, they 13741 /// should return the appropriate thing (e.g. the node) back to the top-level of 13742 /// the DAG combiner loop to avoid it being looked at. 13743 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 13744 SDValue RHS) { 13745 13746 // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) 13747 // The select + setcc is redundant, because fsqrt returns NaN for X < -0. 13748 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { 13749 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { 13750 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) 13751 SDValue Sqrt = RHS; 13752 ISD::CondCode CC; 13753 SDValue CmpLHS; 13754 const ConstantFPSDNode *NegZero = nullptr; 13755 13756 if (TheSelect->getOpcode() == ISD::SELECT_CC) { 13757 CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); 13758 CmpLHS = TheSelect->getOperand(0); 13759 NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); 13760 } else { 13761 // SELECT or VSELECT 13762 SDValue Cmp = TheSelect->getOperand(0); 13763 if (Cmp.getOpcode() == ISD::SETCC) { 13764 CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); 13765 CmpLHS = Cmp.getOperand(0); 13766 NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); 13767 } 13768 } 13769 if (NegZero && NegZero->isNegative() && NegZero->isZero() && 13770 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || 13771 CC == ISD::SETULT || CC == ISD::SETLT)) { 13772 // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) 13773 CombineTo(TheSelect, Sqrt); 13774 return true; 13775 } 13776 } 13777 } 13778 // Cannot simplify select with vector condition 13779 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 13780 13781 // If this is a select from two identical things, try to pull the operation 13782 // through the select. 13783 if (LHS.getOpcode() != RHS.getOpcode() || 13784 !LHS.hasOneUse() || !RHS.hasOneUse()) 13785 return false; 13786 13787 // If this is a load and the token chain is identical, replace the select 13788 // of two loads with a load through a select of the address to load from. 13789 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 13790 // constants have been dropped into the constant pool. 13791 if (LHS.getOpcode() == ISD::LOAD) { 13792 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 13793 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 13794 13795 // Token chains must be identical. 13796 if (LHS.getOperand(0) != RHS.getOperand(0) || 13797 // Do not let this transformation reduce the number of volatile loads. 13798 LLD->isVolatile() || RLD->isVolatile() || 13799 // FIXME: If either is a pre/post inc/dec load, 13800 // we'd need to split out the address adjustment. 13801 LLD->isIndexed() || RLD->isIndexed() || 13802 // If this is an EXTLOAD, the VT's must match. 13803 LLD->getMemoryVT() != RLD->getMemoryVT() || 13804 // If this is an EXTLOAD, the kind of extension must match. 13805 (LLD->getExtensionType() != RLD->getExtensionType() && 13806 // The only exception is if one of the extensions is anyext. 13807 LLD->getExtensionType() != ISD::EXTLOAD && 13808 RLD->getExtensionType() != ISD::EXTLOAD) || 13809 // FIXME: this discards src value information. This is 13810 // over-conservative. It would be beneficial to be able to remember 13811 // both potential memory locations. Since we are discarding 13812 // src value info, don't do the transformation if the memory 13813 // locations are not in the default address space. 13814 LLD->getPointerInfo().getAddrSpace() != 0 || 13815 RLD->getPointerInfo().getAddrSpace() != 0 || 13816 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 13817 LLD->getBasePtr().getValueType())) 13818 return false; 13819 13820 // Check that the select condition doesn't reach either load. If so, 13821 // folding this will induce a cycle into the DAG. If not, this is safe to 13822 // xform, so create a select of the addresses. 13823 SDValue Addr; 13824 if (TheSelect->getOpcode() == ISD::SELECT) { 13825 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 13826 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 13827 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 13828 return false; 13829 // The loads must not depend on one another. 13830 if (LLD->isPredecessorOf(RLD) || 13831 RLD->isPredecessorOf(LLD)) 13832 return false; 13833 Addr = DAG.getSelect(SDLoc(TheSelect), 13834 LLD->getBasePtr().getValueType(), 13835 TheSelect->getOperand(0), LLD->getBasePtr(), 13836 RLD->getBasePtr()); 13837 } else { // Otherwise SELECT_CC 13838 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 13839 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 13840 13841 if ((LLD->hasAnyUseOfValue(1) && 13842 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 13843 (RLD->hasAnyUseOfValue(1) && 13844 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 13845 return false; 13846 13847 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 13848 LLD->getBasePtr().getValueType(), 13849 TheSelect->getOperand(0), 13850 TheSelect->getOperand(1), 13851 LLD->getBasePtr(), RLD->getBasePtr(), 13852 TheSelect->getOperand(4)); 13853 } 13854 13855 SDValue Load; 13856 // It is safe to replace the two loads if they have different alignments, 13857 // but the new load must be the minimum (most restrictive) alignment of the 13858 // inputs. 13859 bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); 13860 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 13861 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 13862 Load = DAG.getLoad(TheSelect->getValueType(0), 13863 SDLoc(TheSelect), 13864 // FIXME: Discards pointer and AA info. 13865 LLD->getChain(), Addr, MachinePointerInfo(), 13866 LLD->isVolatile(), LLD->isNonTemporal(), 13867 isInvariant, Alignment); 13868 } else { 13869 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 13870 RLD->getExtensionType() : LLD->getExtensionType(), 13871 SDLoc(TheSelect), 13872 TheSelect->getValueType(0), 13873 // FIXME: Discards pointer and AA info. 13874 LLD->getChain(), Addr, MachinePointerInfo(), 13875 LLD->getMemoryVT(), LLD->isVolatile(), 13876 LLD->isNonTemporal(), isInvariant, Alignment); 13877 } 13878 13879 // Users of the select now use the result of the load. 13880 CombineTo(TheSelect, Load); 13881 13882 // Users of the old loads now use the new load's chain. We know the 13883 // old-load value is dead now. 13884 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 13885 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 13886 return true; 13887 } 13888 13889 return false; 13890 } 13891 13892 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 13893 /// where 'cond' is the comparison specified by CC. 13894 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, 13895 SDValue N2, SDValue N3, 13896 ISD::CondCode CC, bool NotExtCompare) { 13897 // (x ? y : y) -> y. 13898 if (N2 == N3) return N2; 13899 13900 EVT VT = N2.getValueType(); 13901 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 13902 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 13903 13904 // Determine if the condition we're dealing with is constant 13905 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 13906 N0, N1, CC, DL, false); 13907 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 13908 13909 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 13910 // fold select_cc true, x, y -> x 13911 // fold select_cc false, x, y -> y 13912 return !SCCC->isNullValue() ? N2 : N3; 13913 } 13914 13915 // Check to see if we can simplify the select into an fabs node 13916 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 13917 // Allow either -0.0 or 0.0 13918 if (CFP->isZero()) { 13919 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 13920 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 13921 N0 == N2 && N3.getOpcode() == ISD::FNEG && 13922 N2 == N3.getOperand(0)) 13923 return DAG.getNode(ISD::FABS, DL, VT, N0); 13924 13925 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 13926 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 13927 N0 == N3 && N2.getOpcode() == ISD::FNEG && 13928 N2.getOperand(0) == N3) 13929 return DAG.getNode(ISD::FABS, DL, VT, N3); 13930 } 13931 } 13932 13933 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 13934 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 13935 // in it. This is a win when the constant is not otherwise available because 13936 // it replaces two constant pool loads with one. We only do this if the FP 13937 // type is known to be legal, because if it isn't, then we are before legalize 13938 // types an we want the other legalization to happen first (e.g. to avoid 13939 // messing with soft float) and if the ConstantFP is not legal, because if 13940 // it is legal, we may not need to store the FP constant in a constant pool. 13941 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 13942 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 13943 if (TLI.isTypeLegal(N2.getValueType()) && 13944 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 13945 TargetLowering::Legal && 13946 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 13947 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 13948 // If both constants have multiple uses, then we won't need to do an 13949 // extra load, they are likely around in registers for other users. 13950 (TV->hasOneUse() || FV->hasOneUse())) { 13951 Constant *Elts[] = { 13952 const_cast<ConstantFP*>(FV->getConstantFPValue()), 13953 const_cast<ConstantFP*>(TV->getConstantFPValue()) 13954 }; 13955 Type *FPTy = Elts[0]->getType(); 13956 const DataLayout &TD = DAG.getDataLayout(); 13957 13958 // Create a ConstantArray of the two constants. 13959 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 13960 SDValue CPIdx = 13961 DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), 13962 TD.getPrefTypeAlignment(FPTy)); 13963 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 13964 13965 // Get the offsets to the 0 and 1 element of the array so that we can 13966 // select between them. 13967 SDValue Zero = DAG.getIntPtrConstant(0, DL); 13968 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 13969 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); 13970 13971 SDValue Cond = DAG.getSetCC(DL, 13972 getSetCCResultType(N0.getValueType()), 13973 N0, N1, CC); 13974 AddToWorklist(Cond.getNode()); 13975 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 13976 Cond, One, Zero); 13977 AddToWorklist(CstOffset.getNode()); 13978 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 13979 CstOffset); 13980 AddToWorklist(CPIdx.getNode()); 13981 return DAG.getLoad( 13982 TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 13983 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), 13984 false, false, false, Alignment); 13985 } 13986 } 13987 13988 // Check to see if we can perform the "gzip trick", transforming 13989 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 13990 if (isNullConstant(N3) && CC == ISD::SETLT && 13991 (isNullConstant(N1) || // (a < 0) ? b : 0 13992 (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 13993 EVT XType = N0.getValueType(); 13994 EVT AType = N2.getValueType(); 13995 if (XType.bitsGE(AType)) { 13996 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 13997 // single-bit constant. 13998 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { 13999 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 14000 ShCtV = XType.getSizeInBits() - ShCtV - 1; 14001 SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), 14002 getShiftAmountTy(N0.getValueType())); 14003 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), 14004 XType, N0, ShCt); 14005 AddToWorklist(Shift.getNode()); 14006 14007 if (XType.bitsGT(AType)) { 14008 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 14009 AddToWorklist(Shift.getNode()); 14010 } 14011 14012 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 14013 } 14014 14015 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), 14016 XType, N0, 14017 DAG.getConstant(XType.getSizeInBits() - 1, 14018 SDLoc(N0), 14019 getShiftAmountTy(N0.getValueType()))); 14020 AddToWorklist(Shift.getNode()); 14021 14022 if (XType.bitsGT(AType)) { 14023 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 14024 AddToWorklist(Shift.getNode()); 14025 } 14026 14027 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 14028 } 14029 } 14030 14031 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 14032 // where y is has a single bit set. 14033 // A plaintext description would be, we can turn the SELECT_CC into an AND 14034 // when the condition can be materialized as an all-ones register. Any 14035 // single bit-test can be materialized as an all-ones register with 14036 // shift-left and shift-right-arith. 14037 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 14038 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { 14039 SDValue AndLHS = N0->getOperand(0); 14040 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 14041 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 14042 // Shift the tested bit over the sign bit. 14043 APInt AndMask = ConstAndRHS->getAPIntValue(); 14044 SDValue ShlAmt = 14045 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), 14046 getShiftAmountTy(AndLHS.getValueType())); 14047 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 14048 14049 // Now arithmetic right shift it all the way over, so the result is either 14050 // all-ones, or zero. 14051 SDValue ShrAmt = 14052 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), 14053 getShiftAmountTy(Shl.getValueType())); 14054 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 14055 14056 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 14057 } 14058 } 14059 14060 // fold select C, 16, 0 -> shl C, 4 14061 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && 14062 TLI.getBooleanContents(N0.getValueType()) == 14063 TargetLowering::ZeroOrOneBooleanContent) { 14064 14065 // If the caller doesn't want us to simplify this into a zext of a compare, 14066 // don't do it. 14067 if (NotExtCompare && N2C->isOne()) 14068 return SDValue(); 14069 14070 // Get a SetCC of the condition 14071 // NOTE: Don't create a SETCC if it's not legal on this target. 14072 if (!LegalOperations || 14073 TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { 14074 SDValue Temp, SCC; 14075 // cast from setcc result type to select result type 14076 if (LegalTypes) { 14077 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 14078 N0, N1, CC); 14079 if (N2.getValueType().bitsLT(SCC.getValueType())) 14080 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 14081 N2.getValueType()); 14082 else 14083 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 14084 N2.getValueType(), SCC); 14085 } else { 14086 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 14087 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 14088 N2.getValueType(), SCC); 14089 } 14090 14091 AddToWorklist(SCC.getNode()); 14092 AddToWorklist(Temp.getNode()); 14093 14094 if (N2C->isOne()) 14095 return Temp; 14096 14097 // shl setcc result by log2 n2c 14098 return DAG.getNode( 14099 ISD::SHL, DL, N2.getValueType(), Temp, 14100 DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), 14101 getShiftAmountTy(Temp.getValueType()))); 14102 } 14103 } 14104 14105 // Check to see if this is an integer abs. 14106 // select_cc setg[te] X, 0, X, -X -> 14107 // select_cc setgt X, -1, X, -X -> 14108 // select_cc setl[te] X, 0, -X, X -> 14109 // select_cc setlt X, 1, -X, X -> 14110 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 14111 if (N1C) { 14112 ConstantSDNode *SubC = nullptr; 14113 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 14114 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 14115 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 14116 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 14117 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 14118 (N1C->isOne() && CC == ISD::SETLT)) && 14119 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 14120 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 14121 14122 EVT XType = N0.getValueType(); 14123 if (SubC && SubC->isNullValue() && XType.isInteger()) { 14124 SDLoc DL(N0); 14125 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, 14126 N0, 14127 DAG.getConstant(XType.getSizeInBits() - 1, DL, 14128 getShiftAmountTy(N0.getValueType()))); 14129 SDValue Add = DAG.getNode(ISD::ADD, DL, 14130 XType, N0, Shift); 14131 AddToWorklist(Shift.getNode()); 14132 AddToWorklist(Add.getNode()); 14133 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 14134 } 14135 } 14136 14137 return SDValue(); 14138 } 14139 14140 /// This is a stub for TargetLowering::SimplifySetCC. 14141 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 14142 SDValue N1, ISD::CondCode Cond, 14143 SDLoc DL, bool foldBooleans) { 14144 TargetLowering::DAGCombinerInfo 14145 DagCombineInfo(DAG, Level, false, this); 14146 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 14147 } 14148 14149 /// Given an ISD::SDIV node expressing a divide by constant, return 14150 /// a DAG expression to select that will generate the same value by multiplying 14151 /// by a magic number. 14152 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 14153 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 14154 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 14155 if (!C) 14156 return SDValue(); 14157 14158 // Avoid division by zero. 14159 if (C->isNullValue()) 14160 return SDValue(); 14161 14162 std::vector<SDNode*> Built; 14163 SDValue S = 14164 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 14165 14166 for (SDNode *N : Built) 14167 AddToWorklist(N); 14168 return S; 14169 } 14170 14171 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 14172 /// DAG expression that will generate the same value by right shifting. 14173 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { 14174 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 14175 if (!C) 14176 return SDValue(); 14177 14178 // Avoid division by zero. 14179 if (C->isNullValue()) 14180 return SDValue(); 14181 14182 std::vector<SDNode *> Built; 14183 SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); 14184 14185 for (SDNode *N : Built) 14186 AddToWorklist(N); 14187 return S; 14188 } 14189 14190 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG 14191 /// expression that will generate the same value by multiplying by a magic 14192 /// number. 14193 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 14194 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 14195 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 14196 if (!C) 14197 return SDValue(); 14198 14199 // Avoid division by zero. 14200 if (C->isNullValue()) 14201 return SDValue(); 14202 14203 std::vector<SDNode*> Built; 14204 SDValue S = 14205 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 14206 14207 for (SDNode *N : Built) 14208 AddToWorklist(N); 14209 return S; 14210 } 14211 14212 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { 14213 if (Level >= AfterLegalizeDAG) 14214 return SDValue(); 14215 14216 // Expose the DAG combiner to the target combiner implementations. 14217 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 14218 14219 unsigned Iterations = 0; 14220 if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { 14221 if (Iterations) { 14222 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 14223 // For the reciprocal, we need to find the zero of the function: 14224 // F(X) = A X - 1 [which has a zero at X = 1/A] 14225 // => 14226 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 14227 // does not require additional intermediate precision] 14228 EVT VT = Op.getValueType(); 14229 SDLoc DL(Op); 14230 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 14231 14232 AddToWorklist(Est.getNode()); 14233 14234 // Newton iterations: Est = Est + Est (1 - Arg * Est) 14235 for (unsigned i = 0; i < Iterations; ++i) { 14236 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); 14237 AddToWorklist(NewEst.getNode()); 14238 14239 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); 14240 AddToWorklist(NewEst.getNode()); 14241 14242 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 14243 AddToWorklist(NewEst.getNode()); 14244 14245 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); 14246 AddToWorklist(Est.getNode()); 14247 } 14248 } 14249 return Est; 14250 } 14251 14252 return SDValue(); 14253 } 14254 14255 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 14256 /// For the reciprocal sqrt, we need to find the zero of the function: 14257 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 14258 /// => 14259 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 14260 /// As a result, we precompute A/2 prior to the iteration loop. 14261 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, 14262 unsigned Iterations, 14263 SDNodeFlags *Flags) { 14264 EVT VT = Arg.getValueType(); 14265 SDLoc DL(Arg); 14266 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); 14267 14268 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 14269 // this entire sequence requires only one FP constant. 14270 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); 14271 AddToWorklist(HalfArg.getNode()); 14272 14273 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); 14274 AddToWorklist(HalfArg.getNode()); 14275 14276 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 14277 for (unsigned i = 0; i < Iterations; ++i) { 14278 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); 14279 AddToWorklist(NewEst.getNode()); 14280 14281 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); 14282 AddToWorklist(NewEst.getNode()); 14283 14284 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); 14285 AddToWorklist(NewEst.getNode()); 14286 14287 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); 14288 AddToWorklist(Est.getNode()); 14289 } 14290 return Est; 14291 } 14292 14293 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 14294 /// For the reciprocal sqrt, we need to find the zero of the function: 14295 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 14296 /// => 14297 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 14298 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, 14299 unsigned Iterations, 14300 SDNodeFlags *Flags) { 14301 EVT VT = Arg.getValueType(); 14302 SDLoc DL(Arg); 14303 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); 14304 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); 14305 14306 // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) 14307 for (unsigned i = 0; i < Iterations; ++i) { 14308 SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); 14309 AddToWorklist(HalfEst.getNode()); 14310 14311 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); 14312 AddToWorklist(Est.getNode()); 14313 14314 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); 14315 AddToWorklist(Est.getNode()); 14316 14317 Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); 14318 AddToWorklist(Est.getNode()); 14319 14320 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); 14321 AddToWorklist(Est.getNode()); 14322 } 14323 return Est; 14324 } 14325 14326 SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { 14327 if (Level >= AfterLegalizeDAG) 14328 return SDValue(); 14329 14330 // Expose the DAG combiner to the target combiner implementations. 14331 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 14332 unsigned Iterations = 0; 14333 bool UseOneConstNR = false; 14334 if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { 14335 AddToWorklist(Est.getNode()); 14336 if (Iterations) { 14337 Est = UseOneConstNR ? 14338 BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : 14339 BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); 14340 } 14341 return Est; 14342 } 14343 14344 return SDValue(); 14345 } 14346 14347 /// Return true if base is a frame index, which is known not to alias with 14348 /// anything but itself. Provides base object and offset as results. 14349 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 14350 const GlobalValue *&GV, const void *&CV) { 14351 // Assume it is a primitive operation. 14352 Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; 14353 14354 // If it's an adding a simple constant then integrate the offset. 14355 if (Base.getOpcode() == ISD::ADD) { 14356 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 14357 Base = Base.getOperand(0); 14358 Offset += C->getZExtValue(); 14359 } 14360 } 14361 14362 // Return the underlying GlobalValue, and update the Offset. Return false 14363 // for GlobalAddressSDNode since the same GlobalAddress may be represented 14364 // by multiple nodes with different offsets. 14365 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 14366 GV = G->getGlobal(); 14367 Offset += G->getOffset(); 14368 return false; 14369 } 14370 14371 // Return the underlying Constant value, and update the Offset. Return false 14372 // for ConstantSDNodes since the same constant pool entry may be represented 14373 // by multiple nodes with different offsets. 14374 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 14375 CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() 14376 : (const void *)C->getConstVal(); 14377 Offset += C->getOffset(); 14378 return false; 14379 } 14380 // If it's any of the following then it can't alias with anything but itself. 14381 return isa<FrameIndexSDNode>(Base); 14382 } 14383 14384 /// Return true if there is any possibility that the two addresses overlap. 14385 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 14386 // If they are the same then they must be aliases. 14387 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 14388 14389 // If they are both volatile then they cannot be reordered. 14390 if (Op0->isVolatile() && Op1->isVolatile()) return true; 14391 14392 // If one operation reads from invariant memory, and the other may store, they 14393 // cannot alias. These should really be checking the equivalent of mayWrite, 14394 // but it only matters for memory nodes other than load /store. 14395 if (Op0->isInvariant() && Op1->writeMem()) 14396 return false; 14397 14398 if (Op1->isInvariant() && Op0->writeMem()) 14399 return false; 14400 14401 // Gather base node and offset information. 14402 SDValue Base1, Base2; 14403 int64_t Offset1, Offset2; 14404 const GlobalValue *GV1, *GV2; 14405 const void *CV1, *CV2; 14406 bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), 14407 Base1, Offset1, GV1, CV1); 14408 bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), 14409 Base2, Offset2, GV2, CV2); 14410 14411 // If they have a same base address then check to see if they overlap. 14412 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 14413 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 14414 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 14415 14416 // It is possible for different frame indices to alias each other, mostly 14417 // when tail call optimization reuses return address slots for arguments. 14418 // To catch this case, look up the actual index of frame indices to compute 14419 // the real alias relationship. 14420 if (isFrameIndex1 && isFrameIndex2) { 14421 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 14422 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 14423 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 14424 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 14425 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 14426 } 14427 14428 // Otherwise, if we know what the bases are, and they aren't identical, then 14429 // we know they cannot alias. 14430 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 14431 return false; 14432 14433 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 14434 // compared to the size and offset of the access, we may be able to prove they 14435 // do not alias. This check is conservative for now to catch cases created by 14436 // splitting vector types. 14437 if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && 14438 (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && 14439 (Op0->getMemoryVT().getSizeInBits() >> 3 == 14440 Op1->getMemoryVT().getSizeInBits() >> 3) && 14441 (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { 14442 int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); 14443 int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); 14444 14445 // There is no overlap between these relatively aligned accesses of similar 14446 // size, return no alias. 14447 if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || 14448 (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) 14449 return false; 14450 } 14451 14452 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 14453 ? CombinerGlobalAA 14454 : DAG.getSubtarget().useAA(); 14455 #ifndef NDEBUG 14456 if (CombinerAAOnlyFunc.getNumOccurrences() && 14457 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 14458 UseAA = false; 14459 #endif 14460 if (UseAA && 14461 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 14462 // Use alias analysis information. 14463 int64_t MinOffset = std::min(Op0->getSrcValueOffset(), 14464 Op1->getSrcValueOffset()); 14465 int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + 14466 Op0->getSrcValueOffset() - MinOffset; 14467 int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + 14468 Op1->getSrcValueOffset() - MinOffset; 14469 AliasResult AAResult = 14470 AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, 14471 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 14472 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, 14473 UseTBAA ? Op1->getAAInfo() : AAMDNodes())); 14474 if (AAResult == NoAlias) 14475 return false; 14476 } 14477 14478 // Otherwise we have to assume they alias. 14479 return true; 14480 } 14481 14482 /// Walk up chain skipping non-aliasing memory nodes, 14483 /// looking for aliasing nodes and adding them to the Aliases vector. 14484 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 14485 SmallVectorImpl<SDValue> &Aliases) { 14486 SmallVector<SDValue, 8> Chains; // List of chains to visit. 14487 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 14488 14489 // Get alias information for node. 14490 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 14491 14492 // Starting off. 14493 Chains.push_back(OriginalChain); 14494 unsigned Depth = 0; 14495 14496 // Look at each chain and determine if it is an alias. If so, add it to the 14497 // aliases list. If not, then continue up the chain looking for the next 14498 // candidate. 14499 while (!Chains.empty()) { 14500 SDValue Chain = Chains.pop_back_val(); 14501 14502 // For TokenFactor nodes, look at each operand and only continue up the 14503 // chain until we reach the depth limit. 14504 // 14505 // FIXME: The depth check could be made to return the last non-aliasing 14506 // chain we found before we hit a tokenfactor rather than the original 14507 // chain. 14508 if (Depth > TLI.getGatherAllAliasesMaxDepth()) { 14509 Aliases.clear(); 14510 Aliases.push_back(OriginalChain); 14511 return; 14512 } 14513 14514 // Don't bother if we've been before. 14515 if (!Visited.insert(Chain.getNode()).second) 14516 continue; 14517 14518 switch (Chain.getOpcode()) { 14519 case ISD::EntryToken: 14520 // Entry token is ideal chain operand, but handled in FindBetterChain. 14521 break; 14522 14523 case ISD::LOAD: 14524 case ISD::STORE: { 14525 // Get alias information for Chain. 14526 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 14527 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 14528 14529 // If chain is alias then stop here. 14530 if (!(IsLoad && IsOpLoad) && 14531 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 14532 Aliases.push_back(Chain); 14533 } else { 14534 // Look further up the chain. 14535 Chains.push_back(Chain.getOperand(0)); 14536 ++Depth; 14537 } 14538 break; 14539 } 14540 14541 case ISD::TokenFactor: 14542 // We have to check each of the operands of the token factor for "small" 14543 // token factors, so we queue them up. Adding the operands to the queue 14544 // (stack) in reverse order maintains the original order and increases the 14545 // likelihood that getNode will find a matching token factor (CSE.) 14546 if (Chain.getNumOperands() > 16) { 14547 Aliases.push_back(Chain); 14548 break; 14549 } 14550 for (unsigned n = Chain.getNumOperands(); n;) 14551 Chains.push_back(Chain.getOperand(--n)); 14552 ++Depth; 14553 break; 14554 14555 default: 14556 // For all other instructions we will just have to take what we can get. 14557 Aliases.push_back(Chain); 14558 break; 14559 } 14560 } 14561 14562 // We need to be careful here to also search for aliases through the 14563 // value operand of a store, etc. Consider the following situation: 14564 // Token1 = ... 14565 // L1 = load Token1, %52 14566 // S1 = store Token1, L1, %51 14567 // L2 = load Token1, %52+8 14568 // S2 = store Token1, L2, %51+8 14569 // Token2 = Token(S1, S2) 14570 // L3 = load Token2, %53 14571 // S3 = store Token2, L3, %52 14572 // L4 = load Token2, %53+8 14573 // S4 = store Token2, L4, %52+8 14574 // If we search for aliases of S3 (which loads address %52), and we look 14575 // only through the chain, then we'll miss the trivial dependence on L1 14576 // (which also loads from %52). We then might change all loads and 14577 // stores to use Token1 as their chain operand, which could result in 14578 // copying %53 into %52 before copying %52 into %51 (which should 14579 // happen first). 14580 // 14581 // The problem is, however, that searching for such data dependencies 14582 // can become expensive, and the cost is not directly related to the 14583 // chain depth. Instead, we'll rule out such configurations here by 14584 // insisting that we've visited all chain users (except for users 14585 // of the original chain, which is not necessary). When doing this, 14586 // we need to look through nodes we don't care about (otherwise, things 14587 // like register copies will interfere with trivial cases). 14588 14589 SmallVector<const SDNode *, 16> Worklist; 14590 for (const SDNode *N : Visited) 14591 if (N != OriginalChain.getNode()) 14592 Worklist.push_back(N); 14593 14594 while (!Worklist.empty()) { 14595 const SDNode *M = Worklist.pop_back_val(); 14596 14597 // We have already visited M, and want to make sure we've visited any uses 14598 // of M that we care about. For uses that we've not visisted, and don't 14599 // care about, queue them to the worklist. 14600 14601 for (SDNode::use_iterator UI = M->use_begin(), 14602 UIE = M->use_end(); UI != UIE; ++UI) 14603 if (UI.getUse().getValueType() == MVT::Other && 14604 Visited.insert(*UI).second) { 14605 if (isa<MemSDNode>(*UI)) { 14606 // We've not visited this use, and we care about it (it could have an 14607 // ordering dependency with the original node). 14608 Aliases.clear(); 14609 Aliases.push_back(OriginalChain); 14610 return; 14611 } 14612 14613 // We've not visited this use, but we don't care about it. Mark it as 14614 // visited and enqueue it to the worklist. 14615 Worklist.push_back(*UI); 14616 } 14617 } 14618 } 14619 14620 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 14621 /// (aliasing node.) 14622 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 14623 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 14624 14625 // Accumulate all the aliases to this node. 14626 GatherAllAliases(N, OldChain, Aliases); 14627 14628 // If no operands then chain to entry token. 14629 if (Aliases.size() == 0) 14630 return DAG.getEntryNode(); 14631 14632 // If a single operand then chain to it. We don't need to revisit it. 14633 if (Aliases.size() == 1) 14634 return Aliases[0]; 14635 14636 // Construct a custom tailored token factor. 14637 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 14638 } 14639 14640 bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { 14641 // This holds the base pointer, index, and the offset in bytes from the base 14642 // pointer. 14643 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 14644 14645 // We must have a base and an offset. 14646 if (!BasePtr.Base.getNode()) 14647 return false; 14648 14649 // Do not handle stores to undef base pointers. 14650 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 14651 return false; 14652 14653 SmallVector<StoreSDNode *, 8> ChainedStores; 14654 ChainedStores.push_back(St); 14655 14656 // Walk up the chain and look for nodes with offsets from the same 14657 // base pointer. Stop when reaching an instruction with a different kind 14658 // or instruction which has a different base pointer. 14659 StoreSDNode *Index = St; 14660 while (Index) { 14661 // If the chain has more than one use, then we can't reorder the mem ops. 14662 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 14663 break; 14664 14665 if (Index->isVolatile() || Index->isIndexed()) 14666 break; 14667 14668 // Find the base pointer and offset for this memory node. 14669 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 14670 14671 // Check that the base pointer is the same as the original one. 14672 if (!Ptr.equalBaseIndex(BasePtr)) 14673 break; 14674 14675 // Find the next memory operand in the chain. If the next operand in the 14676 // chain is a store then move up and continue the scan with the next 14677 // memory operand. If the next operand is a load save it and use alias 14678 // information to check if it interferes with anything. 14679 SDNode *NextInChain = Index->getChain().getNode(); 14680 while (true) { 14681 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 14682 // We found a store node. Use it for the next iteration. 14683 ChainedStores.push_back(STn); 14684 Index = STn; 14685 break; 14686 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 14687 NextInChain = Ldn->getChain().getNode(); 14688 continue; 14689 } else { 14690 Index = nullptr; 14691 break; 14692 } 14693 } 14694 } 14695 14696 bool MadeChange = false; 14697 SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; 14698 14699 for (StoreSDNode *ChainedStore : ChainedStores) { 14700 SDValue Chain = ChainedStore->getChain(); 14701 SDValue BetterChain = FindBetterChain(ChainedStore, Chain); 14702 14703 if (Chain != BetterChain) { 14704 MadeChange = true; 14705 BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); 14706 } 14707 } 14708 14709 // Do all replacements after finding the replacements to make to avoid making 14710 // the chains more complicated by introducing new TokenFactors. 14711 for (auto Replacement : BetterChains) 14712 replaceStoreChain(Replacement.first, Replacement.second); 14713 14714 return MadeChange; 14715 } 14716 14717 /// This is the entry point for the file. 14718 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 14719 CodeGenOpt::Level OptLevel) { 14720 /// This is the main entry point to this class. 14721 DAGCombiner(*this, AA, OptLevel).Run(Level); 14722 } 14723