1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetLowering.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dagcombine" 44 45 STATISTIC(NodesCombined , "Number of dag nodes combined"); 46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50 STATISTIC(SlicedLoads, "Number of load sliced"); 51 52 namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65 #ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70 #endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84 //------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode *, 64> Worklist; 103 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode *, unsigned> WorklistMap; 110 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode *, 64> CombinedNodes; 116 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode *N) { 123 for (SDNode *Node : N->uses()) 124 AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode *N); 129 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode *N) { 134 // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode *N) { 145 CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode *N); 157 bool recursivelyDeleteUnusedNodes(SDNode *N); 158 159 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 160 bool AddTo = true); 161 162 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 163 return CombineTo(N, &Res, 1, AddTo); 164 } 165 166 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 167 bool AddTo = true) { 168 SDValue To[] = { Res0, Res1 }; 169 return CombineTo(N, To, 2, AddTo); 170 } 171 172 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 173 174 private: 175 176 /// Check the specified integer node value to see if it can be simplified or 177 /// if things it uses can be simplified by bit propagation. 178 /// If so, return true. 179 bool SimplifyDemandedBits(SDValue Op) { 180 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 181 APInt Demanded = APInt::getAllOnesValue(BitWidth); 182 return SimplifyDemandedBits(Op, Demanded); 183 } 184 185 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 186 187 bool CombineToPreIndexedLoadStore(SDNode *N); 188 bool CombineToPostIndexedLoadStore(SDNode *N); 189 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 190 bool SliceUpLoad(SDNode *N); 191 192 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 193 /// load. 194 /// 195 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 196 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 197 /// \param EltNo index of the vector element to load. 198 /// \param OriginalLoad load that EVE came from to be replaced. 199 /// \returns EVE on success SDValue() on failure. 200 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 201 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 202 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 203 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 204 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 205 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 206 SDValue PromoteIntBinOp(SDValue Op); 207 SDValue PromoteIntShiftOp(SDValue Op); 208 SDValue PromoteExtend(SDValue Op); 209 bool PromoteLoad(SDValue Op); 210 211 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 212 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 213 ISD::NodeType ExtType); 214 215 /// Call the node-specific routine that knows how to fold each 216 /// particular type of node. If that doesn't do anything, try the 217 /// target-specific DAG combines. 218 SDValue combine(SDNode *N); 219 220 // Visitation implementation - Implement dag node combining for different 221 // node types. The semantics are as follows: 222 // Return Value: 223 // SDValue.getNode() == 0 - No change was made 224 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 225 // otherwise - N should be replaced by the returned Operand. 226 // 227 SDValue visitTokenFactor(SDNode *N); 228 SDValue visitMERGE_VALUES(SDNode *N); 229 SDValue visitADD(SDNode *N); 230 SDValue visitSUB(SDNode *N); 231 SDValue visitADDC(SDNode *N); 232 SDValue visitSUBC(SDNode *N); 233 SDValue visitADDE(SDNode *N); 234 SDValue visitSUBE(SDNode *N); 235 SDValue visitMUL(SDNode *N); 236 SDValue visitSDIV(SDNode *N); 237 SDValue visitUDIV(SDNode *N); 238 SDValue visitSREM(SDNode *N); 239 SDValue visitUREM(SDNode *N); 240 SDValue visitMULHU(SDNode *N); 241 SDValue visitMULHS(SDNode *N); 242 SDValue visitSMUL_LOHI(SDNode *N); 243 SDValue visitUMUL_LOHI(SDNode *N); 244 SDValue visitSMULO(SDNode *N); 245 SDValue visitUMULO(SDNode *N); 246 SDValue visitSDIVREM(SDNode *N); 247 SDValue visitUDIVREM(SDNode *N); 248 SDValue visitAND(SDNode *N); 249 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 250 SDValue visitOR(SDNode *N); 251 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 252 SDValue visitXOR(SDNode *N); 253 SDValue SimplifyVBinOp(SDNode *N); 254 SDValue visitSHL(SDNode *N); 255 SDValue visitSRA(SDNode *N); 256 SDValue visitSRL(SDNode *N); 257 SDValue visitRotate(SDNode *N); 258 SDValue visitBSWAP(SDNode *N); 259 SDValue visitCTLZ(SDNode *N); 260 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 261 SDValue visitCTTZ(SDNode *N); 262 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 263 SDValue visitCTPOP(SDNode *N); 264 SDValue visitSELECT(SDNode *N); 265 SDValue visitVSELECT(SDNode *N); 266 SDValue visitSELECT_CC(SDNode *N); 267 SDValue visitSETCC(SDNode *N); 268 SDValue visitSIGN_EXTEND(SDNode *N); 269 SDValue visitZERO_EXTEND(SDNode *N); 270 SDValue visitANY_EXTEND(SDNode *N); 271 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 272 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 273 SDValue visitTRUNCATE(SDNode *N); 274 SDValue visitBITCAST(SDNode *N); 275 SDValue visitBUILD_PAIR(SDNode *N); 276 SDValue visitFADD(SDNode *N); 277 SDValue visitFSUB(SDNode *N); 278 SDValue visitFMUL(SDNode *N); 279 SDValue visitFMA(SDNode *N); 280 SDValue visitFDIV(SDNode *N); 281 SDValue visitFREM(SDNode *N); 282 SDValue visitFSQRT(SDNode *N); 283 SDValue visitFCOPYSIGN(SDNode *N); 284 SDValue visitSINT_TO_FP(SDNode *N); 285 SDValue visitUINT_TO_FP(SDNode *N); 286 SDValue visitFP_TO_SINT(SDNode *N); 287 SDValue visitFP_TO_UINT(SDNode *N); 288 SDValue visitFP_ROUND(SDNode *N); 289 SDValue visitFP_ROUND_INREG(SDNode *N); 290 SDValue visitFP_EXTEND(SDNode *N); 291 SDValue visitFNEG(SDNode *N); 292 SDValue visitFABS(SDNode *N); 293 SDValue visitFCEIL(SDNode *N); 294 SDValue visitFTRUNC(SDNode *N); 295 SDValue visitFFLOOR(SDNode *N); 296 SDValue visitFMINNUM(SDNode *N); 297 SDValue visitFMAXNUM(SDNode *N); 298 SDValue visitBRCOND(SDNode *N); 299 SDValue visitBR_CC(SDNode *N); 300 SDValue visitLOAD(SDNode *N); 301 SDValue visitSTORE(SDNode *N); 302 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 303 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 304 SDValue visitBUILD_VECTOR(SDNode *N); 305 SDValue visitCONCAT_VECTORS(SDNode *N); 306 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 307 SDValue visitVECTOR_SHUFFLE(SDNode *N); 308 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 309 SDValue visitINSERT_SUBVECTOR(SDNode *N); 310 SDValue visitMLOAD(SDNode *N); 311 SDValue visitMSTORE(SDNode *N); 312 SDValue visitMGATHER(SDNode *N); 313 SDValue visitMSCATTER(SDNode *N); 314 SDValue visitFP_TO_FP16(SDNode *N); 315 316 SDValue visitFADDForFMACombine(SDNode *N); 317 SDValue visitFSUBForFMACombine(SDNode *N); 318 319 SDValue XformToShuffleWithZero(SDNode *N); 320 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 321 322 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 323 324 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 325 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 326 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 327 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 328 SDValue N3, ISD::CondCode CC, 329 bool NotExtCompare = false); 330 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 331 SDLoc DL, bool foldBooleans = true); 332 333 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 334 SDValue &CC) const; 335 bool isOneUseSetCC(SDValue N) const; 336 337 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 338 unsigned HiOp); 339 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 340 SDValue CombineExtLoad(SDNode *N); 341 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 342 SDValue BuildSDIV(SDNode *N); 343 SDValue BuildSDIVPow2(SDNode *N); 344 SDValue BuildUDIV(SDNode *N); 345 SDValue BuildReciprocalEstimate(SDValue Op); 346 SDValue BuildRsqrtEstimate(SDValue Op); 347 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); 348 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); 349 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 350 bool DemandHighBits = true); 351 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 352 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 353 SDValue InnerPos, SDValue InnerNeg, 354 unsigned PosOpcode, unsigned NegOpcode, 355 SDLoc DL); 356 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 357 SDValue ReduceLoadWidth(SDNode *N); 358 SDValue ReduceLoadOpStoreWidth(SDNode *N); 359 SDValue TransformFPLoadStorePair(SDNode *N); 360 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 361 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 362 363 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 364 365 /// Walk up chain skipping non-aliasing memory nodes, 366 /// looking for aliasing nodes and adding them to the Aliases vector. 367 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 368 SmallVectorImpl<SDValue> &Aliases); 369 370 /// Return true if there is any possibility that the two addresses overlap. 371 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 372 373 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 374 /// chain (aliasing node.) 375 SDValue FindBetterChain(SDNode *N, SDValue Chain); 376 377 /// Holds a pointer to an LSBaseSDNode as well as information on where it 378 /// is located in a sequence of memory operations connected by a chain. 379 struct MemOpLink { 380 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 381 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 382 // Ptr to the mem node. 383 LSBaseSDNode *MemNode; 384 // Offset from the base ptr. 385 int64_t OffsetFromBase; 386 // What is the sequence number of this mem node. 387 // Lowest mem operand in the DAG starts at zero. 388 unsigned SequenceNum; 389 }; 390 391 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a 392 /// constant build_vector of the stored constant values in Stores. 393 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, 394 SDLoc SL, 395 ArrayRef<MemOpLink> Stores, 396 EVT Ty) const; 397 398 /// This is a helper function for MergeConsecutiveStores. When the source 399 /// elements of the consecutive stores are all constants or all extracted 400 /// vector elements, try to merge them into one larger store. 401 /// \return True if a merged store was created. 402 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 403 EVT MemVT, unsigned NumElem, 404 bool IsConstantSrc, bool UseVector); 405 406 /// This is a helper function for MergeConsecutiveStores. 407 /// Stores that may be merged are placed in StoreNodes. 408 /// Loads that may alias with those stores are placed in AliasLoadNodes. 409 void getStoreMergeAndAliasCandidates( 410 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 411 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); 412 413 /// Merge consecutive store operations into a wide store. 414 /// This optimization uses wide integers or vectors when possible. 415 /// \return True if some memory operations were changed. 416 bool MergeConsecutiveStores(StoreSDNode *N); 417 418 /// \brief Try to transform a truncation where C is a constant: 419 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 420 /// 421 /// \p N needs to be a truncation and its first operand an AND. Other 422 /// requirements are checked by the function (e.g. that trunc is 423 /// single-use) and if missed an empty SDValue is returned. 424 SDValue distributeTruncateThroughAnd(SDNode *N); 425 426 public: 427 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 428 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 429 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 430 auto *F = DAG.getMachineFunction().getFunction(); 431 ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || 432 F->hasFnAttribute(Attribute::MinSize); 433 } 434 435 /// Runs the dag combiner on all nodes in the work list 436 void Run(CombineLevel AtLevel); 437 438 SelectionDAG &getDAG() const { return DAG; } 439 440 /// Returns a type large enough to hold any valid shift amount - before type 441 /// legalization these can be huge. 442 EVT getShiftAmountTy(EVT LHSTy) { 443 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 444 if (LHSTy.isVector()) 445 return LHSTy; 446 auto &DL = DAG.getDataLayout(); 447 return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) 448 : TLI.getPointerTy(DL); 449 } 450 451 /// This method returns true if we are running before type legalization or 452 /// if the specified VT is legal. 453 bool isTypeLegal(const EVT &VT) { 454 if (!LegalTypes) return true; 455 return TLI.isTypeLegal(VT); 456 } 457 458 /// Convenience wrapper around TargetLowering::getSetCCResultType 459 EVT getSetCCResultType(EVT VT) const { 460 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 461 } 462 }; 463 } 464 465 466 namespace { 467 /// This class is a DAGUpdateListener that removes any deleted 468 /// nodes from the worklist. 469 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 470 DAGCombiner &DC; 471 public: 472 explicit WorklistRemover(DAGCombiner &dc) 473 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 474 475 void NodeDeleted(SDNode *N, SDNode *E) override { 476 DC.removeFromWorklist(N); 477 } 478 }; 479 } 480 481 //===----------------------------------------------------------------------===// 482 // TargetLowering::DAGCombinerInfo implementation 483 //===----------------------------------------------------------------------===// 484 485 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 486 ((DAGCombiner*)DC)->AddToWorklist(N); 487 } 488 489 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 490 ((DAGCombiner*)DC)->removeFromWorklist(N); 491 } 492 493 SDValue TargetLowering::DAGCombinerInfo:: 494 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 495 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 496 } 497 498 SDValue TargetLowering::DAGCombinerInfo:: 499 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 500 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 501 } 502 503 504 SDValue TargetLowering::DAGCombinerInfo:: 505 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 506 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 507 } 508 509 void TargetLowering::DAGCombinerInfo:: 510 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 511 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 512 } 513 514 //===----------------------------------------------------------------------===// 515 // Helper Functions 516 //===----------------------------------------------------------------------===// 517 518 void DAGCombiner::deleteAndRecombine(SDNode *N) { 519 removeFromWorklist(N); 520 521 // If the operands of this node are only used by the node, they will now be 522 // dead. Make sure to re-visit them and recursively delete dead nodes. 523 for (const SDValue &Op : N->ops()) 524 // For an operand generating multiple values, one of the values may 525 // become dead allowing further simplification (e.g. split index 526 // arithmetic from an indexed load). 527 if (Op->hasOneUse() || Op->getNumValues() > 1) 528 AddToWorklist(Op.getNode()); 529 530 DAG.DeleteNode(N); 531 } 532 533 /// Return 1 if we can compute the negated form of the specified expression for 534 /// the same cost as the expression itself, or 2 if we can compute the negated 535 /// form more cheaply than the expression itself. 536 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 537 const TargetLowering &TLI, 538 const TargetOptions *Options, 539 unsigned Depth = 0) { 540 // fneg is removable even if it has multiple uses. 541 if (Op.getOpcode() == ISD::FNEG) return 2; 542 543 // Don't allow anything with multiple uses. 544 if (!Op.hasOneUse()) return 0; 545 546 // Don't recurse exponentially. 547 if (Depth > 6) return 0; 548 549 switch (Op.getOpcode()) { 550 default: return false; 551 case ISD::ConstantFP: 552 // Don't invert constant FP values after legalize. The negated constant 553 // isn't necessarily legal. 554 return LegalOperations ? 0 : 1; 555 case ISD::FADD: 556 // FIXME: determine better conditions for this xform. 557 if (!Options->UnsafeFPMath) return 0; 558 559 // After operation legalization, it might not be legal to create new FSUBs. 560 if (LegalOperations && 561 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 562 return 0; 563 564 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 565 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 566 Options, Depth + 1)) 567 return V; 568 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 569 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 570 Depth + 1); 571 case ISD::FSUB: 572 // We can't turn -(A-B) into B-A when we honor signed zeros. 573 if (!Options->UnsafeFPMath) return 0; 574 575 // fold (fneg (fsub A, B)) -> (fsub B, A) 576 return 1; 577 578 case ISD::FMUL: 579 case ISD::FDIV: 580 if (Options->HonorSignDependentRoundingFPMath()) return 0; 581 582 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 583 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 584 Options, Depth + 1)) 585 return V; 586 587 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 588 Depth + 1); 589 590 case ISD::FP_EXTEND: 591 case ISD::FP_ROUND: 592 case ISD::FSIN: 593 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 594 Depth + 1); 595 } 596 } 597 598 /// If isNegatibleForFree returns true, return the newly negated expression. 599 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 600 bool LegalOperations, unsigned Depth = 0) { 601 const TargetOptions &Options = DAG.getTarget().Options; 602 // fneg is removable even if it has multiple uses. 603 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 604 605 // Don't allow anything with multiple uses. 606 assert(Op.hasOneUse() && "Unknown reuse!"); 607 608 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 609 switch (Op.getOpcode()) { 610 default: llvm_unreachable("Unknown code"); 611 case ISD::ConstantFP: { 612 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 613 V.changeSign(); 614 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 615 } 616 case ISD::FADD: 617 // FIXME: determine better conditions for this xform. 618 assert(Options.UnsafeFPMath); 619 620 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 621 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 622 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 623 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 624 GetNegatedExpression(Op.getOperand(0), DAG, 625 LegalOperations, Depth+1), 626 Op.getOperand(1)); 627 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 628 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 629 GetNegatedExpression(Op.getOperand(1), DAG, 630 LegalOperations, Depth+1), 631 Op.getOperand(0)); 632 case ISD::FSUB: 633 // We can't turn -(A-B) into B-A when we honor signed zeros. 634 assert(Options.UnsafeFPMath); 635 636 // fold (fneg (fsub 0, B)) -> B 637 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 638 if (N0CFP->isZero()) 639 return Op.getOperand(1); 640 641 // fold (fneg (fsub A, B)) -> (fsub B, A) 642 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 643 Op.getOperand(1), Op.getOperand(0)); 644 645 case ISD::FMUL: 646 case ISD::FDIV: 647 assert(!Options.HonorSignDependentRoundingFPMath()); 648 649 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 650 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 651 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 652 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 653 GetNegatedExpression(Op.getOperand(0), DAG, 654 LegalOperations, Depth+1), 655 Op.getOperand(1)); 656 657 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 658 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 659 Op.getOperand(0), 660 GetNegatedExpression(Op.getOperand(1), DAG, 661 LegalOperations, Depth+1)); 662 663 case ISD::FP_EXTEND: 664 case ISD::FSIN: 665 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 666 GetNegatedExpression(Op.getOperand(0), DAG, 667 LegalOperations, Depth+1)); 668 case ISD::FP_ROUND: 669 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 670 GetNegatedExpression(Op.getOperand(0), DAG, 671 LegalOperations, Depth+1), 672 Op.getOperand(1)); 673 } 674 } 675 676 // Return true if this node is a setcc, or is a select_cc 677 // that selects between the target values used for true and false, making it 678 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 679 // the appropriate nodes based on the type of node we are checking. This 680 // simplifies life a bit for the callers. 681 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 682 SDValue &CC) const { 683 if (N.getOpcode() == ISD::SETCC) { 684 LHS = N.getOperand(0); 685 RHS = N.getOperand(1); 686 CC = N.getOperand(2); 687 return true; 688 } 689 690 if (N.getOpcode() != ISD::SELECT_CC || 691 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 692 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 693 return false; 694 695 if (TLI.getBooleanContents(N.getValueType()) == 696 TargetLowering::UndefinedBooleanContent) 697 return false; 698 699 LHS = N.getOperand(0); 700 RHS = N.getOperand(1); 701 CC = N.getOperand(4); 702 return true; 703 } 704 705 /// Return true if this is a SetCC-equivalent operation with only one use. 706 /// If this is true, it allows the users to invert the operation for free when 707 /// it is profitable to do so. 708 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 709 SDValue N0, N1, N2; 710 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 711 return true; 712 return false; 713 } 714 715 /// Returns true if N is a BUILD_VECTOR node whose 716 /// elements are all the same constant or undefined. 717 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 718 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 719 if (!C) 720 return false; 721 722 APInt SplatUndef; 723 unsigned SplatBitSize; 724 bool HasAnyUndefs; 725 EVT EltVT = N->getValueType(0).getVectorElementType(); 726 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 727 HasAnyUndefs) && 728 EltVT.getSizeInBits() >= SplatBitSize); 729 } 730 731 // \brief Returns the SDNode if it is a constant integer BuildVector 732 // or constant integer. 733 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { 734 if (isa<ConstantSDNode>(N)) 735 return N.getNode(); 736 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 737 return N.getNode(); 738 return nullptr; 739 } 740 741 // \brief Returns the SDNode if it is a constant float BuildVector 742 // or constant float. 743 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 744 if (isa<ConstantFPSDNode>(N)) 745 return N.getNode(); 746 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 747 return N.getNode(); 748 return nullptr; 749 } 750 751 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 752 // int. 753 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 754 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 755 return CN; 756 757 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 758 BitVector UndefElements; 759 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 760 761 // BuildVectors can truncate their operands. Ignore that case here. 762 // FIXME: We blindly ignore splats which include undef which is overly 763 // pessimistic. 764 if (CN && UndefElements.none() && 765 CN->getValueType(0) == N.getValueType().getScalarType()) 766 return CN; 767 } 768 769 return nullptr; 770 } 771 772 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 773 // float. 774 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 775 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 776 return CN; 777 778 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 779 BitVector UndefElements; 780 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 781 782 if (CN && UndefElements.none()) 783 return CN; 784 } 785 786 return nullptr; 787 } 788 789 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 790 SDValue N0, SDValue N1) { 791 EVT VT = N0.getValueType(); 792 if (N0.getOpcode() == Opc) { 793 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 794 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { 795 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 796 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 797 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 798 return SDValue(); 799 } 800 if (N0.hasOneUse()) { 801 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 802 // use 803 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 804 if (!OpNode.getNode()) 805 return SDValue(); 806 AddToWorklist(OpNode.getNode()); 807 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 808 } 809 } 810 } 811 812 if (N1.getOpcode() == Opc) { 813 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 814 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { 815 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 816 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 817 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 818 return SDValue(); 819 } 820 if (N1.hasOneUse()) { 821 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 822 // use 823 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 824 if (!OpNode.getNode()) 825 return SDValue(); 826 AddToWorklist(OpNode.getNode()); 827 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 828 } 829 } 830 } 831 832 return SDValue(); 833 } 834 835 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 836 bool AddTo) { 837 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 838 ++NodesCombined; 839 DEBUG(dbgs() << "\nReplacing.1 "; 840 N->dump(&DAG); 841 dbgs() << "\nWith: "; 842 To[0].getNode()->dump(&DAG); 843 dbgs() << " and " << NumTo-1 << " other values\n"); 844 for (unsigned i = 0, e = NumTo; i != e; ++i) 845 assert((!To[i].getNode() || 846 N->getValueType(i) == To[i].getValueType()) && 847 "Cannot combine value to value of different type!"); 848 849 WorklistRemover DeadNodes(*this); 850 DAG.ReplaceAllUsesWith(N, To); 851 if (AddTo) { 852 // Push the new nodes and any users onto the worklist 853 for (unsigned i = 0, e = NumTo; i != e; ++i) { 854 if (To[i].getNode()) { 855 AddToWorklist(To[i].getNode()); 856 AddUsersToWorklist(To[i].getNode()); 857 } 858 } 859 } 860 861 // Finally, if the node is now dead, remove it from the graph. The node 862 // may not be dead if the replacement process recursively simplified to 863 // something else needing this node. 864 if (N->use_empty()) 865 deleteAndRecombine(N); 866 return SDValue(N, 0); 867 } 868 869 void DAGCombiner:: 870 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 871 // Replace all uses. If any nodes become isomorphic to other nodes and 872 // are deleted, make sure to remove them from our worklist. 873 WorklistRemover DeadNodes(*this); 874 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 875 876 // Push the new node and any (possibly new) users onto the worklist. 877 AddToWorklist(TLO.New.getNode()); 878 AddUsersToWorklist(TLO.New.getNode()); 879 880 // Finally, if the node is now dead, remove it from the graph. The node 881 // may not be dead if the replacement process recursively simplified to 882 // something else needing this node. 883 if (TLO.Old.getNode()->use_empty()) 884 deleteAndRecombine(TLO.Old.getNode()); 885 } 886 887 /// Check the specified integer node value to see if it can be simplified or if 888 /// things it uses can be simplified by bit propagation. If so, return true. 889 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 890 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 891 APInt KnownZero, KnownOne; 892 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 893 return false; 894 895 // Revisit the node. 896 AddToWorklist(Op.getNode()); 897 898 // Replace the old value with the new one. 899 ++NodesCombined; 900 DEBUG(dbgs() << "\nReplacing.2 "; 901 TLO.Old.getNode()->dump(&DAG); 902 dbgs() << "\nWith: "; 903 TLO.New.getNode()->dump(&DAG); 904 dbgs() << '\n'); 905 906 CommitTargetLoweringOpt(TLO); 907 return true; 908 } 909 910 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 911 SDLoc dl(Load); 912 EVT VT = Load->getValueType(0); 913 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 914 915 DEBUG(dbgs() << "\nReplacing.9 "; 916 Load->dump(&DAG); 917 dbgs() << "\nWith: "; 918 Trunc.getNode()->dump(&DAG); 919 dbgs() << '\n'); 920 WorklistRemover DeadNodes(*this); 921 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 922 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 923 deleteAndRecombine(Load); 924 AddToWorklist(Trunc.getNode()); 925 } 926 927 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 928 Replace = false; 929 SDLoc dl(Op); 930 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 931 EVT MemVT = LD->getMemoryVT(); 932 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 933 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 934 : ISD::EXTLOAD) 935 : LD->getExtensionType(); 936 Replace = true; 937 return DAG.getExtLoad(ExtType, dl, PVT, 938 LD->getChain(), LD->getBasePtr(), 939 MemVT, LD->getMemOperand()); 940 } 941 942 unsigned Opc = Op.getOpcode(); 943 switch (Opc) { 944 default: break; 945 case ISD::AssertSext: 946 return DAG.getNode(ISD::AssertSext, dl, PVT, 947 SExtPromoteOperand(Op.getOperand(0), PVT), 948 Op.getOperand(1)); 949 case ISD::AssertZext: 950 return DAG.getNode(ISD::AssertZext, dl, PVT, 951 ZExtPromoteOperand(Op.getOperand(0), PVT), 952 Op.getOperand(1)); 953 case ISD::Constant: { 954 unsigned ExtOpc = 955 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 956 return DAG.getNode(ExtOpc, dl, PVT, Op); 957 } 958 } 959 960 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 961 return SDValue(); 962 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 963 } 964 965 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 966 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 967 return SDValue(); 968 EVT OldVT = Op.getValueType(); 969 SDLoc dl(Op); 970 bool Replace = false; 971 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 972 if (!NewOp.getNode()) 973 return SDValue(); 974 AddToWorklist(NewOp.getNode()); 975 976 if (Replace) 977 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 978 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 979 DAG.getValueType(OldVT)); 980 } 981 982 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 983 EVT OldVT = Op.getValueType(); 984 SDLoc dl(Op); 985 bool Replace = false; 986 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 987 if (!NewOp.getNode()) 988 return SDValue(); 989 AddToWorklist(NewOp.getNode()); 990 991 if (Replace) 992 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 993 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 994 } 995 996 /// Promote the specified integer binary operation if the target indicates it is 997 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 998 /// i32 since i16 instructions are longer. 999 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1000 if (!LegalOperations) 1001 return SDValue(); 1002 1003 EVT VT = Op.getValueType(); 1004 if (VT.isVector() || !VT.isInteger()) 1005 return SDValue(); 1006 1007 // If operation type is 'undesirable', e.g. i16 on x86, consider 1008 // promoting it. 1009 unsigned Opc = Op.getOpcode(); 1010 if (TLI.isTypeDesirableForOp(Opc, VT)) 1011 return SDValue(); 1012 1013 EVT PVT = VT; 1014 // Consult target whether it is a good idea to promote this operation and 1015 // what's the right type to promote it to. 1016 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1017 assert(PVT != VT && "Don't know what type to promote to!"); 1018 1019 bool Replace0 = false; 1020 SDValue N0 = Op.getOperand(0); 1021 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1022 if (!NN0.getNode()) 1023 return SDValue(); 1024 1025 bool Replace1 = false; 1026 SDValue N1 = Op.getOperand(1); 1027 SDValue NN1; 1028 if (N0 == N1) 1029 NN1 = NN0; 1030 else { 1031 NN1 = PromoteOperand(N1, PVT, Replace1); 1032 if (!NN1.getNode()) 1033 return SDValue(); 1034 } 1035 1036 AddToWorklist(NN0.getNode()); 1037 if (NN1.getNode()) 1038 AddToWorklist(NN1.getNode()); 1039 1040 if (Replace0) 1041 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1042 if (Replace1) 1043 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1044 1045 DEBUG(dbgs() << "\nPromoting "; 1046 Op.getNode()->dump(&DAG)); 1047 SDLoc dl(Op); 1048 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1049 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1050 } 1051 return SDValue(); 1052 } 1053 1054 /// Promote the specified integer shift operation if the target indicates it is 1055 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1056 /// i32 since i16 instructions are longer. 1057 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1058 if (!LegalOperations) 1059 return SDValue(); 1060 1061 EVT VT = Op.getValueType(); 1062 if (VT.isVector() || !VT.isInteger()) 1063 return SDValue(); 1064 1065 // If operation type is 'undesirable', e.g. i16 on x86, consider 1066 // promoting it. 1067 unsigned Opc = Op.getOpcode(); 1068 if (TLI.isTypeDesirableForOp(Opc, VT)) 1069 return SDValue(); 1070 1071 EVT PVT = VT; 1072 // Consult target whether it is a good idea to promote this operation and 1073 // what's the right type to promote it to. 1074 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1075 assert(PVT != VT && "Don't know what type to promote to!"); 1076 1077 bool Replace = false; 1078 SDValue N0 = Op.getOperand(0); 1079 if (Opc == ISD::SRA) 1080 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1081 else if (Opc == ISD::SRL) 1082 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1083 else 1084 N0 = PromoteOperand(N0, PVT, Replace); 1085 if (!N0.getNode()) 1086 return SDValue(); 1087 1088 AddToWorklist(N0.getNode()); 1089 if (Replace) 1090 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1091 1092 DEBUG(dbgs() << "\nPromoting "; 1093 Op.getNode()->dump(&DAG)); 1094 SDLoc dl(Op); 1095 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1096 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1097 } 1098 return SDValue(); 1099 } 1100 1101 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1102 if (!LegalOperations) 1103 return SDValue(); 1104 1105 EVT VT = Op.getValueType(); 1106 if (VT.isVector() || !VT.isInteger()) 1107 return SDValue(); 1108 1109 // If operation type is 'undesirable', e.g. i16 on x86, consider 1110 // promoting it. 1111 unsigned Opc = Op.getOpcode(); 1112 if (TLI.isTypeDesirableForOp(Opc, VT)) 1113 return SDValue(); 1114 1115 EVT PVT = VT; 1116 // Consult target whether it is a good idea to promote this operation and 1117 // what's the right type to promote it to. 1118 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1119 assert(PVT != VT && "Don't know what type to promote to!"); 1120 // fold (aext (aext x)) -> (aext x) 1121 // fold (aext (zext x)) -> (zext x) 1122 // fold (aext (sext x)) -> (sext x) 1123 DEBUG(dbgs() << "\nPromoting "; 1124 Op.getNode()->dump(&DAG)); 1125 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1126 } 1127 return SDValue(); 1128 } 1129 1130 bool DAGCombiner::PromoteLoad(SDValue Op) { 1131 if (!LegalOperations) 1132 return false; 1133 1134 EVT VT = Op.getValueType(); 1135 if (VT.isVector() || !VT.isInteger()) 1136 return false; 1137 1138 // If operation type is 'undesirable', e.g. i16 on x86, consider 1139 // promoting it. 1140 unsigned Opc = Op.getOpcode(); 1141 if (TLI.isTypeDesirableForOp(Opc, VT)) 1142 return false; 1143 1144 EVT PVT = VT; 1145 // Consult target whether it is a good idea to promote this operation and 1146 // what's the right type to promote it to. 1147 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1148 assert(PVT != VT && "Don't know what type to promote to!"); 1149 1150 SDLoc dl(Op); 1151 SDNode *N = Op.getNode(); 1152 LoadSDNode *LD = cast<LoadSDNode>(N); 1153 EVT MemVT = LD->getMemoryVT(); 1154 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1155 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1156 : ISD::EXTLOAD) 1157 : LD->getExtensionType(); 1158 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1159 LD->getChain(), LD->getBasePtr(), 1160 MemVT, LD->getMemOperand()); 1161 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1162 1163 DEBUG(dbgs() << "\nPromoting "; 1164 N->dump(&DAG); 1165 dbgs() << "\nTo: "; 1166 Result.getNode()->dump(&DAG); 1167 dbgs() << '\n'); 1168 WorklistRemover DeadNodes(*this); 1169 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1170 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1171 deleteAndRecombine(N); 1172 AddToWorklist(Result.getNode()); 1173 return true; 1174 } 1175 return false; 1176 } 1177 1178 /// \brief Recursively delete a node which has no uses and any operands for 1179 /// which it is the only use. 1180 /// 1181 /// Note that this both deletes the nodes and removes them from the worklist. 1182 /// It also adds any nodes who have had a user deleted to the worklist as they 1183 /// may now have only one use and subject to other combines. 1184 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1185 if (!N->use_empty()) 1186 return false; 1187 1188 SmallSetVector<SDNode *, 16> Nodes; 1189 Nodes.insert(N); 1190 do { 1191 N = Nodes.pop_back_val(); 1192 if (!N) 1193 continue; 1194 1195 if (N->use_empty()) { 1196 for (const SDValue &ChildN : N->op_values()) 1197 Nodes.insert(ChildN.getNode()); 1198 1199 removeFromWorklist(N); 1200 DAG.DeleteNode(N); 1201 } else { 1202 AddToWorklist(N); 1203 } 1204 } while (!Nodes.empty()); 1205 return true; 1206 } 1207 1208 //===----------------------------------------------------------------------===// 1209 // Main DAG Combiner implementation 1210 //===----------------------------------------------------------------------===// 1211 1212 void DAGCombiner::Run(CombineLevel AtLevel) { 1213 // set the instance variables, so that the various visit routines may use it. 1214 Level = AtLevel; 1215 LegalOperations = Level >= AfterLegalizeVectorOps; 1216 LegalTypes = Level >= AfterLegalizeTypes; 1217 1218 // Add all the dag nodes to the worklist. 1219 for (SDNode &Node : DAG.allnodes()) 1220 AddToWorklist(&Node); 1221 1222 // Create a dummy node (which is not added to allnodes), that adds a reference 1223 // to the root node, preventing it from being deleted, and tracking any 1224 // changes of the root. 1225 HandleSDNode Dummy(DAG.getRoot()); 1226 1227 // while the worklist isn't empty, find a node and 1228 // try and combine it. 1229 while (!WorklistMap.empty()) { 1230 SDNode *N; 1231 // The Worklist holds the SDNodes in order, but it may contain null entries. 1232 do { 1233 N = Worklist.pop_back_val(); 1234 } while (!N); 1235 1236 bool GoodWorklistEntry = WorklistMap.erase(N); 1237 (void)GoodWorklistEntry; 1238 assert(GoodWorklistEntry && 1239 "Found a worklist entry without a corresponding map entry!"); 1240 1241 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1242 // N is deleted from the DAG, since they too may now be dead or may have a 1243 // reduced number of uses, allowing other xforms. 1244 if (recursivelyDeleteUnusedNodes(N)) 1245 continue; 1246 1247 WorklistRemover DeadNodes(*this); 1248 1249 // If this combine is running after legalizing the DAG, re-legalize any 1250 // nodes pulled off the worklist. 1251 if (Level == AfterLegalizeDAG) { 1252 SmallSetVector<SDNode *, 16> UpdatedNodes; 1253 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1254 1255 for (SDNode *LN : UpdatedNodes) { 1256 AddToWorklist(LN); 1257 AddUsersToWorklist(LN); 1258 } 1259 if (!NIsValid) 1260 continue; 1261 } 1262 1263 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1264 1265 // Add any operands of the new node which have not yet been combined to the 1266 // worklist as well. Because the worklist uniques things already, this 1267 // won't repeatedly process the same operand. 1268 CombinedNodes.insert(N); 1269 for (const SDValue &ChildN : N->op_values()) 1270 if (!CombinedNodes.count(ChildN.getNode())) 1271 AddToWorklist(ChildN.getNode()); 1272 1273 SDValue RV = combine(N); 1274 1275 if (!RV.getNode()) 1276 continue; 1277 1278 ++NodesCombined; 1279 1280 // If we get back the same node we passed in, rather than a new node or 1281 // zero, we know that the node must have defined multiple values and 1282 // CombineTo was used. Since CombineTo takes care of the worklist 1283 // mechanics for us, we have no work to do in this case. 1284 if (RV.getNode() == N) 1285 continue; 1286 1287 assert(N->getOpcode() != ISD::DELETED_NODE && 1288 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1289 "Node was deleted but visit returned new node!"); 1290 1291 DEBUG(dbgs() << " ... into: "; 1292 RV.getNode()->dump(&DAG)); 1293 1294 // Transfer debug value. 1295 DAG.TransferDbgValues(SDValue(N, 0), RV); 1296 if (N->getNumValues() == RV.getNode()->getNumValues()) 1297 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1298 else { 1299 assert(N->getValueType(0) == RV.getValueType() && 1300 N->getNumValues() == 1 && "Type mismatch"); 1301 SDValue OpV = RV; 1302 DAG.ReplaceAllUsesWith(N, &OpV); 1303 } 1304 1305 // Push the new node and any users onto the worklist 1306 AddToWorklist(RV.getNode()); 1307 AddUsersToWorklist(RV.getNode()); 1308 1309 // Finally, if the node is now dead, remove it from the graph. The node 1310 // may not be dead if the replacement process recursively simplified to 1311 // something else needing this node. This will also take care of adding any 1312 // operands which have lost a user to the worklist. 1313 recursivelyDeleteUnusedNodes(N); 1314 } 1315 1316 // If the root changed (e.g. it was a dead load, update the root). 1317 DAG.setRoot(Dummy.getValue()); 1318 DAG.RemoveDeadNodes(); 1319 } 1320 1321 SDValue DAGCombiner::visit(SDNode *N) { 1322 switch (N->getOpcode()) { 1323 default: break; 1324 case ISD::TokenFactor: return visitTokenFactor(N); 1325 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1326 case ISD::ADD: return visitADD(N); 1327 case ISD::SUB: return visitSUB(N); 1328 case ISD::ADDC: return visitADDC(N); 1329 case ISD::SUBC: return visitSUBC(N); 1330 case ISD::ADDE: return visitADDE(N); 1331 case ISD::SUBE: return visitSUBE(N); 1332 case ISD::MUL: return visitMUL(N); 1333 case ISD::SDIV: return visitSDIV(N); 1334 case ISD::UDIV: return visitUDIV(N); 1335 case ISD::SREM: return visitSREM(N); 1336 case ISD::UREM: return visitUREM(N); 1337 case ISD::MULHU: return visitMULHU(N); 1338 case ISD::MULHS: return visitMULHS(N); 1339 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1340 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1341 case ISD::SMULO: return visitSMULO(N); 1342 case ISD::UMULO: return visitUMULO(N); 1343 case ISD::SDIVREM: return visitSDIVREM(N); 1344 case ISD::UDIVREM: return visitUDIVREM(N); 1345 case ISD::AND: return visitAND(N); 1346 case ISD::OR: return visitOR(N); 1347 case ISD::XOR: return visitXOR(N); 1348 case ISD::SHL: return visitSHL(N); 1349 case ISD::SRA: return visitSRA(N); 1350 case ISD::SRL: return visitSRL(N); 1351 case ISD::ROTR: 1352 case ISD::ROTL: return visitRotate(N); 1353 case ISD::BSWAP: return visitBSWAP(N); 1354 case ISD::CTLZ: return visitCTLZ(N); 1355 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1356 case ISD::CTTZ: return visitCTTZ(N); 1357 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1358 case ISD::CTPOP: return visitCTPOP(N); 1359 case ISD::SELECT: return visitSELECT(N); 1360 case ISD::VSELECT: return visitVSELECT(N); 1361 case ISD::SELECT_CC: return visitSELECT_CC(N); 1362 case ISD::SETCC: return visitSETCC(N); 1363 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1364 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1365 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1366 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1367 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1368 case ISD::TRUNCATE: return visitTRUNCATE(N); 1369 case ISD::BITCAST: return visitBITCAST(N); 1370 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1371 case ISD::FADD: return visitFADD(N); 1372 case ISD::FSUB: return visitFSUB(N); 1373 case ISD::FMUL: return visitFMUL(N); 1374 case ISD::FMA: return visitFMA(N); 1375 case ISD::FDIV: return visitFDIV(N); 1376 case ISD::FREM: return visitFREM(N); 1377 case ISD::FSQRT: return visitFSQRT(N); 1378 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1379 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1380 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1381 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1382 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1383 case ISD::FP_ROUND: return visitFP_ROUND(N); 1384 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1385 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1386 case ISD::FNEG: return visitFNEG(N); 1387 case ISD::FABS: return visitFABS(N); 1388 case ISD::FFLOOR: return visitFFLOOR(N); 1389 case ISD::FMINNUM: return visitFMINNUM(N); 1390 case ISD::FMAXNUM: return visitFMAXNUM(N); 1391 case ISD::FCEIL: return visitFCEIL(N); 1392 case ISD::FTRUNC: return visitFTRUNC(N); 1393 case ISD::BRCOND: return visitBRCOND(N); 1394 case ISD::BR_CC: return visitBR_CC(N); 1395 case ISD::LOAD: return visitLOAD(N); 1396 case ISD::STORE: return visitSTORE(N); 1397 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1398 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1399 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1400 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1401 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1402 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1403 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1404 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1405 case ISD::MGATHER: return visitMGATHER(N); 1406 case ISD::MLOAD: return visitMLOAD(N); 1407 case ISD::MSCATTER: return visitMSCATTER(N); 1408 case ISD::MSTORE: return visitMSTORE(N); 1409 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1410 } 1411 return SDValue(); 1412 } 1413 1414 SDValue DAGCombiner::combine(SDNode *N) { 1415 SDValue RV = visit(N); 1416 1417 // If nothing happened, try a target-specific DAG combine. 1418 if (!RV.getNode()) { 1419 assert(N->getOpcode() != ISD::DELETED_NODE && 1420 "Node was deleted but visit returned NULL!"); 1421 1422 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1423 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1424 1425 // Expose the DAG combiner to the target combiner impls. 1426 TargetLowering::DAGCombinerInfo 1427 DagCombineInfo(DAG, Level, false, this); 1428 1429 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1430 } 1431 } 1432 1433 // If nothing happened still, try promoting the operation. 1434 if (!RV.getNode()) { 1435 switch (N->getOpcode()) { 1436 default: break; 1437 case ISD::ADD: 1438 case ISD::SUB: 1439 case ISD::MUL: 1440 case ISD::AND: 1441 case ISD::OR: 1442 case ISD::XOR: 1443 RV = PromoteIntBinOp(SDValue(N, 0)); 1444 break; 1445 case ISD::SHL: 1446 case ISD::SRA: 1447 case ISD::SRL: 1448 RV = PromoteIntShiftOp(SDValue(N, 0)); 1449 break; 1450 case ISD::SIGN_EXTEND: 1451 case ISD::ZERO_EXTEND: 1452 case ISD::ANY_EXTEND: 1453 RV = PromoteExtend(SDValue(N, 0)); 1454 break; 1455 case ISD::LOAD: 1456 if (PromoteLoad(SDValue(N, 0))) 1457 RV = SDValue(N, 0); 1458 break; 1459 } 1460 } 1461 1462 // If N is a commutative binary node, try commuting it to enable more 1463 // sdisel CSE. 1464 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1465 N->getNumValues() == 1) { 1466 SDValue N0 = N->getOperand(0); 1467 SDValue N1 = N->getOperand(1); 1468 1469 // Constant operands are canonicalized to RHS. 1470 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1471 SDValue Ops[] = {N1, N0}; 1472 SDNode *CSENode; 1473 if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { 1474 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1475 &BinNode->Flags); 1476 } else { 1477 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1478 } 1479 if (CSENode) 1480 return SDValue(CSENode, 0); 1481 } 1482 } 1483 1484 return RV; 1485 } 1486 1487 /// Given a node, return its input chain if it has one, otherwise return a null 1488 /// sd operand. 1489 static SDValue getInputChainForNode(SDNode *N) { 1490 if (unsigned NumOps = N->getNumOperands()) { 1491 if (N->getOperand(0).getValueType() == MVT::Other) 1492 return N->getOperand(0); 1493 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1494 return N->getOperand(NumOps-1); 1495 for (unsigned i = 1; i < NumOps-1; ++i) 1496 if (N->getOperand(i).getValueType() == MVT::Other) 1497 return N->getOperand(i); 1498 } 1499 return SDValue(); 1500 } 1501 1502 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1503 // If N has two operands, where one has an input chain equal to the other, 1504 // the 'other' chain is redundant. 1505 if (N->getNumOperands() == 2) { 1506 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1507 return N->getOperand(0); 1508 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1509 return N->getOperand(1); 1510 } 1511 1512 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1513 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1514 SmallPtrSet<SDNode*, 16> SeenOps; 1515 bool Changed = false; // If we should replace this token factor. 1516 1517 // Start out with this token factor. 1518 TFs.push_back(N); 1519 1520 // Iterate through token factors. The TFs grows when new token factors are 1521 // encountered. 1522 for (unsigned i = 0; i < TFs.size(); ++i) { 1523 SDNode *TF = TFs[i]; 1524 1525 // Check each of the operands. 1526 for (const SDValue &Op : TF->op_values()) { 1527 1528 switch (Op.getOpcode()) { 1529 case ISD::EntryToken: 1530 // Entry tokens don't need to be added to the list. They are 1531 // redundant. 1532 Changed = true; 1533 break; 1534 1535 case ISD::TokenFactor: 1536 if (Op.hasOneUse() && 1537 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1538 // Queue up for processing. 1539 TFs.push_back(Op.getNode()); 1540 // Clean up in case the token factor is removed. 1541 AddToWorklist(Op.getNode()); 1542 Changed = true; 1543 break; 1544 } 1545 // Fall thru 1546 1547 default: 1548 // Only add if it isn't already in the list. 1549 if (SeenOps.insert(Op.getNode()).second) 1550 Ops.push_back(Op); 1551 else 1552 Changed = true; 1553 break; 1554 } 1555 } 1556 } 1557 1558 SDValue Result; 1559 1560 // If we've changed things around then replace token factor. 1561 if (Changed) { 1562 if (Ops.empty()) { 1563 // The entry token is the only possible outcome. 1564 Result = DAG.getEntryNode(); 1565 } else { 1566 // New and improved token factor. 1567 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1568 } 1569 1570 // Add users to worklist if AA is enabled, since it may introduce 1571 // a lot of new chained token factors while removing memory deps. 1572 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1573 : DAG.getSubtarget().useAA(); 1574 return CombineTo(N, Result, UseAA /*add to worklist*/); 1575 } 1576 1577 return Result; 1578 } 1579 1580 /// MERGE_VALUES can always be eliminated. 1581 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1582 WorklistRemover DeadNodes(*this); 1583 // Replacing results may cause a different MERGE_VALUES to suddenly 1584 // be CSE'd with N, and carry its uses with it. Iterate until no 1585 // uses remain, to ensure that the node can be safely deleted. 1586 // First add the users of this node to the work list so that they 1587 // can be tried again once they have new operands. 1588 AddUsersToWorklist(N); 1589 do { 1590 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1591 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1592 } while (!N->use_empty()); 1593 deleteAndRecombine(N); 1594 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1595 } 1596 1597 static bool isNullConstant(SDValue V) { 1598 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1599 return Const != nullptr && Const->isNullValue(); 1600 } 1601 1602 static bool isNullFPConstant(SDValue V) { 1603 ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); 1604 return Const != nullptr && Const->isZero() && !Const->isNegative(); 1605 } 1606 1607 static bool isAllOnesConstant(SDValue V) { 1608 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1609 return Const != nullptr && Const->isAllOnesValue(); 1610 } 1611 1612 static bool isOneConstant(SDValue V) { 1613 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); 1614 return Const != nullptr && Const->isOne(); 1615 } 1616 1617 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a 1618 /// ContantSDNode pointer else nullptr. 1619 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1620 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1621 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1622 } 1623 1624 SDValue DAGCombiner::visitADD(SDNode *N) { 1625 SDValue N0 = N->getOperand(0); 1626 SDValue N1 = N->getOperand(1); 1627 EVT VT = N0.getValueType(); 1628 1629 // fold vector ops 1630 if (VT.isVector()) { 1631 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1632 return FoldedVOp; 1633 1634 // fold (add x, 0) -> x, vector edition 1635 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1636 return N0; 1637 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1638 return N1; 1639 } 1640 1641 // fold (add x, undef) -> undef 1642 if (N0.getOpcode() == ISD::UNDEF) 1643 return N0; 1644 if (N1.getOpcode() == ISD::UNDEF) 1645 return N1; 1646 // fold (add c1, c2) -> c1+c2 1647 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1648 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1649 if (N0C && N1C) 1650 return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); 1651 // canonicalize constant to RHS 1652 if (isConstantIntBuildVectorOrConstantInt(N0) && 1653 !isConstantIntBuildVectorOrConstantInt(N1)) 1654 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1655 // fold (add x, 0) -> x 1656 if (isNullConstant(N1)) 1657 return N0; 1658 // fold (add Sym, c) -> Sym+c 1659 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1660 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1661 GA->getOpcode() == ISD::GlobalAddress) 1662 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1663 GA->getOffset() + 1664 (uint64_t)N1C->getSExtValue()); 1665 // fold ((c1-A)+c2) -> (c1+c2)-A 1666 if (N1C && N0.getOpcode() == ISD::SUB) 1667 if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { 1668 SDLoc DL(N); 1669 return DAG.getNode(ISD::SUB, DL, VT, 1670 DAG.getConstant(N1C->getAPIntValue()+ 1671 N0C->getAPIntValue(), DL, VT), 1672 N0.getOperand(1)); 1673 } 1674 // reassociate add 1675 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1676 return RADD; 1677 // fold ((0-A) + B) -> B-A 1678 if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) 1679 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1680 // fold (A + (0-B)) -> A-B 1681 if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) 1682 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1683 // fold (A+(B-A)) -> B 1684 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1685 return N1.getOperand(0); 1686 // fold ((B-A)+A) -> B 1687 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1688 return N0.getOperand(0); 1689 // fold (A+(B-(A+C))) to (B-C) 1690 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1691 N0 == N1.getOperand(1).getOperand(0)) 1692 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1693 N1.getOperand(1).getOperand(1)); 1694 // fold (A+(B-(C+A))) to (B-C) 1695 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1696 N0 == N1.getOperand(1).getOperand(1)) 1697 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1698 N1.getOperand(1).getOperand(0)); 1699 // fold (A+((B-A)+or-C)) to (B+or-C) 1700 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1701 N1.getOperand(0).getOpcode() == ISD::SUB && 1702 N0 == N1.getOperand(0).getOperand(1)) 1703 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1704 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1705 1706 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1707 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1708 SDValue N00 = N0.getOperand(0); 1709 SDValue N01 = N0.getOperand(1); 1710 SDValue N10 = N1.getOperand(0); 1711 SDValue N11 = N1.getOperand(1); 1712 1713 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1714 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1715 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1716 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1717 } 1718 1719 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1720 return SDValue(N, 0); 1721 1722 // fold (a+b) -> (a|b) iff a and b share no bits. 1723 if (VT.isInteger() && !VT.isVector()) { 1724 APInt LHSZero, LHSOne; 1725 APInt RHSZero, RHSOne; 1726 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1727 1728 if (LHSZero.getBoolValue()) { 1729 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1730 1731 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1732 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1733 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ 1734 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) 1735 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1736 } 1737 } 1738 } 1739 1740 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1741 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 1742 isNullConstant(N1.getOperand(0).getOperand(0))) 1743 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1744 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1745 N1.getOperand(0).getOperand(1), 1746 N1.getOperand(1))); 1747 if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && 1748 isNullConstant(N0.getOperand(0).getOperand(0))) 1749 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1750 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1751 N0.getOperand(0).getOperand(1), 1752 N0.getOperand(1))); 1753 1754 if (N1.getOpcode() == ISD::AND) { 1755 SDValue AndOp0 = N1.getOperand(0); 1756 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1757 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1758 1759 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1760 // and similar xforms where the inner op is either ~0 or 0. 1761 if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { 1762 SDLoc DL(N); 1763 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1764 } 1765 } 1766 1767 // add (sext i1), X -> sub X, (zext i1) 1768 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1769 N0.getOperand(0).getValueType() == MVT::i1 && 1770 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1771 SDLoc DL(N); 1772 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1773 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1774 } 1775 1776 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1777 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1778 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1779 if (TN->getVT() == MVT::i1) { 1780 SDLoc DL(N); 1781 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1782 DAG.getConstant(1, DL, VT)); 1783 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1784 } 1785 } 1786 1787 return SDValue(); 1788 } 1789 1790 SDValue DAGCombiner::visitADDC(SDNode *N) { 1791 SDValue N0 = N->getOperand(0); 1792 SDValue N1 = N->getOperand(1); 1793 EVT VT = N0.getValueType(); 1794 1795 // If the flag result is dead, turn this into an ADD. 1796 if (!N->hasAnyUseOfValue(1)) 1797 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1798 DAG.getNode(ISD::CARRY_FALSE, 1799 SDLoc(N), MVT::Glue)); 1800 1801 // canonicalize constant to RHS. 1802 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1803 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1804 if (N0C && !N1C) 1805 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1806 1807 // fold (addc x, 0) -> x + no carry out 1808 if (isNullConstant(N1)) 1809 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1810 SDLoc(N), MVT::Glue)); 1811 1812 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1813 APInt LHSZero, LHSOne; 1814 APInt RHSZero, RHSOne; 1815 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1816 1817 if (LHSZero.getBoolValue()) { 1818 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1819 1820 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1821 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1822 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1823 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1824 DAG.getNode(ISD::CARRY_FALSE, 1825 SDLoc(N), MVT::Glue)); 1826 } 1827 1828 return SDValue(); 1829 } 1830 1831 SDValue DAGCombiner::visitADDE(SDNode *N) { 1832 SDValue N0 = N->getOperand(0); 1833 SDValue N1 = N->getOperand(1); 1834 SDValue CarryIn = N->getOperand(2); 1835 1836 // canonicalize constant to RHS 1837 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1838 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1839 if (N0C && !N1C) 1840 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1841 N1, N0, CarryIn); 1842 1843 // fold (adde x, y, false) -> (addc x, y) 1844 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1845 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1846 1847 return SDValue(); 1848 } 1849 1850 // Since it may not be valid to emit a fold to zero for vector initializers 1851 // check if we can before folding. 1852 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1853 SelectionDAG &DAG, 1854 bool LegalOperations, bool LegalTypes) { 1855 if (!VT.isVector()) 1856 return DAG.getConstant(0, DL, VT); 1857 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1858 return DAG.getConstant(0, DL, VT); 1859 return SDValue(); 1860 } 1861 1862 SDValue DAGCombiner::visitSUB(SDNode *N) { 1863 SDValue N0 = N->getOperand(0); 1864 SDValue N1 = N->getOperand(1); 1865 EVT VT = N0.getValueType(); 1866 1867 // fold vector ops 1868 if (VT.isVector()) { 1869 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1870 return FoldedVOp; 1871 1872 // fold (sub x, 0) -> x, vector edition 1873 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1874 return N0; 1875 } 1876 1877 // fold (sub x, x) -> 0 1878 // FIXME: Refactor this and xor and other similar operations together. 1879 if (N0 == N1) 1880 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1881 // fold (sub c1, c2) -> c1-c2 1882 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1883 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1884 if (N0C && N1C) 1885 return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); 1886 // fold (sub x, c) -> (add x, -c) 1887 if (N1C) { 1888 SDLoc DL(N); 1889 return DAG.getNode(ISD::ADD, DL, VT, N0, 1890 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 1891 } 1892 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1893 if (isAllOnesConstant(N0)) 1894 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1895 // fold A-(A-B) -> B 1896 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1897 return N1.getOperand(1); 1898 // fold (A+B)-A -> B 1899 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1900 return N0.getOperand(1); 1901 // fold (A+B)-B -> A 1902 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1903 return N0.getOperand(0); 1904 // fold C2-(A+C1) -> (C2-C1)-A 1905 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1906 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1907 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1908 SDLoc DL(N); 1909 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1910 DL, VT); 1911 return DAG.getNode(ISD::SUB, DL, VT, NewC, 1912 N1.getOperand(0)); 1913 } 1914 // fold ((A+(B+or-C))-B) -> A+or-C 1915 if (N0.getOpcode() == ISD::ADD && 1916 (N0.getOperand(1).getOpcode() == ISD::SUB || 1917 N0.getOperand(1).getOpcode() == ISD::ADD) && 1918 N0.getOperand(1).getOperand(0) == N1) 1919 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1920 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1921 // fold ((A+(C+B))-B) -> A+C 1922 if (N0.getOpcode() == ISD::ADD && 1923 N0.getOperand(1).getOpcode() == ISD::ADD && 1924 N0.getOperand(1).getOperand(1) == N1) 1925 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1926 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1927 // fold ((A-(B-C))-C) -> A-B 1928 if (N0.getOpcode() == ISD::SUB && 1929 N0.getOperand(1).getOpcode() == ISD::SUB && 1930 N0.getOperand(1).getOperand(1) == N1) 1931 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1932 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1933 1934 // If either operand of a sub is undef, the result is undef 1935 if (N0.getOpcode() == ISD::UNDEF) 1936 return N0; 1937 if (N1.getOpcode() == ISD::UNDEF) 1938 return N1; 1939 1940 // If the relocation model supports it, consider symbol offsets. 1941 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1942 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1943 // fold (sub Sym, c) -> Sym-c 1944 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1945 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1946 GA->getOffset() - 1947 (uint64_t)N1C->getSExtValue()); 1948 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1949 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1950 if (GA->getGlobal() == GB->getGlobal()) 1951 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1952 SDLoc(N), VT); 1953 } 1954 1955 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1956 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1957 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1958 if (TN->getVT() == MVT::i1) { 1959 SDLoc DL(N); 1960 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1961 DAG.getConstant(1, DL, VT)); 1962 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1963 } 1964 } 1965 1966 return SDValue(); 1967 } 1968 1969 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1970 SDValue N0 = N->getOperand(0); 1971 SDValue N1 = N->getOperand(1); 1972 EVT VT = N0.getValueType(); 1973 1974 // If the flag result is dead, turn this into an SUB. 1975 if (!N->hasAnyUseOfValue(1)) 1976 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1977 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1978 MVT::Glue)); 1979 1980 // fold (subc x, x) -> 0 + no borrow 1981 if (N0 == N1) { 1982 SDLoc DL(N); 1983 return CombineTo(N, DAG.getConstant(0, DL, VT), 1984 DAG.getNode(ISD::CARRY_FALSE, DL, 1985 MVT::Glue)); 1986 } 1987 1988 // fold (subc x, 0) -> x + no borrow 1989 if (isNullConstant(N1)) 1990 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1991 MVT::Glue)); 1992 1993 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1994 if (isAllOnesConstant(N0)) 1995 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1996 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1997 MVT::Glue)); 1998 1999 return SDValue(); 2000 } 2001 2002 SDValue DAGCombiner::visitSUBE(SDNode *N) { 2003 SDValue N0 = N->getOperand(0); 2004 SDValue N1 = N->getOperand(1); 2005 SDValue CarryIn = N->getOperand(2); 2006 2007 // fold (sube x, y, false) -> (subc x, y) 2008 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2009 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2010 2011 return SDValue(); 2012 } 2013 2014 SDValue DAGCombiner::visitMUL(SDNode *N) { 2015 SDValue N0 = N->getOperand(0); 2016 SDValue N1 = N->getOperand(1); 2017 EVT VT = N0.getValueType(); 2018 2019 // fold (mul x, undef) -> 0 2020 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2021 return DAG.getConstant(0, SDLoc(N), VT); 2022 2023 bool N0IsConst = false; 2024 bool N1IsConst = false; 2025 bool N1IsOpaqueConst = false; 2026 bool N0IsOpaqueConst = false; 2027 APInt ConstValue0, ConstValue1; 2028 // fold vector ops 2029 if (VT.isVector()) { 2030 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2031 return FoldedVOp; 2032 2033 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 2034 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 2035 } else { 2036 N0IsConst = isa<ConstantSDNode>(N0); 2037 if (N0IsConst) { 2038 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2039 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2040 } 2041 N1IsConst = isa<ConstantSDNode>(N1); 2042 if (N1IsConst) { 2043 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2044 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2045 } 2046 } 2047 2048 // fold (mul c1, c2) -> c1*c2 2049 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2050 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2051 N0.getNode(), N1.getNode()); 2052 2053 // canonicalize constant to RHS (vector doesn't have to splat) 2054 if (isConstantIntBuildVectorOrConstantInt(N0) && 2055 !isConstantIntBuildVectorOrConstantInt(N1)) 2056 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2057 // fold (mul x, 0) -> 0 2058 if (N1IsConst && ConstValue1 == 0) 2059 return N1; 2060 // We require a splat of the entire scalar bit width for non-contiguous 2061 // bit patterns. 2062 bool IsFullSplat = 2063 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2064 // fold (mul x, 1) -> x 2065 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2066 return N0; 2067 // fold (mul x, -1) -> 0-x 2068 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2069 SDLoc DL(N); 2070 return DAG.getNode(ISD::SUB, DL, VT, 2071 DAG.getConstant(0, DL, VT), N0); 2072 } 2073 // fold (mul x, (1 << c)) -> x << c 2074 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && 2075 IsFullSplat) { 2076 SDLoc DL(N); 2077 return DAG.getNode(ISD::SHL, DL, VT, N0, 2078 DAG.getConstant(ConstValue1.logBase2(), DL, 2079 getShiftAmountTy(N0.getValueType()))); 2080 } 2081 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2082 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && 2083 IsFullSplat) { 2084 unsigned Log2Val = (-ConstValue1).logBase2(); 2085 SDLoc DL(N); 2086 // FIXME: If the input is something that is easily negated (e.g. a 2087 // single-use add), we should put the negate there. 2088 return DAG.getNode(ISD::SUB, DL, VT, 2089 DAG.getConstant(0, DL, VT), 2090 DAG.getNode(ISD::SHL, DL, VT, N0, 2091 DAG.getConstant(Log2Val, DL, 2092 getShiftAmountTy(N0.getValueType())))); 2093 } 2094 2095 APInt Val; 2096 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2097 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2098 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2099 isa<ConstantSDNode>(N0.getOperand(1)))) { 2100 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2101 N1, N0.getOperand(1)); 2102 AddToWorklist(C3.getNode()); 2103 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2104 N0.getOperand(0), C3); 2105 } 2106 2107 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2108 // use. 2109 { 2110 SDValue Sh(nullptr,0), Y(nullptr,0); 2111 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2112 if (N0.getOpcode() == ISD::SHL && 2113 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2114 isa<ConstantSDNode>(N0.getOperand(1))) && 2115 N0.getNode()->hasOneUse()) { 2116 Sh = N0; Y = N1; 2117 } else if (N1.getOpcode() == ISD::SHL && 2118 isa<ConstantSDNode>(N1.getOperand(1)) && 2119 N1.getNode()->hasOneUse()) { 2120 Sh = N1; Y = N0; 2121 } 2122 2123 if (Sh.getNode()) { 2124 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2125 Sh.getOperand(0), Y); 2126 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2127 Mul, Sh.getOperand(1)); 2128 } 2129 } 2130 2131 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2132 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2133 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2134 isa<ConstantSDNode>(N0.getOperand(1)))) 2135 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2136 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2137 N0.getOperand(0), N1), 2138 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2139 N0.getOperand(1), N1)); 2140 2141 // reassociate mul 2142 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2143 return RMUL; 2144 2145 return SDValue(); 2146 } 2147 2148 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2149 SDValue N0 = N->getOperand(0); 2150 SDValue N1 = N->getOperand(1); 2151 EVT VT = N->getValueType(0); 2152 2153 // fold vector ops 2154 if (VT.isVector()) 2155 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2156 return FoldedVOp; 2157 2158 // fold (sdiv c1, c2) -> c1/c2 2159 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2160 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2161 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 2162 return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C); 2163 // fold (sdiv X, 1) -> X 2164 if (N1C && N1C->isOne()) 2165 return N0; 2166 // fold (sdiv X, -1) -> 0-X 2167 if (N1C && N1C->isAllOnesValue()) { 2168 SDLoc DL(N); 2169 return DAG.getNode(ISD::SUB, DL, VT, 2170 DAG.getConstant(0, DL, VT), N0); 2171 } 2172 // If we know the sign bits of both operands are zero, strength reduce to a 2173 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2174 if (!VT.isVector()) { 2175 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2176 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2177 N0, N1); 2178 } 2179 2180 // fold (sdiv X, pow2) -> simple ops after legalize 2181 // FIXME: We check for the exact bit here because the generic lowering gives 2182 // better results in that case. The target-specific lowering should learn how 2183 // to handle exact sdivs efficiently. 2184 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2185 !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && 2186 (N1C->getAPIntValue().isPowerOf2() || 2187 (-N1C->getAPIntValue()).isPowerOf2())) { 2188 // If dividing by powers of two is cheap, then don't perform the following 2189 // fold. 2190 if (TLI.isPow2SDivCheap()) 2191 return SDValue(); 2192 2193 // Target-specific implementation of sdiv x, pow2. 2194 SDValue Res = BuildSDIVPow2(N); 2195 if (Res.getNode()) 2196 return Res; 2197 2198 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2199 SDLoc DL(N); 2200 2201 // Splat the sign bit into the register 2202 SDValue SGN = 2203 DAG.getNode(ISD::SRA, DL, VT, N0, 2204 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, 2205 getShiftAmountTy(N0.getValueType()))); 2206 AddToWorklist(SGN.getNode()); 2207 2208 // Add (N0 < 0) ? abs2 - 1 : 0; 2209 SDValue SRL = 2210 DAG.getNode(ISD::SRL, DL, VT, SGN, 2211 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, 2212 getShiftAmountTy(SGN.getValueType()))); 2213 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); 2214 AddToWorklist(SRL.getNode()); 2215 AddToWorklist(ADD.getNode()); // Divide by pow2 2216 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, 2217 DAG.getConstant(lg2, DL, 2218 getShiftAmountTy(ADD.getValueType()))); 2219 2220 // If we're dividing by a positive value, we're done. Otherwise, we must 2221 // negate the result. 2222 if (N1C->getAPIntValue().isNonNegative()) 2223 return SRA; 2224 2225 AddToWorklist(SRA.getNode()); 2226 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); 2227 } 2228 2229 // If integer divide is expensive and we satisfy the requirements, emit an 2230 // alternate sequence. 2231 if (N1C && !TLI.isIntDivCheap()) { 2232 SDValue Op = BuildSDIV(N); 2233 if (Op.getNode()) return Op; 2234 } 2235 2236 // undef / X -> 0 2237 if (N0.getOpcode() == ISD::UNDEF) 2238 return DAG.getConstant(0, SDLoc(N), VT); 2239 // X / undef -> undef 2240 if (N1.getOpcode() == ISD::UNDEF) 2241 return N1; 2242 2243 return SDValue(); 2244 } 2245 2246 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2247 SDValue N0 = N->getOperand(0); 2248 SDValue N1 = N->getOperand(1); 2249 EVT VT = N->getValueType(0); 2250 2251 // fold vector ops 2252 if (VT.isVector()) 2253 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2254 return FoldedVOp; 2255 2256 // fold (udiv c1, c2) -> c1/c2 2257 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2258 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2259 if (N0C && N1C) 2260 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, 2261 N0C, N1C)) 2262 return Folded; 2263 // fold (udiv x, (1 << c)) -> x >>u c 2264 if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) { 2265 SDLoc DL(N); 2266 return DAG.getNode(ISD::SRL, DL, VT, N0, 2267 DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, 2268 getShiftAmountTy(N0.getValueType()))); 2269 } 2270 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2271 if (N1.getOpcode() == ISD::SHL) { 2272 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2273 if (SHC->getAPIntValue().isPowerOf2()) { 2274 EVT ADDVT = N1.getOperand(1).getValueType(); 2275 SDLoc DL(N); 2276 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, 2277 N1.getOperand(1), 2278 DAG.getConstant(SHC->getAPIntValue() 2279 .logBase2(), 2280 DL, ADDVT)); 2281 AddToWorklist(Add.getNode()); 2282 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 2283 } 2284 } 2285 } 2286 // fold (udiv x, c) -> alternate 2287 if (N1C && !TLI.isIntDivCheap()) { 2288 SDValue Op = BuildUDIV(N); 2289 if (Op.getNode()) return Op; 2290 } 2291 2292 // undef / X -> 0 2293 if (N0.getOpcode() == ISD::UNDEF) 2294 return DAG.getConstant(0, SDLoc(N), VT); 2295 // X / undef -> undef 2296 if (N1.getOpcode() == ISD::UNDEF) 2297 return N1; 2298 2299 return SDValue(); 2300 } 2301 2302 SDValue DAGCombiner::visitSREM(SDNode *N) { 2303 SDValue N0 = N->getOperand(0); 2304 SDValue N1 = N->getOperand(1); 2305 EVT VT = N->getValueType(0); 2306 2307 // fold (srem c1, c2) -> c1%c2 2308 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2309 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2310 if (N0C && N1C) 2311 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, 2312 N0C, N1C)) 2313 return Folded; 2314 // If we know the sign bits of both operands are zero, strength reduce to a 2315 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2316 if (!VT.isVector()) { 2317 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2318 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2319 } 2320 2321 // If X/C can be simplified by the division-by-constant logic, lower 2322 // X%C to the equivalent of X-X/C*C. 2323 if (N1C && !N1C->isNullValue()) { 2324 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2325 AddToWorklist(Div.getNode()); 2326 SDValue OptimizedDiv = combine(Div.getNode()); 2327 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2328 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2329 OptimizedDiv, N1); 2330 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2331 AddToWorklist(Mul.getNode()); 2332 return Sub; 2333 } 2334 } 2335 2336 // undef % X -> 0 2337 if (N0.getOpcode() == ISD::UNDEF) 2338 return DAG.getConstant(0, SDLoc(N), VT); 2339 // X % undef -> undef 2340 if (N1.getOpcode() == ISD::UNDEF) 2341 return N1; 2342 2343 return SDValue(); 2344 } 2345 2346 SDValue DAGCombiner::visitUREM(SDNode *N) { 2347 SDValue N0 = N->getOperand(0); 2348 SDValue N1 = N->getOperand(1); 2349 EVT VT = N->getValueType(0); 2350 2351 // fold (urem c1, c2) -> c1%c2 2352 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2353 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2354 if (N0C && N1C) 2355 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, 2356 N0C, N1C)) 2357 return Folded; 2358 // fold (urem x, pow2) -> (and x, pow2-1) 2359 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2360 N1C->getAPIntValue().isPowerOf2()) { 2361 SDLoc DL(N); 2362 return DAG.getNode(ISD::AND, DL, VT, N0, 2363 DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); 2364 } 2365 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2366 if (N1.getOpcode() == ISD::SHL) { 2367 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2368 if (SHC->getAPIntValue().isPowerOf2()) { 2369 SDLoc DL(N); 2370 SDValue Add = 2371 DAG.getNode(ISD::ADD, DL, VT, N1, 2372 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, 2373 VT)); 2374 AddToWorklist(Add.getNode()); 2375 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 2376 } 2377 } 2378 } 2379 2380 // If X/C can be simplified by the division-by-constant logic, lower 2381 // X%C to the equivalent of X-X/C*C. 2382 if (N1C && !N1C->isNullValue()) { 2383 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2384 AddToWorklist(Div.getNode()); 2385 SDValue OptimizedDiv = combine(Div.getNode()); 2386 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2387 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2388 OptimizedDiv, N1); 2389 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2390 AddToWorklist(Mul.getNode()); 2391 return Sub; 2392 } 2393 } 2394 2395 // undef % X -> 0 2396 if (N0.getOpcode() == ISD::UNDEF) 2397 return DAG.getConstant(0, SDLoc(N), VT); 2398 // X % undef -> undef 2399 if (N1.getOpcode() == ISD::UNDEF) 2400 return N1; 2401 2402 return SDValue(); 2403 } 2404 2405 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2406 SDValue N0 = N->getOperand(0); 2407 SDValue N1 = N->getOperand(1); 2408 EVT VT = N->getValueType(0); 2409 SDLoc DL(N); 2410 2411 // fold (mulhs x, 0) -> 0 2412 if (isNullConstant(N1)) 2413 return N1; 2414 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2415 if (isOneConstant(N1)) { 2416 SDLoc DL(N); 2417 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 2418 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2419 DL, 2420 getShiftAmountTy(N0.getValueType()))); 2421 } 2422 // fold (mulhs x, undef) -> 0 2423 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2424 return DAG.getConstant(0, SDLoc(N), VT); 2425 2426 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2427 // plus a shift. 2428 if (VT.isSimple() && !VT.isVector()) { 2429 MVT Simple = VT.getSimpleVT(); 2430 unsigned SimpleSize = Simple.getSizeInBits(); 2431 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2432 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2433 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2434 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2435 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2436 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2437 DAG.getConstant(SimpleSize, DL, 2438 getShiftAmountTy(N1.getValueType()))); 2439 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2440 } 2441 } 2442 2443 return SDValue(); 2444 } 2445 2446 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2447 SDValue N0 = N->getOperand(0); 2448 SDValue N1 = N->getOperand(1); 2449 EVT VT = N->getValueType(0); 2450 SDLoc DL(N); 2451 2452 // fold (mulhu x, 0) -> 0 2453 if (isNullConstant(N1)) 2454 return N1; 2455 // fold (mulhu x, 1) -> 0 2456 if (isOneConstant(N1)) 2457 return DAG.getConstant(0, DL, N0.getValueType()); 2458 // fold (mulhu x, undef) -> 0 2459 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2460 return DAG.getConstant(0, DL, VT); 2461 2462 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2463 // plus a shift. 2464 if (VT.isSimple() && !VT.isVector()) { 2465 MVT Simple = VT.getSimpleVT(); 2466 unsigned SimpleSize = Simple.getSizeInBits(); 2467 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2468 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2469 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2470 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2471 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2472 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2473 DAG.getConstant(SimpleSize, DL, 2474 getShiftAmountTy(N1.getValueType()))); 2475 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2476 } 2477 } 2478 2479 return SDValue(); 2480 } 2481 2482 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2483 /// give the opcodes for the two computations that are being performed. Return 2484 /// true if a simplification was made. 2485 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2486 unsigned HiOp) { 2487 // If the high half is not needed, just compute the low half. 2488 bool HiExists = N->hasAnyUseOfValue(1); 2489 if (!HiExists && 2490 (!LegalOperations || 2491 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2492 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2493 return CombineTo(N, Res, Res); 2494 } 2495 2496 // If the low half is not needed, just compute the high half. 2497 bool LoExists = N->hasAnyUseOfValue(0); 2498 if (!LoExists && 2499 (!LegalOperations || 2500 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2501 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2502 return CombineTo(N, Res, Res); 2503 } 2504 2505 // If both halves are used, return as it is. 2506 if (LoExists && HiExists) 2507 return SDValue(); 2508 2509 // If the two computed results can be simplified separately, separate them. 2510 if (LoExists) { 2511 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2512 AddToWorklist(Lo.getNode()); 2513 SDValue LoOpt = combine(Lo.getNode()); 2514 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2515 (!LegalOperations || 2516 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2517 return CombineTo(N, LoOpt, LoOpt); 2518 } 2519 2520 if (HiExists) { 2521 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2522 AddToWorklist(Hi.getNode()); 2523 SDValue HiOpt = combine(Hi.getNode()); 2524 if (HiOpt.getNode() && HiOpt != Hi && 2525 (!LegalOperations || 2526 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2527 return CombineTo(N, HiOpt, HiOpt); 2528 } 2529 2530 return SDValue(); 2531 } 2532 2533 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2534 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2535 if (Res.getNode()) return Res; 2536 2537 EVT VT = N->getValueType(0); 2538 SDLoc DL(N); 2539 2540 // If the type is twice as wide is legal, transform the mulhu to a wider 2541 // multiply plus a shift. 2542 if (VT.isSimple() && !VT.isVector()) { 2543 MVT Simple = VT.getSimpleVT(); 2544 unsigned SimpleSize = Simple.getSizeInBits(); 2545 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2546 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2547 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2548 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2549 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2550 // Compute the high part as N1. 2551 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2552 DAG.getConstant(SimpleSize, DL, 2553 getShiftAmountTy(Lo.getValueType()))); 2554 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2555 // Compute the low part as N0. 2556 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2557 return CombineTo(N, Lo, Hi); 2558 } 2559 } 2560 2561 return SDValue(); 2562 } 2563 2564 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2565 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2566 if (Res.getNode()) return Res; 2567 2568 EVT VT = N->getValueType(0); 2569 SDLoc DL(N); 2570 2571 // If the type is twice as wide is legal, transform the mulhu to a wider 2572 // multiply plus a shift. 2573 if (VT.isSimple() && !VT.isVector()) { 2574 MVT Simple = VT.getSimpleVT(); 2575 unsigned SimpleSize = Simple.getSizeInBits(); 2576 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2577 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2578 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2579 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2580 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2581 // Compute the high part as N1. 2582 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2583 DAG.getConstant(SimpleSize, DL, 2584 getShiftAmountTy(Lo.getValueType()))); 2585 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2586 // Compute the low part as N0. 2587 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2588 return CombineTo(N, Lo, Hi); 2589 } 2590 } 2591 2592 return SDValue(); 2593 } 2594 2595 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2596 // (smulo x, 2) -> (saddo x, x) 2597 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2598 if (C2->getAPIntValue() == 2) 2599 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2600 N->getOperand(0), N->getOperand(0)); 2601 2602 return SDValue(); 2603 } 2604 2605 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2606 // (umulo x, 2) -> (uaddo x, x) 2607 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2608 if (C2->getAPIntValue() == 2) 2609 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2610 N->getOperand(0), N->getOperand(0)); 2611 2612 return SDValue(); 2613 } 2614 2615 SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2616 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2617 if (Res.getNode()) return Res; 2618 2619 return SDValue(); 2620 } 2621 2622 SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2623 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2624 if (Res.getNode()) return Res; 2625 2626 return SDValue(); 2627 } 2628 2629 /// If this is a binary operator with two operands of the same opcode, try to 2630 /// simplify it. 2631 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2632 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2633 EVT VT = N0.getValueType(); 2634 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2635 2636 // Bail early if none of these transforms apply. 2637 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2638 2639 // For each of OP in AND/OR/XOR: 2640 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2641 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2642 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2643 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2644 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2645 // 2646 // do not sink logical op inside of a vector extend, since it may combine 2647 // into a vsetcc. 2648 EVT Op0VT = N0.getOperand(0).getValueType(); 2649 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2650 N0.getOpcode() == ISD::SIGN_EXTEND || 2651 N0.getOpcode() == ISD::BSWAP || 2652 // Avoid infinite looping with PromoteIntBinOp. 2653 (N0.getOpcode() == ISD::ANY_EXTEND && 2654 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2655 (N0.getOpcode() == ISD::TRUNCATE && 2656 (!TLI.isZExtFree(VT, Op0VT) || 2657 !TLI.isTruncateFree(Op0VT, VT)) && 2658 TLI.isTypeLegal(Op0VT))) && 2659 !VT.isVector() && 2660 Op0VT == N1.getOperand(0).getValueType() && 2661 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2662 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2663 N0.getOperand(0).getValueType(), 2664 N0.getOperand(0), N1.getOperand(0)); 2665 AddToWorklist(ORNode.getNode()); 2666 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2667 } 2668 2669 // For each of OP in SHL/SRL/SRA/AND... 2670 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2671 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2672 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2673 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2674 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2675 N0.getOperand(1) == N1.getOperand(1)) { 2676 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2677 N0.getOperand(0).getValueType(), 2678 N0.getOperand(0), N1.getOperand(0)); 2679 AddToWorklist(ORNode.getNode()); 2680 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2681 ORNode, N0.getOperand(1)); 2682 } 2683 2684 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2685 // Only perform this optimization after type legalization and before 2686 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2687 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2688 // we don't want to undo this promotion. 2689 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2690 // on scalars. 2691 if ((N0.getOpcode() == ISD::BITCAST || 2692 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2693 Level == AfterLegalizeTypes) { 2694 SDValue In0 = N0.getOperand(0); 2695 SDValue In1 = N1.getOperand(0); 2696 EVT In0Ty = In0.getValueType(); 2697 EVT In1Ty = In1.getValueType(); 2698 SDLoc DL(N); 2699 // If both incoming values are integers, and the original types are the 2700 // same. 2701 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2702 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2703 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2704 AddToWorklist(Op.getNode()); 2705 return BC; 2706 } 2707 } 2708 2709 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2710 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2711 // If both shuffles use the same mask, and both shuffle within a single 2712 // vector, then it is worthwhile to move the swizzle after the operation. 2713 // The type-legalizer generates this pattern when loading illegal 2714 // vector types from memory. In many cases this allows additional shuffle 2715 // optimizations. 2716 // There are other cases where moving the shuffle after the xor/and/or 2717 // is profitable even if shuffles don't perform a swizzle. 2718 // If both shuffles use the same mask, and both shuffles have the same first 2719 // or second operand, then it might still be profitable to move the shuffle 2720 // after the xor/and/or operation. 2721 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2722 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2723 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2724 2725 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2726 "Inputs to shuffles are not the same type"); 2727 2728 // Check that both shuffles use the same mask. The masks are known to be of 2729 // the same length because the result vector type is the same. 2730 // Check also that shuffles have only one use to avoid introducing extra 2731 // instructions. 2732 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2733 SVN0->getMask().equals(SVN1->getMask())) { 2734 SDValue ShOp = N0->getOperand(1); 2735 2736 // Don't try to fold this node if it requires introducing a 2737 // build vector of all zeros that might be illegal at this stage. 2738 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2739 if (!LegalTypes) 2740 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2741 else 2742 ShOp = SDValue(); 2743 } 2744 2745 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2746 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2747 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2748 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2749 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2750 N0->getOperand(0), N1->getOperand(0)); 2751 AddToWorklist(NewNode.getNode()); 2752 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2753 &SVN0->getMask()[0]); 2754 } 2755 2756 // Don't try to fold this node if it requires introducing a 2757 // build vector of all zeros that might be illegal at this stage. 2758 ShOp = N0->getOperand(0); 2759 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2760 if (!LegalTypes) 2761 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2762 else 2763 ShOp = SDValue(); 2764 } 2765 2766 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2767 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2768 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2769 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2770 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2771 N0->getOperand(1), N1->getOperand(1)); 2772 AddToWorklist(NewNode.getNode()); 2773 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2774 &SVN0->getMask()[0]); 2775 } 2776 } 2777 } 2778 2779 return SDValue(); 2780 } 2781 2782 /// This contains all DAGCombine rules which reduce two values combined by 2783 /// an And operation to a single value. This makes them reusable in the context 2784 /// of visitSELECT(). Rules involving constants are not included as 2785 /// visitSELECT() already handles those cases. 2786 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2787 SDNode *LocReference) { 2788 EVT VT = N1.getValueType(); 2789 2790 // fold (and x, undef) -> 0 2791 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2792 return DAG.getConstant(0, SDLoc(LocReference), VT); 2793 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2794 SDValue LL, LR, RL, RR, CC0, CC1; 2795 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2796 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2797 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2798 2799 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2800 LL.getValueType().isInteger()) { 2801 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2802 if (isNullConstant(LR) && Op1 == ISD::SETEQ) { 2803 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2804 LR.getValueType(), LL, RL); 2805 AddToWorklist(ORNode.getNode()); 2806 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2807 } 2808 if (isAllOnesConstant(LR)) { 2809 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2810 if (Op1 == ISD::SETEQ) { 2811 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2812 LR.getValueType(), LL, RL); 2813 AddToWorklist(ANDNode.getNode()); 2814 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2815 } 2816 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2817 if (Op1 == ISD::SETGT) { 2818 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2819 LR.getValueType(), LL, RL); 2820 AddToWorklist(ORNode.getNode()); 2821 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2822 } 2823 } 2824 } 2825 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2826 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2827 Op0 == Op1 && LL.getValueType().isInteger() && 2828 Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || 2829 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 2830 SDLoc DL(N0); 2831 SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), 2832 LL, DAG.getConstant(1, DL, 2833 LL.getValueType())); 2834 AddToWorklist(ADDNode.getNode()); 2835 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2836 DAG.getConstant(2, DL, LL.getValueType()), 2837 ISD::SETUGE); 2838 } 2839 // canonicalize equivalent to ll == rl 2840 if (LL == RR && LR == RL) { 2841 Op1 = ISD::getSetCCSwappedOperands(Op1); 2842 std::swap(RL, RR); 2843 } 2844 if (LL == RL && LR == RR) { 2845 bool isInteger = LL.getValueType().isInteger(); 2846 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2847 if (Result != ISD::SETCC_INVALID && 2848 (!LegalOperations || 2849 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2850 TLI.isOperationLegal(ISD::SETCC, 2851 getSetCCResultType(N0.getSimpleValueType()))))) 2852 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2853 LL, LR, Result); 2854 } 2855 } 2856 2857 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2858 VT.getSizeInBits() <= 64) { 2859 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2860 APInt ADDC = ADDI->getAPIntValue(); 2861 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2862 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2863 // immediate for an add, but it is legal if its top c2 bits are set, 2864 // transform the ADD so the immediate doesn't need to be materialized 2865 // in a register. 2866 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2867 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2868 SRLI->getZExtValue()); 2869 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2870 ADDC |= Mask; 2871 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2872 SDLoc DL(N0); 2873 SDValue NewAdd = 2874 DAG.getNode(ISD::ADD, DL, VT, 2875 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 2876 CombineTo(N0.getNode(), NewAdd); 2877 // Return N so it doesn't get rechecked! 2878 return SDValue(LocReference, 0); 2879 } 2880 } 2881 } 2882 } 2883 } 2884 } 2885 2886 return SDValue(); 2887 } 2888 2889 SDValue DAGCombiner::visitAND(SDNode *N) { 2890 SDValue N0 = N->getOperand(0); 2891 SDValue N1 = N->getOperand(1); 2892 EVT VT = N1.getValueType(); 2893 2894 // fold vector ops 2895 if (VT.isVector()) { 2896 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2897 return FoldedVOp; 2898 2899 // fold (and x, 0) -> 0, vector edition 2900 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2901 // do not return N0, because undef node may exist in N0 2902 return DAG.getConstant( 2903 APInt::getNullValue( 2904 N0.getValueType().getScalarType().getSizeInBits()), 2905 SDLoc(N), N0.getValueType()); 2906 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2907 // do not return N1, because undef node may exist in N1 2908 return DAG.getConstant( 2909 APInt::getNullValue( 2910 N1.getValueType().getScalarType().getSizeInBits()), 2911 SDLoc(N), N1.getValueType()); 2912 2913 // fold (and x, -1) -> x, vector edition 2914 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2915 return N1; 2916 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2917 return N0; 2918 } 2919 2920 // fold (and c1, c2) -> c1&c2 2921 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 2922 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2923 if (N0C && N1C && !N1C->isOpaque()) 2924 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 2925 // canonicalize constant to RHS 2926 if (isConstantIntBuildVectorOrConstantInt(N0) && 2927 !isConstantIntBuildVectorOrConstantInt(N1)) 2928 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2929 // fold (and x, -1) -> x 2930 if (isAllOnesConstant(N1)) 2931 return N0; 2932 // if (and x, c) is known to be zero, return 0 2933 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2934 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2935 APInt::getAllOnesValue(BitWidth))) 2936 return DAG.getConstant(0, SDLoc(N), VT); 2937 // reassociate and 2938 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 2939 return RAND; 2940 // fold (and (or x, C), D) -> D if (C & D) == D 2941 if (N1C && N0.getOpcode() == ISD::OR) 2942 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2943 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2944 return N1; 2945 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2946 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2947 SDValue N0Op0 = N0.getOperand(0); 2948 APInt Mask = ~N1C->getAPIntValue(); 2949 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2950 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2951 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2952 N0.getValueType(), N0Op0); 2953 2954 // Replace uses of the AND with uses of the Zero extend node. 2955 CombineTo(N, Zext); 2956 2957 // We actually want to replace all uses of the any_extend with the 2958 // zero_extend, to avoid duplicating things. This will later cause this 2959 // AND to be folded. 2960 CombineTo(N0.getNode(), Zext); 2961 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2962 } 2963 } 2964 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2965 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2966 // already be zero by virtue of the width of the base type of the load. 2967 // 2968 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2969 // more cases. 2970 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2971 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2972 N0.getOpcode() == ISD::LOAD) { 2973 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2974 N0 : N0.getOperand(0) ); 2975 2976 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2977 // This can be a pure constant or a vector splat, in which case we treat the 2978 // vector as a scalar and use the splat value. 2979 APInt Constant = APInt::getNullValue(1); 2980 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2981 Constant = C->getAPIntValue(); 2982 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2983 APInt SplatValue, SplatUndef; 2984 unsigned SplatBitSize; 2985 bool HasAnyUndefs; 2986 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2987 SplatBitSize, HasAnyUndefs); 2988 if (IsSplat) { 2989 // Undef bits can contribute to a possible optimisation if set, so 2990 // set them. 2991 SplatValue |= SplatUndef; 2992 2993 // The splat value may be something like "0x00FFFFFF", which means 0 for 2994 // the first vector value and FF for the rest, repeating. We need a mask 2995 // that will apply equally to all members of the vector, so AND all the 2996 // lanes of the constant together. 2997 EVT VT = Vector->getValueType(0); 2998 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2999 3000 // If the splat value has been compressed to a bitlength lower 3001 // than the size of the vector lane, we need to re-expand it to 3002 // the lane size. 3003 if (BitWidth > SplatBitSize) 3004 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 3005 SplatBitSize < BitWidth; 3006 SplatBitSize = SplatBitSize * 2) 3007 SplatValue |= SplatValue.shl(SplatBitSize); 3008 3009 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 3010 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 3011 if (SplatBitSize % BitWidth == 0) { 3012 Constant = APInt::getAllOnesValue(BitWidth); 3013 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 3014 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 3015 } 3016 } 3017 } 3018 3019 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 3020 // actually legal and isn't going to get expanded, else this is a false 3021 // optimisation. 3022 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 3023 Load->getValueType(0), 3024 Load->getMemoryVT()); 3025 3026 // Resize the constant to the same size as the original memory access before 3027 // extension. If it is still the AllOnesValue then this AND is completely 3028 // unneeded. 3029 Constant = 3030 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 3031 3032 bool B; 3033 switch (Load->getExtensionType()) { 3034 default: B = false; break; 3035 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 3036 case ISD::ZEXTLOAD: 3037 case ISD::NON_EXTLOAD: B = true; break; 3038 } 3039 3040 if (B && Constant.isAllOnesValue()) { 3041 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 3042 // preserve semantics once we get rid of the AND. 3043 SDValue NewLoad(Load, 0); 3044 if (Load->getExtensionType() == ISD::EXTLOAD) { 3045 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 3046 Load->getValueType(0), SDLoc(Load), 3047 Load->getChain(), Load->getBasePtr(), 3048 Load->getOffset(), Load->getMemoryVT(), 3049 Load->getMemOperand()); 3050 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 3051 if (Load->getNumValues() == 3) { 3052 // PRE/POST_INC loads have 3 values. 3053 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 3054 NewLoad.getValue(2) }; 3055 CombineTo(Load, To, 3, true); 3056 } else { 3057 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 3058 } 3059 } 3060 3061 // Fold the AND away, taking care not to fold to the old load node if we 3062 // replaced it. 3063 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 3064 3065 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3066 } 3067 } 3068 3069 // fold (and (load x), 255) -> (zextload x, i8) 3070 // fold (and (extload x, i16), 255) -> (zextload x, i8) 3071 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 3072 if (N1C && (N0.getOpcode() == ISD::LOAD || 3073 (N0.getOpcode() == ISD::ANY_EXTEND && 3074 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 3075 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 3076 LoadSDNode *LN0 = HasAnyExt 3077 ? cast<LoadSDNode>(N0.getOperand(0)) 3078 : cast<LoadSDNode>(N0); 3079 if (LN0->getExtensionType() != ISD::SEXTLOAD && 3080 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 3081 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 3082 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 3083 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 3084 EVT LoadedVT = LN0->getMemoryVT(); 3085 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 3086 3087 if (ExtVT == LoadedVT && 3088 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 3089 ExtVT))) { 3090 3091 SDValue NewLoad = 3092 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3093 LN0->getChain(), LN0->getBasePtr(), ExtVT, 3094 LN0->getMemOperand()); 3095 AddToWorklist(N); 3096 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 3097 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3098 } 3099 3100 // Do not change the width of a volatile load. 3101 // Do not generate loads of non-round integer types since these can 3102 // be expensive (and would be wrong if the type is not byte sized). 3103 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 3104 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 3105 ExtVT))) { 3106 EVT PtrType = LN0->getOperand(1).getValueType(); 3107 3108 unsigned Alignment = LN0->getAlignment(); 3109 SDValue NewPtr = LN0->getBasePtr(); 3110 3111 // For big endian targets, we need to add an offset to the pointer 3112 // to load the correct bytes. For little endian systems, we merely 3113 // need to read fewer bytes from the same pointer. 3114 if (DAG.getDataLayout().isBigEndian()) { 3115 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3116 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3117 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3118 SDLoc DL(LN0); 3119 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, 3120 NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); 3121 Alignment = MinAlign(Alignment, PtrOff); 3122 } 3123 3124 AddToWorklist(NewPtr.getNode()); 3125 3126 SDValue Load = 3127 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3128 LN0->getChain(), NewPtr, 3129 LN0->getPointerInfo(), 3130 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3131 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3132 AddToWorklist(N); 3133 CombineTo(LN0, Load, Load.getValue(1)); 3134 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3135 } 3136 } 3137 } 3138 } 3139 3140 if (SDValue Combined = visitANDLike(N0, N1, N)) 3141 return Combined; 3142 3143 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 3144 if (N0.getOpcode() == N1.getOpcode()) { 3145 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3146 if (Tmp.getNode()) return Tmp; 3147 } 3148 3149 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 3150 // fold (and (sra)) -> (and (srl)) when possible. 3151 if (!VT.isVector() && 3152 SimplifyDemandedBits(SDValue(N, 0))) 3153 return SDValue(N, 0); 3154 3155 // fold (zext_inreg (extload x)) -> (zextload x) 3156 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 3157 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3158 EVT MemVT = LN0->getMemoryVT(); 3159 // If we zero all the possible extended bits, then we can turn this into 3160 // a zextload if we are running before legalize or the operation is legal. 3161 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3162 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3163 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3164 ((!LegalOperations && !LN0->isVolatile()) || 3165 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3166 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3167 LN0->getChain(), LN0->getBasePtr(), 3168 MemVT, LN0->getMemOperand()); 3169 AddToWorklist(N); 3170 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3171 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3172 } 3173 } 3174 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 3175 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3176 N0.hasOneUse()) { 3177 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3178 EVT MemVT = LN0->getMemoryVT(); 3179 // If we zero all the possible extended bits, then we can turn this into 3180 // a zextload if we are running before legalize or the operation is legal. 3181 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3182 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3183 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3184 ((!LegalOperations && !LN0->isVolatile()) || 3185 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3186 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3187 LN0->getChain(), LN0->getBasePtr(), 3188 MemVT, LN0->getMemOperand()); 3189 AddToWorklist(N); 3190 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3191 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3192 } 3193 } 3194 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3195 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3196 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3197 N0.getOperand(1), false); 3198 if (BSwap.getNode()) 3199 return BSwap; 3200 } 3201 3202 return SDValue(); 3203 } 3204 3205 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3206 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3207 bool DemandHighBits) { 3208 if (!LegalOperations) 3209 return SDValue(); 3210 3211 EVT VT = N->getValueType(0); 3212 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3213 return SDValue(); 3214 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3215 return SDValue(); 3216 3217 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3218 bool LookPassAnd0 = false; 3219 bool LookPassAnd1 = false; 3220 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3221 std::swap(N0, N1); 3222 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3223 std::swap(N0, N1); 3224 if (N0.getOpcode() == ISD::AND) { 3225 if (!N0.getNode()->hasOneUse()) 3226 return SDValue(); 3227 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3228 if (!N01C || N01C->getZExtValue() != 0xFF00) 3229 return SDValue(); 3230 N0 = N0.getOperand(0); 3231 LookPassAnd0 = true; 3232 } 3233 3234 if (N1.getOpcode() == ISD::AND) { 3235 if (!N1.getNode()->hasOneUse()) 3236 return SDValue(); 3237 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3238 if (!N11C || N11C->getZExtValue() != 0xFF) 3239 return SDValue(); 3240 N1 = N1.getOperand(0); 3241 LookPassAnd1 = true; 3242 } 3243 3244 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3245 std::swap(N0, N1); 3246 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3247 return SDValue(); 3248 if (!N0.getNode()->hasOneUse() || 3249 !N1.getNode()->hasOneUse()) 3250 return SDValue(); 3251 3252 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3253 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3254 if (!N01C || !N11C) 3255 return SDValue(); 3256 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3257 return SDValue(); 3258 3259 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3260 SDValue N00 = N0->getOperand(0); 3261 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3262 if (!N00.getNode()->hasOneUse()) 3263 return SDValue(); 3264 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3265 if (!N001C || N001C->getZExtValue() != 0xFF) 3266 return SDValue(); 3267 N00 = N00.getOperand(0); 3268 LookPassAnd0 = true; 3269 } 3270 3271 SDValue N10 = N1->getOperand(0); 3272 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3273 if (!N10.getNode()->hasOneUse()) 3274 return SDValue(); 3275 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3276 if (!N101C || N101C->getZExtValue() != 0xFF00) 3277 return SDValue(); 3278 N10 = N10.getOperand(0); 3279 LookPassAnd1 = true; 3280 } 3281 3282 if (N00 != N10) 3283 return SDValue(); 3284 3285 // Make sure everything beyond the low halfword gets set to zero since the SRL 3286 // 16 will clear the top bits. 3287 unsigned OpSizeInBits = VT.getSizeInBits(); 3288 if (DemandHighBits && OpSizeInBits > 16) { 3289 // If the left-shift isn't masked out then the only way this is a bswap is 3290 // if all bits beyond the low 8 are 0. In that case the entire pattern 3291 // reduces to a left shift anyway: leave it for other parts of the combiner. 3292 if (!LookPassAnd0) 3293 return SDValue(); 3294 3295 // However, if the right shift isn't masked out then it might be because 3296 // it's not needed. See if we can spot that too. 3297 if (!LookPassAnd1 && 3298 !DAG.MaskedValueIsZero( 3299 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3300 return SDValue(); 3301 } 3302 3303 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3304 if (OpSizeInBits > 16) { 3305 SDLoc DL(N); 3306 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 3307 DAG.getConstant(OpSizeInBits - 16, DL, 3308 getShiftAmountTy(VT))); 3309 } 3310 return Res; 3311 } 3312 3313 /// Return true if the specified node is an element that makes up a 32-bit 3314 /// packed halfword byteswap. 3315 /// ((x & 0x000000ff) << 8) | 3316 /// ((x & 0x0000ff00) >> 8) | 3317 /// ((x & 0x00ff0000) << 8) | 3318 /// ((x & 0xff000000) >> 8) 3319 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3320 if (!N.getNode()->hasOneUse()) 3321 return false; 3322 3323 unsigned Opc = N.getOpcode(); 3324 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3325 return false; 3326 3327 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3328 if (!N1C) 3329 return false; 3330 3331 unsigned Num; 3332 switch (N1C->getZExtValue()) { 3333 default: 3334 return false; 3335 case 0xFF: Num = 0; break; 3336 case 0xFF00: Num = 1; break; 3337 case 0xFF0000: Num = 2; break; 3338 case 0xFF000000: Num = 3; break; 3339 } 3340 3341 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3342 SDValue N0 = N.getOperand(0); 3343 if (Opc == ISD::AND) { 3344 if (Num == 0 || Num == 2) { 3345 // (x >> 8) & 0xff 3346 // (x >> 8) & 0xff0000 3347 if (N0.getOpcode() != ISD::SRL) 3348 return false; 3349 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3350 if (!C || C->getZExtValue() != 8) 3351 return false; 3352 } else { 3353 // (x << 8) & 0xff00 3354 // (x << 8) & 0xff000000 3355 if (N0.getOpcode() != ISD::SHL) 3356 return false; 3357 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3358 if (!C || C->getZExtValue() != 8) 3359 return false; 3360 } 3361 } else if (Opc == ISD::SHL) { 3362 // (x & 0xff) << 8 3363 // (x & 0xff0000) << 8 3364 if (Num != 0 && Num != 2) 3365 return false; 3366 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3367 if (!C || C->getZExtValue() != 8) 3368 return false; 3369 } else { // Opc == ISD::SRL 3370 // (x & 0xff00) >> 8 3371 // (x & 0xff000000) >> 8 3372 if (Num != 1 && Num != 3) 3373 return false; 3374 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3375 if (!C || C->getZExtValue() != 8) 3376 return false; 3377 } 3378 3379 if (Parts[Num]) 3380 return false; 3381 3382 Parts[Num] = N0.getOperand(0).getNode(); 3383 return true; 3384 } 3385 3386 /// Match a 32-bit packed halfword bswap. That is 3387 /// ((x & 0x000000ff) << 8) | 3388 /// ((x & 0x0000ff00) >> 8) | 3389 /// ((x & 0x00ff0000) << 8) | 3390 /// ((x & 0xff000000) >> 8) 3391 /// => (rotl (bswap x), 16) 3392 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3393 if (!LegalOperations) 3394 return SDValue(); 3395 3396 EVT VT = N->getValueType(0); 3397 if (VT != MVT::i32) 3398 return SDValue(); 3399 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3400 return SDValue(); 3401 3402 // Look for either 3403 // (or (or (and), (and)), (or (and), (and))) 3404 // (or (or (or (and), (and)), (and)), (and)) 3405 if (N0.getOpcode() != ISD::OR) 3406 return SDValue(); 3407 SDValue N00 = N0.getOperand(0); 3408 SDValue N01 = N0.getOperand(1); 3409 SDNode *Parts[4] = {}; 3410 3411 if (N1.getOpcode() == ISD::OR && 3412 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3413 // (or (or (and), (and)), (or (and), (and))) 3414 SDValue N000 = N00.getOperand(0); 3415 if (!isBSwapHWordElement(N000, Parts)) 3416 return SDValue(); 3417 3418 SDValue N001 = N00.getOperand(1); 3419 if (!isBSwapHWordElement(N001, Parts)) 3420 return SDValue(); 3421 SDValue N010 = N01.getOperand(0); 3422 if (!isBSwapHWordElement(N010, Parts)) 3423 return SDValue(); 3424 SDValue N011 = N01.getOperand(1); 3425 if (!isBSwapHWordElement(N011, Parts)) 3426 return SDValue(); 3427 } else { 3428 // (or (or (or (and), (and)), (and)), (and)) 3429 if (!isBSwapHWordElement(N1, Parts)) 3430 return SDValue(); 3431 if (!isBSwapHWordElement(N01, Parts)) 3432 return SDValue(); 3433 if (N00.getOpcode() != ISD::OR) 3434 return SDValue(); 3435 SDValue N000 = N00.getOperand(0); 3436 if (!isBSwapHWordElement(N000, Parts)) 3437 return SDValue(); 3438 SDValue N001 = N00.getOperand(1); 3439 if (!isBSwapHWordElement(N001, Parts)) 3440 return SDValue(); 3441 } 3442 3443 // Make sure the parts are all coming from the same node. 3444 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3445 return SDValue(); 3446 3447 SDLoc DL(N); 3448 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 3449 SDValue(Parts[0], 0)); 3450 3451 // Result of the bswap should be rotated by 16. If it's not legal, then 3452 // do (x << 16) | (x >> 16). 3453 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 3454 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3455 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 3456 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3457 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 3458 return DAG.getNode(ISD::OR, DL, VT, 3459 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 3460 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 3461 } 3462 3463 /// This contains all DAGCombine rules which reduce two values combined by 3464 /// an Or operation to a single value \see visitANDLike(). 3465 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { 3466 EVT VT = N1.getValueType(); 3467 // fold (or x, undef) -> -1 3468 if (!LegalOperations && 3469 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3470 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3471 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), 3472 SDLoc(LocReference), VT); 3473 } 3474 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3475 SDValue LL, LR, RL, RR, CC0, CC1; 3476 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3477 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3478 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3479 3480 if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { 3481 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3482 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3483 if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3484 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3485 LR.getValueType(), LL, RL); 3486 AddToWorklist(ORNode.getNode()); 3487 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 3488 } 3489 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3490 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3491 if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3492 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3493 LR.getValueType(), LL, RL); 3494 AddToWorklist(ANDNode.getNode()); 3495 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 3496 } 3497 } 3498 // canonicalize equivalent to ll == rl 3499 if (LL == RR && LR == RL) { 3500 Op1 = ISD::getSetCCSwappedOperands(Op1); 3501 std::swap(RL, RR); 3502 } 3503 if (LL == RL && LR == RR) { 3504 bool isInteger = LL.getValueType().isInteger(); 3505 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3506 if (Result != ISD::SETCC_INVALID && 3507 (!LegalOperations || 3508 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3509 TLI.isOperationLegal(ISD::SETCC, 3510 getSetCCResultType(N0.getValueType()))))) 3511 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 3512 LL, LR, Result); 3513 } 3514 } 3515 3516 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3517 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3518 // Don't increase # computations. 3519 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3520 // We can only do this xform if we know that bits from X that are set in C2 3521 // but not in C1 are already zero. Likewise for Y. 3522 if (const ConstantSDNode *N0O1C = 3523 getAsNonOpaqueConstant(N0.getOperand(1))) { 3524 if (const ConstantSDNode *N1O1C = 3525 getAsNonOpaqueConstant(N1.getOperand(1))) { 3526 // We can only do this xform if we know that bits from X that are set in 3527 // C2 but not in C1 are already zero. Likewise for Y. 3528 const APInt &LHSMask = N0O1C->getAPIntValue(); 3529 const APInt &RHSMask = N1O1C->getAPIntValue(); 3530 3531 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3532 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3533 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3534 N0.getOperand(0), N1.getOperand(0)); 3535 SDLoc DL(LocReference); 3536 return DAG.getNode(ISD::AND, DL, VT, X, 3537 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 3538 } 3539 } 3540 } 3541 } 3542 3543 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 3544 if (N0.getOpcode() == ISD::AND && 3545 N1.getOpcode() == ISD::AND && 3546 N0.getOperand(0) == N1.getOperand(0) && 3547 // Don't increase # computations. 3548 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3549 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3550 N0.getOperand(1), N1.getOperand(1)); 3551 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); 3552 } 3553 3554 return SDValue(); 3555 } 3556 3557 SDValue DAGCombiner::visitOR(SDNode *N) { 3558 SDValue N0 = N->getOperand(0); 3559 SDValue N1 = N->getOperand(1); 3560 EVT VT = N1.getValueType(); 3561 3562 // fold vector ops 3563 if (VT.isVector()) { 3564 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3565 return FoldedVOp; 3566 3567 // fold (or x, 0) -> x, vector edition 3568 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3569 return N1; 3570 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3571 return N0; 3572 3573 // fold (or x, -1) -> -1, vector edition 3574 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3575 // do not return N0, because undef node may exist in N0 3576 return DAG.getConstant( 3577 APInt::getAllOnesValue( 3578 N0.getValueType().getScalarType().getSizeInBits()), 3579 SDLoc(N), N0.getValueType()); 3580 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3581 // do not return N1, because undef node may exist in N1 3582 return DAG.getConstant( 3583 APInt::getAllOnesValue( 3584 N1.getValueType().getScalarType().getSizeInBits()), 3585 SDLoc(N), N1.getValueType()); 3586 3587 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3588 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3589 // Do this only if the resulting shuffle is legal. 3590 if (isa<ShuffleVectorSDNode>(N0) && 3591 isa<ShuffleVectorSDNode>(N1) && 3592 // Avoid folding a node with illegal type. 3593 TLI.isTypeLegal(VT) && 3594 N0->getOperand(1) == N1->getOperand(1) && 3595 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3596 bool CanFold = true; 3597 unsigned NumElts = VT.getVectorNumElements(); 3598 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3599 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3600 // We construct two shuffle masks: 3601 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3602 // and N1 as the second operand. 3603 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3604 // and N0 as the second operand. 3605 // We do this because OR is commutable and therefore there might be 3606 // two ways to fold this node into a shuffle. 3607 SmallVector<int,4> Mask1; 3608 SmallVector<int,4> Mask2; 3609 3610 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3611 int M0 = SV0->getMaskElt(i); 3612 int M1 = SV1->getMaskElt(i); 3613 3614 // Both shuffle indexes are undef. Propagate Undef. 3615 if (M0 < 0 && M1 < 0) { 3616 Mask1.push_back(M0); 3617 Mask2.push_back(M0); 3618 continue; 3619 } 3620 3621 if (M0 < 0 || M1 < 0 || 3622 (M0 < (int)NumElts && M1 < (int)NumElts) || 3623 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3624 CanFold = false; 3625 break; 3626 } 3627 3628 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3629 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3630 } 3631 3632 if (CanFold) { 3633 // Fold this sequence only if the resulting shuffle is 'legal'. 3634 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3635 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3636 N1->getOperand(0), &Mask1[0]); 3637 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3638 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3639 N0->getOperand(0), &Mask2[0]); 3640 } 3641 } 3642 } 3643 3644 // fold (or c1, c2) -> c1|c2 3645 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3646 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3647 if (N0C && N1C && !N1C->isOpaque()) 3648 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 3649 // canonicalize constant to RHS 3650 if (isConstantIntBuildVectorOrConstantInt(N0) && 3651 !isConstantIntBuildVectorOrConstantInt(N1)) 3652 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3653 // fold (or x, 0) -> x 3654 if (isNullConstant(N1)) 3655 return N0; 3656 // fold (or x, -1) -> -1 3657 if (isAllOnesConstant(N1)) 3658 return N1; 3659 // fold (or x, c) -> c iff (x & ~c) == 0 3660 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3661 return N1; 3662 3663 if (SDValue Combined = visitORLike(N0, N1, N)) 3664 return Combined; 3665 3666 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3667 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3668 if (BSwap.getNode()) 3669 return BSwap; 3670 BSwap = MatchBSwapHWordLow(N, N0, N1); 3671 if (BSwap.getNode()) 3672 return BSwap; 3673 3674 // reassociate or 3675 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 3676 return ROR; 3677 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3678 // iff (c1 & c2) == 0. 3679 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3680 isa<ConstantSDNode>(N0.getOperand(1))) { 3681 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3682 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3683 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, 3684 N1C, C1)) 3685 return DAG.getNode( 3686 ISD::AND, SDLoc(N), VT, 3687 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3688 return SDValue(); 3689 } 3690 } 3691 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3692 if (N0.getOpcode() == N1.getOpcode()) { 3693 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3694 if (Tmp.getNode()) return Tmp; 3695 } 3696 3697 // See if this is some rotate idiom. 3698 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3699 return SDValue(Rot, 0); 3700 3701 // Simplify the operands using demanded-bits information. 3702 if (!VT.isVector() && 3703 SimplifyDemandedBits(SDValue(N, 0))) 3704 return SDValue(N, 0); 3705 3706 return SDValue(); 3707 } 3708 3709 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 3710 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3711 if (Op.getOpcode() == ISD::AND) { 3712 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3713 Mask = Op.getOperand(1); 3714 Op = Op.getOperand(0); 3715 } else { 3716 return false; 3717 } 3718 } 3719 3720 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3721 Shift = Op; 3722 return true; 3723 } 3724 3725 return false; 3726 } 3727 3728 // Return true if we can prove that, whenever Neg and Pos are both in the 3729 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3730 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3731 // 3732 // (or (shift1 X, Neg), (shift2 X, Pos)) 3733 // 3734 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3735 // in direction shift1 by Neg. The range [0, OpSize) means that we only need 3736 // to consider shift amounts with defined behavior. 3737 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3738 // If OpSize is a power of 2 then: 3739 // 3740 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3741 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3742 // 3743 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3744 // for the stronger condition: 3745 // 3746 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3747 // 3748 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3749 // we can just replace Neg with Neg' for the rest of the function. 3750 // 3751 // In other cases we check for the even stronger condition: 3752 // 3753 // Neg == OpSize - Pos [B] 3754 // 3755 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3756 // behavior if Pos == 0 (and consequently Neg == OpSize). 3757 // 3758 // We could actually use [A] whenever OpSize is a power of 2, but the 3759 // only extra cases that it would match are those uninteresting ones 3760 // where Neg and Pos are never in range at the same time. E.g. for 3761 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3762 // as well as (sub 32, Pos), but: 3763 // 3764 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3765 // 3766 // always invokes undefined behavior for 32-bit X. 3767 // 3768 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3769 unsigned MaskLoBits = 0; 3770 if (Neg.getOpcode() == ISD::AND && 3771 isPowerOf2_64(OpSize) && 3772 Neg.getOperand(1).getOpcode() == ISD::Constant && 3773 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3774 Neg = Neg.getOperand(0); 3775 MaskLoBits = Log2_64(OpSize); 3776 } 3777 3778 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3779 if (Neg.getOpcode() != ISD::SUB) 3780 return 0; 3781 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3782 if (!NegC) 3783 return 0; 3784 SDValue NegOp1 = Neg.getOperand(1); 3785 3786 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3787 // Pos'. The truncation is redundant for the purpose of the equality. 3788 if (MaskLoBits && 3789 Pos.getOpcode() == ISD::AND && 3790 Pos.getOperand(1).getOpcode() == ISD::Constant && 3791 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3792 Pos = Pos.getOperand(0); 3793 3794 // The condition we need is now: 3795 // 3796 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3797 // 3798 // If NegOp1 == Pos then we need: 3799 // 3800 // OpSize & Mask == NegC & Mask 3801 // 3802 // (because "x & Mask" is a truncation and distributes through subtraction). 3803 APInt Width; 3804 if (Pos == NegOp1) 3805 Width = NegC->getAPIntValue(); 3806 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3807 // Then the condition we want to prove becomes: 3808 // 3809 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3810 // 3811 // which, again because "x & Mask" is a truncation, becomes: 3812 // 3813 // NegC & Mask == (OpSize - PosC) & Mask 3814 // OpSize & Mask == (NegC + PosC) & Mask 3815 else if (Pos.getOpcode() == ISD::ADD && 3816 Pos.getOperand(0) == NegOp1 && 3817 Pos.getOperand(1).getOpcode() == ISD::Constant) 3818 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3819 NegC->getAPIntValue()); 3820 else 3821 return false; 3822 3823 // Now we just need to check that OpSize & Mask == Width & Mask. 3824 if (MaskLoBits) 3825 // Opsize & Mask is 0 since Mask is Opsize - 1. 3826 return Width.getLoBits(MaskLoBits) == 0; 3827 return Width == OpSize; 3828 } 3829 3830 // A subroutine of MatchRotate used once we have found an OR of two opposite 3831 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3832 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3833 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3834 // Neg with outer conversions stripped away. 3835 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3836 SDValue Neg, SDValue InnerPos, 3837 SDValue InnerNeg, unsigned PosOpcode, 3838 unsigned NegOpcode, SDLoc DL) { 3839 // fold (or (shl x, (*ext y)), 3840 // (srl x, (*ext (sub 32, y)))) -> 3841 // (rotl x, y) or (rotr x, (sub 32, y)) 3842 // 3843 // fold (or (shl x, (*ext (sub 32, y))), 3844 // (srl x, (*ext y))) -> 3845 // (rotr x, y) or (rotl x, (sub 32, y)) 3846 EVT VT = Shifted.getValueType(); 3847 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3848 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3849 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3850 HasPos ? Pos : Neg).getNode(); 3851 } 3852 3853 return nullptr; 3854 } 3855 3856 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 3857 // idioms for rotate, and if the target supports rotation instructions, generate 3858 // a rot[lr]. 3859 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3860 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3861 EVT VT = LHS.getValueType(); 3862 if (!TLI.isTypeLegal(VT)) return nullptr; 3863 3864 // The target must have at least one rotate flavor. 3865 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3866 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3867 if (!HasROTL && !HasROTR) return nullptr; 3868 3869 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3870 SDValue LHSShift; // The shift. 3871 SDValue LHSMask; // AND value if any. 3872 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3873 return nullptr; // Not part of a rotate. 3874 3875 SDValue RHSShift; // The shift. 3876 SDValue RHSMask; // AND value if any. 3877 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3878 return nullptr; // Not part of a rotate. 3879 3880 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3881 return nullptr; // Not shifting the same value. 3882 3883 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3884 return nullptr; // Shifts must disagree. 3885 3886 // Canonicalize shl to left side in a shl/srl pair. 3887 if (RHSShift.getOpcode() == ISD::SHL) { 3888 std::swap(LHS, RHS); 3889 std::swap(LHSShift, RHSShift); 3890 std::swap(LHSMask , RHSMask ); 3891 } 3892 3893 unsigned OpSizeInBits = VT.getSizeInBits(); 3894 SDValue LHSShiftArg = LHSShift.getOperand(0); 3895 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3896 SDValue RHSShiftArg = RHSShift.getOperand(0); 3897 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3898 3899 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3900 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3901 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3902 RHSShiftAmt.getOpcode() == ISD::Constant) { 3903 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3904 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3905 if ((LShVal + RShVal) != OpSizeInBits) 3906 return nullptr; 3907 3908 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3909 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3910 3911 // If there is an AND of either shifted operand, apply it to the result. 3912 if (LHSMask.getNode() || RHSMask.getNode()) { 3913 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3914 3915 if (LHSMask.getNode()) { 3916 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3917 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3918 } 3919 if (RHSMask.getNode()) { 3920 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3921 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3922 } 3923 3924 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT)); 3925 } 3926 3927 return Rot.getNode(); 3928 } 3929 3930 // If there is a mask here, and we have a variable shift, we can't be sure 3931 // that we're masking out the right stuff. 3932 if (LHSMask.getNode() || RHSMask.getNode()) 3933 return nullptr; 3934 3935 // If the shift amount is sign/zext/any-extended just peel it off. 3936 SDValue LExtOp0 = LHSShiftAmt; 3937 SDValue RExtOp0 = RHSShiftAmt; 3938 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3939 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3940 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3941 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3942 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3943 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3944 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3945 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3946 LExtOp0 = LHSShiftAmt.getOperand(0); 3947 RExtOp0 = RHSShiftAmt.getOperand(0); 3948 } 3949 3950 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3951 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3952 if (TryL) 3953 return TryL; 3954 3955 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3956 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3957 if (TryR) 3958 return TryR; 3959 3960 return nullptr; 3961 } 3962 3963 SDValue DAGCombiner::visitXOR(SDNode *N) { 3964 SDValue N0 = N->getOperand(0); 3965 SDValue N1 = N->getOperand(1); 3966 EVT VT = N0.getValueType(); 3967 3968 // fold vector ops 3969 if (VT.isVector()) { 3970 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3971 return FoldedVOp; 3972 3973 // fold (xor x, 0) -> x, vector edition 3974 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3975 return N1; 3976 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3977 return N0; 3978 } 3979 3980 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3981 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3982 return DAG.getConstant(0, SDLoc(N), VT); 3983 // fold (xor x, undef) -> undef 3984 if (N0.getOpcode() == ISD::UNDEF) 3985 return N0; 3986 if (N1.getOpcode() == ISD::UNDEF) 3987 return N1; 3988 // fold (xor c1, c2) -> c1^c2 3989 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3990 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 3991 if (N0C && N1C) 3992 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); 3993 // canonicalize constant to RHS 3994 if (isConstantIntBuildVectorOrConstantInt(N0) && 3995 !isConstantIntBuildVectorOrConstantInt(N1)) 3996 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3997 // fold (xor x, 0) -> x 3998 if (isNullConstant(N1)) 3999 return N0; 4000 // reassociate xor 4001 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 4002 return RXOR; 4003 4004 // fold !(x cc y) -> (x !cc y) 4005 SDValue LHS, RHS, CC; 4006 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 4007 bool isInt = LHS.getValueType().isInteger(); 4008 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 4009 isInt); 4010 4011 if (!LegalOperations || 4012 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 4013 switch (N0.getOpcode()) { 4014 default: 4015 llvm_unreachable("Unhandled SetCC Equivalent!"); 4016 case ISD::SETCC: 4017 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 4018 case ISD::SELECT_CC: 4019 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 4020 N0.getOperand(3), NotCC); 4021 } 4022 } 4023 } 4024 4025 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 4026 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && 4027 N0.getNode()->hasOneUse() && 4028 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 4029 SDValue V = N0.getOperand(0); 4030 SDLoc DL(N0); 4031 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, 4032 DAG.getConstant(1, DL, V.getValueType())); 4033 AddToWorklist(V.getNode()); 4034 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 4035 } 4036 4037 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 4038 if (isOneConstant(N1) && VT == MVT::i1 && 4039 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4040 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4041 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 4042 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4043 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4044 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4045 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4046 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4047 } 4048 } 4049 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 4050 if (isAllOnesConstant(N1) && 4051 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4052 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4053 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 4054 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4055 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4056 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4057 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4058 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4059 } 4060 } 4061 // fold (xor (and x, y), y) -> (and (not x), y) 4062 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 4063 N0->getOperand(1) == N1) { 4064 SDValue X = N0->getOperand(0); 4065 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 4066 AddToWorklist(NotX.getNode()); 4067 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 4068 } 4069 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 4070 if (N1C && N0.getOpcode() == ISD::XOR) { 4071 if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { 4072 SDLoc DL(N); 4073 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), 4074 DAG.getConstant(N1C->getAPIntValue() ^ 4075 N00C->getAPIntValue(), DL, VT)); 4076 } 4077 if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { 4078 SDLoc DL(N); 4079 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), 4080 DAG.getConstant(N1C->getAPIntValue() ^ 4081 N01C->getAPIntValue(), DL, VT)); 4082 } 4083 } 4084 // fold (xor x, x) -> 0 4085 if (N0 == N1) 4086 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 4087 4088 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 4089 // Here is a concrete example of this equivalence: 4090 // i16 x == 14 4091 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 4092 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 4093 // 4094 // => 4095 // 4096 // i16 ~1 == 0b1111111111111110 4097 // i16 rol(~1, 14) == 0b1011111111111111 4098 // 4099 // Some additional tips to help conceptualize this transform: 4100 // - Try to see the operation as placing a single zero in a value of all ones. 4101 // - There exists no value for x which would allow the result to contain zero. 4102 // - Values of x larger than the bitwidth are undefined and do not require a 4103 // consistent result. 4104 // - Pushing the zero left requires shifting one bits in from the right. 4105 // A rotate left of ~1 is a nice way of achieving the desired result. 4106 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL 4107 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 4108 SDLoc DL(N); 4109 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 4110 N0.getOperand(1)); 4111 } 4112 4113 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 4114 if (N0.getOpcode() == N1.getOpcode()) { 4115 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 4116 if (Tmp.getNode()) return Tmp; 4117 } 4118 4119 // Simplify the expression using non-local knowledge. 4120 if (!VT.isVector() && 4121 SimplifyDemandedBits(SDValue(N, 0))) 4122 return SDValue(N, 0); 4123 4124 return SDValue(); 4125 } 4126 4127 /// Handle transforms common to the three shifts, when the shift amount is a 4128 /// constant. 4129 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 4130 SDNode *LHS = N->getOperand(0).getNode(); 4131 if (!LHS->hasOneUse()) return SDValue(); 4132 4133 // We want to pull some binops through shifts, so that we have (and (shift)) 4134 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 4135 // thing happens with address calculations, so it's important to canonicalize 4136 // it. 4137 bool HighBitSet = false; // Can we transform this if the high bit is set? 4138 4139 switch (LHS->getOpcode()) { 4140 default: return SDValue(); 4141 case ISD::OR: 4142 case ISD::XOR: 4143 HighBitSet = false; // We can only transform sra if the high bit is clear. 4144 break; 4145 case ISD::AND: 4146 HighBitSet = true; // We can only transform sra if the high bit is set. 4147 break; 4148 case ISD::ADD: 4149 if (N->getOpcode() != ISD::SHL) 4150 return SDValue(); // only shl(add) not sr[al](add). 4151 HighBitSet = false; // We can only transform sra if the high bit is clear. 4152 break; 4153 } 4154 4155 // We require the RHS of the binop to be a constant and not opaque as well. 4156 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 4157 if (!BinOpCst) return SDValue(); 4158 4159 // FIXME: disable this unless the input to the binop is a shift by a constant. 4160 // If it is not a shift, it pessimizes some common cases like: 4161 // 4162 // void foo(int *X, int i) { X[i & 1235] = 1; } 4163 // int bar(int *X, int i) { return X[i & 255]; } 4164 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 4165 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 4166 BinOpLHSVal->getOpcode() != ISD::SRA && 4167 BinOpLHSVal->getOpcode() != ISD::SRL) || 4168 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 4169 return SDValue(); 4170 4171 EVT VT = N->getValueType(0); 4172 4173 // If this is a signed shift right, and the high bit is modified by the 4174 // logical operation, do not perform the transformation. The highBitSet 4175 // boolean indicates the value of the high bit of the constant which would 4176 // cause it to be modified for this operation. 4177 if (N->getOpcode() == ISD::SRA) { 4178 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 4179 if (BinOpRHSSignSet != HighBitSet) 4180 return SDValue(); 4181 } 4182 4183 if (!TLI.isDesirableToCommuteWithShift(LHS)) 4184 return SDValue(); 4185 4186 // Fold the constants, shifting the binop RHS by the shift amount. 4187 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 4188 N->getValueType(0), 4189 LHS->getOperand(1), N->getOperand(1)); 4190 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4191 4192 // Create the new shift. 4193 SDValue NewShift = DAG.getNode(N->getOpcode(), 4194 SDLoc(LHS->getOperand(0)), 4195 VT, LHS->getOperand(0), N->getOperand(1)); 4196 4197 // Create the new binop. 4198 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4199 } 4200 4201 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4202 assert(N->getOpcode() == ISD::TRUNCATE); 4203 assert(N->getOperand(0).getOpcode() == ISD::AND); 4204 4205 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4206 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4207 SDValue N01 = N->getOperand(0).getOperand(1); 4208 4209 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4210 if (!N01C->isOpaque()) { 4211 EVT TruncVT = N->getValueType(0); 4212 SDValue N00 = N->getOperand(0).getOperand(0); 4213 APInt TruncC = N01C->getAPIntValue(); 4214 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4215 SDLoc DL(N); 4216 4217 return DAG.getNode(ISD::AND, DL, TruncVT, 4218 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), 4219 DAG.getConstant(TruncC, DL, TruncVT)); 4220 } 4221 } 4222 } 4223 4224 return SDValue(); 4225 } 4226 4227 SDValue DAGCombiner::visitRotate(SDNode *N) { 4228 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4229 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4230 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4231 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4232 if (NewOp1.getNode()) 4233 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4234 N->getOperand(0), NewOp1); 4235 } 4236 return SDValue(); 4237 } 4238 4239 SDValue DAGCombiner::visitSHL(SDNode *N) { 4240 SDValue N0 = N->getOperand(0); 4241 SDValue N1 = N->getOperand(1); 4242 EVT VT = N0.getValueType(); 4243 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4244 4245 // fold vector ops 4246 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4247 if (VT.isVector()) { 4248 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4249 return FoldedVOp; 4250 4251 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4252 // If setcc produces all-one true value then: 4253 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4254 if (N1CV && N1CV->isConstant()) { 4255 if (N0.getOpcode() == ISD::AND) { 4256 SDValue N00 = N0->getOperand(0); 4257 SDValue N01 = N0->getOperand(1); 4258 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4259 4260 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4261 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4262 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4263 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 4264 N01CV, N1CV)) 4265 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4266 } 4267 } else { 4268 N1C = isConstOrConstSplat(N1); 4269 } 4270 } 4271 } 4272 4273 // fold (shl c1, c2) -> c1<<c2 4274 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4275 if (N0C && N1C && !N1C->isOpaque()) 4276 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 4277 // fold (shl 0, x) -> 0 4278 if (isNullConstant(N0)) 4279 return N0; 4280 // fold (shl x, c >= size(x)) -> undef 4281 if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) 4282 return DAG.getUNDEF(VT); 4283 // fold (shl x, 0) -> x 4284 if (N1C && N1C->isNullValue()) 4285 return N0; 4286 // fold (shl undef, x) -> 0 4287 if (N0.getOpcode() == ISD::UNDEF) 4288 return DAG.getConstant(0, SDLoc(N), VT); 4289 // if (shl x, c) is known to be zero, return 0 4290 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4291 APInt::getAllOnesValue(OpSizeInBits))) 4292 return DAG.getConstant(0, SDLoc(N), VT); 4293 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4294 if (N1.getOpcode() == ISD::TRUNCATE && 4295 N1.getOperand(0).getOpcode() == ISD::AND) { 4296 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4297 if (NewOp1.getNode()) 4298 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4299 } 4300 4301 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4302 return SDValue(N, 0); 4303 4304 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4305 if (N1C && N0.getOpcode() == ISD::SHL) { 4306 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4307 uint64_t c1 = N0C1->getZExtValue(); 4308 uint64_t c2 = N1C->getZExtValue(); 4309 SDLoc DL(N); 4310 if (c1 + c2 >= OpSizeInBits) 4311 return DAG.getConstant(0, DL, VT); 4312 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4313 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4314 } 4315 } 4316 4317 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4318 // For this to be valid, the second form must not preserve any of the bits 4319 // that are shifted out by the inner shift in the first form. This means 4320 // the outer shift size must be >= the number of bits added by the ext. 4321 // As a corollary, we don't care what kind of ext it is. 4322 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4323 N0.getOpcode() == ISD::ANY_EXTEND || 4324 N0.getOpcode() == ISD::SIGN_EXTEND) && 4325 N0.getOperand(0).getOpcode() == ISD::SHL) { 4326 SDValue N0Op0 = N0.getOperand(0); 4327 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4328 uint64_t c1 = N0Op0C1->getZExtValue(); 4329 uint64_t c2 = N1C->getZExtValue(); 4330 EVT InnerShiftVT = N0Op0.getValueType(); 4331 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4332 if (c2 >= OpSizeInBits - InnerShiftSize) { 4333 SDLoc DL(N0); 4334 if (c1 + c2 >= OpSizeInBits) 4335 return DAG.getConstant(0, DL, VT); 4336 return DAG.getNode(ISD::SHL, DL, VT, 4337 DAG.getNode(N0.getOpcode(), DL, VT, 4338 N0Op0->getOperand(0)), 4339 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4340 } 4341 } 4342 } 4343 4344 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4345 // Only fold this if the inner zext has no other uses to avoid increasing 4346 // the total number of instructions. 4347 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4348 N0.getOperand(0).getOpcode() == ISD::SRL) { 4349 SDValue N0Op0 = N0.getOperand(0); 4350 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4351 uint64_t c1 = N0Op0C1->getZExtValue(); 4352 if (c1 < VT.getScalarSizeInBits()) { 4353 uint64_t c2 = N1C->getZExtValue(); 4354 if (c1 == c2) { 4355 SDValue NewOp0 = N0.getOperand(0); 4356 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4357 SDLoc DL(N); 4358 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 4359 NewOp0, 4360 DAG.getConstant(c2, DL, CountVT)); 4361 AddToWorklist(NewSHL.getNode()); 4362 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4363 } 4364 } 4365 } 4366 } 4367 4368 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 4369 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 4370 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 4371 cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { 4372 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4373 uint64_t C1 = N0C1->getZExtValue(); 4374 uint64_t C2 = N1C->getZExtValue(); 4375 SDLoc DL(N); 4376 if (C1 <= C2) 4377 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4378 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 4379 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 4380 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 4381 } 4382 } 4383 4384 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4385 // (and (srl x, (sub c1, c2), MASK) 4386 // Only fold this if the inner shift has no other uses -- if it does, folding 4387 // this will increase the total number of instructions. 4388 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4389 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4390 uint64_t c1 = N0C1->getZExtValue(); 4391 if (c1 < OpSizeInBits) { 4392 uint64_t c2 = N1C->getZExtValue(); 4393 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4394 SDValue Shift; 4395 if (c2 > c1) { 4396 Mask = Mask.shl(c2 - c1); 4397 SDLoc DL(N); 4398 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4399 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 4400 } else { 4401 Mask = Mask.lshr(c1 - c2); 4402 SDLoc DL(N); 4403 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4404 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 4405 } 4406 SDLoc DL(N0); 4407 return DAG.getNode(ISD::AND, DL, VT, Shift, 4408 DAG.getConstant(Mask, DL, VT)); 4409 } 4410 } 4411 } 4412 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4413 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4414 unsigned BitSize = VT.getScalarSizeInBits(); 4415 SDLoc DL(N); 4416 SDValue HiBitsMask = 4417 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4418 BitSize - N1C->getZExtValue()), 4419 DL, VT); 4420 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), 4421 HiBitsMask); 4422 } 4423 4424 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4425 // Variant of version done on multiply, except mul by a power of 2 is turned 4426 // into a shift. 4427 APInt Val; 4428 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4429 (isa<ConstantSDNode>(N0.getOperand(1)) || 4430 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4431 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4432 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4433 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4434 } 4435 4436 if (N1C && !N1C->isOpaque()) { 4437 SDValue NewSHL = visitShiftByConstant(N, N1C); 4438 if (NewSHL.getNode()) 4439 return NewSHL; 4440 } 4441 4442 return SDValue(); 4443 } 4444 4445 SDValue DAGCombiner::visitSRA(SDNode *N) { 4446 SDValue N0 = N->getOperand(0); 4447 SDValue N1 = N->getOperand(1); 4448 EVT VT = N0.getValueType(); 4449 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4450 4451 // fold vector ops 4452 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4453 if (VT.isVector()) { 4454 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4455 return FoldedVOp; 4456 4457 N1C = isConstOrConstSplat(N1); 4458 } 4459 4460 // fold (sra c1, c2) -> (sra c1, c2) 4461 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4462 if (N0C && N1C && !N1C->isOpaque()) 4463 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 4464 // fold (sra 0, x) -> 0 4465 if (isNullConstant(N0)) 4466 return N0; 4467 // fold (sra -1, x) -> -1 4468 if (isAllOnesConstant(N0)) 4469 return N0; 4470 // fold (sra x, (setge c, size(x))) -> undef 4471 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4472 return DAG.getUNDEF(VT); 4473 // fold (sra x, 0) -> x 4474 if (N1C && N1C->isNullValue()) 4475 return N0; 4476 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4477 // sext_inreg. 4478 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4479 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4480 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4481 if (VT.isVector()) 4482 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4483 ExtVT, VT.getVectorNumElements()); 4484 if ((!LegalOperations || 4485 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4486 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4487 N0.getOperand(0), DAG.getValueType(ExtVT)); 4488 } 4489 4490 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4491 if (N1C && N0.getOpcode() == ISD::SRA) { 4492 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4493 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4494 if (Sum >= OpSizeInBits) 4495 Sum = OpSizeInBits - 1; 4496 SDLoc DL(N); 4497 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), 4498 DAG.getConstant(Sum, DL, N1.getValueType())); 4499 } 4500 } 4501 4502 // fold (sra (shl X, m), (sub result_size, n)) 4503 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4504 // result_size - n != m. 4505 // If truncate is free for the target sext(shl) is likely to result in better 4506 // code. 4507 if (N0.getOpcode() == ISD::SHL && N1C) { 4508 // Get the two constanst of the shifts, CN0 = m, CN = n. 4509 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4510 if (N01C) { 4511 LLVMContext &Ctx = *DAG.getContext(); 4512 // Determine what the truncate's result bitsize and type would be. 4513 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4514 4515 if (VT.isVector()) 4516 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4517 4518 // Determine the residual right-shift amount. 4519 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4520 4521 // If the shift is not a no-op (in which case this should be just a sign 4522 // extend already), the truncated to type is legal, sign_extend is legal 4523 // on that type, and the truncate to that type is both legal and free, 4524 // perform the transform. 4525 if ((ShiftAmt > 0) && 4526 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4527 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4528 TLI.isTruncateFree(VT, TruncVT)) { 4529 4530 SDLoc DL(N); 4531 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 4532 getShiftAmountTy(N0.getOperand(0).getValueType())); 4533 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 4534 N0.getOperand(0), Amt); 4535 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 4536 Shift); 4537 return DAG.getNode(ISD::SIGN_EXTEND, DL, 4538 N->getValueType(0), Trunc); 4539 } 4540 } 4541 } 4542 4543 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4544 if (N1.getOpcode() == ISD::TRUNCATE && 4545 N1.getOperand(0).getOpcode() == ISD::AND) { 4546 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4547 if (NewOp1.getNode()) 4548 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4549 } 4550 4551 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4552 // if c1 is equal to the number of bits the trunc removes 4553 if (N0.getOpcode() == ISD::TRUNCATE && 4554 (N0.getOperand(0).getOpcode() == ISD::SRL || 4555 N0.getOperand(0).getOpcode() == ISD::SRA) && 4556 N0.getOperand(0).hasOneUse() && 4557 N0.getOperand(0).getOperand(1).hasOneUse() && 4558 N1C) { 4559 SDValue N0Op0 = N0.getOperand(0); 4560 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4561 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4562 EVT LargeVT = N0Op0.getValueType(); 4563 4564 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4565 SDLoc DL(N); 4566 SDValue Amt = 4567 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, 4568 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4569 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, 4570 N0Op0.getOperand(0), Amt); 4571 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); 4572 } 4573 } 4574 } 4575 4576 // Simplify, based on bits shifted out of the LHS. 4577 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4578 return SDValue(N, 0); 4579 4580 4581 // If the sign bit is known to be zero, switch this to a SRL. 4582 if (DAG.SignBitIsZero(N0)) 4583 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4584 4585 if (N1C && !N1C->isOpaque()) { 4586 SDValue NewSRA = visitShiftByConstant(N, N1C); 4587 if (NewSRA.getNode()) 4588 return NewSRA; 4589 } 4590 4591 return SDValue(); 4592 } 4593 4594 SDValue DAGCombiner::visitSRL(SDNode *N) { 4595 SDValue N0 = N->getOperand(0); 4596 SDValue N1 = N->getOperand(1); 4597 EVT VT = N0.getValueType(); 4598 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4599 4600 // fold vector ops 4601 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4602 if (VT.isVector()) { 4603 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4604 return FoldedVOp; 4605 4606 N1C = isConstOrConstSplat(N1); 4607 } 4608 4609 // fold (srl c1, c2) -> c1 >>u c2 4610 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4611 if (N0C && N1C && !N1C->isOpaque()) 4612 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); 4613 // fold (srl 0, x) -> 0 4614 if (isNullConstant(N0)) 4615 return N0; 4616 // fold (srl x, c >= size(x)) -> undef 4617 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4618 return DAG.getUNDEF(VT); 4619 // fold (srl x, 0) -> x 4620 if (N1C && N1C->isNullValue()) 4621 return N0; 4622 // if (srl x, c) is known to be zero, return 0 4623 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4624 APInt::getAllOnesValue(OpSizeInBits))) 4625 return DAG.getConstant(0, SDLoc(N), VT); 4626 4627 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4628 if (N1C && N0.getOpcode() == ISD::SRL) { 4629 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4630 uint64_t c1 = N01C->getZExtValue(); 4631 uint64_t c2 = N1C->getZExtValue(); 4632 SDLoc DL(N); 4633 if (c1 + c2 >= OpSizeInBits) 4634 return DAG.getConstant(0, DL, VT); 4635 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4636 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4637 } 4638 } 4639 4640 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4641 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4642 N0.getOperand(0).getOpcode() == ISD::SRL && 4643 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4644 uint64_t c1 = 4645 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4646 uint64_t c2 = N1C->getZExtValue(); 4647 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4648 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4649 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4650 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4651 if (c1 + OpSizeInBits == InnerShiftSize) { 4652 SDLoc DL(N0); 4653 if (c1 + c2 >= InnerShiftSize) 4654 return DAG.getConstant(0, DL, VT); 4655 return DAG.getNode(ISD::TRUNCATE, DL, VT, 4656 DAG.getNode(ISD::SRL, DL, InnerShiftVT, 4657 N0.getOperand(0)->getOperand(0), 4658 DAG.getConstant(c1 + c2, DL, 4659 ShiftCountVT))); 4660 } 4661 } 4662 4663 // fold (srl (shl x, c), c) -> (and x, cst2) 4664 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4665 unsigned BitSize = N0.getScalarValueSizeInBits(); 4666 if (BitSize <= 64) { 4667 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4668 SDLoc DL(N); 4669 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), 4670 DAG.getConstant(~0ULL >> ShAmt, DL, VT)); 4671 } 4672 } 4673 4674 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4675 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4676 // Shifting in all undef bits? 4677 EVT SmallVT = N0.getOperand(0).getValueType(); 4678 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4679 if (N1C->getZExtValue() >= BitSize) 4680 return DAG.getUNDEF(VT); 4681 4682 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4683 uint64_t ShiftAmt = N1C->getZExtValue(); 4684 SDLoc DL0(N0); 4685 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, 4686 N0.getOperand(0), 4687 DAG.getConstant(ShiftAmt, DL0, 4688 getShiftAmountTy(SmallVT))); 4689 AddToWorklist(SmallShift.getNode()); 4690 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4691 SDLoc DL(N); 4692 return DAG.getNode(ISD::AND, DL, VT, 4693 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), 4694 DAG.getConstant(Mask, DL, VT)); 4695 } 4696 } 4697 4698 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4699 // bit, which is unmodified by sra. 4700 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4701 if (N0.getOpcode() == ISD::SRA) 4702 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4703 } 4704 4705 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4706 if (N1C && N0.getOpcode() == ISD::CTLZ && 4707 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4708 APInt KnownZero, KnownOne; 4709 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4710 4711 // If any of the input bits are KnownOne, then the input couldn't be all 4712 // zeros, thus the result of the srl will always be zero. 4713 if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); 4714 4715 // If all of the bits input the to ctlz node are known to be zero, then 4716 // the result of the ctlz is "32" and the result of the shift is one. 4717 APInt UnknownBits = ~KnownZero; 4718 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); 4719 4720 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4721 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4722 // Okay, we know that only that the single bit specified by UnknownBits 4723 // could be set on input to the CTLZ node. If this bit is set, the SRL 4724 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4725 // to an SRL/XOR pair, which is likely to simplify more. 4726 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4727 SDValue Op = N0.getOperand(0); 4728 4729 if (ShAmt) { 4730 SDLoc DL(N0); 4731 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4732 DAG.getConstant(ShAmt, DL, 4733 getShiftAmountTy(Op.getValueType()))); 4734 AddToWorklist(Op.getNode()); 4735 } 4736 4737 SDLoc DL(N); 4738 return DAG.getNode(ISD::XOR, DL, VT, 4739 Op, DAG.getConstant(1, DL, VT)); 4740 } 4741 } 4742 4743 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4744 if (N1.getOpcode() == ISD::TRUNCATE && 4745 N1.getOperand(0).getOpcode() == ISD::AND) { 4746 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4747 if (NewOp1.getNode()) 4748 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4749 } 4750 4751 // fold operands of srl based on knowledge that the low bits are not 4752 // demanded. 4753 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4754 return SDValue(N, 0); 4755 4756 if (N1C && !N1C->isOpaque()) { 4757 SDValue NewSRL = visitShiftByConstant(N, N1C); 4758 if (NewSRL.getNode()) 4759 return NewSRL; 4760 } 4761 4762 // Attempt to convert a srl of a load into a narrower zero-extending load. 4763 SDValue NarrowLoad = ReduceLoadWidth(N); 4764 if (NarrowLoad.getNode()) 4765 return NarrowLoad; 4766 4767 // Here is a common situation. We want to optimize: 4768 // 4769 // %a = ... 4770 // %b = and i32 %a, 2 4771 // %c = srl i32 %b, 1 4772 // brcond i32 %c ... 4773 // 4774 // into 4775 // 4776 // %a = ... 4777 // %b = and %a, 2 4778 // %c = setcc eq %b, 0 4779 // brcond %c ... 4780 // 4781 // However when after the source operand of SRL is optimized into AND, the SRL 4782 // itself may not be optimized further. Look for it and add the BRCOND into 4783 // the worklist. 4784 if (N->hasOneUse()) { 4785 SDNode *Use = *N->use_begin(); 4786 if (Use->getOpcode() == ISD::BRCOND) 4787 AddToWorklist(Use); 4788 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4789 // Also look pass the truncate. 4790 Use = *Use->use_begin(); 4791 if (Use->getOpcode() == ISD::BRCOND) 4792 AddToWorklist(Use); 4793 } 4794 } 4795 4796 return SDValue(); 4797 } 4798 4799 SDValue DAGCombiner::visitBSWAP(SDNode *N) { 4800 SDValue N0 = N->getOperand(0); 4801 EVT VT = N->getValueType(0); 4802 4803 // fold (bswap c1) -> c2 4804 if (isConstantIntBuildVectorOrConstantInt(N0)) 4805 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); 4806 // fold (bswap (bswap x)) -> x 4807 if (N0.getOpcode() == ISD::BSWAP) 4808 return N0->getOperand(0); 4809 return SDValue(); 4810 } 4811 4812 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4813 SDValue N0 = N->getOperand(0); 4814 EVT VT = N->getValueType(0); 4815 4816 // fold (ctlz c1) -> c2 4817 if (isConstantIntBuildVectorOrConstantInt(N0)) 4818 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4819 return SDValue(); 4820 } 4821 4822 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4823 SDValue N0 = N->getOperand(0); 4824 EVT VT = N->getValueType(0); 4825 4826 // fold (ctlz_zero_undef c1) -> c2 4827 if (isConstantIntBuildVectorOrConstantInt(N0)) 4828 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4829 return SDValue(); 4830 } 4831 4832 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4833 SDValue N0 = N->getOperand(0); 4834 EVT VT = N->getValueType(0); 4835 4836 // fold (cttz c1) -> c2 4837 if (isConstantIntBuildVectorOrConstantInt(N0)) 4838 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4839 return SDValue(); 4840 } 4841 4842 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4843 SDValue N0 = N->getOperand(0); 4844 EVT VT = N->getValueType(0); 4845 4846 // fold (cttz_zero_undef c1) -> c2 4847 if (isConstantIntBuildVectorOrConstantInt(N0)) 4848 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4849 return SDValue(); 4850 } 4851 4852 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4853 SDValue N0 = N->getOperand(0); 4854 EVT VT = N->getValueType(0); 4855 4856 // fold (ctpop c1) -> c2 4857 if (isConstantIntBuildVectorOrConstantInt(N0)) 4858 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4859 return SDValue(); 4860 } 4861 4862 4863 /// \brief Generate Min/Max node 4864 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4865 SDValue True, SDValue False, 4866 ISD::CondCode CC, const TargetLowering &TLI, 4867 SelectionDAG &DAG) { 4868 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4869 return SDValue(); 4870 4871 switch (CC) { 4872 case ISD::SETOLT: 4873 case ISD::SETOLE: 4874 case ISD::SETLT: 4875 case ISD::SETLE: 4876 case ISD::SETULT: 4877 case ISD::SETULE: { 4878 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4879 if (TLI.isOperationLegal(Opcode, VT)) 4880 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4881 return SDValue(); 4882 } 4883 case ISD::SETOGT: 4884 case ISD::SETOGE: 4885 case ISD::SETGT: 4886 case ISD::SETGE: 4887 case ISD::SETUGT: 4888 case ISD::SETUGE: { 4889 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4890 if (TLI.isOperationLegal(Opcode, VT)) 4891 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4892 return SDValue(); 4893 } 4894 default: 4895 return SDValue(); 4896 } 4897 } 4898 4899 SDValue DAGCombiner::visitSELECT(SDNode *N) { 4900 SDValue N0 = N->getOperand(0); 4901 SDValue N1 = N->getOperand(1); 4902 SDValue N2 = N->getOperand(2); 4903 EVT VT = N->getValueType(0); 4904 EVT VT0 = N0.getValueType(); 4905 4906 // fold (select C, X, X) -> X 4907 if (N1 == N2) 4908 return N1; 4909 if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { 4910 // fold (select true, X, Y) -> X 4911 // fold (select false, X, Y) -> Y 4912 return !N0C->isNullValue() ? N1 : N2; 4913 } 4914 // fold (select C, 1, X) -> (or C, X) 4915 if (VT == MVT::i1 && isOneConstant(N1)) 4916 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4917 // fold (select C, 0, 1) -> (xor C, 1) 4918 // We can't do this reliably if integer based booleans have different contents 4919 // to floating point based booleans. This is because we can't tell whether we 4920 // have an integer-based boolean or a floating-point-based boolean unless we 4921 // can find the SETCC that produced it and inspect its operands. This is 4922 // fairly easy if C is the SETCC node, but it can potentially be 4923 // undiscoverable (or not reasonably discoverable). For example, it could be 4924 // in another basic block or it could require searching a complicated 4925 // expression. 4926 if (VT.isInteger() && 4927 (VT0 == MVT::i1 || (VT0.isInteger() && 4928 TLI.getBooleanContents(false, false) == 4929 TLI.getBooleanContents(false, true) && 4930 TLI.getBooleanContents(false, false) == 4931 TargetLowering::ZeroOrOneBooleanContent)) && 4932 isNullConstant(N1) && isOneConstant(N2)) { 4933 SDValue XORNode; 4934 if (VT == VT0) { 4935 SDLoc DL(N); 4936 return DAG.getNode(ISD::XOR, DL, VT0, 4937 N0, DAG.getConstant(1, DL, VT0)); 4938 } 4939 SDLoc DL0(N0); 4940 XORNode = DAG.getNode(ISD::XOR, DL0, VT0, 4941 N0, DAG.getConstant(1, DL0, VT0)); 4942 AddToWorklist(XORNode.getNode()); 4943 if (VT.bitsGT(VT0)) 4944 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4945 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4946 } 4947 // fold (select C, 0, X) -> (and (not C), X) 4948 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { 4949 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4950 AddToWorklist(NOTNode.getNode()); 4951 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4952 } 4953 // fold (select C, X, 1) -> (or (not C), X) 4954 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { 4955 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4956 AddToWorklist(NOTNode.getNode()); 4957 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4958 } 4959 // fold (select C, X, 0) -> (and C, X) 4960 if (VT == MVT::i1 && isNullConstant(N2)) 4961 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4962 // fold (select X, X, Y) -> (or X, Y) 4963 // fold (select X, 1, Y) -> (or X, Y) 4964 if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) 4965 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4966 // fold (select X, Y, X) -> (and X, Y) 4967 // fold (select X, Y, 0) -> (and X, Y) 4968 if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) 4969 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4970 4971 // If we can fold this based on the true/false value, do so. 4972 if (SimplifySelectOps(N, N1, N2)) 4973 return SDValue(N, 0); // Don't revisit N. 4974 4975 // fold selects based on a setcc into other things, such as min/max/abs 4976 if (N0.getOpcode() == ISD::SETCC) { 4977 // select x, y (fcmp lt x, y) -> fminnum x, y 4978 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4979 // 4980 // This is OK if we don't care about what happens if either operand is a 4981 // NaN. 4982 // 4983 4984 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4985 // no signed zeros as well as no nans. 4986 const TargetOptions &Options = DAG.getTarget().Options; 4987 if (Options.UnsafeFPMath && 4988 VT.isFloatingPoint() && N0.hasOneUse() && 4989 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4990 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4991 4992 SDValue FMinMax = 4993 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4994 N1, N2, CC, TLI, DAG); 4995 if (FMinMax) 4996 return FMinMax; 4997 } 4998 4999 if ((!LegalOperations && 5000 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 5001 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 5002 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 5003 N0.getOperand(0), N0.getOperand(1), 5004 N1, N2, N0.getOperand(2)); 5005 return SimplifySelect(SDLoc(N), N0, N1, N2); 5006 } 5007 5008 if (VT0 == MVT::i1) { 5009 if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 5010 // select (and Cond0, Cond1), X, Y 5011 // -> select Cond0, (select Cond1, X, Y), Y 5012 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 5013 SDValue Cond0 = N0->getOperand(0); 5014 SDValue Cond1 = N0->getOperand(1); 5015 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 5016 N1.getValueType(), Cond1, N1, N2); 5017 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, 5018 InnerSelect, N2); 5019 } 5020 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 5021 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 5022 SDValue Cond0 = N0->getOperand(0); 5023 SDValue Cond1 = N0->getOperand(1); 5024 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 5025 N1.getValueType(), Cond1, N1, N2); 5026 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, 5027 InnerSelect); 5028 } 5029 } 5030 5031 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 5032 if (N1->getOpcode() == ISD::SELECT) { 5033 SDValue N1_0 = N1->getOperand(0); 5034 SDValue N1_1 = N1->getOperand(1); 5035 SDValue N1_2 = N1->getOperand(2); 5036 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 5037 // Create the actual and node if we can generate good code for it. 5038 if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 5039 SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), 5040 N0, N1_0); 5041 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, 5042 N1_1, N2); 5043 } 5044 // Otherwise see if we can optimize the "and" to a better pattern. 5045 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 5046 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 5047 N1_1, N2); 5048 } 5049 } 5050 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 5051 if (N2->getOpcode() == ISD::SELECT) { 5052 SDValue N2_0 = N2->getOperand(0); 5053 SDValue N2_1 = N2->getOperand(1); 5054 SDValue N2_2 = N2->getOperand(2); 5055 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 5056 // Create the actual or node if we can generate good code for it. 5057 if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 5058 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), 5059 N0, N2_0); 5060 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, 5061 N1, N2_2); 5062 } 5063 // Otherwise see if we can optimize to a better pattern. 5064 if (SDValue Combined = visitORLike(N0, N2_0, N)) 5065 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 5066 N1, N2_2); 5067 } 5068 } 5069 } 5070 5071 return SDValue(); 5072 } 5073 5074 static 5075 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 5076 SDLoc DL(N); 5077 EVT LoVT, HiVT; 5078 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 5079 5080 // Split the inputs. 5081 SDValue Lo, Hi, LL, LH, RL, RH; 5082 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 5083 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 5084 5085 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 5086 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 5087 5088 return std::make_pair(Lo, Hi); 5089 } 5090 5091 // This function assumes all the vselect's arguments are CONCAT_VECTOR 5092 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 5093 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 5094 SDLoc dl(N); 5095 SDValue Cond = N->getOperand(0); 5096 SDValue LHS = N->getOperand(1); 5097 SDValue RHS = N->getOperand(2); 5098 EVT VT = N->getValueType(0); 5099 int NumElems = VT.getVectorNumElements(); 5100 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 5101 RHS.getOpcode() == ISD::CONCAT_VECTORS && 5102 Cond.getOpcode() == ISD::BUILD_VECTOR); 5103 5104 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 5105 // binary ones here. 5106 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 5107 return SDValue(); 5108 5109 // We're sure we have an even number of elements due to the 5110 // concat_vectors we have as arguments to vselect. 5111 // Skip BV elements until we find one that's not an UNDEF 5112 // After we find an UNDEF element, keep looping until we get to half the 5113 // length of the BV and see if all the non-undef nodes are the same. 5114 ConstantSDNode *BottomHalf = nullptr; 5115 for (int i = 0; i < NumElems / 2; ++i) { 5116 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 5117 continue; 5118 5119 if (BottomHalf == nullptr) 5120 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 5121 else if (Cond->getOperand(i).getNode() != BottomHalf) 5122 return SDValue(); 5123 } 5124 5125 // Do the same for the second half of the BuildVector 5126 ConstantSDNode *TopHalf = nullptr; 5127 for (int i = NumElems / 2; i < NumElems; ++i) { 5128 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 5129 continue; 5130 5131 if (TopHalf == nullptr) 5132 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 5133 else if (Cond->getOperand(i).getNode() != TopHalf) 5134 return SDValue(); 5135 } 5136 5137 assert(TopHalf && BottomHalf && 5138 "One half of the selector was all UNDEFs and the other was all the " 5139 "same value. This should have been addressed before this function."); 5140 return DAG.getNode( 5141 ISD::CONCAT_VECTORS, dl, VT, 5142 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 5143 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 5144 } 5145 5146 SDValue DAGCombiner::visitMSCATTER(SDNode *N) { 5147 5148 if (Level >= AfterLegalizeTypes) 5149 return SDValue(); 5150 5151 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 5152 SDValue Mask = MSC->getMask(); 5153 SDValue Data = MSC->getValue(); 5154 SDLoc DL(N); 5155 5156 // If the MSCATTER data type requires splitting and the mask is provided by a 5157 // SETCC, then split both nodes and its operands before legalization. This 5158 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5159 // and enables future optimizations (e.g. min/max pattern matching on X86). 5160 if (Mask.getOpcode() != ISD::SETCC) 5161 return SDValue(); 5162 5163 // Check if any splitting is required. 5164 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 5165 TargetLowering::TypeSplitVector) 5166 return SDValue(); 5167 SDValue MaskLo, MaskHi, Lo, Hi; 5168 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5169 5170 EVT LoVT, HiVT; 5171 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); 5172 5173 SDValue Chain = MSC->getChain(); 5174 5175 EVT MemoryVT = MSC->getMemoryVT(); 5176 unsigned Alignment = MSC->getOriginalAlignment(); 5177 5178 EVT LoMemVT, HiMemVT; 5179 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5180 5181 SDValue DataLo, DataHi; 5182 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 5183 5184 SDValue BasePtr = MSC->getBasePtr(); 5185 SDValue IndexLo, IndexHi; 5186 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); 5187 5188 MachineMemOperand *MMO = DAG.getMachineFunction(). 5189 getMachineMemOperand(MSC->getPointerInfo(), 5190 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 5191 Alignment, MSC->getAAInfo(), MSC->getRanges()); 5192 5193 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; 5194 Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), 5195 DL, OpsLo, MMO); 5196 5197 SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; 5198 Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), 5199 DL, OpsHi, MMO); 5200 5201 AddToWorklist(Lo.getNode()); 5202 AddToWorklist(Hi.getNode()); 5203 5204 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 5205 } 5206 5207 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 5208 5209 if (Level >= AfterLegalizeTypes) 5210 return SDValue(); 5211 5212 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 5213 SDValue Mask = MST->getMask(); 5214 SDValue Data = MST->getValue(); 5215 SDLoc DL(N); 5216 5217 // If the MSTORE data type requires splitting and the mask is provided by a 5218 // SETCC, then split both nodes and its operands before legalization. This 5219 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5220 // and enables future optimizations (e.g. min/max pattern matching on X86). 5221 if (Mask.getOpcode() == ISD::SETCC) { 5222 5223 // Check if any splitting is required. 5224 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 5225 TargetLowering::TypeSplitVector) 5226 return SDValue(); 5227 5228 SDValue MaskLo, MaskHi, Lo, Hi; 5229 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5230 5231 EVT LoVT, HiVT; 5232 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 5233 5234 SDValue Chain = MST->getChain(); 5235 SDValue Ptr = MST->getBasePtr(); 5236 5237 EVT MemoryVT = MST->getMemoryVT(); 5238 unsigned Alignment = MST->getOriginalAlignment(); 5239 5240 // if Alignment is equal to the vector size, 5241 // take the half of it for the second part 5242 unsigned SecondHalfAlignment = 5243 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 5244 Alignment/2 : Alignment; 5245 5246 EVT LoMemVT, HiMemVT; 5247 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5248 5249 SDValue DataLo, DataHi; 5250 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 5251 5252 MachineMemOperand *MMO = DAG.getMachineFunction(). 5253 getMachineMemOperand(MST->getPointerInfo(), 5254 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 5255 Alignment, MST->getAAInfo(), MST->getRanges()); 5256 5257 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 5258 MST->isTruncatingStore()); 5259 5260 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5261 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5262 DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); 5263 5264 MMO = DAG.getMachineFunction(). 5265 getMachineMemOperand(MST->getPointerInfo(), 5266 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 5267 SecondHalfAlignment, MST->getAAInfo(), 5268 MST->getRanges()); 5269 5270 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 5271 MST->isTruncatingStore()); 5272 5273 AddToWorklist(Lo.getNode()); 5274 AddToWorklist(Hi.getNode()); 5275 5276 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 5277 } 5278 return SDValue(); 5279 } 5280 5281 SDValue DAGCombiner::visitMGATHER(SDNode *N) { 5282 5283 if (Level >= AfterLegalizeTypes) 5284 return SDValue(); 5285 5286 MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); 5287 SDValue Mask = MGT->getMask(); 5288 SDLoc DL(N); 5289 5290 // If the MGATHER result requires splitting and the mask is provided by a 5291 // SETCC, then split both nodes and its operands before legalization. This 5292 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5293 // and enables future optimizations (e.g. min/max pattern matching on X86). 5294 5295 if (Mask.getOpcode() != ISD::SETCC) 5296 return SDValue(); 5297 5298 EVT VT = N->getValueType(0); 5299 5300 // Check if any splitting is required. 5301 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5302 TargetLowering::TypeSplitVector) 5303 return SDValue(); 5304 5305 SDValue MaskLo, MaskHi, Lo, Hi; 5306 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5307 5308 SDValue Src0 = MGT->getValue(); 5309 SDValue Src0Lo, Src0Hi; 5310 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 5311 5312 EVT LoVT, HiVT; 5313 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); 5314 5315 SDValue Chain = MGT->getChain(); 5316 EVT MemoryVT = MGT->getMemoryVT(); 5317 unsigned Alignment = MGT->getOriginalAlignment(); 5318 5319 EVT LoMemVT, HiMemVT; 5320 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5321 5322 SDValue BasePtr = MGT->getBasePtr(); 5323 SDValue Index = MGT->getIndex(); 5324 SDValue IndexLo, IndexHi; 5325 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); 5326 5327 MachineMemOperand *MMO = DAG.getMachineFunction(). 5328 getMachineMemOperand(MGT->getPointerInfo(), 5329 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 5330 Alignment, MGT->getAAInfo(), MGT->getRanges()); 5331 5332 SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; 5333 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, 5334 MMO); 5335 5336 SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; 5337 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, 5338 MMO); 5339 5340 AddToWorklist(Lo.getNode()); 5341 AddToWorklist(Hi.getNode()); 5342 5343 // Build a factor node to remember that this load is independent of the 5344 // other one. 5345 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 5346 Hi.getValue(1)); 5347 5348 // Legalized the chain result - switch anything that used the old chain to 5349 // use the new one. 5350 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); 5351 5352 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5353 5354 SDValue RetOps[] = { GatherRes, Chain }; 5355 return DAG.getMergeValues(RetOps, DL); 5356 } 5357 5358 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 5359 5360 if (Level >= AfterLegalizeTypes) 5361 return SDValue(); 5362 5363 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 5364 SDValue Mask = MLD->getMask(); 5365 SDLoc DL(N); 5366 5367 // If the MLOAD result requires splitting and the mask is provided by a 5368 // SETCC, then split both nodes and its operands before legalization. This 5369 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5370 // and enables future optimizations (e.g. min/max pattern matching on X86). 5371 5372 if (Mask.getOpcode() == ISD::SETCC) { 5373 EVT VT = N->getValueType(0); 5374 5375 // Check if any splitting is required. 5376 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5377 TargetLowering::TypeSplitVector) 5378 return SDValue(); 5379 5380 SDValue MaskLo, MaskHi, Lo, Hi; 5381 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5382 5383 SDValue Src0 = MLD->getSrc0(); 5384 SDValue Src0Lo, Src0Hi; 5385 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 5386 5387 EVT LoVT, HiVT; 5388 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 5389 5390 SDValue Chain = MLD->getChain(); 5391 SDValue Ptr = MLD->getBasePtr(); 5392 EVT MemoryVT = MLD->getMemoryVT(); 5393 unsigned Alignment = MLD->getOriginalAlignment(); 5394 5395 // if Alignment is equal to the vector size, 5396 // take the half of it for the second part 5397 unsigned SecondHalfAlignment = 5398 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 5399 Alignment/2 : Alignment; 5400 5401 EVT LoMemVT, HiMemVT; 5402 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5403 5404 MachineMemOperand *MMO = DAG.getMachineFunction(). 5405 getMachineMemOperand(MLD->getPointerInfo(), 5406 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 5407 Alignment, MLD->getAAInfo(), MLD->getRanges()); 5408 5409 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 5410 ISD::NON_EXTLOAD); 5411 5412 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5413 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5414 DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); 5415 5416 MMO = DAG.getMachineFunction(). 5417 getMachineMemOperand(MLD->getPointerInfo(), 5418 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 5419 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 5420 5421 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 5422 ISD::NON_EXTLOAD); 5423 5424 AddToWorklist(Lo.getNode()); 5425 AddToWorklist(Hi.getNode()); 5426 5427 // Build a factor node to remember that this load is independent of the 5428 // other one. 5429 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 5430 Hi.getValue(1)); 5431 5432 // Legalized the chain result - switch anything that used the old chain to 5433 // use the new one. 5434 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 5435 5436 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5437 5438 SDValue RetOps[] = { LoadRes, Chain }; 5439 return DAG.getMergeValues(RetOps, DL); 5440 } 5441 return SDValue(); 5442 } 5443 5444 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 5445 SDValue N0 = N->getOperand(0); 5446 SDValue N1 = N->getOperand(1); 5447 SDValue N2 = N->getOperand(2); 5448 SDLoc DL(N); 5449 5450 // Canonicalize integer abs. 5451 // vselect (setg[te] X, 0), X, -X -> 5452 // vselect (setgt X, -1), X, -X -> 5453 // vselect (setl[te] X, 0), -X, X -> 5454 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5455 if (N0.getOpcode() == ISD::SETCC) { 5456 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5457 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5458 bool isAbs = false; 5459 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5460 5461 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 5462 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5463 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5464 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5465 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 5466 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5467 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5468 5469 if (isAbs) { 5470 EVT VT = LHS.getValueType(); 5471 SDValue Shift = DAG.getNode( 5472 ISD::SRA, DL, VT, LHS, 5473 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); 5474 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5475 AddToWorklist(Shift.getNode()); 5476 AddToWorklist(Add.getNode()); 5477 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5478 } 5479 } 5480 5481 if (SimplifySelectOps(N, N1, N2)) 5482 return SDValue(N, 0); // Don't revisit N. 5483 5484 // If the VSELECT result requires splitting and the mask is provided by a 5485 // SETCC, then split both nodes and its operands before legalization. This 5486 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5487 // and enables future optimizations (e.g. min/max pattern matching on X86). 5488 if (N0.getOpcode() == ISD::SETCC) { 5489 EVT VT = N->getValueType(0); 5490 5491 // Check if any splitting is required. 5492 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5493 TargetLowering::TypeSplitVector) 5494 return SDValue(); 5495 5496 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5497 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5498 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5499 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5500 5501 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5502 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5503 5504 // Add the new VSELECT nodes to the work list in case they need to be split 5505 // again. 5506 AddToWorklist(Lo.getNode()); 5507 AddToWorklist(Hi.getNode()); 5508 5509 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5510 } 5511 5512 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 5513 if (ISD::isBuildVectorAllOnes(N0.getNode())) 5514 return N1; 5515 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 5516 if (ISD::isBuildVectorAllZeros(N0.getNode())) 5517 return N2; 5518 5519 // The ConvertSelectToConcatVector function is assuming both the above 5520 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 5521 // and addressed. 5522 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 5523 N2.getOpcode() == ISD::CONCAT_VECTORS && 5524 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5525 SDValue CV = ConvertSelectToConcatVector(N, DAG); 5526 if (CV.getNode()) 5527 return CV; 5528 } 5529 5530 return SDValue(); 5531 } 5532 5533 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 5534 SDValue N0 = N->getOperand(0); 5535 SDValue N1 = N->getOperand(1); 5536 SDValue N2 = N->getOperand(2); 5537 SDValue N3 = N->getOperand(3); 5538 SDValue N4 = N->getOperand(4); 5539 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 5540 5541 // fold select_cc lhs, rhs, x, x, cc -> x 5542 if (N2 == N3) 5543 return N2; 5544 5545 // Determine if the condition we're dealing with is constant 5546 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 5547 N0, N1, CC, SDLoc(N), false); 5548 if (SCC.getNode()) { 5549 AddToWorklist(SCC.getNode()); 5550 5551 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 5552 if (!SCCC->isNullValue()) 5553 return N2; // cond always true -> true val 5554 else 5555 return N3; // cond always false -> false val 5556 } else if (SCC->getOpcode() == ISD::UNDEF) { 5557 // When the condition is UNDEF, just return the first operand. This is 5558 // coherent the DAG creation, no setcc node is created in this case 5559 return N2; 5560 } else if (SCC.getOpcode() == ISD::SETCC) { 5561 // Fold to a simpler select_cc 5562 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 5563 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 5564 SCC.getOperand(2)); 5565 } 5566 } 5567 5568 // If we can fold this based on the true/false value, do so. 5569 if (SimplifySelectOps(N, N2, N3)) 5570 return SDValue(N, 0); // Don't revisit N. 5571 5572 // fold select_cc into other things, such as min/max/abs 5573 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 5574 } 5575 5576 SDValue DAGCombiner::visitSETCC(SDNode *N) { 5577 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 5578 cast<CondCodeSDNode>(N->getOperand(2))->get(), 5579 SDLoc(N)); 5580 } 5581 5582 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 5583 /// a build_vector of constants. 5584 /// This function is called by the DAGCombiner when visiting sext/zext/aext 5585 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 5586 /// Vector extends are not folded if operations are legal; this is to 5587 /// avoid introducing illegal build_vector dag nodes. 5588 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 5589 SelectionDAG &DAG, bool LegalTypes, 5590 bool LegalOperations) { 5591 unsigned Opcode = N->getOpcode(); 5592 SDValue N0 = N->getOperand(0); 5593 EVT VT = N->getValueType(0); 5594 5595 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 5596 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 5597 && "Expected EXTEND dag node in input!"); 5598 5599 // fold (sext c1) -> c1 5600 // fold (zext c1) -> c1 5601 // fold (aext c1) -> c1 5602 if (isa<ConstantSDNode>(N0)) 5603 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 5604 5605 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 5606 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 5607 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 5608 EVT SVT = VT.getScalarType(); 5609 if (!(VT.isVector() && 5610 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 5611 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 5612 return nullptr; 5613 5614 // We can fold this node into a build_vector. 5615 unsigned VTBits = SVT.getSizeInBits(); 5616 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 5617 SmallVector<SDValue, 8> Elts; 5618 unsigned NumElts = VT.getVectorNumElements(); 5619 SDLoc DL(N); 5620 5621 for (unsigned i=0; i != NumElts; ++i) { 5622 SDValue Op = N0->getOperand(i); 5623 if (Op->getOpcode() == ISD::UNDEF) { 5624 Elts.push_back(DAG.getUNDEF(SVT)); 5625 continue; 5626 } 5627 5628 SDLoc DL(Op); 5629 // Get the constant value and if needed trunc it to the size of the type. 5630 // Nodes like build_vector might have constants wider than the scalar type. 5631 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); 5632 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) 5633 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); 5634 else 5635 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); 5636 } 5637 5638 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 5639 } 5640 5641 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 5642 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 5643 // transformation. Returns true if extension are possible and the above 5644 // mentioned transformation is profitable. 5645 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 5646 unsigned ExtOpc, 5647 SmallVectorImpl<SDNode *> &ExtendNodes, 5648 const TargetLowering &TLI) { 5649 bool HasCopyToRegUses = false; 5650 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 5651 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 5652 UE = N0.getNode()->use_end(); 5653 UI != UE; ++UI) { 5654 SDNode *User = *UI; 5655 if (User == N) 5656 continue; 5657 if (UI.getUse().getResNo() != N0.getResNo()) 5658 continue; 5659 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 5660 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 5661 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 5662 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 5663 // Sign bits will be lost after a zext. 5664 return false; 5665 bool Add = false; 5666 for (unsigned i = 0; i != 2; ++i) { 5667 SDValue UseOp = User->getOperand(i); 5668 if (UseOp == N0) 5669 continue; 5670 if (!isa<ConstantSDNode>(UseOp)) 5671 return false; 5672 Add = true; 5673 } 5674 if (Add) 5675 ExtendNodes.push_back(User); 5676 continue; 5677 } 5678 // If truncates aren't free and there are users we can't 5679 // extend, it isn't worthwhile. 5680 if (!isTruncFree) 5681 return false; 5682 // Remember if this value is live-out. 5683 if (User->getOpcode() == ISD::CopyToReg) 5684 HasCopyToRegUses = true; 5685 } 5686 5687 if (HasCopyToRegUses) { 5688 bool BothLiveOut = false; 5689 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5690 UI != UE; ++UI) { 5691 SDUse &Use = UI.getUse(); 5692 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 5693 BothLiveOut = true; 5694 break; 5695 } 5696 } 5697 if (BothLiveOut) 5698 // Both unextended and extended values are live out. There had better be 5699 // a good reason for the transformation. 5700 return ExtendNodes.size(); 5701 } 5702 return true; 5703 } 5704 5705 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 5706 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 5707 ISD::NodeType ExtType) { 5708 // Extend SetCC uses if necessary. 5709 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 5710 SDNode *SetCC = SetCCs[i]; 5711 SmallVector<SDValue, 4> Ops; 5712 5713 for (unsigned j = 0; j != 2; ++j) { 5714 SDValue SOp = SetCC->getOperand(j); 5715 if (SOp == Trunc) 5716 Ops.push_back(ExtLoad); 5717 else 5718 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 5719 } 5720 5721 Ops.push_back(SetCC->getOperand(2)); 5722 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 5723 } 5724 } 5725 5726 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). 5727 SDValue DAGCombiner::CombineExtLoad(SDNode *N) { 5728 SDValue N0 = N->getOperand(0); 5729 EVT DstVT = N->getValueType(0); 5730 EVT SrcVT = N0.getValueType(); 5731 5732 assert((N->getOpcode() == ISD::SIGN_EXTEND || 5733 N->getOpcode() == ISD::ZERO_EXTEND) && 5734 "Unexpected node type (not an extend)!"); 5735 5736 // fold (sext (load x)) to multiple smaller sextloads; same for zext. 5737 // For example, on a target with legal v4i32, but illegal v8i32, turn: 5738 // (v8i32 (sext (v8i16 (load x)))) 5739 // into: 5740 // (v8i32 (concat_vectors (v4i32 (sextload x)), 5741 // (v4i32 (sextload (x + 16))))) 5742 // Where uses of the original load, i.e.: 5743 // (v8i16 (load x)) 5744 // are replaced with: 5745 // (v8i16 (truncate 5746 // (v8i32 (concat_vectors (v4i32 (sextload x)), 5747 // (v4i32 (sextload (x + 16))))))) 5748 // 5749 // This combine is only applicable to illegal, but splittable, vectors. 5750 // All legal types, and illegal non-vector types, are handled elsewhere. 5751 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. 5752 // 5753 if (N0->getOpcode() != ISD::LOAD) 5754 return SDValue(); 5755 5756 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5757 5758 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || 5759 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || 5760 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) 5761 return SDValue(); 5762 5763 SmallVector<SDNode *, 4> SetCCs; 5764 if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) 5765 return SDValue(); 5766 5767 ISD::LoadExtType ExtType = 5768 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; 5769 5770 // Try to split the vector types to get down to legal types. 5771 EVT SplitSrcVT = SrcVT; 5772 EVT SplitDstVT = DstVT; 5773 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && 5774 SplitSrcVT.getVectorNumElements() > 1) { 5775 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; 5776 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; 5777 } 5778 5779 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) 5780 return SDValue(); 5781 5782 SDLoc DL(N); 5783 const unsigned NumSplits = 5784 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); 5785 const unsigned Stride = SplitSrcVT.getStoreSize(); 5786 SmallVector<SDValue, 4> Loads; 5787 SmallVector<SDValue, 4> Chains; 5788 5789 SDValue BasePtr = LN0->getBasePtr(); 5790 for (unsigned Idx = 0; Idx < NumSplits; Idx++) { 5791 const unsigned Offset = Idx * Stride; 5792 const unsigned Align = MinAlign(LN0->getAlignment(), Offset); 5793 5794 SDValue SplitLoad = DAG.getExtLoad( 5795 ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, 5796 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, 5797 LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), 5798 Align, LN0->getAAInfo()); 5799 5800 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 5801 DAG.getConstant(Stride, DL, BasePtr.getValueType())); 5802 5803 Loads.push_back(SplitLoad.getValue(0)); 5804 Chains.push_back(SplitLoad.getValue(1)); 5805 } 5806 5807 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 5808 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); 5809 5810 CombineTo(N, NewValue); 5811 5812 // Replace uses of the original load (before extension) 5813 // with a truncate of the concatenated sextloaded vectors. 5814 SDValue Trunc = 5815 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); 5816 CombineTo(N0.getNode(), Trunc, NewChain); 5817 ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, 5818 (ISD::NodeType)N->getOpcode()); 5819 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5820 } 5821 5822 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 5823 SDValue N0 = N->getOperand(0); 5824 EVT VT = N->getValueType(0); 5825 5826 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5827 LegalOperations)) 5828 return SDValue(Res, 0); 5829 5830 // fold (sext (sext x)) -> (sext x) 5831 // fold (sext (aext x)) -> (sext x) 5832 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5833 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 5834 N0.getOperand(0)); 5835 5836 if (N0.getOpcode() == ISD::TRUNCATE) { 5837 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 5838 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 5839 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5840 if (NarrowLoad.getNode()) { 5841 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5842 if (NarrowLoad.getNode() != N0.getNode()) { 5843 CombineTo(N0.getNode(), NarrowLoad); 5844 // CombineTo deleted the truncate, if needed, but not what's under it. 5845 AddToWorklist(oye); 5846 } 5847 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5848 } 5849 5850 // See if the value being truncated is already sign extended. If so, just 5851 // eliminate the trunc/sext pair. 5852 SDValue Op = N0.getOperand(0); 5853 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 5854 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 5855 unsigned DestBits = VT.getScalarType().getSizeInBits(); 5856 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 5857 5858 if (OpBits == DestBits) { 5859 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 5860 // bits, it is already ready. 5861 if (NumSignBits > DestBits-MidBits) 5862 return Op; 5863 } else if (OpBits < DestBits) { 5864 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 5865 // bits, just sext from i32. 5866 if (NumSignBits > OpBits-MidBits) 5867 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 5868 } else { 5869 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 5870 // bits, just truncate to i32. 5871 if (NumSignBits > OpBits-MidBits) 5872 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5873 } 5874 5875 // fold (sext (truncate x)) -> (sextinreg x). 5876 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 5877 N0.getValueType())) { 5878 if (OpBits < DestBits) 5879 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 5880 else if (OpBits > DestBits) 5881 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 5882 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 5883 DAG.getValueType(N0.getValueType())); 5884 } 5885 } 5886 5887 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5888 // Only generate vector extloads when 1) they're legal, and 2) they are 5889 // deemed desirable by the target. 5890 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5891 ((!LegalOperations && !VT.isVector() && 5892 !cast<LoadSDNode>(N0)->isVolatile()) || 5893 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { 5894 bool DoXform = true; 5895 SmallVector<SDNode*, 4> SetCCs; 5896 if (!N0.hasOneUse()) 5897 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5898 if (VT.isVector()) 5899 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 5900 if (DoXform) { 5901 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5902 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5903 LN0->getChain(), 5904 LN0->getBasePtr(), N0.getValueType(), 5905 LN0->getMemOperand()); 5906 CombineTo(N, ExtLoad); 5907 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5908 N0.getValueType(), ExtLoad); 5909 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5910 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5911 ISD::SIGN_EXTEND); 5912 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5913 } 5914 } 5915 5916 // fold (sext (load x)) to multiple smaller sextloads. 5917 // Only on illegal but splittable vectors. 5918 if (SDValue ExtLoad = CombineExtLoad(N)) 5919 return ExtLoad; 5920 5921 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5922 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5923 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5924 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5925 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5926 EVT MemVT = LN0->getMemoryVT(); 5927 if ((!LegalOperations && !LN0->isVolatile()) || 5928 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { 5929 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5930 LN0->getChain(), 5931 LN0->getBasePtr(), MemVT, 5932 LN0->getMemOperand()); 5933 CombineTo(N, ExtLoad); 5934 CombineTo(N0.getNode(), 5935 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5936 N0.getValueType(), ExtLoad), 5937 ExtLoad.getValue(1)); 5938 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5939 } 5940 } 5941 5942 // fold (sext (and/or/xor (load x), cst)) -> 5943 // (and/or/xor (sextload x), (sext cst)) 5944 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5945 N0.getOpcode() == ISD::XOR) && 5946 isa<LoadSDNode>(N0.getOperand(0)) && 5947 N0.getOperand(1).getOpcode() == ISD::Constant && 5948 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && 5949 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5950 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5951 if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5952 bool DoXform = true; 5953 SmallVector<SDNode*, 4> SetCCs; 5954 if (!N0.hasOneUse()) 5955 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5956 SetCCs, TLI); 5957 if (DoXform) { 5958 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5959 LN0->getChain(), LN0->getBasePtr(), 5960 LN0->getMemoryVT(), 5961 LN0->getMemOperand()); 5962 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5963 Mask = Mask.sext(VT.getSizeInBits()); 5964 SDLoc DL(N); 5965 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 5966 ExtLoad, DAG.getConstant(Mask, DL, VT)); 5967 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5968 SDLoc(N0.getOperand(0)), 5969 N0.getOperand(0).getValueType(), ExtLoad); 5970 CombineTo(N, And); 5971 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5972 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, 5973 ISD::SIGN_EXTEND); 5974 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5975 } 5976 } 5977 } 5978 5979 if (N0.getOpcode() == ISD::SETCC) { 5980 EVT N0VT = N0.getOperand(0).getValueType(); 5981 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5982 // Only do this before legalize for now. 5983 if (VT.isVector() && !LegalOperations && 5984 TLI.getBooleanContents(N0VT) == 5985 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5986 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5987 // of the same size as the compared operands. Only optimize sext(setcc()) 5988 // if this is the case. 5989 EVT SVT = getSetCCResultType(N0VT); 5990 5991 // We know that the # elements of the results is the same as the 5992 // # elements of the compare (and the # elements of the compare result 5993 // for that matter). Check to see that they are the same size. If so, 5994 // we know that the element size of the sext'd result matches the 5995 // element size of the compare operands. 5996 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5997 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5998 N0.getOperand(1), 5999 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6000 6001 // If the desired elements are smaller or larger than the source 6002 // elements we can use a matching integer vector type and then 6003 // truncate/sign extend 6004 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 6005 if (SVT == MatchingVectorType) { 6006 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 6007 N0.getOperand(0), N0.getOperand(1), 6008 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6009 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 6010 } 6011 } 6012 6013 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 6014 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 6015 SDLoc DL(N); 6016 SDValue NegOne = 6017 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); 6018 SDValue SCC = 6019 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6020 NegOne, DAG.getConstant(0, DL, VT), 6021 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6022 if (SCC.getNode()) return SCC; 6023 6024 if (!VT.isVector()) { 6025 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 6026 if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 6027 SDLoc DL(N); 6028 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 6029 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, 6030 N0.getOperand(0), N0.getOperand(1), CC); 6031 return DAG.getSelect(DL, VT, SetCC, 6032 NegOne, DAG.getConstant(0, DL, VT)); 6033 } 6034 } 6035 } 6036 6037 // fold (sext x) -> (zext x) if the sign bit is known zero. 6038 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 6039 DAG.SignBitIsZero(N0)) 6040 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 6041 6042 return SDValue(); 6043 } 6044 6045 // isTruncateOf - If N is a truncate of some other value, return true, record 6046 // the value being truncated in Op and which of Op's bits are zero in KnownZero. 6047 // This function computes KnownZero to avoid a duplicated call to 6048 // computeKnownBits in the caller. 6049 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 6050 APInt &KnownZero) { 6051 APInt KnownOne; 6052 if (N->getOpcode() == ISD::TRUNCATE) { 6053 Op = N->getOperand(0); 6054 DAG.computeKnownBits(Op, KnownZero, KnownOne); 6055 return true; 6056 } 6057 6058 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 6059 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 6060 return false; 6061 6062 SDValue Op0 = N->getOperand(0); 6063 SDValue Op1 = N->getOperand(1); 6064 assert(Op0.getValueType() == Op1.getValueType()); 6065 6066 if (isNullConstant(Op0)) 6067 Op = Op1; 6068 else if (isNullConstant(Op1)) 6069 Op = Op0; 6070 else 6071 return false; 6072 6073 DAG.computeKnownBits(Op, KnownZero, KnownOne); 6074 6075 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 6076 return false; 6077 6078 return true; 6079 } 6080 6081 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 6082 SDValue N0 = N->getOperand(0); 6083 EVT VT = N->getValueType(0); 6084 6085 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 6086 LegalOperations)) 6087 return SDValue(Res, 0); 6088 6089 // fold (zext (zext x)) -> (zext x) 6090 // fold (zext (aext x)) -> (zext x) 6091 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 6092 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 6093 N0.getOperand(0)); 6094 6095 // fold (zext (truncate x)) -> (zext x) or 6096 // (zext (truncate x)) -> (truncate x) 6097 // This is valid when the truncated bits of x are already zero. 6098 // FIXME: We should extend this to work for vectors too. 6099 SDValue Op; 6100 APInt KnownZero; 6101 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 6102 APInt TruncatedBits = 6103 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 6104 APInt(Op.getValueSizeInBits(), 0) : 6105 APInt::getBitsSet(Op.getValueSizeInBits(), 6106 N0.getValueSizeInBits(), 6107 std::min(Op.getValueSizeInBits(), 6108 VT.getSizeInBits())); 6109 if (TruncatedBits == (KnownZero & TruncatedBits)) { 6110 if (VT.bitsGT(Op.getValueType())) 6111 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 6112 if (VT.bitsLT(Op.getValueType())) 6113 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 6114 6115 return Op; 6116 } 6117 } 6118 6119 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 6120 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 6121 if (N0.getOpcode() == ISD::TRUNCATE) { 6122 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 6123 if (NarrowLoad.getNode()) { 6124 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 6125 if (NarrowLoad.getNode() != N0.getNode()) { 6126 CombineTo(N0.getNode(), NarrowLoad); 6127 // CombineTo deleted the truncate, if needed, but not what's under it. 6128 AddToWorklist(oye); 6129 } 6130 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6131 } 6132 } 6133 6134 // fold (zext (truncate x)) -> (and x, mask) 6135 if (N0.getOpcode() == ISD::TRUNCATE && 6136 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 6137 6138 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 6139 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 6140 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 6141 if (NarrowLoad.getNode()) { 6142 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 6143 if (NarrowLoad.getNode() != N0.getNode()) { 6144 CombineTo(N0.getNode(), NarrowLoad); 6145 // CombineTo deleted the truncate, if needed, but not what's under it. 6146 AddToWorklist(oye); 6147 } 6148 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6149 } 6150 6151 SDValue Op = N0.getOperand(0); 6152 if (Op.getValueType().bitsLT(VT)) { 6153 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 6154 AddToWorklist(Op.getNode()); 6155 } else if (Op.getValueType().bitsGT(VT)) { 6156 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 6157 AddToWorklist(Op.getNode()); 6158 } 6159 return DAG.getZeroExtendInReg(Op, SDLoc(N), 6160 N0.getValueType().getScalarType()); 6161 } 6162 6163 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 6164 // if either of the casts is not free. 6165 if (N0.getOpcode() == ISD::AND && 6166 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6167 N0.getOperand(1).getOpcode() == ISD::Constant && 6168 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 6169 N0.getValueType()) || 6170 !TLI.isZExtFree(N0.getValueType(), VT))) { 6171 SDValue X = N0.getOperand(0).getOperand(0); 6172 if (X.getValueType().bitsLT(VT)) { 6173 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 6174 } else if (X.getValueType().bitsGT(VT)) { 6175 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6176 } 6177 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6178 Mask = Mask.zext(VT.getSizeInBits()); 6179 SDLoc DL(N); 6180 return DAG.getNode(ISD::AND, DL, VT, 6181 X, DAG.getConstant(Mask, DL, VT)); 6182 } 6183 6184 // fold (zext (load x)) -> (zext (truncate (zextload x))) 6185 // Only generate vector extloads when 1) they're legal, and 2) they are 6186 // deemed desirable by the target. 6187 if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6188 ((!LegalOperations && !VT.isVector() && 6189 !cast<LoadSDNode>(N0)->isVolatile()) || 6190 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { 6191 bool DoXform = true; 6192 SmallVector<SDNode*, 4> SetCCs; 6193 if (!N0.hasOneUse()) 6194 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 6195 if (VT.isVector()) 6196 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); 6197 if (DoXform) { 6198 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6199 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 6200 LN0->getChain(), 6201 LN0->getBasePtr(), N0.getValueType(), 6202 LN0->getMemOperand()); 6203 CombineTo(N, ExtLoad); 6204 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6205 N0.getValueType(), ExtLoad); 6206 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 6207 6208 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 6209 ISD::ZERO_EXTEND); 6210 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6211 } 6212 } 6213 6214 // fold (zext (load x)) to multiple smaller zextloads. 6215 // Only on illegal but splittable vectors. 6216 if (SDValue ExtLoad = CombineExtLoad(N)) 6217 return ExtLoad; 6218 6219 // fold (zext (and/or/xor (load x), cst)) -> 6220 // (and/or/xor (zextload x), (zext cst)) 6221 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 6222 N0.getOpcode() == ISD::XOR) && 6223 isa<LoadSDNode>(N0.getOperand(0)) && 6224 N0.getOperand(1).getOpcode() == ISD::Constant && 6225 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && 6226 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 6227 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 6228 if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 6229 bool DoXform = true; 6230 SmallVector<SDNode*, 4> SetCCs; 6231 if (!N0.hasOneUse()) 6232 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 6233 SetCCs, TLI); 6234 if (DoXform) { 6235 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 6236 LN0->getChain(), LN0->getBasePtr(), 6237 LN0->getMemoryVT(), 6238 LN0->getMemOperand()); 6239 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6240 Mask = Mask.zext(VT.getSizeInBits()); 6241 SDLoc DL(N); 6242 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, 6243 ExtLoad, DAG.getConstant(Mask, DL, VT)); 6244 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 6245 SDLoc(N0.getOperand(0)), 6246 N0.getOperand(0).getValueType(), ExtLoad); 6247 CombineTo(N, And); 6248 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 6249 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, 6250 ISD::ZERO_EXTEND); 6251 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6252 } 6253 } 6254 } 6255 6256 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 6257 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 6258 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 6259 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 6260 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6261 EVT MemVT = LN0->getMemoryVT(); 6262 if ((!LegalOperations && !LN0->isVolatile()) || 6263 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { 6264 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 6265 LN0->getChain(), 6266 LN0->getBasePtr(), MemVT, 6267 LN0->getMemOperand()); 6268 CombineTo(N, ExtLoad); 6269 CombineTo(N0.getNode(), 6270 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 6271 ExtLoad), 6272 ExtLoad.getValue(1)); 6273 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6274 } 6275 } 6276 6277 if (N0.getOpcode() == ISD::SETCC) { 6278 if (!LegalOperations && VT.isVector() && 6279 N0.getValueType().getVectorElementType() == MVT::i1) { 6280 EVT N0VT = N0.getOperand(0).getValueType(); 6281 if (getSetCCResultType(N0VT) == N0.getValueType()) 6282 return SDValue(); 6283 6284 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 6285 // Only do this before legalize for now. 6286 EVT EltVT = VT.getVectorElementType(); 6287 SDLoc DL(N); 6288 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 6289 DAG.getConstant(1, DL, EltVT)); 6290 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 6291 // We know that the # elements of the results is the same as the 6292 // # elements of the compare (and the # elements of the compare result 6293 // for that matter). Check to see that they are the same size. If so, 6294 // we know that the element size of the sext'd result matches the 6295 // element size of the compare operands. 6296 return DAG.getNode(ISD::AND, DL, VT, 6297 DAG.getSetCC(DL, VT, N0.getOperand(0), 6298 N0.getOperand(1), 6299 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 6300 DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 6301 OneOps)); 6302 6303 // If the desired elements are smaller or larger than the source 6304 // elements we can use a matching integer vector type and then 6305 // truncate/sign extend 6306 EVT MatchingElementType = 6307 EVT::getIntegerVT(*DAG.getContext(), 6308 N0VT.getScalarType().getSizeInBits()); 6309 EVT MatchingVectorType = 6310 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 6311 N0VT.getVectorNumElements()); 6312 SDValue VsetCC = 6313 DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), 6314 N0.getOperand(1), 6315 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6316 return DAG.getNode(ISD::AND, DL, VT, 6317 DAG.getSExtOrTrunc(VsetCC, DL, VT), 6318 DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); 6319 } 6320 6321 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 6322 SDLoc DL(N); 6323 SDValue SCC = 6324 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6325 DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), 6326 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6327 if (SCC.getNode()) return SCC; 6328 } 6329 6330 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 6331 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 6332 isa<ConstantSDNode>(N0.getOperand(1)) && 6333 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 6334 N0.hasOneUse()) { 6335 SDValue ShAmt = N0.getOperand(1); 6336 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 6337 if (N0.getOpcode() == ISD::SHL) { 6338 SDValue InnerZExt = N0.getOperand(0); 6339 // If the original shl may be shifting out bits, do not perform this 6340 // transformation. 6341 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 6342 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 6343 if (ShAmtVal > KnownZeroBits) 6344 return SDValue(); 6345 } 6346 6347 SDLoc DL(N); 6348 6349 // Ensure that the shift amount is wide enough for the shifted value. 6350 if (VT.getSizeInBits() >= 256) 6351 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 6352 6353 return DAG.getNode(N0.getOpcode(), DL, VT, 6354 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 6355 ShAmt); 6356 } 6357 6358 return SDValue(); 6359 } 6360 6361 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 6362 SDValue N0 = N->getOperand(0); 6363 EVT VT = N->getValueType(0); 6364 6365 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 6366 LegalOperations)) 6367 return SDValue(Res, 0); 6368 6369 // fold (aext (aext x)) -> (aext x) 6370 // fold (aext (zext x)) -> (zext x) 6371 // fold (aext (sext x)) -> (sext x) 6372 if (N0.getOpcode() == ISD::ANY_EXTEND || 6373 N0.getOpcode() == ISD::ZERO_EXTEND || 6374 N0.getOpcode() == ISD::SIGN_EXTEND) 6375 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 6376 6377 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 6378 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 6379 if (N0.getOpcode() == ISD::TRUNCATE) { 6380 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 6381 if (NarrowLoad.getNode()) { 6382 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 6383 if (NarrowLoad.getNode() != N0.getNode()) { 6384 CombineTo(N0.getNode(), NarrowLoad); 6385 // CombineTo deleted the truncate, if needed, but not what's under it. 6386 AddToWorklist(oye); 6387 } 6388 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6389 } 6390 } 6391 6392 // fold (aext (truncate x)) 6393 if (N0.getOpcode() == ISD::TRUNCATE) { 6394 SDValue TruncOp = N0.getOperand(0); 6395 if (TruncOp.getValueType() == VT) 6396 return TruncOp; // x iff x size == zext size. 6397 if (TruncOp.getValueType().bitsGT(VT)) 6398 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 6399 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 6400 } 6401 6402 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 6403 // if the trunc is not free. 6404 if (N0.getOpcode() == ISD::AND && 6405 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6406 N0.getOperand(1).getOpcode() == ISD::Constant && 6407 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 6408 N0.getValueType())) { 6409 SDValue X = N0.getOperand(0).getOperand(0); 6410 if (X.getValueType().bitsLT(VT)) { 6411 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 6412 } else if (X.getValueType().bitsGT(VT)) { 6413 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 6414 } 6415 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 6416 Mask = Mask.zext(VT.getSizeInBits()); 6417 SDLoc DL(N); 6418 return DAG.getNode(ISD::AND, DL, VT, 6419 X, DAG.getConstant(Mask, DL, VT)); 6420 } 6421 6422 // fold (aext (load x)) -> (aext (truncate (extload x))) 6423 // None of the supported targets knows how to perform load and any_ext 6424 // on vectors in one instruction. We only perform this transformation on 6425 // scalars. 6426 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 6427 ISD::isUNINDEXEDLoad(N0.getNode()) && 6428 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 6429 bool DoXform = true; 6430 SmallVector<SDNode*, 4> SetCCs; 6431 if (!N0.hasOneUse()) 6432 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 6433 if (DoXform) { 6434 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6435 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 6436 LN0->getChain(), 6437 LN0->getBasePtr(), N0.getValueType(), 6438 LN0->getMemOperand()); 6439 CombineTo(N, ExtLoad); 6440 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6441 N0.getValueType(), ExtLoad); 6442 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 6443 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 6444 ISD::ANY_EXTEND); 6445 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6446 } 6447 } 6448 6449 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 6450 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 6451 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 6452 if (N0.getOpcode() == ISD::LOAD && 6453 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6454 N0.hasOneUse()) { 6455 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6456 ISD::LoadExtType ExtType = LN0->getExtensionType(); 6457 EVT MemVT = LN0->getMemoryVT(); 6458 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { 6459 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 6460 VT, LN0->getChain(), LN0->getBasePtr(), 6461 MemVT, LN0->getMemOperand()); 6462 CombineTo(N, ExtLoad); 6463 CombineTo(N0.getNode(), 6464 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 6465 N0.getValueType(), ExtLoad), 6466 ExtLoad.getValue(1)); 6467 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6468 } 6469 } 6470 6471 if (N0.getOpcode() == ISD::SETCC) { 6472 // For vectors: 6473 // aext(setcc) -> vsetcc 6474 // aext(setcc) -> truncate(vsetcc) 6475 // aext(setcc) -> aext(vsetcc) 6476 // Only do this before legalize for now. 6477 if (VT.isVector() && !LegalOperations) { 6478 EVT N0VT = N0.getOperand(0).getValueType(); 6479 // We know that the # elements of the results is the same as the 6480 // # elements of the compare (and the # elements of the compare result 6481 // for that matter). Check to see that they are the same size. If so, 6482 // we know that the element size of the sext'd result matches the 6483 // element size of the compare operands. 6484 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 6485 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 6486 N0.getOperand(1), 6487 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6488 // If the desired elements are smaller or larger than the source 6489 // elements we can use a matching integer vector type and then 6490 // truncate/any extend 6491 else { 6492 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 6493 SDValue VsetCC = 6494 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 6495 N0.getOperand(1), 6496 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6497 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 6498 } 6499 } 6500 6501 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 6502 SDLoc DL(N); 6503 SDValue SCC = 6504 SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), 6505 DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), 6506 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 6507 if (SCC.getNode()) 6508 return SCC; 6509 } 6510 6511 return SDValue(); 6512 } 6513 6514 /// See if the specified operand can be simplified with the knowledge that only 6515 /// the bits specified by Mask are used. If so, return the simpler operand, 6516 /// otherwise return a null SDValue. 6517 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 6518 switch (V.getOpcode()) { 6519 default: break; 6520 case ISD::Constant: { 6521 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 6522 assert(CV && "Const value should be ConstSDNode."); 6523 const APInt &CVal = CV->getAPIntValue(); 6524 APInt NewVal = CVal & Mask; 6525 if (NewVal != CVal) 6526 return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); 6527 break; 6528 } 6529 case ISD::OR: 6530 case ISD::XOR: 6531 // If the LHS or RHS don't contribute bits to the or, drop them. 6532 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 6533 return V.getOperand(1); 6534 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 6535 return V.getOperand(0); 6536 break; 6537 case ISD::SRL: 6538 // Only look at single-use SRLs. 6539 if (!V.getNode()->hasOneUse()) 6540 break; 6541 if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { 6542 // See if we can recursively simplify the LHS. 6543 unsigned Amt = RHSC->getZExtValue(); 6544 6545 // Watch out for shift count overflow though. 6546 if (Amt >= Mask.getBitWidth()) break; 6547 APInt NewMask = Mask << Amt; 6548 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 6549 if (SimplifyLHS.getNode()) 6550 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 6551 SimplifyLHS, V.getOperand(1)); 6552 } 6553 } 6554 return SDValue(); 6555 } 6556 6557 /// If the result of a wider load is shifted to right of N bits and then 6558 /// truncated to a narrower type and where N is a multiple of number of bits of 6559 /// the narrower type, transform it to a narrower load from address + N / num of 6560 /// bits of new type. If the result is to be extended, also fold the extension 6561 /// to form a extending load. 6562 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 6563 unsigned Opc = N->getOpcode(); 6564 6565 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 6566 SDValue N0 = N->getOperand(0); 6567 EVT VT = N->getValueType(0); 6568 EVT ExtVT = VT; 6569 6570 // This transformation isn't valid for vector loads. 6571 if (VT.isVector()) 6572 return SDValue(); 6573 6574 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 6575 // extended to VT. 6576 if (Opc == ISD::SIGN_EXTEND_INREG) { 6577 ExtType = ISD::SEXTLOAD; 6578 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6579 } else if (Opc == ISD::SRL) { 6580 // Another special-case: SRL is basically zero-extending a narrower value. 6581 ExtType = ISD::ZEXTLOAD; 6582 N0 = SDValue(N, 0); 6583 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6584 if (!N01) return SDValue(); 6585 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 6586 VT.getSizeInBits() - N01->getZExtValue()); 6587 } 6588 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) 6589 return SDValue(); 6590 6591 unsigned EVTBits = ExtVT.getSizeInBits(); 6592 6593 // Do not generate loads of non-round integer types since these can 6594 // be expensive (and would be wrong if the type is not byte sized). 6595 if (!ExtVT.isRound()) 6596 return SDValue(); 6597 6598 unsigned ShAmt = 0; 6599 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 6600 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6601 ShAmt = N01->getZExtValue(); 6602 // Is the shift amount a multiple of size of VT? 6603 if ((ShAmt & (EVTBits-1)) == 0) { 6604 N0 = N0.getOperand(0); 6605 // Is the load width a multiple of size of VT? 6606 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 6607 return SDValue(); 6608 } 6609 6610 // At this point, we must have a load or else we can't do the transform. 6611 if (!isa<LoadSDNode>(N0)) return SDValue(); 6612 6613 // Because a SRL must be assumed to *need* to zero-extend the high bits 6614 // (as opposed to anyext the high bits), we can't combine the zextload 6615 // lowering of SRL and an sextload. 6616 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 6617 return SDValue(); 6618 6619 // If the shift amount is larger than the input type then we're not 6620 // accessing any of the loaded bytes. If the load was a zextload/extload 6621 // then the result of the shift+trunc is zero/undef (handled elsewhere). 6622 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 6623 return SDValue(); 6624 } 6625 } 6626 6627 // If the load is shifted left (and the result isn't shifted back right), 6628 // we can fold the truncate through the shift. 6629 unsigned ShLeftAmt = 0; 6630 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 6631 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 6632 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 6633 ShLeftAmt = N01->getZExtValue(); 6634 N0 = N0.getOperand(0); 6635 } 6636 } 6637 6638 // If we haven't found a load, we can't narrow it. Don't transform one with 6639 // multiple uses, this would require adding a new load. 6640 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 6641 return SDValue(); 6642 6643 // Don't change the width of a volatile load. 6644 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6645 if (LN0->isVolatile()) 6646 return SDValue(); 6647 6648 // Verify that we are actually reducing a load width here. 6649 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 6650 return SDValue(); 6651 6652 // For the transform to be legal, the load must produce only two values 6653 // (the value loaded and the chain). Don't transform a pre-increment 6654 // load, for example, which produces an extra value. Otherwise the 6655 // transformation is not equivalent, and the downstream logic to replace 6656 // uses gets things wrong. 6657 if (LN0->getNumValues() > 2) 6658 return SDValue(); 6659 6660 // If the load that we're shrinking is an extload and we're not just 6661 // discarding the extension we can't simply shrink the load. Bail. 6662 // TODO: It would be possible to merge the extensions in some cases. 6663 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 6664 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 6665 return SDValue(); 6666 6667 if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) 6668 return SDValue(); 6669 6670 EVT PtrType = N0.getOperand(1).getValueType(); 6671 6672 if (PtrType == MVT::Untyped || PtrType.isExtended()) 6673 // It's not possible to generate a constant of extended or untyped type. 6674 return SDValue(); 6675 6676 // For big endian targets, we need to adjust the offset to the pointer to 6677 // load the correct bytes. 6678 if (DAG.getDataLayout().isBigEndian()) { 6679 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 6680 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 6681 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 6682 } 6683 6684 uint64_t PtrOff = ShAmt / 8; 6685 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 6686 SDLoc DL(LN0); 6687 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, 6688 PtrType, LN0->getBasePtr(), 6689 DAG.getConstant(PtrOff, DL, PtrType)); 6690 AddToWorklist(NewPtr.getNode()); 6691 6692 SDValue Load; 6693 if (ExtType == ISD::NON_EXTLOAD) 6694 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 6695 LN0->getPointerInfo().getWithOffset(PtrOff), 6696 LN0->isVolatile(), LN0->isNonTemporal(), 6697 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6698 else 6699 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 6700 LN0->getPointerInfo().getWithOffset(PtrOff), 6701 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 6702 LN0->isInvariant(), NewAlign, LN0->getAAInfo()); 6703 6704 // Replace the old load's chain with the new load's chain. 6705 WorklistRemover DeadNodes(*this); 6706 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 6707 6708 // Shift the result left, if we've swallowed a left shift. 6709 SDValue Result = Load; 6710 if (ShLeftAmt != 0) { 6711 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 6712 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 6713 ShImmTy = VT; 6714 // If the shift amount is as large as the result size (but, presumably, 6715 // no larger than the source) then the useful bits of the result are 6716 // zero; we can't simply return the shortened shift, because the result 6717 // of that operation is undefined. 6718 SDLoc DL(N0); 6719 if (ShLeftAmt >= VT.getSizeInBits()) 6720 Result = DAG.getConstant(0, DL, VT); 6721 else 6722 Result = DAG.getNode(ISD::SHL, DL, VT, 6723 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); 6724 } 6725 6726 // Return the new loaded value. 6727 return Result; 6728 } 6729 6730 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 6731 SDValue N0 = N->getOperand(0); 6732 SDValue N1 = N->getOperand(1); 6733 EVT VT = N->getValueType(0); 6734 EVT EVT = cast<VTSDNode>(N1)->getVT(); 6735 unsigned VTBits = VT.getScalarType().getSizeInBits(); 6736 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 6737 6738 // fold (sext_in_reg c1) -> c1 6739 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 6740 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 6741 6742 // If the input is already sign extended, just drop the extension. 6743 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 6744 return N0; 6745 6746 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 6747 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 6748 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 6749 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6750 N0.getOperand(0), N1); 6751 6752 // fold (sext_in_reg (sext x)) -> (sext x) 6753 // fold (sext_in_reg (aext x)) -> (sext x) 6754 // if x is small enough. 6755 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 6756 SDValue N00 = N0.getOperand(0); 6757 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 6758 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 6759 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 6760 } 6761 6762 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 6763 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 6764 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 6765 6766 // fold operands of sext_in_reg based on knowledge that the top bits are not 6767 // demanded. 6768 if (SimplifyDemandedBits(SDValue(N, 0))) 6769 return SDValue(N, 0); 6770 6771 // fold (sext_in_reg (load x)) -> (smaller sextload x) 6772 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 6773 SDValue NarrowLoad = ReduceLoadWidth(N); 6774 if (NarrowLoad.getNode()) 6775 return NarrowLoad; 6776 6777 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 6778 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 6779 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 6780 if (N0.getOpcode() == ISD::SRL) { 6781 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 6782 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 6783 // We can turn this into an SRA iff the input to the SRL is already sign 6784 // extended enough. 6785 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 6786 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 6787 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 6788 N0.getOperand(0), N0.getOperand(1)); 6789 } 6790 } 6791 6792 // fold (sext_inreg (extload x)) -> (sextload x) 6793 if (ISD::isEXTLoad(N0.getNode()) && 6794 ISD::isUNINDEXEDLoad(N0.getNode()) && 6795 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6796 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6797 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6798 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6799 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6800 LN0->getChain(), 6801 LN0->getBasePtr(), EVT, 6802 LN0->getMemOperand()); 6803 CombineTo(N, ExtLoad); 6804 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6805 AddToWorklist(ExtLoad.getNode()); 6806 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6807 } 6808 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 6809 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 6810 N0.hasOneUse() && 6811 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 6812 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6813 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { 6814 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6815 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 6816 LN0->getChain(), 6817 LN0->getBasePtr(), EVT, 6818 LN0->getMemOperand()); 6819 CombineTo(N, ExtLoad); 6820 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 6821 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6822 } 6823 6824 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 6825 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 6826 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 6827 N0.getOperand(1), false); 6828 if (BSwap.getNode()) 6829 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 6830 BSwap, N1); 6831 } 6832 6833 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 6834 // into a build_vector. 6835 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 6836 SmallVector<SDValue, 8> Elts; 6837 unsigned NumElts = N0->getNumOperands(); 6838 unsigned ShAmt = VTBits - EVTBits; 6839 6840 for (unsigned i = 0; i != NumElts; ++i) { 6841 SDValue Op = N0->getOperand(i); 6842 if (Op->getOpcode() == ISD::UNDEF) { 6843 Elts.push_back(Op); 6844 continue; 6845 } 6846 6847 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 6848 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 6849 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 6850 SDLoc(Op), Op.getValueType())); 6851 } 6852 6853 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 6854 } 6855 6856 return SDValue(); 6857 } 6858 6859 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { 6860 SDValue N0 = N->getOperand(0); 6861 EVT VT = N->getValueType(0); 6862 6863 if (N0.getOpcode() == ISD::UNDEF) 6864 return DAG.getUNDEF(VT); 6865 6866 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 6867 LegalOperations)) 6868 return SDValue(Res, 0); 6869 6870 return SDValue(); 6871 } 6872 6873 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 6874 SDValue N0 = N->getOperand(0); 6875 EVT VT = N->getValueType(0); 6876 bool isLE = DAG.getDataLayout().isLittleEndian(); 6877 6878 // noop truncate 6879 if (N0.getValueType() == N->getValueType(0)) 6880 return N0; 6881 // fold (truncate c1) -> c1 6882 if (isConstantIntBuildVectorOrConstantInt(N0)) 6883 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 6884 // fold (truncate (truncate x)) -> (truncate x) 6885 if (N0.getOpcode() == ISD::TRUNCATE) 6886 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6887 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 6888 if (N0.getOpcode() == ISD::ZERO_EXTEND || 6889 N0.getOpcode() == ISD::SIGN_EXTEND || 6890 N0.getOpcode() == ISD::ANY_EXTEND) { 6891 if (N0.getOperand(0).getValueType().bitsLT(VT)) 6892 // if the source is smaller than the dest, we still need an extend 6893 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 6894 N0.getOperand(0)); 6895 if (N0.getOperand(0).getValueType().bitsGT(VT)) 6896 // if the source is larger than the dest, than we just need the truncate 6897 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 6898 // if the source and dest are the same type, we can drop both the extend 6899 // and the truncate. 6900 return N0.getOperand(0); 6901 } 6902 6903 // Fold extract-and-trunc into a narrow extract. For example: 6904 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 6905 // i32 y = TRUNCATE(i64 x) 6906 // -- becomes -- 6907 // v16i8 b = BITCAST (v2i64 val) 6908 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 6909 // 6910 // Note: We only run this optimization after type legalization (which often 6911 // creates this pattern) and before operation legalization after which 6912 // we need to be more careful about the vector instructions that we generate. 6913 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6914 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 6915 6916 EVT VecTy = N0.getOperand(0).getValueType(); 6917 EVT ExTy = N0.getValueType(); 6918 EVT TrTy = N->getValueType(0); 6919 6920 unsigned NumElem = VecTy.getVectorNumElements(); 6921 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6922 6923 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 6924 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6925 6926 SDValue EltNo = N0->getOperand(1); 6927 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6928 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6929 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 6930 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 6931 6932 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6933 NVT, N0.getOperand(0)); 6934 6935 SDLoc DL(N); 6936 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6937 DL, TrTy, V, 6938 DAG.getConstant(Index, DL, IndexTy)); 6939 } 6940 } 6941 6942 // trunc (select c, a, b) -> select c, (trunc a), (trunc b) 6943 if (N0.getOpcode() == ISD::SELECT) { 6944 EVT SrcVT = N0.getValueType(); 6945 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && 6946 TLI.isTruncateFree(SrcVT, VT)) { 6947 SDLoc SL(N0); 6948 SDValue Cond = N0.getOperand(0); 6949 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); 6950 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); 6951 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); 6952 } 6953 } 6954 6955 // Fold a series of buildvector, bitcast, and truncate if possible. 6956 // For example fold 6957 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6958 // (2xi32 (buildvector x, y)). 6959 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6960 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6961 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6962 N0.getOperand(0).hasOneUse()) { 6963 6964 SDValue BuildVect = N0.getOperand(0); 6965 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6966 EVT TruncVecEltTy = VT.getVectorElementType(); 6967 6968 // Check that the element types match. 6969 if (BuildVectEltTy == TruncVecEltTy) { 6970 // Now we only need to compute the offset of the truncated elements. 6971 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6972 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6973 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6974 6975 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6976 "Invalid number of elements"); 6977 6978 SmallVector<SDValue, 8> Opnds; 6979 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6980 Opnds.push_back(BuildVect.getOperand(i)); 6981 6982 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6983 } 6984 } 6985 6986 // See if we can simplify the input to this truncate through knowledge that 6987 // only the low bits are being used. 6988 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6989 // Currently we only perform this optimization on scalars because vectors 6990 // may have different active low bits. 6991 if (!VT.isVector()) { 6992 SDValue Shorter = 6993 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6994 VT.getSizeInBits())); 6995 if (Shorter.getNode()) 6996 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6997 } 6998 // fold (truncate (load x)) -> (smaller load x) 6999 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 7000 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 7001 SDValue Reduced = ReduceLoadWidth(N); 7002 if (Reduced.getNode()) 7003 return Reduced; 7004 // Handle the case where the load remains an extending load even 7005 // after truncation. 7006 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 7007 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7008 if (!LN0->isVolatile() && 7009 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 7010 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 7011 VT, LN0->getChain(), LN0->getBasePtr(), 7012 LN0->getMemoryVT(), 7013 LN0->getMemOperand()); 7014 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 7015 return NewLoad; 7016 } 7017 } 7018 } 7019 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 7020 // where ... are all 'undef'. 7021 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 7022 SmallVector<EVT, 8> VTs; 7023 SDValue V; 7024 unsigned Idx = 0; 7025 unsigned NumDefs = 0; 7026 7027 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 7028 SDValue X = N0.getOperand(i); 7029 if (X.getOpcode() != ISD::UNDEF) { 7030 V = X; 7031 Idx = i; 7032 NumDefs++; 7033 } 7034 // Stop if more than one members are non-undef. 7035 if (NumDefs > 1) 7036 break; 7037 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 7038 VT.getVectorElementType(), 7039 X.getValueType().getVectorNumElements())); 7040 } 7041 7042 if (NumDefs == 0) 7043 return DAG.getUNDEF(VT); 7044 7045 if (NumDefs == 1) { 7046 assert(V.getNode() && "The single defined operand is empty!"); 7047 SmallVector<SDValue, 8> Opnds; 7048 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 7049 if (i != Idx) { 7050 Opnds.push_back(DAG.getUNDEF(VTs[i])); 7051 continue; 7052 } 7053 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 7054 AddToWorklist(NV.getNode()); 7055 Opnds.push_back(NV); 7056 } 7057 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 7058 } 7059 } 7060 7061 // Simplify the operands using demanded-bits information. 7062 if (!VT.isVector() && 7063 SimplifyDemandedBits(SDValue(N, 0))) 7064 return SDValue(N, 0); 7065 7066 return SDValue(); 7067 } 7068 7069 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 7070 SDValue Elt = N->getOperand(i); 7071 if (Elt.getOpcode() != ISD::MERGE_VALUES) 7072 return Elt.getNode(); 7073 return Elt.getOperand(Elt.getResNo()).getNode(); 7074 } 7075 7076 /// build_pair (load, load) -> load 7077 /// if load locations are consecutive. 7078 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 7079 assert(N->getOpcode() == ISD::BUILD_PAIR); 7080 7081 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 7082 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 7083 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 7084 LD1->getAddressSpace() != LD2->getAddressSpace()) 7085 return SDValue(); 7086 EVT LD1VT = LD1->getValueType(0); 7087 7088 if (ISD::isNON_EXTLoad(LD2) && 7089 LD2->hasOneUse() && 7090 // If both are volatile this would reduce the number of volatile loads. 7091 // If one is volatile it might be ok, but play conservative and bail out. 7092 !LD1->isVolatile() && 7093 !LD2->isVolatile() && 7094 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 7095 unsigned Align = LD1->getAlignment(); 7096 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 7097 VT.getTypeForEVT(*DAG.getContext())); 7098 7099 if (NewAlign <= Align && 7100 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 7101 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 7102 LD1->getBasePtr(), LD1->getPointerInfo(), 7103 false, false, false, Align); 7104 } 7105 7106 return SDValue(); 7107 } 7108 7109 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 7110 SDValue N0 = N->getOperand(0); 7111 EVT VT = N->getValueType(0); 7112 7113 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 7114 // Only do this before legalize, since afterward the target may be depending 7115 // on the bitconvert. 7116 // First check to see if this is all constant. 7117 if (!LegalTypes && 7118 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 7119 VT.isVector()) { 7120 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 7121 7122 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 7123 assert(!DestEltVT.isVector() && 7124 "Element type of vector ValueType must not be vector!"); 7125 if (isSimple) 7126 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 7127 } 7128 7129 // If the input is a constant, let getNode fold it. 7130 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 7131 // If we can't allow illegal operations, we need to check that this is just 7132 // a fp -> int or int -> conversion and that the resulting operation will 7133 // be legal. 7134 if (!LegalOperations || 7135 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && 7136 TLI.isOperationLegal(ISD::ConstantFP, VT)) || 7137 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && 7138 TLI.isOperationLegal(ISD::Constant, VT))) 7139 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 7140 } 7141 7142 // (conv (conv x, t1), t2) -> (conv x, t2) 7143 if (N0.getOpcode() == ISD::BITCAST) 7144 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 7145 N0.getOperand(0)); 7146 7147 // fold (conv (load x)) -> (load (conv*)x) 7148 // If the resultant load doesn't need a higher alignment than the original! 7149 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7150 // Do not change the width of a volatile load. 7151 !cast<LoadSDNode>(N0)->isVolatile() && 7152 // Do not remove the cast if the types differ in endian layout. 7153 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == 7154 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && 7155 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 7156 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 7157 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7158 unsigned Align = DAG.getDataLayout().getABITypeAlignment( 7159 VT.getTypeForEVT(*DAG.getContext())); 7160 unsigned OrigAlign = LN0->getAlignment(); 7161 7162 if (Align <= OrigAlign) { 7163 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 7164 LN0->getBasePtr(), LN0->getPointerInfo(), 7165 LN0->isVolatile(), LN0->isNonTemporal(), 7166 LN0->isInvariant(), OrigAlign, 7167 LN0->getAAInfo()); 7168 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 7169 return Load; 7170 } 7171 } 7172 7173 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 7174 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 7175 // This often reduces constant pool loads. 7176 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 7177 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 7178 N0.getNode()->hasOneUse() && VT.isInteger() && 7179 !VT.isVector() && !N0.getValueType().isVector()) { 7180 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 7181 N0.getOperand(0)); 7182 AddToWorklist(NewConv.getNode()); 7183 7184 SDLoc DL(N); 7185 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 7186 if (N0.getOpcode() == ISD::FNEG) 7187 return DAG.getNode(ISD::XOR, DL, VT, 7188 NewConv, DAG.getConstant(SignBit, DL, VT)); 7189 assert(N0.getOpcode() == ISD::FABS); 7190 return DAG.getNode(ISD::AND, DL, VT, 7191 NewConv, DAG.getConstant(~SignBit, DL, VT)); 7192 } 7193 7194 // fold (bitconvert (fcopysign cst, x)) -> 7195 // (or (and (bitconvert x), sign), (and cst, (not sign))) 7196 // Note that we don't handle (copysign x, cst) because this can always be 7197 // folded to an fneg or fabs. 7198 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 7199 isa<ConstantFPSDNode>(N0.getOperand(0)) && 7200 VT.isInteger() && !VT.isVector()) { 7201 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 7202 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 7203 if (isTypeLegal(IntXVT)) { 7204 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 7205 IntXVT, N0.getOperand(1)); 7206 AddToWorklist(X.getNode()); 7207 7208 // If X has a different width than the result/lhs, sext it or truncate it. 7209 unsigned VTWidth = VT.getSizeInBits(); 7210 if (OrigXWidth < VTWidth) { 7211 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 7212 AddToWorklist(X.getNode()); 7213 } else if (OrigXWidth > VTWidth) { 7214 // To get the sign bit in the right place, we have to shift it right 7215 // before truncating. 7216 SDLoc DL(X); 7217 X = DAG.getNode(ISD::SRL, DL, 7218 X.getValueType(), X, 7219 DAG.getConstant(OrigXWidth-VTWidth, DL, 7220 X.getValueType())); 7221 AddToWorklist(X.getNode()); 7222 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 7223 AddToWorklist(X.getNode()); 7224 } 7225 7226 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 7227 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 7228 X, DAG.getConstant(SignBit, SDLoc(X), VT)); 7229 AddToWorklist(X.getNode()); 7230 7231 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 7232 VT, N0.getOperand(0)); 7233 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 7234 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); 7235 AddToWorklist(Cst.getNode()); 7236 7237 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 7238 } 7239 } 7240 7241 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 7242 if (N0.getOpcode() == ISD::BUILD_PAIR) { 7243 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 7244 if (CombineLD.getNode()) 7245 return CombineLD; 7246 } 7247 7248 // Remove double bitcasts from shuffles - this is often a legacy of 7249 // XformToShuffleWithZero being used to combine bitmaskings (of 7250 // float vectors bitcast to integer vectors) into shuffles. 7251 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) 7252 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && 7253 N0->getOpcode() == ISD::VECTOR_SHUFFLE && 7254 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && 7255 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { 7256 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); 7257 7258 // If operands are a bitcast, peek through if it casts the original VT. 7259 // If operands are a UNDEF or constant, just bitcast back to original VT. 7260 auto PeekThroughBitcast = [&](SDValue Op) { 7261 if (Op.getOpcode() == ISD::BITCAST && 7262 Op.getOperand(0)->getValueType(0) == VT) 7263 return SDValue(Op.getOperand(0)); 7264 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || 7265 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) 7266 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 7267 return SDValue(); 7268 }; 7269 7270 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); 7271 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); 7272 if (!(SV0 && SV1)) 7273 return SDValue(); 7274 7275 int MaskScale = 7276 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); 7277 SmallVector<int, 8> NewMask; 7278 for (int M : SVN->getMask()) 7279 for (int i = 0; i != MaskScale; ++i) 7280 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); 7281 7282 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 7283 if (!LegalMask) { 7284 std::swap(SV0, SV1); 7285 ShuffleVectorSDNode::commuteMask(NewMask); 7286 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); 7287 } 7288 7289 if (LegalMask) 7290 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); 7291 } 7292 7293 return SDValue(); 7294 } 7295 7296 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 7297 EVT VT = N->getValueType(0); 7298 return CombineConsecutiveLoads(N, VT); 7299 } 7300 7301 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef 7302 /// operands. DstEltVT indicates the destination element value type. 7303 SDValue DAGCombiner:: 7304 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 7305 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 7306 7307 // If this is already the right type, we're done. 7308 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 7309 7310 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 7311 unsigned DstBitSize = DstEltVT.getSizeInBits(); 7312 7313 // If this is a conversion of N elements of one type to N elements of another 7314 // type, convert each element. This handles FP<->INT cases. 7315 if (SrcBitSize == DstBitSize) { 7316 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 7317 BV->getValueType(0).getVectorNumElements()); 7318 7319 // Due to the FP element handling below calling this routine recursively, 7320 // we can end up with a scalar-to-vector node here. 7321 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 7322 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 7323 DAG.getNode(ISD::BITCAST, SDLoc(BV), 7324 DstEltVT, BV->getOperand(0))); 7325 7326 SmallVector<SDValue, 8> Ops; 7327 for (SDValue Op : BV->op_values()) { 7328 // If the vector element type is not legal, the BUILD_VECTOR operands 7329 // are promoted and implicitly truncated. Make that explicit here. 7330 if (Op.getValueType() != SrcEltVT) 7331 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 7332 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 7333 DstEltVT, Op)); 7334 AddToWorklist(Ops.back().getNode()); 7335 } 7336 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 7337 } 7338 7339 // Otherwise, we're growing or shrinking the elements. To avoid having to 7340 // handle annoying details of growing/shrinking FP values, we convert them to 7341 // int first. 7342 if (SrcEltVT.isFloatingPoint()) { 7343 // Convert the input float vector to a int vector where the elements are the 7344 // same sizes. 7345 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 7346 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 7347 SrcEltVT = IntVT; 7348 } 7349 7350 // Now we know the input is an integer vector. If the output is a FP type, 7351 // convert to integer first, then to FP of the right size. 7352 if (DstEltVT.isFloatingPoint()) { 7353 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 7354 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 7355 7356 // Next, convert to FP elements of the same size. 7357 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 7358 } 7359 7360 SDLoc DL(BV); 7361 7362 // Okay, we know the src/dst types are both integers of differing types. 7363 // Handling growing first. 7364 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 7365 if (SrcBitSize < DstBitSize) { 7366 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 7367 7368 SmallVector<SDValue, 8> Ops; 7369 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 7370 i += NumInputsPerOutput) { 7371 bool isLE = DAG.getDataLayout().isLittleEndian(); 7372 APInt NewBits = APInt(DstBitSize, 0); 7373 bool EltIsUndef = true; 7374 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 7375 // Shift the previously computed bits over. 7376 NewBits <<= SrcBitSize; 7377 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 7378 if (Op.getOpcode() == ISD::UNDEF) continue; 7379 EltIsUndef = false; 7380 7381 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 7382 zextOrTrunc(SrcBitSize).zext(DstBitSize); 7383 } 7384 7385 if (EltIsUndef) 7386 Ops.push_back(DAG.getUNDEF(DstEltVT)); 7387 else 7388 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); 7389 } 7390 7391 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 7392 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); 7393 } 7394 7395 // Finally, this must be the case where we are shrinking elements: each input 7396 // turns into multiple outputs. 7397 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 7398 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 7399 NumOutputsPerInput*BV->getNumOperands()); 7400 SmallVector<SDValue, 8> Ops; 7401 7402 for (const SDValue &Op : BV->op_values()) { 7403 if (Op.getOpcode() == ISD::UNDEF) { 7404 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); 7405 continue; 7406 } 7407 7408 APInt OpVal = cast<ConstantSDNode>(Op)-> 7409 getAPIntValue().zextOrTrunc(SrcBitSize); 7410 7411 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 7412 APInt ThisVal = OpVal.trunc(DstBitSize); 7413 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); 7414 OpVal = OpVal.lshr(DstBitSize); 7415 } 7416 7417 // For big endian targets, swap the order of the pieces of each element. 7418 if (DAG.getDataLayout().isBigEndian()) 7419 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 7420 } 7421 7422 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); 7423 } 7424 7425 /// Try to perform FMA combining on a given FADD node. 7426 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { 7427 SDValue N0 = N->getOperand(0); 7428 SDValue N1 = N->getOperand(1); 7429 EVT VT = N->getValueType(0); 7430 SDLoc SL(N); 7431 7432 const TargetOptions &Options = DAG.getTarget().Options; 7433 bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || 7434 Options.UnsafeFPMath); 7435 7436 // Floating-point multiply-add with intermediate rounding. 7437 bool HasFMAD = (LegalOperations && 7438 TLI.isOperationLegal(ISD::FMAD, VT)); 7439 7440 // Floating-point multiply-add without intermediate rounding. 7441 bool HasFMA = ((!LegalOperations || 7442 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && 7443 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7444 UnsafeFPMath); 7445 7446 // No valid opcode, do not combine. 7447 if (!HasFMAD && !HasFMA) 7448 return SDValue(); 7449 7450 // Always prefer FMAD to FMA for precision. 7451 unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 7452 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 7453 bool LookThroughFPExt = TLI.isFPExtFree(VT); 7454 7455 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 7456 if (N0.getOpcode() == ISD::FMUL && 7457 (Aggressive || N0->hasOneUse())) { 7458 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7459 N0.getOperand(0), N0.getOperand(1), N1); 7460 } 7461 7462 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 7463 // Note: Commutes FADD operands. 7464 if (N1.getOpcode() == ISD::FMUL && 7465 (Aggressive || N1->hasOneUse())) { 7466 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7467 N1.getOperand(0), N1.getOperand(1), N0); 7468 } 7469 7470 // Look through FP_EXTEND nodes to do more combining. 7471 if (UnsafeFPMath && LookThroughFPExt) { 7472 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) 7473 if (N0.getOpcode() == ISD::FP_EXTEND) { 7474 SDValue N00 = N0.getOperand(0); 7475 if (N00.getOpcode() == ISD::FMUL) 7476 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7477 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7478 N00.getOperand(0)), 7479 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7480 N00.getOperand(1)), N1); 7481 } 7482 7483 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) 7484 // Note: Commutes FADD operands. 7485 if (N1.getOpcode() == ISD::FP_EXTEND) { 7486 SDValue N10 = N1.getOperand(0); 7487 if (N10.getOpcode() == ISD::FMUL) 7488 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7489 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7490 N10.getOperand(0)), 7491 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7492 N10.getOperand(1)), N0); 7493 } 7494 } 7495 7496 // More folding opportunities when target permits. 7497 if ((UnsafeFPMath || HasFMAD) && Aggressive) { 7498 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) 7499 if (N0.getOpcode() == PreferredFusedOpcode && 7500 N0.getOperand(2).getOpcode() == ISD::FMUL) { 7501 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7502 N0.getOperand(0), N0.getOperand(1), 7503 DAG.getNode(PreferredFusedOpcode, SL, VT, 7504 N0.getOperand(2).getOperand(0), 7505 N0.getOperand(2).getOperand(1), 7506 N1)); 7507 } 7508 7509 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) 7510 if (N1->getOpcode() == PreferredFusedOpcode && 7511 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7512 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7513 N1.getOperand(0), N1.getOperand(1), 7514 DAG.getNode(PreferredFusedOpcode, SL, VT, 7515 N1.getOperand(2).getOperand(0), 7516 N1.getOperand(2).getOperand(1), 7517 N0)); 7518 } 7519 7520 if (UnsafeFPMath && LookThroughFPExt) { 7521 // fold (fadd (fma x, y, (fpext (fmul u, v))), z) 7522 // -> (fma x, y, (fma (fpext u), (fpext v), z)) 7523 auto FoldFAddFMAFPExtFMul = [&] ( 7524 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { 7525 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, 7526 DAG.getNode(PreferredFusedOpcode, SL, VT, 7527 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 7528 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 7529 Z)); 7530 }; 7531 if (N0.getOpcode() == PreferredFusedOpcode) { 7532 SDValue N02 = N0.getOperand(2); 7533 if (N02.getOpcode() == ISD::FP_EXTEND) { 7534 SDValue N020 = N02.getOperand(0); 7535 if (N020.getOpcode() == ISD::FMUL) 7536 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), 7537 N020.getOperand(0), N020.getOperand(1), 7538 N1); 7539 } 7540 } 7541 7542 // fold (fadd (fpext (fma x, y, (fmul u, v))), z) 7543 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) 7544 // FIXME: This turns two single-precision and one double-precision 7545 // operation into two double-precision operations, which might not be 7546 // interesting for all targets, especially GPUs. 7547 auto FoldFAddFPExtFMAFMul = [&] ( 7548 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { 7549 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7550 DAG.getNode(ISD::FP_EXTEND, SL, VT, X), 7551 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), 7552 DAG.getNode(PreferredFusedOpcode, SL, VT, 7553 DAG.getNode(ISD::FP_EXTEND, SL, VT, U), 7554 DAG.getNode(ISD::FP_EXTEND, SL, VT, V), 7555 Z)); 7556 }; 7557 if (N0.getOpcode() == ISD::FP_EXTEND) { 7558 SDValue N00 = N0.getOperand(0); 7559 if (N00.getOpcode() == PreferredFusedOpcode) { 7560 SDValue N002 = N00.getOperand(2); 7561 if (N002.getOpcode() == ISD::FMUL) 7562 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), 7563 N002.getOperand(0), N002.getOperand(1), 7564 N1); 7565 } 7566 } 7567 7568 // fold (fadd x, (fma y, z, (fpext (fmul u, v))) 7569 // -> (fma y, z, (fma (fpext u), (fpext v), x)) 7570 if (N1.getOpcode() == PreferredFusedOpcode) { 7571 SDValue N12 = N1.getOperand(2); 7572 if (N12.getOpcode() == ISD::FP_EXTEND) { 7573 SDValue N120 = N12.getOperand(0); 7574 if (N120.getOpcode() == ISD::FMUL) 7575 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), 7576 N120.getOperand(0), N120.getOperand(1), 7577 N0); 7578 } 7579 } 7580 7581 // fold (fadd x, (fpext (fma y, z, (fmul u, v))) 7582 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) 7583 // FIXME: This turns two single-precision and one double-precision 7584 // operation into two double-precision operations, which might not be 7585 // interesting for all targets, especially GPUs. 7586 if (N1.getOpcode() == ISD::FP_EXTEND) { 7587 SDValue N10 = N1.getOperand(0); 7588 if (N10.getOpcode() == PreferredFusedOpcode) { 7589 SDValue N102 = N10.getOperand(2); 7590 if (N102.getOpcode() == ISD::FMUL) 7591 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), 7592 N102.getOperand(0), N102.getOperand(1), 7593 N0); 7594 } 7595 } 7596 } 7597 } 7598 7599 return SDValue(); 7600 } 7601 7602 /// Try to perform FMA combining on a given FSUB node. 7603 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { 7604 SDValue N0 = N->getOperand(0); 7605 SDValue N1 = N->getOperand(1); 7606 EVT VT = N->getValueType(0); 7607 SDLoc SL(N); 7608 7609 const TargetOptions &Options = DAG.getTarget().Options; 7610 bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast || 7611 Options.UnsafeFPMath); 7612 7613 // Floating-point multiply-add with intermediate rounding. 7614 bool HasFMAD = (LegalOperations && 7615 TLI.isOperationLegal(ISD::FMAD, VT)); 7616 7617 // Floating-point multiply-add without intermediate rounding. 7618 bool HasFMA = ((!LegalOperations || 7619 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) && 7620 TLI.isFMAFasterThanFMulAndFAdd(VT) && 7621 UnsafeFPMath); 7622 7623 // No valid opcode, do not combine. 7624 if (!HasFMAD && !HasFMA) 7625 return SDValue(); 7626 7627 // Always prefer FMAD to FMA for precision. 7628 unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; 7629 bool Aggressive = TLI.enableAggressiveFMAFusion(VT); 7630 bool LookThroughFPExt = TLI.isFPExtFree(VT); 7631 7632 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 7633 if (N0.getOpcode() == ISD::FMUL && 7634 (Aggressive || N0->hasOneUse())) { 7635 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7636 N0.getOperand(0), N0.getOperand(1), 7637 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7638 } 7639 7640 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 7641 // Note: Commutes FSUB operands. 7642 if (N1.getOpcode() == ISD::FMUL && 7643 (Aggressive || N1->hasOneUse())) 7644 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7645 DAG.getNode(ISD::FNEG, SL, VT, 7646 N1.getOperand(0)), 7647 N1.getOperand(1), N0); 7648 7649 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 7650 if (N0.getOpcode() == ISD::FNEG && 7651 N0.getOperand(0).getOpcode() == ISD::FMUL && 7652 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { 7653 SDValue N00 = N0.getOperand(0).getOperand(0); 7654 SDValue N01 = N0.getOperand(0).getOperand(1); 7655 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7656 DAG.getNode(ISD::FNEG, SL, VT, N00), N01, 7657 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7658 } 7659 7660 // Look through FP_EXTEND nodes to do more combining. 7661 if (UnsafeFPMath && LookThroughFPExt) { 7662 // fold (fsub (fpext (fmul x, y)), z) 7663 // -> (fma (fpext x), (fpext y), (fneg z)) 7664 if (N0.getOpcode() == ISD::FP_EXTEND) { 7665 SDValue N00 = N0.getOperand(0); 7666 if (N00.getOpcode() == ISD::FMUL) 7667 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7668 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7669 N00.getOperand(0)), 7670 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7671 N00.getOperand(1)), 7672 DAG.getNode(ISD::FNEG, SL, VT, N1)); 7673 } 7674 7675 // fold (fsub x, (fpext (fmul y, z))) 7676 // -> (fma (fneg (fpext y)), (fpext z), x) 7677 // Note: Commutes FSUB operands. 7678 if (N1.getOpcode() == ISD::FP_EXTEND) { 7679 SDValue N10 = N1.getOperand(0); 7680 if (N10.getOpcode() == ISD::FMUL) 7681 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7682 DAG.getNode(ISD::FNEG, SL, VT, 7683 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7684 N10.getOperand(0))), 7685 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7686 N10.getOperand(1)), 7687 N0); 7688 } 7689 7690 // fold (fsub (fpext (fneg (fmul, x, y))), z) 7691 // -> (fneg (fma (fpext x), (fpext y), z)) 7692 // Note: This could be removed with appropriate canonicalization of the 7693 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 7694 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 7695 // from implementing the canonicalization in visitFSUB. 7696 if (N0.getOpcode() == ISD::FP_EXTEND) { 7697 SDValue N00 = N0.getOperand(0); 7698 if (N00.getOpcode() == ISD::FNEG) { 7699 SDValue N000 = N00.getOperand(0); 7700 if (N000.getOpcode() == ISD::FMUL) { 7701 return DAG.getNode(ISD::FNEG, SL, VT, 7702 DAG.getNode(PreferredFusedOpcode, SL, VT, 7703 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7704 N000.getOperand(0)), 7705 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7706 N000.getOperand(1)), 7707 N1)); 7708 } 7709 } 7710 } 7711 7712 // fold (fsub (fneg (fpext (fmul, x, y))), z) 7713 // -> (fneg (fma (fpext x)), (fpext y), z) 7714 // Note: This could be removed with appropriate canonicalization of the 7715 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the 7716 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent 7717 // from implementing the canonicalization in visitFSUB. 7718 if (N0.getOpcode() == ISD::FNEG) { 7719 SDValue N00 = N0.getOperand(0); 7720 if (N00.getOpcode() == ISD::FP_EXTEND) { 7721 SDValue N000 = N00.getOperand(0); 7722 if (N000.getOpcode() == ISD::FMUL) { 7723 return DAG.getNode(ISD::FNEG, SL, VT, 7724 DAG.getNode(PreferredFusedOpcode, SL, VT, 7725 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7726 N000.getOperand(0)), 7727 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7728 N000.getOperand(1)), 7729 N1)); 7730 } 7731 } 7732 } 7733 7734 } 7735 7736 // More folding opportunities when target permits. 7737 if ((UnsafeFPMath || HasFMAD) && Aggressive) { 7738 // fold (fsub (fma x, y, (fmul u, v)), z) 7739 // -> (fma x, y (fma u, v, (fneg z))) 7740 if (N0.getOpcode() == PreferredFusedOpcode && 7741 N0.getOperand(2).getOpcode() == ISD::FMUL) { 7742 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7743 N0.getOperand(0), N0.getOperand(1), 7744 DAG.getNode(PreferredFusedOpcode, SL, VT, 7745 N0.getOperand(2).getOperand(0), 7746 N0.getOperand(2).getOperand(1), 7747 DAG.getNode(ISD::FNEG, SL, VT, 7748 N1))); 7749 } 7750 7751 // fold (fsub x, (fma y, z, (fmul u, v))) 7752 // -> (fma (fneg y), z, (fma (fneg u), v, x)) 7753 if (N1.getOpcode() == PreferredFusedOpcode && 7754 N1.getOperand(2).getOpcode() == ISD::FMUL) { 7755 SDValue N20 = N1.getOperand(2).getOperand(0); 7756 SDValue N21 = N1.getOperand(2).getOperand(1); 7757 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7758 DAG.getNode(ISD::FNEG, SL, VT, 7759 N1.getOperand(0)), 7760 N1.getOperand(1), 7761 DAG.getNode(PreferredFusedOpcode, SL, VT, 7762 DAG.getNode(ISD::FNEG, SL, VT, N20), 7763 7764 N21, N0)); 7765 } 7766 7767 if (UnsafeFPMath && LookThroughFPExt) { 7768 // fold (fsub (fma x, y, (fpext (fmul u, v))), z) 7769 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) 7770 if (N0.getOpcode() == PreferredFusedOpcode) { 7771 SDValue N02 = N0.getOperand(2); 7772 if (N02.getOpcode() == ISD::FP_EXTEND) { 7773 SDValue N020 = N02.getOperand(0); 7774 if (N020.getOpcode() == ISD::FMUL) 7775 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7776 N0.getOperand(0), N0.getOperand(1), 7777 DAG.getNode(PreferredFusedOpcode, SL, VT, 7778 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7779 N020.getOperand(0)), 7780 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7781 N020.getOperand(1)), 7782 DAG.getNode(ISD::FNEG, SL, VT, 7783 N1))); 7784 } 7785 } 7786 7787 // fold (fsub (fpext (fma x, y, (fmul u, v))), z) 7788 // -> (fma (fpext x), (fpext y), 7789 // (fma (fpext u), (fpext v), (fneg z))) 7790 // FIXME: This turns two single-precision and one double-precision 7791 // operation into two double-precision operations, which might not be 7792 // interesting for all targets, especially GPUs. 7793 if (N0.getOpcode() == ISD::FP_EXTEND) { 7794 SDValue N00 = N0.getOperand(0); 7795 if (N00.getOpcode() == PreferredFusedOpcode) { 7796 SDValue N002 = N00.getOperand(2); 7797 if (N002.getOpcode() == ISD::FMUL) 7798 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7799 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7800 N00.getOperand(0)), 7801 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7802 N00.getOperand(1)), 7803 DAG.getNode(PreferredFusedOpcode, SL, VT, 7804 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7805 N002.getOperand(0)), 7806 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7807 N002.getOperand(1)), 7808 DAG.getNode(ISD::FNEG, SL, VT, 7809 N1))); 7810 } 7811 } 7812 7813 // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) 7814 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) 7815 if (N1.getOpcode() == PreferredFusedOpcode && 7816 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { 7817 SDValue N120 = N1.getOperand(2).getOperand(0); 7818 if (N120.getOpcode() == ISD::FMUL) { 7819 SDValue N1200 = N120.getOperand(0); 7820 SDValue N1201 = N120.getOperand(1); 7821 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7822 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), 7823 N1.getOperand(1), 7824 DAG.getNode(PreferredFusedOpcode, SL, VT, 7825 DAG.getNode(ISD::FNEG, SL, VT, 7826 DAG.getNode(ISD::FP_EXTEND, SL, 7827 VT, N1200)), 7828 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7829 N1201), 7830 N0)); 7831 } 7832 } 7833 7834 // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) 7835 // -> (fma (fneg (fpext y)), (fpext z), 7836 // (fma (fneg (fpext u)), (fpext v), x)) 7837 // FIXME: This turns two single-precision and one double-precision 7838 // operation into two double-precision operations, which might not be 7839 // interesting for all targets, especially GPUs. 7840 if (N1.getOpcode() == ISD::FP_EXTEND && 7841 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { 7842 SDValue N100 = N1.getOperand(0).getOperand(0); 7843 SDValue N101 = N1.getOperand(0).getOperand(1); 7844 SDValue N102 = N1.getOperand(0).getOperand(2); 7845 if (N102.getOpcode() == ISD::FMUL) { 7846 SDValue N1020 = N102.getOperand(0); 7847 SDValue N1021 = N102.getOperand(1); 7848 return DAG.getNode(PreferredFusedOpcode, SL, VT, 7849 DAG.getNode(ISD::FNEG, SL, VT, 7850 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7851 N100)), 7852 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), 7853 DAG.getNode(PreferredFusedOpcode, SL, VT, 7854 DAG.getNode(ISD::FNEG, SL, VT, 7855 DAG.getNode(ISD::FP_EXTEND, SL, 7856 VT, N1020)), 7857 DAG.getNode(ISD::FP_EXTEND, SL, VT, 7858 N1021), 7859 N0)); 7860 } 7861 } 7862 } 7863 } 7864 7865 return SDValue(); 7866 } 7867 7868 SDValue DAGCombiner::visitFADD(SDNode *N) { 7869 SDValue N0 = N->getOperand(0); 7870 SDValue N1 = N->getOperand(1); 7871 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7872 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 7873 EVT VT = N->getValueType(0); 7874 SDLoc DL(N); 7875 const TargetOptions &Options = DAG.getTarget().Options; 7876 7877 // fold vector ops 7878 if (VT.isVector()) 7879 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 7880 return FoldedVOp; 7881 7882 // fold (fadd c1, c2) -> c1 + c2 7883 if (N0CFP && N1CFP) 7884 return DAG.getNode(ISD::FADD, DL, VT, N0, N1); 7885 7886 // canonicalize constant to RHS 7887 if (N0CFP && !N1CFP) 7888 return DAG.getNode(ISD::FADD, DL, VT, N1, N0); 7889 7890 // fold (fadd A, (fneg B)) -> (fsub A, B) 7891 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 7892 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) 7893 return DAG.getNode(ISD::FSUB, DL, VT, N0, 7894 GetNegatedExpression(N1, DAG, LegalOperations)); 7895 7896 // fold (fadd (fneg A), B) -> (fsub B, A) 7897 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 7898 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) 7899 return DAG.getNode(ISD::FSUB, DL, VT, N1, 7900 GetNegatedExpression(N0, DAG, LegalOperations)); 7901 7902 // If 'unsafe math' is enabled, fold lots of things. 7903 if (Options.UnsafeFPMath) { 7904 // No FP constant should be created after legalization as Instruction 7905 // Selection pass has a hard time dealing with FP constants. 7906 bool AllowNewConst = (Level < AfterLegalizeDAG); 7907 7908 // fold (fadd A, 0) -> A 7909 if (N1CFP && N1CFP->isZero()) 7910 return N0; 7911 7912 // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 7913 if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 7914 isa<ConstantFPSDNode>(N0.getOperand(1))) 7915 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), 7916 DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1)); 7917 7918 // If allowed, fold (fadd (fneg x), x) -> 0.0 7919 if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 7920 return DAG.getConstantFP(0.0, DL, VT); 7921 7922 // If allowed, fold (fadd x, (fneg x)) -> 0.0 7923 if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 7924 return DAG.getConstantFP(0.0, DL, VT); 7925 7926 // We can fold chains of FADD's of the same value into multiplications. 7927 // This transform is not safe in general because we are reducing the number 7928 // of rounding steps. 7929 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { 7930 if (N0.getOpcode() == ISD::FMUL) { 7931 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 7932 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7933 7934 // (fadd (fmul x, c), x) -> (fmul x, c+1) 7935 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 7936 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), 7937 DAG.getConstantFP(1.0, DL, VT)); 7938 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP); 7939 } 7940 7941 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 7942 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 7943 N1.getOperand(0) == N1.getOperand(1) && 7944 N0.getOperand(0) == N1.getOperand(0)) { 7945 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0), 7946 DAG.getConstantFP(2.0, DL, VT)); 7947 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP); 7948 } 7949 } 7950 7951 if (N1.getOpcode() == ISD::FMUL) { 7952 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 7953 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 7954 7955 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 7956 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 7957 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), 7958 DAG.getConstantFP(1.0, DL, VT)); 7959 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP); 7960 } 7961 7962 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 7963 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 7964 N0.getOperand(0) == N0.getOperand(1) && 7965 N1.getOperand(0) == N0.getOperand(0)) { 7966 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0), 7967 DAG.getConstantFP(2.0, DL, VT)); 7968 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP); 7969 } 7970 } 7971 7972 if (N0.getOpcode() == ISD::FADD && AllowNewConst) { 7973 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 7974 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 7975 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 7976 (N0.getOperand(0) == N1)) { 7977 return DAG.getNode(ISD::FMUL, DL, VT, 7978 N1, DAG.getConstantFP(3.0, DL, VT)); 7979 } 7980 } 7981 7982 if (N1.getOpcode() == ISD::FADD && AllowNewConst) { 7983 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 7984 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 7985 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 7986 N1.getOperand(0) == N0) { 7987 return DAG.getNode(ISD::FMUL, DL, VT, 7988 N0, DAG.getConstantFP(3.0, DL, VT)); 7989 } 7990 } 7991 7992 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 7993 if (AllowNewConst && 7994 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 7995 N0.getOperand(0) == N0.getOperand(1) && 7996 N1.getOperand(0) == N1.getOperand(1) && 7997 N0.getOperand(0) == N1.getOperand(0)) { 7998 return DAG.getNode(ISD::FMUL, DL, VT, 7999 N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT)); 8000 } 8001 } 8002 } // enable-unsafe-fp-math 8003 8004 // FADD -> FMA combines: 8005 SDValue Fused = visitFADDForFMACombine(N); 8006 if (Fused) { 8007 AddToWorklist(Fused.getNode()); 8008 return Fused; 8009 } 8010 8011 return SDValue(); 8012 } 8013 8014 SDValue DAGCombiner::visitFSUB(SDNode *N) { 8015 SDValue N0 = N->getOperand(0); 8016 SDValue N1 = N->getOperand(1); 8017 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 8018 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 8019 EVT VT = N->getValueType(0); 8020 SDLoc dl(N); 8021 const TargetOptions &Options = DAG.getTarget().Options; 8022 8023 // fold vector ops 8024 if (VT.isVector()) 8025 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8026 return FoldedVOp; 8027 8028 // fold (fsub c1, c2) -> c1-c2 8029 if (N0CFP && N1CFP) 8030 return DAG.getNode(ISD::FSUB, dl, VT, N0, N1); 8031 8032 // fold (fsub A, (fneg B)) -> (fadd A, B) 8033 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 8034 return DAG.getNode(ISD::FADD, dl, VT, N0, 8035 GetNegatedExpression(N1, DAG, LegalOperations)); 8036 8037 // If 'unsafe math' is enabled, fold lots of things. 8038 if (Options.UnsafeFPMath) { 8039 // (fsub A, 0) -> A 8040 if (N1CFP && N1CFP->isZero()) 8041 return N0; 8042 8043 // (fsub 0, B) -> -B 8044 if (N0CFP && N0CFP->isZero()) { 8045 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) 8046 return GetNegatedExpression(N1, DAG, LegalOperations); 8047 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8048 return DAG.getNode(ISD::FNEG, dl, VT, N1); 8049 } 8050 8051 // (fsub x, x) -> 0.0 8052 if (N0 == N1) 8053 return DAG.getConstantFP(0.0f, dl, VT); 8054 8055 // (fsub x, (fadd x, y)) -> (fneg y) 8056 // (fsub x, (fadd y, x)) -> (fneg y) 8057 if (N1.getOpcode() == ISD::FADD) { 8058 SDValue N10 = N1->getOperand(0); 8059 SDValue N11 = N1->getOperand(1); 8060 8061 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) 8062 return GetNegatedExpression(N11, DAG, LegalOperations); 8063 8064 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) 8065 return GetNegatedExpression(N10, DAG, LegalOperations); 8066 } 8067 } 8068 8069 // FSUB -> FMA combines: 8070 SDValue Fused = visitFSUBForFMACombine(N); 8071 if (Fused) { 8072 AddToWorklist(Fused.getNode()); 8073 return Fused; 8074 } 8075 8076 return SDValue(); 8077 } 8078 8079 SDValue DAGCombiner::visitFMUL(SDNode *N) { 8080 SDValue N0 = N->getOperand(0); 8081 SDValue N1 = N->getOperand(1); 8082 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); 8083 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); 8084 EVT VT = N->getValueType(0); 8085 SDLoc DL(N); 8086 const TargetOptions &Options = DAG.getTarget().Options; 8087 8088 // fold vector ops 8089 if (VT.isVector()) { 8090 // This just handles C1 * C2 for vectors. Other vector folds are below. 8091 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8092 return FoldedVOp; 8093 } 8094 8095 // fold (fmul c1, c2) -> c1*c2 8096 if (N0CFP && N1CFP) 8097 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1); 8098 8099 // canonicalize constant to RHS 8100 if (isConstantFPBuildVectorOrConstantFP(N0) && 8101 !isConstantFPBuildVectorOrConstantFP(N1)) 8102 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0); 8103 8104 // fold (fmul A, 1.0) -> A 8105 if (N1CFP && N1CFP->isExactlyValue(1.0)) 8106 return N0; 8107 8108 if (Options.UnsafeFPMath) { 8109 // fold (fmul A, 0) -> 0 8110 if (N1CFP && N1CFP->isZero()) 8111 return N1; 8112 8113 // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 8114 if (N0.getOpcode() == ISD::FMUL) { 8115 // Fold scalars or any vector constants (not just splats). 8116 // This fold is done in general by InstCombine, but extra fmul insts 8117 // may have been generated during lowering. 8118 SDValue N00 = N0.getOperand(0); 8119 SDValue N01 = N0.getOperand(1); 8120 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); 8121 auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); 8122 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); 8123 8124 // Check 1: Make sure that the first operand of the inner multiply is NOT 8125 // a constant. Otherwise, we may induce infinite looping. 8126 if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { 8127 // Check 2: Make sure that the second operand of the inner multiply and 8128 // the second operand of the outer multiply are constants. 8129 if ((N1CFP && isConstOrConstSplatFP(N01)) || 8130 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { 8131 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1); 8132 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts); 8133 } 8134 } 8135 } 8136 8137 // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) 8138 // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs 8139 // during an early run of DAGCombiner can prevent folding with fmuls 8140 // inserted during lowering. 8141 if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { 8142 const SDValue Two = DAG.getConstantFP(2.0, DL, VT); 8143 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1); 8144 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts); 8145 } 8146 } 8147 8148 // fold (fmul X, 2.0) -> (fadd X, X) 8149 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 8150 return DAG.getNode(ISD::FADD, DL, VT, N0, N0); 8151 8152 // fold (fmul X, -1.0) -> (fneg X) 8153 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 8154 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8155 return DAG.getNode(ISD::FNEG, DL, VT, N0); 8156 8157 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 8158 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 8159 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 8160 // Both can be negated for free, check to see if at least one is cheaper 8161 // negated. 8162 if (LHSNeg == 2 || RHSNeg == 2) 8163 return DAG.getNode(ISD::FMUL, DL, VT, 8164 GetNegatedExpression(N0, DAG, LegalOperations), 8165 GetNegatedExpression(N1, DAG, LegalOperations)); 8166 } 8167 } 8168 8169 return SDValue(); 8170 } 8171 8172 SDValue DAGCombiner::visitFMA(SDNode *N) { 8173 SDValue N0 = N->getOperand(0); 8174 SDValue N1 = N->getOperand(1); 8175 SDValue N2 = N->getOperand(2); 8176 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8177 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8178 EVT VT = N->getValueType(0); 8179 SDLoc dl(N); 8180 const TargetOptions &Options = DAG.getTarget().Options; 8181 8182 // Constant fold FMA. 8183 if (isa<ConstantFPSDNode>(N0) && 8184 isa<ConstantFPSDNode>(N1) && 8185 isa<ConstantFPSDNode>(N2)) { 8186 return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); 8187 } 8188 8189 if (Options.UnsafeFPMath) { 8190 if (N0CFP && N0CFP->isZero()) 8191 return N2; 8192 if (N1CFP && N1CFP->isZero()) 8193 return N2; 8194 } 8195 if (N0CFP && N0CFP->isExactlyValue(1.0)) 8196 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 8197 if (N1CFP && N1CFP->isExactlyValue(1.0)) 8198 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 8199 8200 // Canonicalize (fma c, x, y) -> (fma x, c, y) 8201 if (N0CFP && !N1CFP) 8202 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 8203 8204 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 8205 if (Options.UnsafeFPMath && N1CFP && 8206 N2.getOpcode() == ISD::FMUL && 8207 N0 == N2.getOperand(0) && 8208 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 8209 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8210 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 8211 } 8212 8213 8214 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 8215 if (Options.UnsafeFPMath && 8216 N0.getOpcode() == ISD::FMUL && N1CFP && 8217 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 8218 return DAG.getNode(ISD::FMA, dl, VT, 8219 N0.getOperand(0), 8220 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 8221 N2); 8222 } 8223 8224 // (fma x, 1, y) -> (fadd x, y) 8225 // (fma x, -1, y) -> (fadd (fneg x), y) 8226 if (N1CFP) { 8227 if (N1CFP->isExactlyValue(1.0)) 8228 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 8229 8230 if (N1CFP->isExactlyValue(-1.0) && 8231 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 8232 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 8233 AddToWorklist(RHSNeg.getNode()); 8234 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 8235 } 8236 } 8237 8238 // (fma x, c, x) -> (fmul x, (c+1)) 8239 if (Options.UnsafeFPMath && N1CFP && N0 == N2) 8240 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8241 DAG.getNode(ISD::FADD, dl, VT, 8242 N1, DAG.getConstantFP(1.0, dl, VT))); 8243 8244 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 8245 if (Options.UnsafeFPMath && N1CFP && 8246 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 8247 return DAG.getNode(ISD::FMUL, dl, VT, N0, 8248 DAG.getNode(ISD::FADD, dl, VT, 8249 N1, DAG.getConstantFP(-1.0, dl, VT))); 8250 8251 8252 return SDValue(); 8253 } 8254 8255 SDValue DAGCombiner::visitFDIV(SDNode *N) { 8256 SDValue N0 = N->getOperand(0); 8257 SDValue N1 = N->getOperand(1); 8258 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8259 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8260 EVT VT = N->getValueType(0); 8261 SDLoc DL(N); 8262 const TargetOptions &Options = DAG.getTarget().Options; 8263 8264 // fold vector ops 8265 if (VT.isVector()) 8266 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 8267 return FoldedVOp; 8268 8269 // fold (fdiv c1, c2) -> c1/c2 8270 if (N0CFP && N1CFP) 8271 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 8272 8273 if (Options.UnsafeFPMath) { 8274 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 8275 if (N1CFP) { 8276 // Compute the reciprocal 1.0 / c2. 8277 APFloat N1APF = N1CFP->getValueAPF(); 8278 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 8279 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 8280 // Only do the transform if the reciprocal is a legal fp immediate that 8281 // isn't too nasty (eg NaN, denormal, ...). 8282 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 8283 (!LegalOperations || 8284 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 8285 // backend)... we should handle this gracefully after Legalize. 8286 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 8287 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 8288 TLI.isFPImmLegal(Recip, VT))) 8289 return DAG.getNode(ISD::FMUL, DL, VT, N0, 8290 DAG.getConstantFP(Recip, DL, VT)); 8291 } 8292 8293 // If this FDIV is part of a reciprocal square root, it may be folded 8294 // into a target-specific square root estimate instruction. 8295 if (N1.getOpcode() == ISD::FSQRT) { 8296 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { 8297 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 8298 } 8299 } else if (N1.getOpcode() == ISD::FP_EXTEND && 8300 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8301 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 8302 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); 8303 AddToWorklist(RV.getNode()); 8304 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 8305 } 8306 } else if (N1.getOpcode() == ISD::FP_ROUND && 8307 N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8308 if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { 8309 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); 8310 AddToWorklist(RV.getNode()); 8311 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 8312 } 8313 } else if (N1.getOpcode() == ISD::FMUL) { 8314 // Look through an FMUL. Even though this won't remove the FDIV directly, 8315 // it's still worthwhile to get rid of the FSQRT if possible. 8316 SDValue SqrtOp; 8317 SDValue OtherOp; 8318 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { 8319 SqrtOp = N1.getOperand(0); 8320 OtherOp = N1.getOperand(1); 8321 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { 8322 SqrtOp = N1.getOperand(1); 8323 OtherOp = N1.getOperand(0); 8324 } 8325 if (SqrtOp.getNode()) { 8326 // We found a FSQRT, so try to make this fold: 8327 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) 8328 if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { 8329 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); 8330 AddToWorklist(RV.getNode()); 8331 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 8332 } 8333 } 8334 } 8335 8336 // Fold into a reciprocal estimate and multiply instead of a real divide. 8337 if (SDValue RV = BuildReciprocalEstimate(N1)) { 8338 AddToWorklist(RV.getNode()); 8339 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); 8340 } 8341 } 8342 8343 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 8344 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { 8345 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { 8346 // Both can be negated for free, check to see if at least one is cheaper 8347 // negated. 8348 if (LHSNeg == 2 || RHSNeg == 2) 8349 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 8350 GetNegatedExpression(N0, DAG, LegalOperations), 8351 GetNegatedExpression(N1, DAG, LegalOperations)); 8352 } 8353 } 8354 8355 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 8356 // reciprocal. 8357 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) 8358 // Notice that this is not always beneficial. One reason is different target 8359 // may have different costs for FDIV and FMUL, so sometimes the cost of two 8360 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason 8361 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". 8362 if (Options.UnsafeFPMath) { 8363 // Skip if current node is a reciprocal. 8364 if (N0CFP && N0CFP->isExactlyValue(1.0)) 8365 return SDValue(); 8366 8367 SmallVector<SDNode *, 4> Users; 8368 // Find all FDIV users of the same divisor. 8369 for (auto *U : N1->uses()) { 8370 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) 8371 Users.push_back(U); 8372 } 8373 8374 if (TLI.combineRepeatedFPDivisors(Users.size())) { 8375 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 8376 // FIXME: This optimization requires some level of fast-math, so the 8377 // created reciprocal node should at least have the 'allowReciprocal' 8378 // fast-math-flag set. 8379 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1); 8380 8381 // Dividend / Divisor -> Dividend * Reciprocal 8382 for (auto *U : Users) { 8383 SDValue Dividend = U->getOperand(0); 8384 if (Dividend != FPOne) { 8385 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, 8386 Reciprocal); 8387 CombineTo(U, NewNode); 8388 } else if (U != Reciprocal.getNode()) { 8389 // In the absence of fast-math-flags, this user node is always the 8390 // same node as Reciprocal, but with FMF they may be different nodes. 8391 CombineTo(U, Reciprocal); 8392 } 8393 } 8394 return SDValue(N, 0); // N was replaced. 8395 } 8396 } 8397 8398 return SDValue(); 8399 } 8400 8401 SDValue DAGCombiner::visitFREM(SDNode *N) { 8402 SDValue N0 = N->getOperand(0); 8403 SDValue N1 = N->getOperand(1); 8404 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8405 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8406 EVT VT = N->getValueType(0); 8407 8408 // fold (frem c1, c2) -> fmod(c1,c2) 8409 if (N0CFP && N1CFP) 8410 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 8411 8412 return SDValue(); 8413 } 8414 8415 SDValue DAGCombiner::visitFSQRT(SDNode *N) { 8416 if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) 8417 return SDValue(); 8418 8419 // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) 8420 SDValue RV = BuildRsqrtEstimate(N->getOperand(0)); 8421 if (!RV) 8422 return SDValue(); 8423 8424 EVT VT = RV.getValueType(); 8425 SDLoc DL(N); 8426 RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV); 8427 AddToWorklist(RV.getNode()); 8428 8429 // Unfortunately, RV is now NaN if the input was exactly 0. 8430 // Select out this case and force the answer to 0. 8431 SDValue Zero = DAG.getConstantFP(0.0, DL, VT); 8432 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 8433 SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); 8434 AddToWorklist(ZeroCmp.getNode()); 8435 AddToWorklist(RV.getNode()); 8436 8437 return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, 8438 ZeroCmp, Zero, RV); 8439 } 8440 8441 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 8442 SDValue N0 = N->getOperand(0); 8443 SDValue N1 = N->getOperand(1); 8444 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8445 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8446 EVT VT = N->getValueType(0); 8447 8448 if (N0CFP && N1CFP) // Constant fold 8449 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 8450 8451 if (N1CFP) { 8452 const APFloat& V = N1CFP->getValueAPF(); 8453 // copysign(x, c1) -> fabs(x) iff ispos(c1) 8454 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 8455 if (!V.isNegative()) { 8456 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 8457 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 8458 } else { 8459 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 8460 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 8461 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 8462 } 8463 } 8464 8465 // copysign(fabs(x), y) -> copysign(x, y) 8466 // copysign(fneg(x), y) -> copysign(x, y) 8467 // copysign(copysign(x,z), y) -> copysign(x, y) 8468 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 8469 N0.getOpcode() == ISD::FCOPYSIGN) 8470 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8471 N0.getOperand(0), N1); 8472 8473 // copysign(x, abs(y)) -> abs(x) 8474 if (N1.getOpcode() == ISD::FABS) 8475 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 8476 8477 // copysign(x, copysign(y,z)) -> copysign(x, z) 8478 if (N1.getOpcode() == ISD::FCOPYSIGN) 8479 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8480 N0, N1.getOperand(1)); 8481 8482 // copysign(x, fp_extend(y)) -> copysign(x, y) 8483 // copysign(x, fp_round(y)) -> copysign(x, y) 8484 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 8485 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8486 N0, N1.getOperand(0)); 8487 8488 return SDValue(); 8489 } 8490 8491 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 8492 SDValue N0 = N->getOperand(0); 8493 EVT VT = N->getValueType(0); 8494 EVT OpVT = N0.getValueType(); 8495 8496 // fold (sint_to_fp c1) -> c1fp 8497 if (isConstantIntBuildVectorOrConstantInt(N0) && 8498 // ...but only if the target supports immediate floating-point values 8499 (!LegalOperations || 8500 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 8501 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 8502 8503 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 8504 // but UINT_TO_FP is legal on this target, try to convert. 8505 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 8506 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 8507 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 8508 if (DAG.SignBitIsZero(N0)) 8509 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 8510 } 8511 8512 // The next optimizations are desirable only if SELECT_CC can be lowered. 8513 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 8514 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 8515 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 8516 !VT.isVector() && 8517 (!LegalOperations || 8518 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8519 SDLoc DL(N); 8520 SDValue Ops[] = 8521 { N0.getOperand(0), N0.getOperand(1), 8522 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8523 N0.getOperand(2) }; 8524 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8525 } 8526 8527 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 8528 // (select_cc x, y, 1.0, 0.0,, cc) 8529 if (N0.getOpcode() == ISD::ZERO_EXTEND && 8530 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 8531 (!LegalOperations || 8532 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8533 SDLoc DL(N); 8534 SDValue Ops[] = 8535 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 8536 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8537 N0.getOperand(0).getOperand(2) }; 8538 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8539 } 8540 } 8541 8542 return SDValue(); 8543 } 8544 8545 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 8546 SDValue N0 = N->getOperand(0); 8547 EVT VT = N->getValueType(0); 8548 EVT OpVT = N0.getValueType(); 8549 8550 // fold (uint_to_fp c1) -> c1fp 8551 if (isConstantIntBuildVectorOrConstantInt(N0) && 8552 // ...but only if the target supports immediate floating-point values 8553 (!LegalOperations || 8554 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 8555 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 8556 8557 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 8558 // but SINT_TO_FP is legal on this target, try to convert. 8559 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 8560 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 8561 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 8562 if (DAG.SignBitIsZero(N0)) 8563 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 8564 } 8565 8566 // The next optimizations are desirable only if SELECT_CC can be lowered. 8567 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 8568 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 8569 8570 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 8571 (!LegalOperations || 8572 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 8573 SDLoc DL(N); 8574 SDValue Ops[] = 8575 { N0.getOperand(0), N0.getOperand(1), 8576 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), 8577 N0.getOperand(2) }; 8578 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 8579 } 8580 } 8581 8582 return SDValue(); 8583 } 8584 8585 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x 8586 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { 8587 SDValue N0 = N->getOperand(0); 8588 EVT VT = N->getValueType(0); 8589 8590 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) 8591 return SDValue(); 8592 8593 SDValue Src = N0.getOperand(0); 8594 EVT SrcVT = Src.getValueType(); 8595 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; 8596 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; 8597 8598 // We can safely assume the conversion won't overflow the output range, 8599 // because (for example) (uint8_t)18293.f is undefined behavior. 8600 8601 // Since we can assume the conversion won't overflow, our decision as to 8602 // whether the input will fit in the float should depend on the minimum 8603 // of the input range and output range. 8604 8605 // This means this is also safe for a signed input and unsigned output, since 8606 // a negative input would lead to undefined behavior. 8607 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; 8608 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; 8609 unsigned ActualSize = std::min(InputSize, OutputSize); 8610 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); 8611 8612 // We can only fold away the float conversion if the input range can be 8613 // represented exactly in the float range. 8614 if (APFloat::semanticsPrecision(sem) >= ActualSize) { 8615 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { 8616 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND 8617 : ISD::ZERO_EXTEND; 8618 return DAG.getNode(ExtOp, SDLoc(N), VT, Src); 8619 } 8620 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) 8621 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); 8622 if (SrcVT == VT) 8623 return Src; 8624 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); 8625 } 8626 return SDValue(); 8627 } 8628 8629 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 8630 SDValue N0 = N->getOperand(0); 8631 EVT VT = N->getValueType(0); 8632 8633 // fold (fp_to_sint c1fp) -> c1 8634 if (isConstantFPBuildVectorOrConstantFP(N0)) 8635 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 8636 8637 return FoldIntToFPToInt(N, DAG); 8638 } 8639 8640 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 8641 SDValue N0 = N->getOperand(0); 8642 EVT VT = N->getValueType(0); 8643 8644 // fold (fp_to_uint c1fp) -> c1 8645 if (isConstantFPBuildVectorOrConstantFP(N0)) 8646 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 8647 8648 return FoldIntToFPToInt(N, DAG); 8649 } 8650 8651 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 8652 SDValue N0 = N->getOperand(0); 8653 SDValue N1 = N->getOperand(1); 8654 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8655 EVT VT = N->getValueType(0); 8656 8657 // fold (fp_round c1fp) -> c1fp 8658 if (N0CFP) 8659 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 8660 8661 // fold (fp_round (fp_extend x)) -> x 8662 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 8663 return N0.getOperand(0); 8664 8665 // fold (fp_round (fp_round x)) -> (fp_round x) 8666 if (N0.getOpcode() == ISD::FP_ROUND) { 8667 const bool NIsTrunc = N->getConstantOperandVal(1) == 1; 8668 const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; 8669 // If the first fp_round isn't a value preserving truncation, it might 8670 // introduce a tie in the second fp_round, that wouldn't occur in the 8671 // single-step fp_round we want to fold to. 8672 // In other words, double rounding isn't the same as rounding. 8673 // Also, this is a value preserving truncation iff both fp_round's are. 8674 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { 8675 SDLoc DL(N); 8676 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), 8677 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); 8678 } 8679 } 8680 8681 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 8682 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 8683 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 8684 N0.getOperand(0), N1); 8685 AddToWorklist(Tmp.getNode()); 8686 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 8687 Tmp, N0.getOperand(1)); 8688 } 8689 8690 return SDValue(); 8691 } 8692 8693 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 8694 SDValue N0 = N->getOperand(0); 8695 EVT VT = N->getValueType(0); 8696 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 8697 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8698 8699 // fold (fp_round_inreg c1fp) -> c1fp 8700 if (N0CFP && isTypeLegal(EVT)) { 8701 SDLoc DL(N); 8702 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); 8703 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); 8704 } 8705 8706 return SDValue(); 8707 } 8708 8709 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 8710 SDValue N0 = N->getOperand(0); 8711 EVT VT = N->getValueType(0); 8712 8713 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 8714 if (N->hasOneUse() && 8715 N->use_begin()->getOpcode() == ISD::FP_ROUND) 8716 return SDValue(); 8717 8718 // fold (fp_extend c1fp) -> c1fp 8719 if (isConstantFPBuildVectorOrConstantFP(N0)) 8720 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 8721 8722 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) 8723 if (N0.getOpcode() == ISD::FP16_TO_FP && 8724 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) 8725 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); 8726 8727 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 8728 // value of X. 8729 if (N0.getOpcode() == ISD::FP_ROUND 8730 && N0.getNode()->getConstantOperandVal(1) == 1) { 8731 SDValue In = N0.getOperand(0); 8732 if (In.getValueType() == VT) return In; 8733 if (VT.bitsLT(In.getValueType())) 8734 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 8735 In, N0.getOperand(1)); 8736 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 8737 } 8738 8739 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 8740 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 8741 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { 8742 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 8743 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 8744 LN0->getChain(), 8745 LN0->getBasePtr(), N0.getValueType(), 8746 LN0->getMemOperand()); 8747 CombineTo(N, ExtLoad); 8748 CombineTo(N0.getNode(), 8749 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 8750 N0.getValueType(), ExtLoad, 8751 DAG.getIntPtrConstant(1, SDLoc(N0))), 8752 ExtLoad.getValue(1)); 8753 return SDValue(N, 0); // Return N so it doesn't get rechecked! 8754 } 8755 8756 return SDValue(); 8757 } 8758 8759 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 8760 SDValue N0 = N->getOperand(0); 8761 EVT VT = N->getValueType(0); 8762 8763 // fold (fceil c1) -> fceil(c1) 8764 if (isConstantFPBuildVectorOrConstantFP(N0)) 8765 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 8766 8767 return SDValue(); 8768 } 8769 8770 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 8771 SDValue N0 = N->getOperand(0); 8772 EVT VT = N->getValueType(0); 8773 8774 // fold (ftrunc c1) -> ftrunc(c1) 8775 if (isConstantFPBuildVectorOrConstantFP(N0)) 8776 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 8777 8778 return SDValue(); 8779 } 8780 8781 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 8782 SDValue N0 = N->getOperand(0); 8783 EVT VT = N->getValueType(0); 8784 8785 // fold (ffloor c1) -> ffloor(c1) 8786 if (isConstantFPBuildVectorOrConstantFP(N0)) 8787 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 8788 8789 return SDValue(); 8790 } 8791 8792 // FIXME: FNEG and FABS have a lot in common; refactor. 8793 SDValue DAGCombiner::visitFNEG(SDNode *N) { 8794 SDValue N0 = N->getOperand(0); 8795 EVT VT = N->getValueType(0); 8796 8797 // Constant fold FNEG. 8798 if (isConstantFPBuildVectorOrConstantFP(N0)) 8799 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 8800 8801 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 8802 &DAG.getTarget().Options)) 8803 return GetNegatedExpression(N0, DAG, LegalOperations); 8804 8805 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading 8806 // constant pool values. 8807 if (!TLI.isFNegFree(VT) && 8808 N0.getOpcode() == ISD::BITCAST && 8809 N0.getNode()->hasOneUse()) { 8810 SDValue Int = N0.getOperand(0); 8811 EVT IntVT = Int.getValueType(); 8812 if (IntVT.isInteger() && !IntVT.isVector()) { 8813 APInt SignMask; 8814 if (N0.getValueType().isVector()) { 8815 // For a vector, get a mask such as 0x80... per scalar element 8816 // and splat it. 8817 SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 8818 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 8819 } else { 8820 // For a scalar, just generate 0x80... 8821 SignMask = APInt::getSignBit(IntVT.getSizeInBits()); 8822 } 8823 SDLoc DL0(N0); 8824 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, 8825 DAG.getConstant(SignMask, DL0, IntVT)); 8826 AddToWorklist(Int.getNode()); 8827 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); 8828 } 8829 } 8830 8831 // (fneg (fmul c, x)) -> (fmul -c, x) 8832 if (N0.getOpcode() == ISD::FMUL && 8833 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { 8834 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 8835 if (CFP1) { 8836 APFloat CVal = CFP1->getValueAPF(); 8837 CVal.changeSign(); 8838 if (Level >= AfterLegalizeDAG && 8839 (TLI.isFPImmLegal(CVal, N->getValueType(0)) || 8840 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 8841 return DAG.getNode( 8842 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 8843 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 8844 } 8845 } 8846 8847 return SDValue(); 8848 } 8849 8850 SDValue DAGCombiner::visitFMINNUM(SDNode *N) { 8851 SDValue N0 = N->getOperand(0); 8852 SDValue N1 = N->getOperand(1); 8853 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8854 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8855 8856 if (N0CFP && N1CFP) { 8857 const APFloat &C0 = N0CFP->getValueAPF(); 8858 const APFloat &C1 = N1CFP->getValueAPF(); 8859 return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0)); 8860 } 8861 8862 if (N0CFP) { 8863 EVT VT = N->getValueType(0); 8864 // Canonicalize to constant on RHS. 8865 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); 8866 } 8867 8868 return SDValue(); 8869 } 8870 8871 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { 8872 SDValue N0 = N->getOperand(0); 8873 SDValue N1 = N->getOperand(1); 8874 const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 8875 const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 8876 8877 if (N0CFP && N1CFP) { 8878 const APFloat &C0 = N0CFP->getValueAPF(); 8879 const APFloat &C1 = N1CFP->getValueAPF(); 8880 return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0)); 8881 } 8882 8883 if (N0CFP) { 8884 EVT VT = N->getValueType(0); 8885 // Canonicalize to constant on RHS. 8886 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); 8887 } 8888 8889 return SDValue(); 8890 } 8891 8892 SDValue DAGCombiner::visitFABS(SDNode *N) { 8893 SDValue N0 = N->getOperand(0); 8894 EVT VT = N->getValueType(0); 8895 8896 // fold (fabs c1) -> fabs(c1) 8897 if (isConstantFPBuildVectorOrConstantFP(N0)) 8898 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 8899 8900 // fold (fabs (fabs x)) -> (fabs x) 8901 if (N0.getOpcode() == ISD::FABS) 8902 return N->getOperand(0); 8903 8904 // fold (fabs (fneg x)) -> (fabs x) 8905 // fold (fabs (fcopysign x, y)) -> (fabs x) 8906 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 8907 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 8908 8909 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading 8910 // constant pool values. 8911 if (!TLI.isFAbsFree(VT) && 8912 N0.getOpcode() == ISD::BITCAST && 8913 N0.getNode()->hasOneUse()) { 8914 SDValue Int = N0.getOperand(0); 8915 EVT IntVT = Int.getValueType(); 8916 if (IntVT.isInteger() && !IntVT.isVector()) { 8917 APInt SignMask; 8918 if (N0.getValueType().isVector()) { 8919 // For a vector, get a mask such as 0x7f... per scalar element 8920 // and splat it. 8921 SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); 8922 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); 8923 } else { 8924 // For a scalar, just generate 0x7f... 8925 SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); 8926 } 8927 SDLoc DL(N0); 8928 Int = DAG.getNode(ISD::AND, DL, IntVT, Int, 8929 DAG.getConstant(SignMask, DL, IntVT)); 8930 AddToWorklist(Int.getNode()); 8931 return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); 8932 } 8933 } 8934 8935 return SDValue(); 8936 } 8937 8938 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 8939 SDValue Chain = N->getOperand(0); 8940 SDValue N1 = N->getOperand(1); 8941 SDValue N2 = N->getOperand(2); 8942 8943 // If N is a constant we could fold this into a fallthrough or unconditional 8944 // branch. However that doesn't happen very often in normal code, because 8945 // Instcombine/SimplifyCFG should have handled the available opportunities. 8946 // If we did this folding here, it would be necessary to update the 8947 // MachineBasicBlock CFG, which is awkward. 8948 8949 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 8950 // on the target. 8951 if (N1.getOpcode() == ISD::SETCC && 8952 TLI.isOperationLegalOrCustom(ISD::BR_CC, 8953 N1.getOperand(0).getValueType())) { 8954 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 8955 Chain, N1.getOperand(2), 8956 N1.getOperand(0), N1.getOperand(1), N2); 8957 } 8958 8959 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 8960 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 8961 (N1.getOperand(0).hasOneUse() && 8962 N1.getOperand(0).getOpcode() == ISD::SRL))) { 8963 SDNode *Trunc = nullptr; 8964 if (N1.getOpcode() == ISD::TRUNCATE) { 8965 // Look pass the truncate. 8966 Trunc = N1.getNode(); 8967 N1 = N1.getOperand(0); 8968 } 8969 8970 // Match this pattern so that we can generate simpler code: 8971 // 8972 // %a = ... 8973 // %b = and i32 %a, 2 8974 // %c = srl i32 %b, 1 8975 // brcond i32 %c ... 8976 // 8977 // into 8978 // 8979 // %a = ... 8980 // %b = and i32 %a, 2 8981 // %c = setcc eq %b, 0 8982 // brcond %c ... 8983 // 8984 // This applies only when the AND constant value has one bit set and the 8985 // SRL constant is equal to the log2 of the AND constant. The back-end is 8986 // smart enough to convert the result into a TEST/JMP sequence. 8987 SDValue Op0 = N1.getOperand(0); 8988 SDValue Op1 = N1.getOperand(1); 8989 8990 if (Op0.getOpcode() == ISD::AND && 8991 Op1.getOpcode() == ISD::Constant) { 8992 SDValue AndOp1 = Op0.getOperand(1); 8993 8994 if (AndOp1.getOpcode() == ISD::Constant) { 8995 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 8996 8997 if (AndConst.isPowerOf2() && 8998 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 8999 SDLoc DL(N); 9000 SDValue SetCC = 9001 DAG.getSetCC(DL, 9002 getSetCCResultType(Op0.getValueType()), 9003 Op0, DAG.getConstant(0, DL, Op0.getValueType()), 9004 ISD::SETNE); 9005 9006 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, 9007 MVT::Other, Chain, SetCC, N2); 9008 // Don't add the new BRCond into the worklist or else SimplifySelectCC 9009 // will convert it back to (X & C1) >> C2. 9010 CombineTo(N, NewBRCond, false); 9011 // Truncate is dead. 9012 if (Trunc) 9013 deleteAndRecombine(Trunc); 9014 // Replace the uses of SRL with SETCC 9015 WorklistRemover DeadNodes(*this); 9016 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 9017 deleteAndRecombine(N1.getNode()); 9018 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9019 } 9020 } 9021 } 9022 9023 if (Trunc) 9024 // Restore N1 if the above transformation doesn't match. 9025 N1 = N->getOperand(1); 9026 } 9027 9028 // Transform br(xor(x, y)) -> br(x != y) 9029 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 9030 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 9031 SDNode *TheXor = N1.getNode(); 9032 SDValue Op0 = TheXor->getOperand(0); 9033 SDValue Op1 = TheXor->getOperand(1); 9034 if (Op0.getOpcode() == Op1.getOpcode()) { 9035 // Avoid missing important xor optimizations. 9036 SDValue Tmp = visitXOR(TheXor); 9037 if (Tmp.getNode()) { 9038 if (Tmp.getNode() != TheXor) { 9039 DEBUG(dbgs() << "\nReplacing.8 "; 9040 TheXor->dump(&DAG); 9041 dbgs() << "\nWith: "; 9042 Tmp.getNode()->dump(&DAG); 9043 dbgs() << '\n'); 9044 WorklistRemover DeadNodes(*this); 9045 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 9046 deleteAndRecombine(TheXor); 9047 return DAG.getNode(ISD::BRCOND, SDLoc(N), 9048 MVT::Other, Chain, Tmp, N2); 9049 } 9050 9051 // visitXOR has changed XOR's operands or replaced the XOR completely, 9052 // bail out. 9053 return SDValue(N, 0); 9054 } 9055 } 9056 9057 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 9058 bool Equal = false; 9059 if (isOneConstant(Op0) && Op0.hasOneUse() && 9060 Op0.getOpcode() == ISD::XOR) { 9061 TheXor = Op0.getNode(); 9062 Equal = true; 9063 } 9064 9065 EVT SetCCVT = N1.getValueType(); 9066 if (LegalTypes) 9067 SetCCVT = getSetCCResultType(SetCCVT); 9068 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 9069 SetCCVT, 9070 Op0, Op1, 9071 Equal ? ISD::SETEQ : ISD::SETNE); 9072 // Replace the uses of XOR with SETCC 9073 WorklistRemover DeadNodes(*this); 9074 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 9075 deleteAndRecombine(N1.getNode()); 9076 return DAG.getNode(ISD::BRCOND, SDLoc(N), 9077 MVT::Other, Chain, SetCC, N2); 9078 } 9079 } 9080 9081 return SDValue(); 9082 } 9083 9084 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 9085 // 9086 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 9087 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 9088 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 9089 9090 // If N is a constant we could fold this into a fallthrough or unconditional 9091 // branch. However that doesn't happen very often in normal code, because 9092 // Instcombine/SimplifyCFG should have handled the available opportunities. 9093 // If we did this folding here, it would be necessary to update the 9094 // MachineBasicBlock CFG, which is awkward. 9095 9096 // Use SimplifySetCC to simplify SETCC's. 9097 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 9098 CondLHS, CondRHS, CC->get(), SDLoc(N), 9099 false); 9100 if (Simp.getNode()) AddToWorklist(Simp.getNode()); 9101 9102 // fold to a simpler setcc 9103 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 9104 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 9105 N->getOperand(0), Simp.getOperand(2), 9106 Simp.getOperand(0), Simp.getOperand(1), 9107 N->getOperand(4)); 9108 9109 return SDValue(); 9110 } 9111 9112 /// Return true if 'Use' is a load or a store that uses N as its base pointer 9113 /// and that N may be folded in the load / store addressing mode. 9114 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 9115 SelectionDAG &DAG, 9116 const TargetLowering &TLI) { 9117 EVT VT; 9118 unsigned AS; 9119 9120 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 9121 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 9122 return false; 9123 VT = LD->getMemoryVT(); 9124 AS = LD->getAddressSpace(); 9125 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 9126 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 9127 return false; 9128 VT = ST->getMemoryVT(); 9129 AS = ST->getAddressSpace(); 9130 } else 9131 return false; 9132 9133 TargetLowering::AddrMode AM; 9134 if (N->getOpcode() == ISD::ADD) { 9135 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9136 if (Offset) 9137 // [reg +/- imm] 9138 AM.BaseOffs = Offset->getSExtValue(); 9139 else 9140 // [reg +/- reg] 9141 AM.Scale = 1; 9142 } else if (N->getOpcode() == ISD::SUB) { 9143 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9144 if (Offset) 9145 // [reg +/- imm] 9146 AM.BaseOffs = -Offset->getSExtValue(); 9147 else 9148 // [reg +/- reg] 9149 AM.Scale = 1; 9150 } else 9151 return false; 9152 9153 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, 9154 VT.getTypeForEVT(*DAG.getContext()), AS); 9155 } 9156 9157 /// Try turning a load/store into a pre-indexed load/store when the base 9158 /// pointer is an add or subtract and it has other uses besides the load/store. 9159 /// After the transformation, the new indexed load/store has effectively folded 9160 /// the add/subtract in and all of its other uses are redirected to the 9161 /// new load/store. 9162 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 9163 if (Level < AfterLegalizeDAG) 9164 return false; 9165 9166 bool isLoad = true; 9167 SDValue Ptr; 9168 EVT VT; 9169 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9170 if (LD->isIndexed()) 9171 return false; 9172 VT = LD->getMemoryVT(); 9173 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 9174 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 9175 return false; 9176 Ptr = LD->getBasePtr(); 9177 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9178 if (ST->isIndexed()) 9179 return false; 9180 VT = ST->getMemoryVT(); 9181 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 9182 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 9183 return false; 9184 Ptr = ST->getBasePtr(); 9185 isLoad = false; 9186 } else { 9187 return false; 9188 } 9189 9190 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 9191 // out. There is no reason to make this a preinc/predec. 9192 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 9193 Ptr.getNode()->hasOneUse()) 9194 return false; 9195 9196 // Ask the target to do addressing mode selection. 9197 SDValue BasePtr; 9198 SDValue Offset; 9199 ISD::MemIndexedMode AM = ISD::UNINDEXED; 9200 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 9201 return false; 9202 9203 // Backends without true r+i pre-indexed forms may need to pass a 9204 // constant base with a variable offset so that constant coercion 9205 // will work with the patterns in canonical form. 9206 bool Swapped = false; 9207 if (isa<ConstantSDNode>(BasePtr)) { 9208 std::swap(BasePtr, Offset); 9209 Swapped = true; 9210 } 9211 9212 // Don't create a indexed load / store with zero offset. 9213 if (isNullConstant(Offset)) 9214 return false; 9215 9216 // Try turning it into a pre-indexed load / store except when: 9217 // 1) The new base ptr is a frame index. 9218 // 2) If N is a store and the new base ptr is either the same as or is a 9219 // predecessor of the value being stored. 9220 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 9221 // that would create a cycle. 9222 // 4) All uses are load / store ops that use it as old base ptr. 9223 9224 // Check #1. Preinc'ing a frame index would require copying the stack pointer 9225 // (plus the implicit offset) to a register to preinc anyway. 9226 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 9227 return false; 9228 9229 // Check #2. 9230 if (!isLoad) { 9231 SDValue Val = cast<StoreSDNode>(N)->getValue(); 9232 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 9233 return false; 9234 } 9235 9236 // If the offset is a constant, there may be other adds of constants that 9237 // can be folded with this one. We should do this to avoid having to keep 9238 // a copy of the original base pointer. 9239 SmallVector<SDNode *, 16> OtherUses; 9240 if (isa<ConstantSDNode>(Offset)) 9241 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), 9242 UE = BasePtr.getNode()->use_end(); 9243 UI != UE; ++UI) { 9244 SDUse &Use = UI.getUse(); 9245 // Skip the use that is Ptr and uses of other results from BasePtr's 9246 // node (important for nodes that return multiple results). 9247 if (Use.getUser() == Ptr.getNode() || Use != BasePtr) 9248 continue; 9249 9250 if (Use.getUser()->isPredecessorOf(N)) 9251 continue; 9252 9253 if (Use.getUser()->getOpcode() != ISD::ADD && 9254 Use.getUser()->getOpcode() != ISD::SUB) { 9255 OtherUses.clear(); 9256 break; 9257 } 9258 9259 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); 9260 if (!isa<ConstantSDNode>(Op1)) { 9261 OtherUses.clear(); 9262 break; 9263 } 9264 9265 // FIXME: In some cases, we can be smarter about this. 9266 if (Op1.getValueType() != Offset.getValueType()) { 9267 OtherUses.clear(); 9268 break; 9269 } 9270 9271 OtherUses.push_back(Use.getUser()); 9272 } 9273 9274 if (Swapped) 9275 std::swap(BasePtr, Offset); 9276 9277 // Now check for #3 and #4. 9278 bool RealUse = false; 9279 9280 // Caches for hasPredecessorHelper 9281 SmallPtrSet<const SDNode *, 32> Visited; 9282 SmallVector<const SDNode *, 16> Worklist; 9283 9284 for (SDNode *Use : Ptr.getNode()->uses()) { 9285 if (Use == N) 9286 continue; 9287 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 9288 return false; 9289 9290 // If Ptr may be folded in addressing mode of other use, then it's 9291 // not profitable to do this transformation. 9292 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 9293 RealUse = true; 9294 } 9295 9296 if (!RealUse) 9297 return false; 9298 9299 SDValue Result; 9300 if (isLoad) 9301 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 9302 BasePtr, Offset, AM); 9303 else 9304 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 9305 BasePtr, Offset, AM); 9306 ++PreIndexedNodes; 9307 ++NodesCombined; 9308 DEBUG(dbgs() << "\nReplacing.4 "; 9309 N->dump(&DAG); 9310 dbgs() << "\nWith: "; 9311 Result.getNode()->dump(&DAG); 9312 dbgs() << '\n'); 9313 WorklistRemover DeadNodes(*this); 9314 if (isLoad) { 9315 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 9316 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 9317 } else { 9318 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 9319 } 9320 9321 // Finally, since the node is now dead, remove it from the graph. 9322 deleteAndRecombine(N); 9323 9324 if (Swapped) 9325 std::swap(BasePtr, Offset); 9326 9327 // Replace other uses of BasePtr that can be updated to use Ptr 9328 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 9329 unsigned OffsetIdx = 1; 9330 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 9331 OffsetIdx = 0; 9332 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 9333 BasePtr.getNode() && "Expected BasePtr operand"); 9334 9335 // We need to replace ptr0 in the following expression: 9336 // x0 * offset0 + y0 * ptr0 = t0 9337 // knowing that 9338 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 9339 // 9340 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 9341 // indexed load/store and the expresion that needs to be re-written. 9342 // 9343 // Therefore, we have: 9344 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 9345 9346 ConstantSDNode *CN = 9347 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 9348 int X0, X1, Y0, Y1; 9349 APInt Offset0 = CN->getAPIntValue(); 9350 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 9351 9352 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 9353 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 9354 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 9355 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 9356 9357 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 9358 9359 APInt CNV = Offset0; 9360 if (X0 < 0) CNV = -CNV; 9361 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 9362 else CNV = CNV - Offset1; 9363 9364 SDLoc DL(OtherUses[i]); 9365 9366 // We can now generate the new expression. 9367 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); 9368 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 9369 9370 SDValue NewUse = DAG.getNode(Opcode, 9371 DL, 9372 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 9373 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 9374 deleteAndRecombine(OtherUses[i]); 9375 } 9376 9377 // Replace the uses of Ptr with uses of the updated base value. 9378 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 9379 deleteAndRecombine(Ptr.getNode()); 9380 9381 return true; 9382 } 9383 9384 /// Try to combine a load/store with a add/sub of the base pointer node into a 9385 /// post-indexed load/store. The transformation folded the add/subtract into the 9386 /// new indexed load/store effectively and all of its uses are redirected to the 9387 /// new load/store. 9388 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 9389 if (Level < AfterLegalizeDAG) 9390 return false; 9391 9392 bool isLoad = true; 9393 SDValue Ptr; 9394 EVT VT; 9395 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 9396 if (LD->isIndexed()) 9397 return false; 9398 VT = LD->getMemoryVT(); 9399 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 9400 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 9401 return false; 9402 Ptr = LD->getBasePtr(); 9403 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 9404 if (ST->isIndexed()) 9405 return false; 9406 VT = ST->getMemoryVT(); 9407 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 9408 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 9409 return false; 9410 Ptr = ST->getBasePtr(); 9411 isLoad = false; 9412 } else { 9413 return false; 9414 } 9415 9416 if (Ptr.getNode()->hasOneUse()) 9417 return false; 9418 9419 for (SDNode *Op : Ptr.getNode()->uses()) { 9420 if (Op == N || 9421 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 9422 continue; 9423 9424 SDValue BasePtr; 9425 SDValue Offset; 9426 ISD::MemIndexedMode AM = ISD::UNINDEXED; 9427 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 9428 // Don't create a indexed load / store with zero offset. 9429 if (isNullConstant(Offset)) 9430 continue; 9431 9432 // Try turning it into a post-indexed load / store except when 9433 // 1) All uses are load / store ops that use it as base ptr (and 9434 // it may be folded as addressing mmode). 9435 // 2) Op must be independent of N, i.e. Op is neither a predecessor 9436 // nor a successor of N. Otherwise, if Op is folded that would 9437 // create a cycle. 9438 9439 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 9440 continue; 9441 9442 // Check for #1. 9443 bool TryNext = false; 9444 for (SDNode *Use : BasePtr.getNode()->uses()) { 9445 if (Use == Ptr.getNode()) 9446 continue; 9447 9448 // If all the uses are load / store addresses, then don't do the 9449 // transformation. 9450 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 9451 bool RealUse = false; 9452 for (SDNode *UseUse : Use->uses()) { 9453 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 9454 RealUse = true; 9455 } 9456 9457 if (!RealUse) { 9458 TryNext = true; 9459 break; 9460 } 9461 } 9462 } 9463 9464 if (TryNext) 9465 continue; 9466 9467 // Check for #2 9468 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 9469 SDValue Result = isLoad 9470 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 9471 BasePtr, Offset, AM) 9472 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 9473 BasePtr, Offset, AM); 9474 ++PostIndexedNodes; 9475 ++NodesCombined; 9476 DEBUG(dbgs() << "\nReplacing.5 "; 9477 N->dump(&DAG); 9478 dbgs() << "\nWith: "; 9479 Result.getNode()->dump(&DAG); 9480 dbgs() << '\n'); 9481 WorklistRemover DeadNodes(*this); 9482 if (isLoad) { 9483 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 9484 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 9485 } else { 9486 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 9487 } 9488 9489 // Finally, since the node is now dead, remove it from the graph. 9490 deleteAndRecombine(N); 9491 9492 // Replace the uses of Use with uses of the updated base value. 9493 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 9494 Result.getValue(isLoad ? 1 : 0)); 9495 deleteAndRecombine(Op); 9496 return true; 9497 } 9498 } 9499 } 9500 9501 return false; 9502 } 9503 9504 /// \brief Return the base-pointer arithmetic from an indexed \p LD. 9505 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { 9506 ISD::MemIndexedMode AM = LD->getAddressingMode(); 9507 assert(AM != ISD::UNINDEXED); 9508 SDValue BP = LD->getOperand(1); 9509 SDValue Inc = LD->getOperand(2); 9510 9511 // Some backends use TargetConstants for load offsets, but don't expect 9512 // TargetConstants in general ADD nodes. We can convert these constants into 9513 // regular Constants (if the constant is not opaque). 9514 assert((Inc.getOpcode() != ISD::TargetConstant || 9515 !cast<ConstantSDNode>(Inc)->isOpaque()) && 9516 "Cannot split out indexing using opaque target constants"); 9517 if (Inc.getOpcode() == ISD::TargetConstant) { 9518 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); 9519 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), 9520 ConstInc->getValueType(0)); 9521 } 9522 9523 unsigned Opc = 9524 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); 9525 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); 9526 } 9527 9528 SDValue DAGCombiner::visitLOAD(SDNode *N) { 9529 LoadSDNode *LD = cast<LoadSDNode>(N); 9530 SDValue Chain = LD->getChain(); 9531 SDValue Ptr = LD->getBasePtr(); 9532 9533 // If load is not volatile and there are no uses of the loaded value (and 9534 // the updated indexed value in case of indexed loads), change uses of the 9535 // chain value into uses of the chain input (i.e. delete the dead load). 9536 if (!LD->isVolatile()) { 9537 if (N->getValueType(1) == MVT::Other) { 9538 // Unindexed loads. 9539 if (!N->hasAnyUseOfValue(0)) { 9540 // It's not safe to use the two value CombineTo variant here. e.g. 9541 // v1, chain2 = load chain1, loc 9542 // v2, chain3 = load chain2, loc 9543 // v3 = add v2, c 9544 // Now we replace use of chain2 with chain1. This makes the second load 9545 // isomorphic to the one we are deleting, and thus makes this load live. 9546 DEBUG(dbgs() << "\nReplacing.6 "; 9547 N->dump(&DAG); 9548 dbgs() << "\nWith chain: "; 9549 Chain.getNode()->dump(&DAG); 9550 dbgs() << "\n"); 9551 WorklistRemover DeadNodes(*this); 9552 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 9553 9554 if (N->use_empty()) 9555 deleteAndRecombine(N); 9556 9557 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9558 } 9559 } else { 9560 // Indexed loads. 9561 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 9562 9563 // If this load has an opaque TargetConstant offset, then we cannot split 9564 // the indexing into an add/sub directly (that TargetConstant may not be 9565 // valid for a different type of node, and we cannot convert an opaque 9566 // target constant into a regular constant). 9567 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && 9568 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); 9569 9570 if (!N->hasAnyUseOfValue(0) && 9571 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { 9572 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 9573 SDValue Index; 9574 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { 9575 Index = SplitIndexingFromLoad(LD); 9576 // Try to fold the base pointer arithmetic into subsequent loads and 9577 // stores. 9578 AddUsersToWorklist(N); 9579 } else 9580 Index = DAG.getUNDEF(N->getValueType(1)); 9581 DEBUG(dbgs() << "\nReplacing.7 "; 9582 N->dump(&DAG); 9583 dbgs() << "\nWith: "; 9584 Undef.getNode()->dump(&DAG); 9585 dbgs() << " and 2 other values\n"); 9586 WorklistRemover DeadNodes(*this); 9587 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 9588 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); 9589 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 9590 deleteAndRecombine(N); 9591 return SDValue(N, 0); // Return N so it doesn't get rechecked! 9592 } 9593 } 9594 } 9595 9596 // If this load is directly stored, replace the load value with the stored 9597 // value. 9598 // TODO: Handle store large -> read small portion. 9599 // TODO: Handle TRUNCSTORE/LOADEXT 9600 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 9601 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 9602 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 9603 if (PrevST->getBasePtr() == Ptr && 9604 PrevST->getValue().getValueType() == N->getValueType(0)) 9605 return CombineTo(N, Chain.getOperand(1), Chain); 9606 } 9607 } 9608 9609 // Try to infer better alignment information than the load already has. 9610 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 9611 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 9612 if (Align > LD->getMemOperand()->getBaseAlignment()) { 9613 SDValue NewLoad = 9614 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 9615 LD->getValueType(0), 9616 Chain, Ptr, LD->getPointerInfo(), 9617 LD->getMemoryVT(), 9618 LD->isVolatile(), LD->isNonTemporal(), 9619 LD->isInvariant(), Align, LD->getAAInfo()); 9620 if (NewLoad.getNode() != N) 9621 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 9622 } 9623 } 9624 } 9625 9626 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 9627 : DAG.getSubtarget().useAA(); 9628 #ifndef NDEBUG 9629 if (CombinerAAOnlyFunc.getNumOccurrences() && 9630 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 9631 UseAA = false; 9632 #endif 9633 if (UseAA && LD->isUnindexed()) { 9634 // Walk up chain skipping non-aliasing memory nodes. 9635 SDValue BetterChain = FindBetterChain(N, Chain); 9636 9637 // If there is a better chain. 9638 if (Chain != BetterChain) { 9639 SDValue ReplLoad; 9640 9641 // Replace the chain to void dependency. 9642 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 9643 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 9644 BetterChain, Ptr, LD->getMemOperand()); 9645 } else { 9646 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 9647 LD->getValueType(0), 9648 BetterChain, Ptr, LD->getMemoryVT(), 9649 LD->getMemOperand()); 9650 } 9651 9652 // Create token factor to keep old chain connected. 9653 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 9654 MVT::Other, Chain, ReplLoad.getValue(1)); 9655 9656 // Make sure the new and old chains are cleaned up. 9657 AddToWorklist(Token.getNode()); 9658 9659 // Replace uses with load result and token factor. Don't add users 9660 // to work list. 9661 return CombineTo(N, ReplLoad.getValue(0), Token, false); 9662 } 9663 } 9664 9665 // Try transforming N to an indexed load. 9666 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 9667 return SDValue(N, 0); 9668 9669 // Try to slice up N to more direct loads if the slices are mapped to 9670 // different register banks or pairing can take place. 9671 if (SliceUpLoad(N)) 9672 return SDValue(N, 0); 9673 9674 return SDValue(); 9675 } 9676 9677 namespace { 9678 /// \brief Helper structure used to slice a load in smaller loads. 9679 /// Basically a slice is obtained from the following sequence: 9680 /// Origin = load Ty1, Base 9681 /// Shift = srl Ty1 Origin, CstTy Amount 9682 /// Inst = trunc Shift to Ty2 9683 /// 9684 /// Then, it will be rewriten into: 9685 /// Slice = load SliceTy, Base + SliceOffset 9686 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 9687 /// 9688 /// SliceTy is deduced from the number of bits that are actually used to 9689 /// build Inst. 9690 struct LoadedSlice { 9691 /// \brief Helper structure used to compute the cost of a slice. 9692 struct Cost { 9693 /// Are we optimizing for code size. 9694 bool ForCodeSize; 9695 /// Various cost. 9696 unsigned Loads; 9697 unsigned Truncates; 9698 unsigned CrossRegisterBanksCopies; 9699 unsigned ZExts; 9700 unsigned Shift; 9701 9702 Cost(bool ForCodeSize = false) 9703 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 9704 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 9705 9706 /// \brief Get the cost of one isolated slice. 9707 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 9708 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 9709 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 9710 EVT TruncType = LS.Inst->getValueType(0); 9711 EVT LoadedType = LS.getLoadedType(); 9712 if (TruncType != LoadedType && 9713 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 9714 ZExts = 1; 9715 } 9716 9717 /// \brief Account for slicing gain in the current cost. 9718 /// Slicing provide a few gains like removing a shift or a 9719 /// truncate. This method allows to grow the cost of the original 9720 /// load with the gain from this slice. 9721 void addSliceGain(const LoadedSlice &LS) { 9722 // Each slice saves a truncate. 9723 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 9724 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 9725 LS.Inst->getOperand(0).getValueType())) 9726 ++Truncates; 9727 // If there is a shift amount, this slice gets rid of it. 9728 if (LS.Shift) 9729 ++Shift; 9730 // If this slice can merge a cross register bank copy, account for it. 9731 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 9732 ++CrossRegisterBanksCopies; 9733 } 9734 9735 Cost &operator+=(const Cost &RHS) { 9736 Loads += RHS.Loads; 9737 Truncates += RHS.Truncates; 9738 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 9739 ZExts += RHS.ZExts; 9740 Shift += RHS.Shift; 9741 return *this; 9742 } 9743 9744 bool operator==(const Cost &RHS) const { 9745 return Loads == RHS.Loads && Truncates == RHS.Truncates && 9746 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 9747 ZExts == RHS.ZExts && Shift == RHS.Shift; 9748 } 9749 9750 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 9751 9752 bool operator<(const Cost &RHS) const { 9753 // Assume cross register banks copies are as expensive as loads. 9754 // FIXME: Do we want some more target hooks? 9755 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 9756 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 9757 // Unless we are optimizing for code size, consider the 9758 // expensive operation first. 9759 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 9760 return ExpensiveOpsLHS < ExpensiveOpsRHS; 9761 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 9762 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 9763 } 9764 9765 bool operator>(const Cost &RHS) const { return RHS < *this; } 9766 9767 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 9768 9769 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 9770 }; 9771 // The last instruction that represent the slice. This should be a 9772 // truncate instruction. 9773 SDNode *Inst; 9774 // The original load instruction. 9775 LoadSDNode *Origin; 9776 // The right shift amount in bits from the original load. 9777 unsigned Shift; 9778 // The DAG from which Origin came from. 9779 // This is used to get some contextual information about legal types, etc. 9780 SelectionDAG *DAG; 9781 9782 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 9783 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 9784 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 9785 9786 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 9787 /// \return Result is \p BitWidth and has used bits set to 1 and 9788 /// not used bits set to 0. 9789 APInt getUsedBits() const { 9790 // Reproduce the trunc(lshr) sequence: 9791 // - Start from the truncated value. 9792 // - Zero extend to the desired bit width. 9793 // - Shift left. 9794 assert(Origin && "No original load to compare against."); 9795 unsigned BitWidth = Origin->getValueSizeInBits(0); 9796 assert(Inst && "This slice is not bound to an instruction"); 9797 assert(Inst->getValueSizeInBits(0) <= BitWidth && 9798 "Extracted slice is bigger than the whole type!"); 9799 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 9800 UsedBits.setAllBits(); 9801 UsedBits = UsedBits.zext(BitWidth); 9802 UsedBits <<= Shift; 9803 return UsedBits; 9804 } 9805 9806 /// \brief Get the size of the slice to be loaded in bytes. 9807 unsigned getLoadedSize() const { 9808 unsigned SliceSize = getUsedBits().countPopulation(); 9809 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 9810 return SliceSize / 8; 9811 } 9812 9813 /// \brief Get the type that will be loaded for this slice. 9814 /// Note: This may not be the final type for the slice. 9815 EVT getLoadedType() const { 9816 assert(DAG && "Missing context"); 9817 LLVMContext &Ctxt = *DAG->getContext(); 9818 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 9819 } 9820 9821 /// \brief Get the alignment of the load used for this slice. 9822 unsigned getAlignment() const { 9823 unsigned Alignment = Origin->getAlignment(); 9824 unsigned Offset = getOffsetFromBase(); 9825 if (Offset != 0) 9826 Alignment = MinAlign(Alignment, Alignment + Offset); 9827 return Alignment; 9828 } 9829 9830 /// \brief Check if this slice can be rewritten with legal operations. 9831 bool isLegal() const { 9832 // An invalid slice is not legal. 9833 if (!Origin || !Inst || !DAG) 9834 return false; 9835 9836 // Offsets are for indexed load only, we do not handle that. 9837 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 9838 return false; 9839 9840 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9841 9842 // Check that the type is legal. 9843 EVT SliceType = getLoadedType(); 9844 if (!TLI.isTypeLegal(SliceType)) 9845 return false; 9846 9847 // Check that the load is legal for this type. 9848 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 9849 return false; 9850 9851 // Check that the offset can be computed. 9852 // 1. Check its type. 9853 EVT PtrType = Origin->getBasePtr().getValueType(); 9854 if (PtrType == MVT::Untyped || PtrType.isExtended()) 9855 return false; 9856 9857 // 2. Check that it fits in the immediate. 9858 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 9859 return false; 9860 9861 // 3. Check that the computation is legal. 9862 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 9863 return false; 9864 9865 // Check that the zext is legal if it needs one. 9866 EVT TruncateType = Inst->getValueType(0); 9867 if (TruncateType != SliceType && 9868 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 9869 return false; 9870 9871 return true; 9872 } 9873 9874 /// \brief Get the offset in bytes of this slice in the original chunk of 9875 /// bits. 9876 /// \pre DAG != nullptr. 9877 uint64_t getOffsetFromBase() const { 9878 assert(DAG && "Missing context."); 9879 bool IsBigEndian = DAG->getDataLayout().isBigEndian(); 9880 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 9881 uint64_t Offset = Shift / 8; 9882 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 9883 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 9884 "The size of the original loaded type is not a multiple of a" 9885 " byte."); 9886 // If Offset is bigger than TySizeInBytes, it means we are loading all 9887 // zeros. This should have been optimized before in the process. 9888 assert(TySizeInBytes > Offset && 9889 "Invalid shift amount for given loaded size"); 9890 if (IsBigEndian) 9891 Offset = TySizeInBytes - Offset - getLoadedSize(); 9892 return Offset; 9893 } 9894 9895 /// \brief Generate the sequence of instructions to load the slice 9896 /// represented by this object and redirect the uses of this slice to 9897 /// this new sequence of instructions. 9898 /// \pre this->Inst && this->Origin are valid Instructions and this 9899 /// object passed the legal check: LoadedSlice::isLegal returned true. 9900 /// \return The last instruction of the sequence used to load the slice. 9901 SDValue loadSlice() const { 9902 assert(Inst && Origin && "Unable to replace a non-existing slice."); 9903 const SDValue &OldBaseAddr = Origin->getBasePtr(); 9904 SDValue BaseAddr = OldBaseAddr; 9905 // Get the offset in that chunk of bytes w.r.t. the endianess. 9906 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 9907 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 9908 if (Offset) { 9909 // BaseAddr = BaseAddr + Offset. 9910 EVT ArithType = BaseAddr.getValueType(); 9911 SDLoc DL(Origin); 9912 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, 9913 DAG->getConstant(Offset, DL, ArithType)); 9914 } 9915 9916 // Create the type of the loaded slice according to its size. 9917 EVT SliceType = getLoadedType(); 9918 9919 // Create the load for the slice. 9920 SDValue LastInst = DAG->getLoad( 9921 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 9922 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 9923 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 9924 // If the final type is not the same as the loaded type, this means that 9925 // we have to pad with zero. Create a zero extend for that. 9926 EVT FinalType = Inst->getValueType(0); 9927 if (SliceType != FinalType) 9928 LastInst = 9929 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 9930 return LastInst; 9931 } 9932 9933 /// \brief Check if this slice can be merged with an expensive cross register 9934 /// bank copy. E.g., 9935 /// i = load i32 9936 /// f = bitcast i32 i to float 9937 bool canMergeExpensiveCrossRegisterBankCopy() const { 9938 if (!Inst || !Inst->hasOneUse()) 9939 return false; 9940 SDNode *Use = *Inst->use_begin(); 9941 if (Use->getOpcode() != ISD::BITCAST) 9942 return false; 9943 assert(DAG && "Missing context"); 9944 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 9945 EVT ResVT = Use->getValueType(0); 9946 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 9947 const TargetRegisterClass *ArgRC = 9948 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 9949 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 9950 return false; 9951 9952 // At this point, we know that we perform a cross-register-bank copy. 9953 // Check if it is expensive. 9954 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); 9955 // Assume bitcasts are cheap, unless both register classes do not 9956 // explicitly share a common sub class. 9957 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 9958 return false; 9959 9960 // Check if it will be merged with the load. 9961 // 1. Check the alignment constraint. 9962 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( 9963 ResVT.getTypeForEVT(*DAG->getContext())); 9964 9965 if (RequiredAlignment > getAlignment()) 9966 return false; 9967 9968 // 2. Check that the load is a legal operation for that type. 9969 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 9970 return false; 9971 9972 // 3. Check that we do not have a zext in the way. 9973 if (Inst->getValueType(0) != getLoadedType()) 9974 return false; 9975 9976 return true; 9977 } 9978 }; 9979 } 9980 9981 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 9982 /// \p UsedBits looks like 0..0 1..1 0..0. 9983 static bool areUsedBitsDense(const APInt &UsedBits) { 9984 // If all the bits are one, this is dense! 9985 if (UsedBits.isAllOnesValue()) 9986 return true; 9987 9988 // Get rid of the unused bits on the right. 9989 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 9990 // Get rid of the unused bits on the left. 9991 if (NarrowedUsedBits.countLeadingZeros()) 9992 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 9993 // Check that the chunk of bits is completely used. 9994 return NarrowedUsedBits.isAllOnesValue(); 9995 } 9996 9997 /// \brief Check whether or not \p First and \p Second are next to each other 9998 /// in memory. This means that there is no hole between the bits loaded 9999 /// by \p First and the bits loaded by \p Second. 10000 static bool areSlicesNextToEachOther(const LoadedSlice &First, 10001 const LoadedSlice &Second) { 10002 assert(First.Origin == Second.Origin && First.Origin && 10003 "Unable to match different memory origins."); 10004 APInt UsedBits = First.getUsedBits(); 10005 assert((UsedBits & Second.getUsedBits()) == 0 && 10006 "Slices are not supposed to overlap."); 10007 UsedBits |= Second.getUsedBits(); 10008 return areUsedBitsDense(UsedBits); 10009 } 10010 10011 /// \brief Adjust the \p GlobalLSCost according to the target 10012 /// paring capabilities and the layout of the slices. 10013 /// \pre \p GlobalLSCost should account for at least as many loads as 10014 /// there is in the slices in \p LoadedSlices. 10015 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 10016 LoadedSlice::Cost &GlobalLSCost) { 10017 unsigned NumberOfSlices = LoadedSlices.size(); 10018 // If there is less than 2 elements, no pairing is possible. 10019 if (NumberOfSlices < 2) 10020 return; 10021 10022 // Sort the slices so that elements that are likely to be next to each 10023 // other in memory are next to each other in the list. 10024 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 10025 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 10026 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 10027 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 10028 }); 10029 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 10030 // First (resp. Second) is the first (resp. Second) potentially candidate 10031 // to be placed in a paired load. 10032 const LoadedSlice *First = nullptr; 10033 const LoadedSlice *Second = nullptr; 10034 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 10035 // Set the beginning of the pair. 10036 First = Second) { 10037 10038 Second = &LoadedSlices[CurrSlice]; 10039 10040 // If First is NULL, it means we start a new pair. 10041 // Get to the next slice. 10042 if (!First) 10043 continue; 10044 10045 EVT LoadedType = First->getLoadedType(); 10046 10047 // If the types of the slices are different, we cannot pair them. 10048 if (LoadedType != Second->getLoadedType()) 10049 continue; 10050 10051 // Check if the target supplies paired loads for this type. 10052 unsigned RequiredAlignment = 0; 10053 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 10054 // move to the next pair, this type is hopeless. 10055 Second = nullptr; 10056 continue; 10057 } 10058 // Check if we meet the alignment requirement. 10059 if (RequiredAlignment > First->getAlignment()) 10060 continue; 10061 10062 // Check that both loads are next to each other in memory. 10063 if (!areSlicesNextToEachOther(*First, *Second)) 10064 continue; 10065 10066 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 10067 --GlobalLSCost.Loads; 10068 // Move to the next pair. 10069 Second = nullptr; 10070 } 10071 } 10072 10073 /// \brief Check the profitability of all involved LoadedSlice. 10074 /// Currently, it is considered profitable if there is exactly two 10075 /// involved slices (1) which are (2) next to each other in memory, and 10076 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 10077 /// 10078 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 10079 /// the elements themselves. 10080 /// 10081 /// FIXME: When the cost model will be mature enough, we can relax 10082 /// constraints (1) and (2). 10083 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 10084 const APInt &UsedBits, bool ForCodeSize) { 10085 unsigned NumberOfSlices = LoadedSlices.size(); 10086 if (StressLoadSlicing) 10087 return NumberOfSlices > 1; 10088 10089 // Check (1). 10090 if (NumberOfSlices != 2) 10091 return false; 10092 10093 // Check (2). 10094 if (!areUsedBitsDense(UsedBits)) 10095 return false; 10096 10097 // Check (3). 10098 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 10099 // The original code has one big load. 10100 OrigCost.Loads = 1; 10101 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 10102 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 10103 // Accumulate the cost of all the slices. 10104 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 10105 GlobalSlicingCost += SliceCost; 10106 10107 // Account as cost in the original configuration the gain obtained 10108 // with the current slices. 10109 OrigCost.addSliceGain(LS); 10110 } 10111 10112 // If the target supports paired load, adjust the cost accordingly. 10113 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 10114 return OrigCost > GlobalSlicingCost; 10115 } 10116 10117 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 10118 /// operations, split it in the various pieces being extracted. 10119 /// 10120 /// This sort of thing is introduced by SROA. 10121 /// This slicing takes care not to insert overlapping loads. 10122 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 10123 bool DAGCombiner::SliceUpLoad(SDNode *N) { 10124 if (Level < AfterLegalizeDAG) 10125 return false; 10126 10127 LoadSDNode *LD = cast<LoadSDNode>(N); 10128 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 10129 !LD->getValueType(0).isInteger()) 10130 return false; 10131 10132 // Keep track of already used bits to detect overlapping values. 10133 // In that case, we will just abort the transformation. 10134 APInt UsedBits(LD->getValueSizeInBits(0), 0); 10135 10136 SmallVector<LoadedSlice, 4> LoadedSlices; 10137 10138 // Check if this load is used as several smaller chunks of bits. 10139 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 10140 // of computation for each trunc. 10141 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 10142 UI != UIEnd; ++UI) { 10143 // Skip the uses of the chain. 10144 if (UI.getUse().getResNo() != 0) 10145 continue; 10146 10147 SDNode *User = *UI; 10148 unsigned Shift = 0; 10149 10150 // Check if this is a trunc(lshr). 10151 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 10152 isa<ConstantSDNode>(User->getOperand(1))) { 10153 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 10154 User = *User->use_begin(); 10155 } 10156 10157 // At this point, User is a Truncate, iff we encountered, trunc or 10158 // trunc(lshr). 10159 if (User->getOpcode() != ISD::TRUNCATE) 10160 return false; 10161 10162 // The width of the type must be a power of 2 and greater than 8-bits. 10163 // Otherwise the load cannot be represented in LLVM IR. 10164 // Moreover, if we shifted with a non-8-bits multiple, the slice 10165 // will be across several bytes. We do not support that. 10166 unsigned Width = User->getValueSizeInBits(0); 10167 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 10168 return 0; 10169 10170 // Build the slice for this chain of computations. 10171 LoadedSlice LS(User, LD, Shift, &DAG); 10172 APInt CurrentUsedBits = LS.getUsedBits(); 10173 10174 // Check if this slice overlaps with another. 10175 if ((CurrentUsedBits & UsedBits) != 0) 10176 return false; 10177 // Update the bits used globally. 10178 UsedBits |= CurrentUsedBits; 10179 10180 // Check if the new slice would be legal. 10181 if (!LS.isLegal()) 10182 return false; 10183 10184 // Record the slice. 10185 LoadedSlices.push_back(LS); 10186 } 10187 10188 // Abort slicing if it does not seem to be profitable. 10189 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 10190 return false; 10191 10192 ++SlicedLoads; 10193 10194 // Rewrite each chain to use an independent load. 10195 // By construction, each chain can be represented by a unique load. 10196 10197 // Prepare the argument for the new token factor for all the slices. 10198 SmallVector<SDValue, 8> ArgChains; 10199 for (SmallVectorImpl<LoadedSlice>::const_iterator 10200 LSIt = LoadedSlices.begin(), 10201 LSItEnd = LoadedSlices.end(); 10202 LSIt != LSItEnd; ++LSIt) { 10203 SDValue SliceInst = LSIt->loadSlice(); 10204 CombineTo(LSIt->Inst, SliceInst, true); 10205 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 10206 SliceInst = SliceInst.getOperand(0); 10207 assert(SliceInst->getOpcode() == ISD::LOAD && 10208 "It takes more than a zext to get to the loaded slice!!"); 10209 ArgChains.push_back(SliceInst.getValue(1)); 10210 } 10211 10212 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 10213 ArgChains); 10214 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 10215 return true; 10216 } 10217 10218 /// Check to see if V is (and load (ptr), imm), where the load is having 10219 /// specific bytes cleared out. If so, return the byte size being masked out 10220 /// and the shift amount. 10221 static std::pair<unsigned, unsigned> 10222 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 10223 std::pair<unsigned, unsigned> Result(0, 0); 10224 10225 // Check for the structure we're looking for. 10226 if (V->getOpcode() != ISD::AND || 10227 !isa<ConstantSDNode>(V->getOperand(1)) || 10228 !ISD::isNormalLoad(V->getOperand(0).getNode())) 10229 return Result; 10230 10231 // Check the chain and pointer. 10232 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 10233 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 10234 10235 // The store should be chained directly to the load or be an operand of a 10236 // tokenfactor. 10237 if (LD == Chain.getNode()) 10238 ; // ok. 10239 else if (Chain->getOpcode() != ISD::TokenFactor) 10240 return Result; // Fail. 10241 else { 10242 bool isOk = false; 10243 for (const SDValue &ChainOp : Chain->op_values()) 10244 if (ChainOp.getNode() == LD) { 10245 isOk = true; 10246 break; 10247 } 10248 if (!isOk) return Result; 10249 } 10250 10251 // This only handles simple types. 10252 if (V.getValueType() != MVT::i16 && 10253 V.getValueType() != MVT::i32 && 10254 V.getValueType() != MVT::i64) 10255 return Result; 10256 10257 // Check the constant mask. Invert it so that the bits being masked out are 10258 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 10259 // follow the sign bit for uniformity. 10260 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 10261 unsigned NotMaskLZ = countLeadingZeros(NotMask); 10262 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 10263 unsigned NotMaskTZ = countTrailingZeros(NotMask); 10264 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 10265 if (NotMaskLZ == 64) return Result; // All zero mask. 10266 10267 // See if we have a continuous run of bits. If so, we have 0*1+0* 10268 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) 10269 return Result; 10270 10271 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 10272 if (V.getValueType() != MVT::i64 && NotMaskLZ) 10273 NotMaskLZ -= 64-V.getValueSizeInBits(); 10274 10275 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 10276 switch (MaskedBytes) { 10277 case 1: 10278 case 2: 10279 case 4: break; 10280 default: return Result; // All one mask, or 5-byte mask. 10281 } 10282 10283 // Verify that the first bit starts at a multiple of mask so that the access 10284 // is aligned the same as the access width. 10285 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 10286 10287 Result.first = MaskedBytes; 10288 Result.second = NotMaskTZ/8; 10289 return Result; 10290 } 10291 10292 10293 /// Check to see if IVal is something that provides a value as specified by 10294 /// MaskInfo. If so, replace the specified store with a narrower store of 10295 /// truncated IVal. 10296 static SDNode * 10297 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 10298 SDValue IVal, StoreSDNode *St, 10299 DAGCombiner *DC) { 10300 unsigned NumBytes = MaskInfo.first; 10301 unsigned ByteShift = MaskInfo.second; 10302 SelectionDAG &DAG = DC->getDAG(); 10303 10304 // Check to see if IVal is all zeros in the part being masked in by the 'or' 10305 // that uses this. If not, this is not a replacement. 10306 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 10307 ByteShift*8, (ByteShift+NumBytes)*8); 10308 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 10309 10310 // Check that it is legal on the target to do this. It is legal if the new 10311 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 10312 // legalization. 10313 MVT VT = MVT::getIntegerVT(NumBytes*8); 10314 if (!DC->isTypeLegal(VT)) 10315 return nullptr; 10316 10317 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 10318 // shifted by ByteShift and truncated down to NumBytes. 10319 if (ByteShift) { 10320 SDLoc DL(IVal); 10321 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, 10322 DAG.getConstant(ByteShift*8, DL, 10323 DC->getShiftAmountTy(IVal.getValueType()))); 10324 } 10325 10326 // Figure out the offset for the store and the alignment of the access. 10327 unsigned StOffset; 10328 unsigned NewAlign = St->getAlignment(); 10329 10330 if (DAG.getDataLayout().isLittleEndian()) 10331 StOffset = ByteShift; 10332 else 10333 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 10334 10335 SDValue Ptr = St->getBasePtr(); 10336 if (StOffset) { 10337 SDLoc DL(IVal); 10338 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), 10339 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); 10340 NewAlign = MinAlign(NewAlign, StOffset); 10341 } 10342 10343 // Truncate down to the new size. 10344 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 10345 10346 ++OpsNarrowed; 10347 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 10348 St->getPointerInfo().getWithOffset(StOffset), 10349 false, false, NewAlign).getNode(); 10350 } 10351 10352 10353 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and 10354 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try 10355 /// narrowing the load and store if it would end up being a win for performance 10356 /// or code size. 10357 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 10358 StoreSDNode *ST = cast<StoreSDNode>(N); 10359 if (ST->isVolatile()) 10360 return SDValue(); 10361 10362 SDValue Chain = ST->getChain(); 10363 SDValue Value = ST->getValue(); 10364 SDValue Ptr = ST->getBasePtr(); 10365 EVT VT = Value.getValueType(); 10366 10367 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 10368 return SDValue(); 10369 10370 unsigned Opc = Value.getOpcode(); 10371 10372 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 10373 // is a byte mask indicating a consecutive number of bytes, check to see if 10374 // Y is known to provide just those bytes. If so, we try to replace the 10375 // load + replace + store sequence with a single (narrower) store, which makes 10376 // the load dead. 10377 if (Opc == ISD::OR) { 10378 std::pair<unsigned, unsigned> MaskedLoad; 10379 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 10380 if (MaskedLoad.first) 10381 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 10382 Value.getOperand(1), ST,this)) 10383 return SDValue(NewST, 0); 10384 10385 // Or is commutative, so try swapping X and Y. 10386 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 10387 if (MaskedLoad.first) 10388 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 10389 Value.getOperand(0), ST,this)) 10390 return SDValue(NewST, 0); 10391 } 10392 10393 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 10394 Value.getOperand(1).getOpcode() != ISD::Constant) 10395 return SDValue(); 10396 10397 SDValue N0 = Value.getOperand(0); 10398 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 10399 Chain == SDValue(N0.getNode(), 1)) { 10400 LoadSDNode *LD = cast<LoadSDNode>(N0); 10401 if (LD->getBasePtr() != Ptr || 10402 LD->getPointerInfo().getAddrSpace() != 10403 ST->getPointerInfo().getAddrSpace()) 10404 return SDValue(); 10405 10406 // Find the type to narrow it the load / op / store to. 10407 SDValue N1 = Value.getOperand(1); 10408 unsigned BitWidth = N1.getValueSizeInBits(); 10409 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 10410 if (Opc == ISD::AND) 10411 Imm ^= APInt::getAllOnesValue(BitWidth); 10412 if (Imm == 0 || Imm.isAllOnesValue()) 10413 return SDValue(); 10414 unsigned ShAmt = Imm.countTrailingZeros(); 10415 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 10416 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 10417 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 10418 // The narrowing should be profitable, the load/store operation should be 10419 // legal (or custom) and the store size should be equal to the NewVT width. 10420 while (NewBW < BitWidth && 10421 (NewVT.getStoreSizeInBits() != NewBW || 10422 !TLI.isOperationLegalOrCustom(Opc, NewVT) || 10423 !TLI.isNarrowingProfitable(VT, NewVT))) { 10424 NewBW = NextPowerOf2(NewBW); 10425 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 10426 } 10427 if (NewBW >= BitWidth) 10428 return SDValue(); 10429 10430 // If the lsb changed does not start at the type bitwidth boundary, 10431 // start at the previous one. 10432 if (ShAmt % NewBW) 10433 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 10434 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 10435 std::min(BitWidth, ShAmt + NewBW)); 10436 if ((Imm & Mask) == Imm) { 10437 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 10438 if (Opc == ISD::AND) 10439 NewImm ^= APInt::getAllOnesValue(NewBW); 10440 uint64_t PtrOff = ShAmt / 8; 10441 // For big endian targets, we need to adjust the offset to the pointer to 10442 // load the correct bytes. 10443 if (DAG.getDataLayout().isBigEndian()) 10444 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 10445 10446 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 10447 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 10448 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) 10449 return SDValue(); 10450 10451 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 10452 Ptr.getValueType(), Ptr, 10453 DAG.getConstant(PtrOff, SDLoc(LD), 10454 Ptr.getValueType())); 10455 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 10456 LD->getChain(), NewPtr, 10457 LD->getPointerInfo().getWithOffset(PtrOff), 10458 LD->isVolatile(), LD->isNonTemporal(), 10459 LD->isInvariant(), NewAlign, 10460 LD->getAAInfo()); 10461 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 10462 DAG.getConstant(NewImm, SDLoc(Value), 10463 NewVT)); 10464 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 10465 NewVal, NewPtr, 10466 ST->getPointerInfo().getWithOffset(PtrOff), 10467 false, false, NewAlign); 10468 10469 AddToWorklist(NewPtr.getNode()); 10470 AddToWorklist(NewLD.getNode()); 10471 AddToWorklist(NewVal.getNode()); 10472 WorklistRemover DeadNodes(*this); 10473 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 10474 ++OpsNarrowed; 10475 return NewST; 10476 } 10477 } 10478 10479 return SDValue(); 10480 } 10481 10482 /// For a given floating point load / store pair, if the load value isn't used 10483 /// by any other operations, then consider transforming the pair to integer 10484 /// load / store operations if the target deems the transformation profitable. 10485 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 10486 StoreSDNode *ST = cast<StoreSDNode>(N); 10487 SDValue Chain = ST->getChain(); 10488 SDValue Value = ST->getValue(); 10489 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 10490 Value.hasOneUse() && 10491 Chain == SDValue(Value.getNode(), 1)) { 10492 LoadSDNode *LD = cast<LoadSDNode>(Value); 10493 EVT VT = LD->getMemoryVT(); 10494 if (!VT.isFloatingPoint() || 10495 VT != ST->getMemoryVT() || 10496 LD->isNonTemporal() || 10497 ST->isNonTemporal() || 10498 LD->getPointerInfo().getAddrSpace() != 0 || 10499 ST->getPointerInfo().getAddrSpace() != 0) 10500 return SDValue(); 10501 10502 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 10503 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 10504 !TLI.isOperationLegal(ISD::STORE, IntVT) || 10505 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 10506 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 10507 return SDValue(); 10508 10509 unsigned LDAlign = LD->getAlignment(); 10510 unsigned STAlign = ST->getAlignment(); 10511 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 10512 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); 10513 if (LDAlign < ABIAlign || STAlign < ABIAlign) 10514 return SDValue(); 10515 10516 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 10517 LD->getChain(), LD->getBasePtr(), 10518 LD->getPointerInfo(), 10519 false, false, false, LDAlign); 10520 10521 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 10522 NewLD, ST->getBasePtr(), 10523 ST->getPointerInfo(), 10524 false, false, STAlign); 10525 10526 AddToWorklist(NewLD.getNode()); 10527 AddToWorklist(NewST.getNode()); 10528 WorklistRemover DeadNodes(*this); 10529 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 10530 ++LdStFP2Int; 10531 return NewST; 10532 } 10533 10534 return SDValue(); 10535 } 10536 10537 namespace { 10538 /// Helper struct to parse and store a memory address as base + index + offset. 10539 /// We ignore sign extensions when it is safe to do so. 10540 /// The following two expressions are not equivalent. To differentiate we need 10541 /// to store whether there was a sign extension involved in the index 10542 /// computation. 10543 /// (load (i64 add (i64 copyfromreg %c) 10544 /// (i64 signextend (add (i8 load %index) 10545 /// (i8 1)))) 10546 /// vs 10547 /// 10548 /// (load (i64 add (i64 copyfromreg %c) 10549 /// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 10550 /// (i32 1))))) 10551 struct BaseIndexOffset { 10552 SDValue Base; 10553 SDValue Index; 10554 int64_t Offset; 10555 bool IsIndexSignExt; 10556 10557 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 10558 10559 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 10560 bool IsIndexSignExt) : 10561 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 10562 10563 bool equalBaseIndex(const BaseIndexOffset &Other) { 10564 return Other.Base == Base && Other.Index == Index && 10565 Other.IsIndexSignExt == IsIndexSignExt; 10566 } 10567 10568 /// Parses tree in Ptr for base, index, offset addresses. 10569 static BaseIndexOffset match(SDValue Ptr) { 10570 bool IsIndexSignExt = false; 10571 10572 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 10573 // instruction, then it could be just the BASE or everything else we don't 10574 // know how to handle. Just use Ptr as BASE and give up. 10575 if (Ptr->getOpcode() != ISD::ADD) 10576 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10577 10578 // We know that we have at least an ADD instruction. Try to pattern match 10579 // the simple case of BASE + OFFSET. 10580 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 10581 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 10582 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 10583 IsIndexSignExt); 10584 } 10585 10586 // Inside a loop the current BASE pointer is calculated using an ADD and a 10587 // MUL instruction. In this case Ptr is the actual BASE pointer. 10588 // (i64 add (i64 %array_ptr) 10589 // (i64 mul (i64 %induction_var) 10590 // (i64 %element_size))) 10591 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 10592 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10593 10594 // Look at Base + Index + Offset cases. 10595 SDValue Base = Ptr->getOperand(0); 10596 SDValue IndexOffset = Ptr->getOperand(1); 10597 10598 // Skip signextends. 10599 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 10600 IndexOffset = IndexOffset->getOperand(0); 10601 IsIndexSignExt = true; 10602 } 10603 10604 // Either the case of Base + Index (no offset) or something else. 10605 if (IndexOffset->getOpcode() != ISD::ADD) 10606 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 10607 10608 // Now we have the case of Base + Index + offset. 10609 SDValue Index = IndexOffset->getOperand(0); 10610 SDValue Offset = IndexOffset->getOperand(1); 10611 10612 if (!isa<ConstantSDNode>(Offset)) 10613 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 10614 10615 // Ignore signextends. 10616 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 10617 Index = Index->getOperand(0); 10618 IsIndexSignExt = true; 10619 } else IsIndexSignExt = false; 10620 10621 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 10622 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 10623 } 10624 }; 10625 } // namespace 10626 10627 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, 10628 SDLoc SL, 10629 ArrayRef<MemOpLink> Stores, 10630 EVT Ty) const { 10631 SmallVector<SDValue, 8> BuildVector; 10632 10633 for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) 10634 BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue()); 10635 10636 return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); 10637 } 10638 10639 bool DAGCombiner::MergeStoresOfConstantsOrVecElts( 10640 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, 10641 unsigned NumElem, bool IsConstantSrc, bool UseVector) { 10642 // Make sure we have something to merge. 10643 if (NumElem < 2) 10644 return false; 10645 10646 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; 10647 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 10648 unsigned LatestNodeUsed = 0; 10649 10650 for (unsigned i=0; i < NumElem; ++i) { 10651 // Find a chain for the new wide-store operand. Notice that some 10652 // of the store nodes that we found may not be selected for inclusion 10653 // in the wide store. The chain we use needs to be the chain of the 10654 // latest store node which is *used* and replaced by the wide store. 10655 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) 10656 LatestNodeUsed = i; 10657 } 10658 10659 // The latest Node in the DAG. 10660 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; 10661 SDLoc DL(StoreNodes[0].MemNode); 10662 10663 SDValue StoredVal; 10664 if (UseVector) { 10665 // Find a legal type for the vector store. 10666 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 10667 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 10668 if (IsConstantSrc) { 10669 StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); 10670 } else { 10671 SmallVector<SDValue, 8> Ops; 10672 for (unsigned i = 0; i < NumElem ; ++i) { 10673 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10674 SDValue Val = St->getValue(); 10675 // All of the operands of a BUILD_VECTOR must have the same type. 10676 if (Val.getValueType() != MemVT) 10677 return false; 10678 Ops.push_back(Val); 10679 } 10680 10681 // Build the extracted vector elements back into a vector. 10682 StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops); 10683 } 10684 } else { 10685 // We should always use a vector store when merging extracted vector 10686 // elements, so this path implies a store of constants. 10687 assert(IsConstantSrc && "Merged vector elements should use vector store"); 10688 10689 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 10690 APInt StoreInt(SizeInBits, 0); 10691 10692 // Construct a single integer constant which is made of the smaller 10693 // constant inputs. 10694 bool IsLE = DAG.getDataLayout().isLittleEndian(); 10695 for (unsigned i = 0; i < NumElem ; ++i) { 10696 unsigned Idx = IsLE ? (NumElem - 1 - i) : i; 10697 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 10698 SDValue Val = St->getValue(); 10699 StoreInt <<= ElementSizeBytes * 8; 10700 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 10701 StoreInt |= C->getAPIntValue().zext(SizeInBits); 10702 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 10703 StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); 10704 } else { 10705 llvm_unreachable("Invalid constant element type"); 10706 } 10707 } 10708 10709 // Create the new Load and Store operations. 10710 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 10711 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); 10712 } 10713 10714 SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal, 10715 FirstInChain->getBasePtr(), 10716 FirstInChain->getPointerInfo(), 10717 false, false, 10718 FirstInChain->getAlignment()); 10719 10720 // Replace the last store with the new store 10721 CombineTo(LatestOp, NewStore); 10722 // Erase all other stores. 10723 for (unsigned i = 0; i < NumElem ; ++i) { 10724 if (StoreNodes[i].MemNode == LatestOp) 10725 continue; 10726 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10727 // ReplaceAllUsesWith will replace all uses that existed when it was 10728 // called, but graph optimizations may cause new ones to appear. For 10729 // example, the case in pr14333 looks like 10730 // 10731 // St's chain -> St -> another store -> X 10732 // 10733 // And the only difference from St to the other store is the chain. 10734 // When we change it's chain to be St's chain they become identical, 10735 // get CSEed and the net result is that X is now a use of St. 10736 // Since we know that St is redundant, just iterate. 10737 while (!St->use_empty()) 10738 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 10739 deleteAndRecombine(St); 10740 } 10741 10742 return true; 10743 } 10744 10745 static bool allowableAlignment(const SelectionDAG &DAG, 10746 const TargetLowering &TLI, EVT EVTTy, 10747 unsigned AS, unsigned Align) { 10748 if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align)) 10749 return true; 10750 10751 Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext()); 10752 unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty); 10753 return (Align >= ABIAlignment); 10754 } 10755 10756 void DAGCombiner::getStoreMergeAndAliasCandidates( 10757 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 10758 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { 10759 // This holds the base pointer, index, and the offset in bytes from the base 10760 // pointer. 10761 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 10762 10763 // We must have a base and an offset. 10764 if (!BasePtr.Base.getNode()) 10765 return; 10766 10767 // Do not handle stores to undef base pointers. 10768 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 10769 return; 10770 10771 // Walk up the chain and look for nodes with offsets from the same 10772 // base pointer. Stop when reaching an instruction with a different kind 10773 // or instruction which has a different base pointer. 10774 EVT MemVT = St->getMemoryVT(); 10775 unsigned Seq = 0; 10776 StoreSDNode *Index = St; 10777 while (Index) { 10778 // If the chain has more than one use, then we can't reorder the mem ops. 10779 if (Index != St && !SDValue(Index, 0)->hasOneUse()) 10780 break; 10781 10782 // Find the base pointer and offset for this memory node. 10783 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 10784 10785 // Check that the base pointer is the same as the original one. 10786 if (!Ptr.equalBaseIndex(BasePtr)) 10787 break; 10788 10789 // The memory operands must not be volatile. 10790 if (Index->isVolatile() || Index->isIndexed()) 10791 break; 10792 10793 // No truncation. 10794 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 10795 if (St->isTruncatingStore()) 10796 break; 10797 10798 // The stored memory type must be the same. 10799 if (Index->getMemoryVT() != MemVT) 10800 break; 10801 10802 // We found a potential memory operand to merge. 10803 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 10804 10805 // Find the next memory operand in the chain. If the next operand in the 10806 // chain is a store then move up and continue the scan with the next 10807 // memory operand. If the next operand is a load save it and use alias 10808 // information to check if it interferes with anything. 10809 SDNode *NextInChain = Index->getChain().getNode(); 10810 while (1) { 10811 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 10812 // We found a store node. Use it for the next iteration. 10813 Index = STn; 10814 break; 10815 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 10816 if (Ldn->isVolatile()) { 10817 Index = nullptr; 10818 break; 10819 } 10820 10821 // Save the load node for later. Continue the scan. 10822 AliasLoadNodes.push_back(Ldn); 10823 NextInChain = Ldn->getChain().getNode(); 10824 continue; 10825 } else { 10826 Index = nullptr; 10827 break; 10828 } 10829 } 10830 } 10831 } 10832 10833 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 10834 if (OptLevel == CodeGenOpt::None) 10835 return false; 10836 10837 EVT MemVT = St->getMemoryVT(); 10838 int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; 10839 bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( 10840 Attribute::NoImplicitFloat); 10841 10842 // This function cannot currently deal with non-byte-sized memory sizes. 10843 if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) 10844 return false; 10845 10846 // Don't merge vectors into wider inputs. 10847 if (MemVT.isVector() || !MemVT.isSimple()) 10848 return false; 10849 10850 // Perform an early exit check. Do not bother looking at stored values that 10851 // are not constants, loads, or extracted vector elements. 10852 SDValue StoredVal = St->getValue(); 10853 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 10854 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || 10855 isa<ConstantFPSDNode>(StoredVal); 10856 bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); 10857 10858 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) 10859 return false; 10860 10861 // Only look at ends of store sequences. 10862 SDValue Chain = SDValue(St, 0); 10863 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 10864 return false; 10865 10866 // Save the LoadSDNodes that we find in the chain. 10867 // We need to make sure that these nodes do not interfere with 10868 // any of the store nodes. 10869 SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; 10870 10871 // Save the StoreSDNodes that we find in the chain. 10872 SmallVector<MemOpLink, 8> StoreNodes; 10873 10874 getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); 10875 10876 // Check if there is anything to merge. 10877 if (StoreNodes.size() < 2) 10878 return false; 10879 10880 // Sort the memory operands according to their distance from the base pointer. 10881 std::sort(StoreNodes.begin(), StoreNodes.end(), 10882 [](MemOpLink LHS, MemOpLink RHS) { 10883 return LHS.OffsetFromBase < RHS.OffsetFromBase || 10884 (LHS.OffsetFromBase == RHS.OffsetFromBase && 10885 LHS.SequenceNum > RHS.SequenceNum); 10886 }); 10887 10888 // Scan the memory operations on the chain and find the first non-consecutive 10889 // store memory address. 10890 unsigned LastConsecutiveStore = 0; 10891 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 10892 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 10893 10894 // Check that the addresses are consecutive starting from the second 10895 // element in the list of stores. 10896 if (i > 0) { 10897 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 10898 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 10899 break; 10900 } 10901 10902 bool Alias = false; 10903 // Check if this store interferes with any of the loads that we found. 10904 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 10905 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 10906 Alias = true; 10907 break; 10908 } 10909 // We found a load that alias with this store. Stop the sequence. 10910 if (Alias) 10911 break; 10912 10913 // Mark this node as useful. 10914 LastConsecutiveStore = i; 10915 } 10916 10917 // The node with the lowest store address. 10918 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 10919 unsigned FirstStoreAS = FirstInChain->getAddressSpace(); 10920 unsigned FirstStoreAlign = FirstInChain->getAlignment(); 10921 10922 // Store the constants into memory as one consecutive store. 10923 if (IsConstantSrc) { 10924 unsigned LastLegalType = 0; 10925 unsigned LastLegalVectorType = 0; 10926 bool NonZero = false; 10927 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 10928 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10929 SDValue StoredVal = St->getValue(); 10930 10931 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 10932 NonZero |= !C->isNullValue(); 10933 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 10934 NonZero |= !C->getConstantFPValue()->isNullValue(); 10935 } else { 10936 // Non-constant. 10937 break; 10938 } 10939 10940 // Find a legal type for the constant store. 10941 unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; 10942 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 10943 if (TLI.isTypeLegal(StoreTy) && 10944 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, 10945 FirstStoreAlign)) { 10946 LastLegalType = i+1; 10947 // Or check whether a truncstore is legal. 10948 } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 10949 TargetLowering::TypePromoteInteger) { 10950 EVT LegalizedStoredValueTy = 10951 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 10952 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 10953 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, 10954 FirstStoreAlign)) { 10955 LastLegalType = i + 1; 10956 } 10957 } 10958 10959 // Find a legal type for the vector store. 10960 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 10961 if (TLI.isTypeLegal(Ty) && 10962 allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) { 10963 LastLegalVectorType = i + 1; 10964 } 10965 } 10966 10967 10968 // We only use vectors if the constant is known to be zero or the target 10969 // allows it and the function is not marked with the noimplicitfloat 10970 // attribute. 10971 if (NoVectors) { 10972 LastLegalVectorType = 0; 10973 } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT, 10974 LastLegalVectorType, 10975 FirstStoreAS)) { 10976 LastLegalVectorType = 0; 10977 } 10978 10979 // Check if we found a legal integer type to store. 10980 if (LastLegalType == 0 && LastLegalVectorType == 0) 10981 return false; 10982 10983 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 10984 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 10985 10986 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, 10987 true, UseVector); 10988 } 10989 10990 // When extracting multiple vector elements, try to store them 10991 // in one vector store rather than a sequence of scalar stores. 10992 if (IsExtractVecEltSrc) { 10993 unsigned NumElem = 0; 10994 for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { 10995 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 10996 SDValue StoredVal = St->getValue(); 10997 // This restriction could be loosened. 10998 // Bail out if any stored values are not elements extracted from a vector. 10999 // It should be possible to handle mixed sources, but load sources need 11000 // more careful handling (see the block of code below that handles 11001 // consecutive loads). 11002 if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT) 11003 return false; 11004 11005 // Find a legal type for the vector store. 11006 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 11007 if (TLI.isTypeLegal(Ty) && 11008 allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) 11009 NumElem = i + 1; 11010 } 11011 11012 return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, 11013 false, true); 11014 } 11015 11016 // Below we handle the case of multiple consecutive stores that 11017 // come from multiple consecutive loads. We merge them into a single 11018 // wide load and a single wide store. 11019 11020 // Look for load nodes which are used by the stored values. 11021 SmallVector<MemOpLink, 8> LoadNodes; 11022 11023 // Find acceptable loads. Loads need to have the same chain (token factor), 11024 // must not be zext, volatile, indexed, and they must be consecutive. 11025 BaseIndexOffset LdBasePtr; 11026 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 11027 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11028 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 11029 if (!Ld) break; 11030 11031 // Loads must only have one use. 11032 if (!Ld->hasNUsesOfValue(1, 0)) 11033 break; 11034 11035 // The memory operands must not be volatile. 11036 if (Ld->isVolatile() || Ld->isIndexed()) 11037 break; 11038 11039 // We do not accept ext loads. 11040 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 11041 break; 11042 11043 // The stored memory type must be the same. 11044 if (Ld->getMemoryVT() != MemVT) 11045 break; 11046 11047 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 11048 // If this is not the first ptr that we check. 11049 if (LdBasePtr.Base.getNode()) { 11050 // The base ptr must be the same. 11051 if (!LdPtr.equalBaseIndex(LdBasePtr)) 11052 break; 11053 } else { 11054 // Check that all other base pointers are the same as this one. 11055 LdBasePtr = LdPtr; 11056 } 11057 11058 // We found a potential memory operand to merge. 11059 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 11060 } 11061 11062 if (LoadNodes.size() < 2) 11063 return false; 11064 11065 // If we have load/store pair instructions and we only have two values, 11066 // don't bother. 11067 unsigned RequiredAlignment; 11068 if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && 11069 St->getAlignment() >= RequiredAlignment) 11070 return false; 11071 11072 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 11073 unsigned FirstLoadAS = FirstLoad->getAddressSpace(); 11074 unsigned FirstLoadAlign = FirstLoad->getAlignment(); 11075 11076 // Scan the memory operations on the chain and find the first non-consecutive 11077 // load memory address. These variables hold the index in the store node 11078 // array. 11079 unsigned LastConsecutiveLoad = 0; 11080 // This variable refers to the size and not index in the array. 11081 unsigned LastLegalVectorType = 0; 11082 unsigned LastLegalIntegerType = 0; 11083 StartAddress = LoadNodes[0].OffsetFromBase; 11084 SDValue FirstChain = FirstLoad->getChain(); 11085 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 11086 // All loads much share the same chain. 11087 if (LoadNodes[i].MemNode->getChain() != FirstChain) 11088 break; 11089 11090 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 11091 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 11092 break; 11093 LastConsecutiveLoad = i; 11094 11095 // Find a legal type for the vector store. 11096 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 11097 if (TLI.isTypeLegal(StoreTy) && 11098 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && 11099 allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) { 11100 LastLegalVectorType = i + 1; 11101 } 11102 11103 // Find a legal type for the integer store. 11104 unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; 11105 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 11106 if (TLI.isTypeLegal(StoreTy) && 11107 allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && 11108 allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) 11109 LastLegalIntegerType = i + 1; 11110 // Or check whether a truncstore and extload is legal. 11111 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 11112 TargetLowering::TypePromoteInteger) { 11113 EVT LegalizedStoredValueTy = 11114 TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); 11115 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 11116 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && 11117 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && 11118 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && 11119 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS, 11120 FirstStoreAlign) && 11121 allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS, 11122 FirstLoadAlign)) 11123 LastLegalIntegerType = i+1; 11124 } 11125 } 11126 11127 // Only use vector types if the vector type is larger than the integer type. 11128 // If they are the same, use integers. 11129 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 11130 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 11131 11132 // We add +1 here because the LastXXX variables refer to location while 11133 // the NumElem refers to array/index size. 11134 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 11135 NumElem = std::min(LastLegalType, NumElem); 11136 11137 if (NumElem < 2) 11138 return false; 11139 11140 // The latest Node in the DAG. 11141 unsigned LatestNodeUsed = 0; 11142 for (unsigned i=1; i<NumElem; ++i) { 11143 // Find a chain for the new wide-store operand. Notice that some 11144 // of the store nodes that we found may not be selected for inclusion 11145 // in the wide store. The chain we use needs to be the chain of the 11146 // latest store node which is *used* and replaced by the wide store. 11147 if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) 11148 LatestNodeUsed = i; 11149 } 11150 11151 LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; 11152 11153 // Find if it is better to use vectors or integers to load and store 11154 // to memory. 11155 EVT JointMemOpVT; 11156 if (UseVectorTy) { 11157 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 11158 } else { 11159 unsigned SizeInBits = NumElem * ElementSizeBytes * 8; 11160 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); 11161 } 11162 11163 SDLoc LoadDL(LoadNodes[0].MemNode); 11164 SDLoc StoreDL(StoreNodes[0].MemNode); 11165 11166 SDValue NewLoad = DAG.getLoad( 11167 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), 11168 FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); 11169 11170 SDValue NewStore = DAG.getStore( 11171 LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(), 11172 FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); 11173 11174 // Replace one of the loads with the new load. 11175 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 11176 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 11177 SDValue(NewLoad.getNode(), 1)); 11178 11179 // Remove the rest of the load chains. 11180 for (unsigned i = 1; i < NumElem ; ++i) { 11181 // Replace all chain users of the old load nodes with the chain of the new 11182 // load node. 11183 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 11184 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 11185 } 11186 11187 // Replace the last store with the new store. 11188 CombineTo(LatestOp, NewStore); 11189 // Erase all other stores. 11190 for (unsigned i = 0; i < NumElem ; ++i) { 11191 // Remove all Store nodes. 11192 if (StoreNodes[i].MemNode == LatestOp) 11193 continue; 11194 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 11195 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 11196 deleteAndRecombine(St); 11197 } 11198 11199 return true; 11200 } 11201 11202 SDValue DAGCombiner::visitSTORE(SDNode *N) { 11203 StoreSDNode *ST = cast<StoreSDNode>(N); 11204 SDValue Chain = ST->getChain(); 11205 SDValue Value = ST->getValue(); 11206 SDValue Ptr = ST->getBasePtr(); 11207 11208 // If this is a store of a bit convert, store the input value if the 11209 // resultant store does not need a higher alignment than the original. 11210 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 11211 ST->isUnindexed()) { 11212 unsigned OrigAlign = ST->getAlignment(); 11213 EVT SVT = Value.getOperand(0).getValueType(); 11214 unsigned Align = DAG.getDataLayout().getABITypeAlignment( 11215 SVT.getTypeForEVT(*DAG.getContext())); 11216 if (Align <= OrigAlign && 11217 ((!LegalOperations && !ST->isVolatile()) || 11218 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 11219 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 11220 Ptr, ST->getPointerInfo(), ST->isVolatile(), 11221 ST->isNonTemporal(), OrigAlign, 11222 ST->getAAInfo()); 11223 } 11224 11225 // Turn 'store undef, Ptr' -> nothing. 11226 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 11227 return Chain; 11228 11229 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 11230 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 11231 // NOTE: If the original store is volatile, this transform must not increase 11232 // the number of stores. For example, on x86-32 an f64 can be stored in one 11233 // processor operation but an i64 (which is not legal) requires two. So the 11234 // transform should not be done in this case. 11235 if (Value.getOpcode() != ISD::TargetConstantFP) { 11236 SDValue Tmp; 11237 switch (CFP->getSimpleValueType(0).SimpleTy) { 11238 default: llvm_unreachable("Unknown FP type"); 11239 case MVT::f16: // We don't do this for these yet. 11240 case MVT::f80: 11241 case MVT::f128: 11242 case MVT::ppcf128: 11243 break; 11244 case MVT::f32: 11245 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 11246 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 11247 ; 11248 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 11249 bitcastToAPInt().getZExtValue(), SDLoc(CFP), 11250 MVT::i32); 11251 return DAG.getStore(Chain, SDLoc(N), Tmp, 11252 Ptr, ST->getMemOperand()); 11253 } 11254 break; 11255 case MVT::f64: 11256 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 11257 !ST->isVolatile()) || 11258 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 11259 ; 11260 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 11261 getZExtValue(), SDLoc(CFP), MVT::i64); 11262 return DAG.getStore(Chain, SDLoc(N), Tmp, 11263 Ptr, ST->getMemOperand()); 11264 } 11265 11266 if (!ST->isVolatile() && 11267 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 11268 // Many FP stores are not made apparent until after legalize, e.g. for 11269 // argument passing. Since this is so common, custom legalize the 11270 // 64-bit integer store into two 32-bit stores. 11271 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 11272 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); 11273 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); 11274 if (DAG.getDataLayout().isBigEndian()) 11275 std::swap(Lo, Hi); 11276 11277 unsigned Alignment = ST->getAlignment(); 11278 bool isVolatile = ST->isVolatile(); 11279 bool isNonTemporal = ST->isNonTemporal(); 11280 AAMDNodes AAInfo = ST->getAAInfo(); 11281 11282 SDLoc DL(N); 11283 11284 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 11285 Ptr, ST->getPointerInfo(), 11286 isVolatile, isNonTemporal, 11287 ST->getAlignment(), AAInfo); 11288 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 11289 DAG.getConstant(4, DL, Ptr.getValueType())); 11290 Alignment = MinAlign(Alignment, 4U); 11291 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 11292 Ptr, ST->getPointerInfo().getWithOffset(4), 11293 isVolatile, isNonTemporal, 11294 Alignment, AAInfo); 11295 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 11296 St0, St1); 11297 } 11298 11299 break; 11300 } 11301 } 11302 } 11303 11304 // Try to infer better alignment information than the store already has. 11305 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 11306 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 11307 if (Align > ST->getAlignment()) { 11308 SDValue NewStore = 11309 DAG.getTruncStore(Chain, SDLoc(N), Value, 11310 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 11311 ST->isVolatile(), ST->isNonTemporal(), Align, 11312 ST->getAAInfo()); 11313 if (NewStore.getNode() != N) 11314 return CombineTo(ST, NewStore, true); 11315 } 11316 } 11317 } 11318 11319 // Try transforming a pair floating point load / store ops to integer 11320 // load / store ops. 11321 SDValue NewST = TransformFPLoadStorePair(N); 11322 if (NewST.getNode()) 11323 return NewST; 11324 11325 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 11326 : DAG.getSubtarget().useAA(); 11327 #ifndef NDEBUG 11328 if (CombinerAAOnlyFunc.getNumOccurrences() && 11329 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 11330 UseAA = false; 11331 #endif 11332 if (UseAA && ST->isUnindexed()) { 11333 // Walk up chain skipping non-aliasing memory nodes. 11334 SDValue BetterChain = FindBetterChain(N, Chain); 11335 11336 // If there is a better chain. 11337 if (Chain != BetterChain) { 11338 SDValue ReplStore; 11339 11340 // Replace the chain to avoid dependency. 11341 if (ST->isTruncatingStore()) { 11342 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 11343 ST->getMemoryVT(), ST->getMemOperand()); 11344 } else { 11345 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 11346 ST->getMemOperand()); 11347 } 11348 11349 // Create token to keep both nodes around. 11350 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 11351 MVT::Other, Chain, ReplStore); 11352 11353 // Make sure the new and old chains are cleaned up. 11354 AddToWorklist(Token.getNode()); 11355 11356 // Don't add users to work list. 11357 return CombineTo(N, Token, false); 11358 } 11359 } 11360 11361 // Try transforming N to an indexed store. 11362 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 11363 return SDValue(N, 0); 11364 11365 // FIXME: is there such a thing as a truncating indexed store? 11366 if (ST->isTruncatingStore() && ST->isUnindexed() && 11367 Value.getValueType().isInteger()) { 11368 // See if we can simplify the input to this truncstore with knowledge that 11369 // only the low bits are being used. For example: 11370 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 11371 SDValue Shorter = 11372 GetDemandedBits(Value, 11373 APInt::getLowBitsSet( 11374 Value.getValueType().getScalarType().getSizeInBits(), 11375 ST->getMemoryVT().getScalarType().getSizeInBits())); 11376 AddToWorklist(Value.getNode()); 11377 if (Shorter.getNode()) 11378 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 11379 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 11380 11381 // Otherwise, see if we can simplify the operation with 11382 // SimplifyDemandedBits, which only works if the value has a single use. 11383 if (SimplifyDemandedBits(Value, 11384 APInt::getLowBitsSet( 11385 Value.getValueType().getScalarType().getSizeInBits(), 11386 ST->getMemoryVT().getScalarType().getSizeInBits()))) 11387 return SDValue(N, 0); 11388 } 11389 11390 // If this is a load followed by a store to the same location, then the store 11391 // is dead/noop. 11392 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 11393 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 11394 ST->isUnindexed() && !ST->isVolatile() && 11395 // There can't be any side effects between the load and store, such as 11396 // a call or store. 11397 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 11398 // The store is dead, remove it. 11399 return Chain; 11400 } 11401 } 11402 11403 // If this is a store followed by a store with the same value to the same 11404 // location, then the store is dead/noop. 11405 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { 11406 if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && 11407 ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && 11408 ST1->isUnindexed() && !ST1->isVolatile()) { 11409 // The store is dead, remove it. 11410 return Chain; 11411 } 11412 } 11413 11414 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 11415 // truncating store. We can do this even if this is already a truncstore. 11416 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 11417 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 11418 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 11419 ST->getMemoryVT())) { 11420 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 11421 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 11422 } 11423 11424 // Only perform this optimization before the types are legal, because we 11425 // don't want to perform this optimization on every DAGCombine invocation. 11426 if (!LegalTypes) { 11427 bool EverChanged = false; 11428 11429 do { 11430 // There can be multiple store sequences on the same chain. 11431 // Keep trying to merge store sequences until we are unable to do so 11432 // or until we merge the last store on the chain. 11433 bool Changed = MergeConsecutiveStores(ST); 11434 EverChanged |= Changed; 11435 if (!Changed) break; 11436 } while (ST->getOpcode() != ISD::DELETED_NODE); 11437 11438 if (EverChanged) 11439 return SDValue(N, 0); 11440 } 11441 11442 return ReduceLoadOpStoreWidth(N); 11443 } 11444 11445 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 11446 SDValue InVec = N->getOperand(0); 11447 SDValue InVal = N->getOperand(1); 11448 SDValue EltNo = N->getOperand(2); 11449 SDLoc dl(N); 11450 11451 // If the inserted element is an UNDEF, just use the input vector. 11452 if (InVal.getOpcode() == ISD::UNDEF) 11453 return InVec; 11454 11455 EVT VT = InVec.getValueType(); 11456 11457 // If we can't generate a legal BUILD_VECTOR, exit 11458 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 11459 return SDValue(); 11460 11461 // Check that we know which element is being inserted 11462 if (!isa<ConstantSDNode>(EltNo)) 11463 return SDValue(); 11464 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 11465 11466 // Canonicalize insert_vector_elt dag nodes. 11467 // Example: 11468 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 11469 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 11470 // 11471 // Do this only if the child insert_vector node has one use; also 11472 // do this only if indices are both constants and Idx1 < Idx0. 11473 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 11474 && isa<ConstantSDNode>(InVec.getOperand(2))) { 11475 unsigned OtherElt = 11476 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 11477 if (Elt < OtherElt) { 11478 // Swap nodes. 11479 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 11480 InVec.getOperand(0), InVal, EltNo); 11481 AddToWorklist(NewOp.getNode()); 11482 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 11483 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 11484 } 11485 } 11486 11487 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 11488 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 11489 // vector elements. 11490 SmallVector<SDValue, 8> Ops; 11491 // Do not combine these two vectors if the output vector will not replace 11492 // the input vector. 11493 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 11494 Ops.append(InVec.getNode()->op_begin(), 11495 InVec.getNode()->op_end()); 11496 } else if (InVec.getOpcode() == ISD::UNDEF) { 11497 unsigned NElts = VT.getVectorNumElements(); 11498 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 11499 } else { 11500 return SDValue(); 11501 } 11502 11503 // Insert the element 11504 if (Elt < Ops.size()) { 11505 // All the operands of BUILD_VECTOR must have the same type; 11506 // we enforce that here. 11507 EVT OpVT = Ops[0].getValueType(); 11508 if (InVal.getValueType() != OpVT) 11509 InVal = OpVT.bitsGT(InVal.getValueType()) ? 11510 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 11511 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 11512 Ops[Elt] = InVal; 11513 } 11514 11515 // Return the new vector 11516 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 11517 } 11518 11519 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 11520 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 11521 EVT ResultVT = EVE->getValueType(0); 11522 EVT VecEltVT = InVecVT.getVectorElementType(); 11523 unsigned Align = OriginalLoad->getAlignment(); 11524 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( 11525 VecEltVT.getTypeForEVT(*DAG.getContext())); 11526 11527 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 11528 return SDValue(); 11529 11530 Align = NewAlign; 11531 11532 SDValue NewPtr = OriginalLoad->getBasePtr(); 11533 SDValue Offset; 11534 EVT PtrType = NewPtr.getValueType(); 11535 MachinePointerInfo MPI; 11536 SDLoc DL(EVE); 11537 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 11538 int Elt = ConstEltNo->getZExtValue(); 11539 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 11540 Offset = DAG.getConstant(PtrOff, DL, PtrType); 11541 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 11542 } else { 11543 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); 11544 Offset = DAG.getNode( 11545 ISD::MUL, DL, PtrType, Offset, 11546 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); 11547 MPI = OriginalLoad->getPointerInfo(); 11548 } 11549 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); 11550 11551 // The replacement we need to do here is a little tricky: we need to 11552 // replace an extractelement of a load with a load. 11553 // Use ReplaceAllUsesOfValuesWith to do the replacement. 11554 // Note that this replacement assumes that the extractvalue is the only 11555 // use of the load; that's okay because we don't want to perform this 11556 // transformation in other cases anyway. 11557 SDValue Load; 11558 SDValue Chain; 11559 if (ResultVT.bitsGT(VecEltVT)) { 11560 // If the result type of vextract is wider than the load, then issue an 11561 // extending load instead. 11562 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, 11563 VecEltVT) 11564 ? ISD::ZEXTLOAD 11565 : ISD::EXTLOAD; 11566 Load = DAG.getExtLoad( 11567 ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, 11568 VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 11569 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 11570 Chain = Load.getValue(1); 11571 } else { 11572 Load = DAG.getLoad( 11573 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 11574 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 11575 OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); 11576 Chain = Load.getValue(1); 11577 if (ResultVT.bitsLT(VecEltVT)) 11578 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 11579 else 11580 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 11581 } 11582 WorklistRemover DeadNodes(*this); 11583 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 11584 SDValue To[] = { Load, Chain }; 11585 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 11586 // Since we're explicitly calling ReplaceAllUses, add the new node to the 11587 // worklist explicitly as well. 11588 AddToWorklist(Load.getNode()); 11589 AddUsersToWorklist(Load.getNode()); // Add users too 11590 // Make sure to revisit this node to clean it up; it will usually be dead. 11591 AddToWorklist(EVE); 11592 ++OpsNarrowed; 11593 return SDValue(EVE, 0); 11594 } 11595 11596 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 11597 // (vextract (scalar_to_vector val, 0) -> val 11598 SDValue InVec = N->getOperand(0); 11599 EVT VT = InVec.getValueType(); 11600 EVT NVT = N->getValueType(0); 11601 11602 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 11603 // Check if the result type doesn't match the inserted element type. A 11604 // SCALAR_TO_VECTOR may truncate the inserted element and the 11605 // EXTRACT_VECTOR_ELT may widen the extracted vector. 11606 SDValue InOp = InVec.getOperand(0); 11607 if (InOp.getValueType() != NVT) { 11608 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 11609 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 11610 } 11611 return InOp; 11612 } 11613 11614 SDValue EltNo = N->getOperand(1); 11615 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 11616 11617 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 11618 // We only perform this optimization before the op legalization phase because 11619 // we may introduce new vector instructions which are not backed by TD 11620 // patterns. For example on AVX, extracting elements from a wide vector 11621 // without using extract_subvector. However, if we can find an underlying 11622 // scalar value, then we can always use that. 11623 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 11624 && ConstEltNo) { 11625 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 11626 int NumElem = VT.getVectorNumElements(); 11627 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 11628 // Find the new index to extract from. 11629 int OrigElt = SVOp->getMaskElt(Elt); 11630 11631 // Extracting an undef index is undef. 11632 if (OrigElt == -1) 11633 return DAG.getUNDEF(NVT); 11634 11635 // Select the right vector half to extract from. 11636 SDValue SVInVec; 11637 if (OrigElt < NumElem) { 11638 SVInVec = InVec->getOperand(0); 11639 } else { 11640 SVInVec = InVec->getOperand(1); 11641 OrigElt -= NumElem; 11642 } 11643 11644 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 11645 SDValue InOp = SVInVec.getOperand(OrigElt); 11646 if (InOp.getValueType() != NVT) { 11647 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 11648 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 11649 } 11650 11651 return InOp; 11652 } 11653 11654 // FIXME: We should handle recursing on other vector shuffles and 11655 // scalar_to_vector here as well. 11656 11657 if (!LegalOperations) { 11658 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 11659 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, 11660 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); 11661 } 11662 } 11663 11664 bool BCNumEltsChanged = false; 11665 EVT ExtVT = VT.getVectorElementType(); 11666 EVT LVT = ExtVT; 11667 11668 // If the result of load has to be truncated, then it's not necessarily 11669 // profitable. 11670 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 11671 return SDValue(); 11672 11673 if (InVec.getOpcode() == ISD::BITCAST) { 11674 // Don't duplicate a load with other uses. 11675 if (!InVec.hasOneUse()) 11676 return SDValue(); 11677 11678 EVT BCVT = InVec.getOperand(0).getValueType(); 11679 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 11680 return SDValue(); 11681 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 11682 BCNumEltsChanged = true; 11683 InVec = InVec.getOperand(0); 11684 ExtVT = BCVT.getVectorElementType(); 11685 } 11686 11687 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 11688 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 11689 ISD::isNormalLoad(InVec.getNode()) && 11690 !N->getOperand(1)->hasPredecessor(InVec.getNode())) { 11691 SDValue Index = N->getOperand(1); 11692 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) 11693 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 11694 OrigLoad); 11695 } 11696 11697 // Perform only after legalization to ensure build_vector / vector_shuffle 11698 // optimizations have already been done. 11699 if (!LegalOperations) return SDValue(); 11700 11701 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 11702 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 11703 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 11704 11705 if (ConstEltNo) { 11706 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 11707 11708 LoadSDNode *LN0 = nullptr; 11709 const ShuffleVectorSDNode *SVN = nullptr; 11710 if (ISD::isNormalLoad(InVec.getNode())) { 11711 LN0 = cast<LoadSDNode>(InVec); 11712 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 11713 InVec.getOperand(0).getValueType() == ExtVT && 11714 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 11715 // Don't duplicate a load with other uses. 11716 if (!InVec.hasOneUse()) 11717 return SDValue(); 11718 11719 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 11720 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 11721 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 11722 // => 11723 // (load $addr+1*size) 11724 11725 // Don't duplicate a load with other uses. 11726 if (!InVec.hasOneUse()) 11727 return SDValue(); 11728 11729 // If the bit convert changed the number of elements, it is unsafe 11730 // to examine the mask. 11731 if (BCNumEltsChanged) 11732 return SDValue(); 11733 11734 // Select the input vector, guarding against out of range extract vector. 11735 unsigned NumElems = VT.getVectorNumElements(); 11736 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 11737 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 11738 11739 if (InVec.getOpcode() == ISD::BITCAST) { 11740 // Don't duplicate a load with other uses. 11741 if (!InVec.hasOneUse()) 11742 return SDValue(); 11743 11744 InVec = InVec.getOperand(0); 11745 } 11746 if (ISD::isNormalLoad(InVec.getNode())) { 11747 LN0 = cast<LoadSDNode>(InVec); 11748 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 11749 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); 11750 } 11751 } 11752 11753 // Make sure we found a non-volatile load and the extractelement is 11754 // the only use. 11755 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 11756 return SDValue(); 11757 11758 // If Idx was -1 above, Elt is going to be -1, so just return undef. 11759 if (Elt == -1) 11760 return DAG.getUNDEF(LVT); 11761 11762 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 11763 } 11764 11765 return SDValue(); 11766 } 11767 11768 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 11769 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 11770 // We perform this optimization post type-legalization because 11771 // the type-legalizer often scalarizes integer-promoted vectors. 11772 // Performing this optimization before may create bit-casts which 11773 // will be type-legalized to complex code sequences. 11774 // We perform this optimization only before the operation legalizer because we 11775 // may introduce illegal operations. 11776 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 11777 return SDValue(); 11778 11779 unsigned NumInScalars = N->getNumOperands(); 11780 SDLoc dl(N); 11781 EVT VT = N->getValueType(0); 11782 11783 // Check to see if this is a BUILD_VECTOR of a bunch of values 11784 // which come from any_extend or zero_extend nodes. If so, we can create 11785 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 11786 // optimizations. We do not handle sign-extend because we can't fill the sign 11787 // using shuffles. 11788 EVT SourceType = MVT::Other; 11789 bool AllAnyExt = true; 11790 11791 for (unsigned i = 0; i != NumInScalars; ++i) { 11792 SDValue In = N->getOperand(i); 11793 // Ignore undef inputs. 11794 if (In.getOpcode() == ISD::UNDEF) continue; 11795 11796 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 11797 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 11798 11799 // Abort if the element is not an extension. 11800 if (!ZeroExt && !AnyExt) { 11801 SourceType = MVT::Other; 11802 break; 11803 } 11804 11805 // The input is a ZeroExt or AnyExt. Check the original type. 11806 EVT InTy = In.getOperand(0).getValueType(); 11807 11808 // Check that all of the widened source types are the same. 11809 if (SourceType == MVT::Other) 11810 // First time. 11811 SourceType = InTy; 11812 else if (InTy != SourceType) { 11813 // Multiple income types. Abort. 11814 SourceType = MVT::Other; 11815 break; 11816 } 11817 11818 // Check if all of the extends are ANY_EXTENDs. 11819 AllAnyExt &= AnyExt; 11820 } 11821 11822 // In order to have valid types, all of the inputs must be extended from the 11823 // same source type and all of the inputs must be any or zero extend. 11824 // Scalar sizes must be a power of two. 11825 EVT OutScalarTy = VT.getScalarType(); 11826 bool ValidTypes = SourceType != MVT::Other && 11827 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 11828 isPowerOf2_32(SourceType.getSizeInBits()); 11829 11830 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 11831 // turn into a single shuffle instruction. 11832 if (!ValidTypes) 11833 return SDValue(); 11834 11835 bool isLE = DAG.getDataLayout().isLittleEndian(); 11836 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 11837 assert(ElemRatio > 1 && "Invalid element size ratio"); 11838 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 11839 DAG.getConstant(0, SDLoc(N), SourceType); 11840 11841 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 11842 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 11843 11844 // Populate the new build_vector 11845 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 11846 SDValue Cast = N->getOperand(i); 11847 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 11848 Cast.getOpcode() == ISD::ZERO_EXTEND || 11849 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 11850 SDValue In; 11851 if (Cast.getOpcode() == ISD::UNDEF) 11852 In = DAG.getUNDEF(SourceType); 11853 else 11854 In = Cast->getOperand(0); 11855 unsigned Index = isLE ? (i * ElemRatio) : 11856 (i * ElemRatio + (ElemRatio - 1)); 11857 11858 assert(Index < Ops.size() && "Invalid index"); 11859 Ops[Index] = In; 11860 } 11861 11862 // The type of the new BUILD_VECTOR node. 11863 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 11864 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 11865 "Invalid vector size"); 11866 // Check if the new vector type is legal. 11867 if (!isTypeLegal(VecVT)) return SDValue(); 11868 11869 // Make the new BUILD_VECTOR. 11870 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 11871 11872 // The new BUILD_VECTOR node has the potential to be further optimized. 11873 AddToWorklist(BV.getNode()); 11874 // Bitcast to the desired type. 11875 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 11876 } 11877 11878 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 11879 EVT VT = N->getValueType(0); 11880 11881 unsigned NumInScalars = N->getNumOperands(); 11882 SDLoc dl(N); 11883 11884 EVT SrcVT = MVT::Other; 11885 unsigned Opcode = ISD::DELETED_NODE; 11886 unsigned NumDefs = 0; 11887 11888 for (unsigned i = 0; i != NumInScalars; ++i) { 11889 SDValue In = N->getOperand(i); 11890 unsigned Opc = In.getOpcode(); 11891 11892 if (Opc == ISD::UNDEF) 11893 continue; 11894 11895 // If all scalar values are floats and converted from integers. 11896 if (Opcode == ISD::DELETED_NODE && 11897 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 11898 Opcode = Opc; 11899 } 11900 11901 if (Opc != Opcode) 11902 return SDValue(); 11903 11904 EVT InVT = In.getOperand(0).getValueType(); 11905 11906 // If all scalar values are typed differently, bail out. It's chosen to 11907 // simplify BUILD_VECTOR of integer types. 11908 if (SrcVT == MVT::Other) 11909 SrcVT = InVT; 11910 if (SrcVT != InVT) 11911 return SDValue(); 11912 NumDefs++; 11913 } 11914 11915 // If the vector has just one element defined, it's not worth to fold it into 11916 // a vectorized one. 11917 if (NumDefs < 2) 11918 return SDValue(); 11919 11920 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 11921 && "Should only handle conversion from integer to float."); 11922 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 11923 11924 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 11925 11926 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 11927 return SDValue(); 11928 11929 // Just because the floating-point vector type is legal does not necessarily 11930 // mean that the corresponding integer vector type is. 11931 if (!isTypeLegal(NVT)) 11932 return SDValue(); 11933 11934 SmallVector<SDValue, 8> Opnds; 11935 for (unsigned i = 0; i != NumInScalars; ++i) { 11936 SDValue In = N->getOperand(i); 11937 11938 if (In.getOpcode() == ISD::UNDEF) 11939 Opnds.push_back(DAG.getUNDEF(SrcVT)); 11940 else 11941 Opnds.push_back(In.getOperand(0)); 11942 } 11943 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 11944 AddToWorklist(BV.getNode()); 11945 11946 return DAG.getNode(Opcode, dl, VT, BV); 11947 } 11948 11949 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 11950 unsigned NumInScalars = N->getNumOperands(); 11951 SDLoc dl(N); 11952 EVT VT = N->getValueType(0); 11953 11954 // A vector built entirely of undefs is undef. 11955 if (ISD::allOperandsUndef(N)) 11956 return DAG.getUNDEF(VT); 11957 11958 if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) 11959 return V; 11960 11961 if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) 11962 return V; 11963 11964 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 11965 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 11966 // at most two distinct vectors, turn this into a shuffle node. 11967 11968 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 11969 if (!isTypeLegal(VT)) 11970 return SDValue(); 11971 11972 // May only combine to shuffle after legalize if shuffle is legal. 11973 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) 11974 return SDValue(); 11975 11976 SDValue VecIn1, VecIn2; 11977 bool UsesZeroVector = false; 11978 for (unsigned i = 0; i != NumInScalars; ++i) { 11979 SDValue Op = N->getOperand(i); 11980 // Ignore undef inputs. 11981 if (Op.getOpcode() == ISD::UNDEF) continue; 11982 11983 // See if we can combine this build_vector into a blend with a zero vector. 11984 if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { 11985 UsesZeroVector = true; 11986 continue; 11987 } 11988 11989 // If this input is something other than a EXTRACT_VECTOR_ELT with a 11990 // constant index, bail out. 11991 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || 11992 !isa<ConstantSDNode>(Op.getOperand(1))) { 11993 VecIn1 = VecIn2 = SDValue(nullptr, 0); 11994 break; 11995 } 11996 11997 // We allow up to two distinct input vectors. 11998 SDValue ExtractedFromVec = Op.getOperand(0); 11999 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 12000 continue; 12001 12002 if (!VecIn1.getNode()) { 12003 VecIn1 = ExtractedFromVec; 12004 } else if (!VecIn2.getNode() && !UsesZeroVector) { 12005 VecIn2 = ExtractedFromVec; 12006 } else { 12007 // Too many inputs. 12008 VecIn1 = VecIn2 = SDValue(nullptr, 0); 12009 break; 12010 } 12011 } 12012 12013 // If everything is good, we can make a shuffle operation. 12014 if (VecIn1.getNode()) { 12015 unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); 12016 SmallVector<int, 8> Mask; 12017 for (unsigned i = 0; i != NumInScalars; ++i) { 12018 unsigned Opcode = N->getOperand(i).getOpcode(); 12019 if (Opcode == ISD::UNDEF) { 12020 Mask.push_back(-1); 12021 continue; 12022 } 12023 12024 // Operands can also be zero. 12025 if (Opcode != ISD::EXTRACT_VECTOR_ELT) { 12026 assert(UsesZeroVector && 12027 (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && 12028 "Unexpected node found!"); 12029 Mask.push_back(NumInScalars+i); 12030 continue; 12031 } 12032 12033 // If extracting from the first vector, just use the index directly. 12034 SDValue Extract = N->getOperand(i); 12035 SDValue ExtVal = Extract.getOperand(1); 12036 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 12037 if (Extract.getOperand(0) == VecIn1) { 12038 Mask.push_back(ExtIndex); 12039 continue; 12040 } 12041 12042 // Otherwise, use InIdx + InputVecSize 12043 Mask.push_back(InNumElements + ExtIndex); 12044 } 12045 12046 // Avoid introducing illegal shuffles with zero. 12047 if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) 12048 return SDValue(); 12049 12050 // We can't generate a shuffle node with mismatched input and output types. 12051 // Attempt to transform a single input vector to the correct type. 12052 if ((VT != VecIn1.getValueType())) { 12053 // If the input vector type has a different base type to the output 12054 // vector type, bail out. 12055 EVT VTElemType = VT.getVectorElementType(); 12056 if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || 12057 (VecIn2.getNode() && 12058 (VecIn2.getValueType().getVectorElementType() != VTElemType))) 12059 return SDValue(); 12060 12061 // If the input vector is too small, widen it. 12062 // We only support widening of vectors which are half the size of the 12063 // output registers. For example XMM->YMM widening on X86 with AVX. 12064 EVT VecInT = VecIn1.getValueType(); 12065 if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { 12066 // If we only have one small input, widen it by adding undef values. 12067 if (!VecIn2.getNode()) 12068 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, 12069 DAG.getUNDEF(VecIn1.getValueType())); 12070 else if (VecIn1.getValueType() == VecIn2.getValueType()) { 12071 // If we have two small inputs of the same type, try to concat them. 12072 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); 12073 VecIn2 = SDValue(nullptr, 0); 12074 } else 12075 return SDValue(); 12076 } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { 12077 // If the input vector is too large, try to split it. 12078 // We don't support having two input vectors that are too large. 12079 // If the zero vector was used, we can not split the vector, 12080 // since we'd need 3 inputs. 12081 if (UsesZeroVector || VecIn2.getNode()) 12082 return SDValue(); 12083 12084 if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) 12085 return SDValue(); 12086 12087 // Try to replace VecIn1 with two extract_subvectors 12088 // No need to update the masks, they should still be correct. 12089 VecIn2 = DAG.getNode( 12090 ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 12091 DAG.getConstant(VT.getVectorNumElements(), dl, 12092 TLI.getVectorIdxTy(DAG.getDataLayout()))); 12093 VecIn1 = DAG.getNode( 12094 ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, 12095 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 12096 } else 12097 return SDValue(); 12098 } 12099 12100 if (UsesZeroVector) 12101 VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : 12102 DAG.getConstantFP(0.0, dl, VT); 12103 else 12104 // If VecIn2 is unused then change it to undef. 12105 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 12106 12107 // Check that we were able to transform all incoming values to the same 12108 // type. 12109 if (VecIn2.getValueType() != VecIn1.getValueType() || 12110 VecIn1.getValueType() != VT) 12111 return SDValue(); 12112 12113 // Return the new VECTOR_SHUFFLE node. 12114 SDValue Ops[2]; 12115 Ops[0] = VecIn1; 12116 Ops[1] = VecIn2; 12117 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 12118 } 12119 12120 return SDValue(); 12121 } 12122 12123 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { 12124 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 12125 EVT OpVT = N->getOperand(0).getValueType(); 12126 12127 // If the operands are legal vectors, leave them alone. 12128 if (TLI.isTypeLegal(OpVT)) 12129 return SDValue(); 12130 12131 SDLoc DL(N); 12132 EVT VT = N->getValueType(0); 12133 SmallVector<SDValue, 8> Ops; 12134 12135 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); 12136 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 12137 12138 // Keep track of what we encounter. 12139 bool AnyInteger = false; 12140 bool AnyFP = false; 12141 for (const SDValue &Op : N->ops()) { 12142 if (ISD::BITCAST == Op.getOpcode() && 12143 !Op.getOperand(0).getValueType().isVector()) 12144 Ops.push_back(Op.getOperand(0)); 12145 else if (ISD::UNDEF == Op.getOpcode()) 12146 Ops.push_back(ScalarUndef); 12147 else 12148 return SDValue(); 12149 12150 // Note whether we encounter an integer or floating point scalar. 12151 // If it's neither, bail out, it could be something weird like x86mmx. 12152 EVT LastOpVT = Ops.back().getValueType(); 12153 if (LastOpVT.isFloatingPoint()) 12154 AnyFP = true; 12155 else if (LastOpVT.isInteger()) 12156 AnyInteger = true; 12157 else 12158 return SDValue(); 12159 } 12160 12161 // If any of the operands is a floating point scalar bitcast to a vector, 12162 // use floating point types throughout, and bitcast everything. 12163 // Replace UNDEFs by another scalar UNDEF node, of the final desired type. 12164 if (AnyFP) { 12165 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); 12166 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); 12167 if (AnyInteger) { 12168 for (SDValue &Op : Ops) { 12169 if (Op.getValueType() == SVT) 12170 continue; 12171 if (Op.getOpcode() == ISD::UNDEF) 12172 Op = ScalarUndef; 12173 else 12174 Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); 12175 } 12176 } 12177 } 12178 12179 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, 12180 VT.getSizeInBits() / SVT.getSizeInBits()); 12181 return DAG.getNode(ISD::BITCAST, DL, VT, 12182 DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); 12183 } 12184 12185 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 12186 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 12187 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 12188 // inputs come from at most two distinct vectors, turn this into a shuffle 12189 // node. 12190 12191 // If we only have one input vector, we don't need to do any concatenation. 12192 if (N->getNumOperands() == 1) 12193 return N->getOperand(0); 12194 12195 // Check if all of the operands are undefs. 12196 EVT VT = N->getValueType(0); 12197 if (ISD::allOperandsUndef(N)) 12198 return DAG.getUNDEF(VT); 12199 12200 // Optimize concat_vectors where all but the first of the vectors are undef. 12201 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { 12202 return Op.getOpcode() == ISD::UNDEF; 12203 })) { 12204 SDValue In = N->getOperand(0); 12205 assert(In.getValueType().isVector() && "Must concat vectors"); 12206 12207 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 12208 if (In->getOpcode() == ISD::BITCAST && 12209 !In->getOperand(0)->getValueType(0).isVector()) { 12210 SDValue Scalar = In->getOperand(0); 12211 12212 // If the bitcast type isn't legal, it might be a trunc of a legal type; 12213 // look through the trunc so we can still do the transform: 12214 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) 12215 if (Scalar->getOpcode() == ISD::TRUNCATE && 12216 !TLI.isTypeLegal(Scalar.getValueType()) && 12217 TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) 12218 Scalar = Scalar->getOperand(0); 12219 12220 EVT SclTy = Scalar->getValueType(0); 12221 12222 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 12223 return SDValue(); 12224 12225 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, 12226 VT.getSizeInBits() / SclTy.getSizeInBits()); 12227 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 12228 return SDValue(); 12229 12230 SDLoc dl = SDLoc(N); 12231 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); 12232 return DAG.getNode(ISD::BITCAST, dl, VT, Res); 12233 } 12234 } 12235 12236 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. 12237 // We have already tested above for an UNDEF only concatenation. 12238 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 12239 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 12240 auto IsBuildVectorOrUndef = [](const SDValue &Op) { 12241 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); 12242 }; 12243 bool AllBuildVectorsOrUndefs = 12244 std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); 12245 if (AllBuildVectorsOrUndefs) { 12246 SmallVector<SDValue, 8> Opnds; 12247 EVT SVT = VT.getScalarType(); 12248 12249 EVT MinVT = SVT; 12250 if (!SVT.isFloatingPoint()) { 12251 // If BUILD_VECTOR are from built from integer, they may have different 12252 // operand types. Get the smallest type and truncate all operands to it. 12253 bool FoundMinVT = false; 12254 for (const SDValue &Op : N->ops()) 12255 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 12256 EVT OpSVT = Op.getOperand(0)->getValueType(0); 12257 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; 12258 FoundMinVT = true; 12259 } 12260 assert(FoundMinVT && "Concat vector type mismatch"); 12261 } 12262 12263 for (const SDValue &Op : N->ops()) { 12264 EVT OpVT = Op.getValueType(); 12265 unsigned NumElts = OpVT.getVectorNumElements(); 12266 12267 if (ISD::UNDEF == Op.getOpcode()) 12268 Opnds.append(NumElts, DAG.getUNDEF(MinVT)); 12269 12270 if (ISD::BUILD_VECTOR == Op.getOpcode()) { 12271 if (SVT.isFloatingPoint()) { 12272 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); 12273 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); 12274 } else { 12275 for (unsigned i = 0; i != NumElts; ++i) 12276 Opnds.push_back( 12277 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); 12278 } 12279 } 12280 } 12281 12282 assert(VT.getVectorNumElements() == Opnds.size() && 12283 "Concat vector type mismatch"); 12284 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 12285 } 12286 12287 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. 12288 if (SDValue V = combineConcatVectorOfScalars(N, DAG)) 12289 return V; 12290 12291 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 12292 // nodes often generate nop CONCAT_VECTOR nodes. 12293 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 12294 // place the incoming vectors at the exact same location. 12295 SDValue SingleSource = SDValue(); 12296 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 12297 12298 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 12299 SDValue Op = N->getOperand(i); 12300 12301 if (Op.getOpcode() == ISD::UNDEF) 12302 continue; 12303 12304 // Check if this is the identity extract: 12305 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 12306 return SDValue(); 12307 12308 // Find the single incoming vector for the extract_subvector. 12309 if (SingleSource.getNode()) { 12310 if (Op.getOperand(0) != SingleSource) 12311 return SDValue(); 12312 } else { 12313 SingleSource = Op.getOperand(0); 12314 12315 // Check the source type is the same as the type of the result. 12316 // If not, this concat may extend the vector, so we can not 12317 // optimize it away. 12318 if (SingleSource.getValueType() != N->getValueType(0)) 12319 return SDValue(); 12320 } 12321 12322 unsigned IdentityIndex = i * PartNumElem; 12323 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 12324 // The extract index must be constant. 12325 if (!CS) 12326 return SDValue(); 12327 12328 // Check that we are reading from the identity index. 12329 if (CS->getZExtValue() != IdentityIndex) 12330 return SDValue(); 12331 } 12332 12333 if (SingleSource.getNode()) 12334 return SingleSource; 12335 12336 return SDValue(); 12337 } 12338 12339 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 12340 EVT NVT = N->getValueType(0); 12341 SDValue V = N->getOperand(0); 12342 12343 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 12344 // Combine: 12345 // (extract_subvec (concat V1, V2, ...), i) 12346 // Into: 12347 // Vi if possible 12348 // Only operand 0 is checked as 'concat' assumes all inputs of the same 12349 // type. 12350 if (V->getOperand(0).getValueType() != NVT) 12351 return SDValue(); 12352 unsigned Idx = N->getConstantOperandVal(1); 12353 unsigned NumElems = NVT.getVectorNumElements(); 12354 assert((Idx % NumElems) == 0 && 12355 "IDX in concat is not a multiple of the result vector length."); 12356 return V->getOperand(Idx / NumElems); 12357 } 12358 12359 // Skip bitcasting 12360 if (V->getOpcode() == ISD::BITCAST) 12361 V = V.getOperand(0); 12362 12363 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 12364 SDLoc dl(N); 12365 // Handle only simple case where vector being inserted and vector 12366 // being extracted are of same type, and are half size of larger vectors. 12367 EVT BigVT = V->getOperand(0).getValueType(); 12368 EVT SmallVT = V->getOperand(1).getValueType(); 12369 if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 12370 return SDValue(); 12371 12372 // Only handle cases where both indexes are constants with the same type. 12373 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 12374 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 12375 12376 if (InsIdx && ExtIdx && 12377 InsIdx->getValueType(0).getSizeInBits() <= 64 && 12378 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 12379 // Combine: 12380 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 12381 // Into: 12382 // indices are equal or bit offsets are equal => V1 12383 // otherwise => (extract_subvec V1, ExtIdx) 12384 if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == 12385 ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) 12386 return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); 12387 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, 12388 DAG.getNode(ISD::BITCAST, dl, 12389 N->getOperand(0).getValueType(), 12390 V->getOperand(0)), N->getOperand(1)); 12391 } 12392 } 12393 12394 return SDValue(); 12395 } 12396 12397 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, 12398 SDValue V, SelectionDAG &DAG) { 12399 SDLoc DL(V); 12400 EVT VT = V.getValueType(); 12401 12402 switch (V.getOpcode()) { 12403 default: 12404 return V; 12405 12406 case ISD::CONCAT_VECTORS: { 12407 EVT OpVT = V->getOperand(0).getValueType(); 12408 int OpSize = OpVT.getVectorNumElements(); 12409 SmallBitVector OpUsedElements(OpSize, false); 12410 bool FoundSimplification = false; 12411 SmallVector<SDValue, 4> NewOps; 12412 NewOps.reserve(V->getNumOperands()); 12413 for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { 12414 SDValue Op = V->getOperand(i); 12415 bool OpUsed = false; 12416 for (int j = 0; j < OpSize; ++j) 12417 if (UsedElements[i * OpSize + j]) { 12418 OpUsedElements[j] = true; 12419 OpUsed = true; 12420 } 12421 NewOps.push_back( 12422 OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) 12423 : DAG.getUNDEF(OpVT)); 12424 FoundSimplification |= Op == NewOps.back(); 12425 OpUsedElements.reset(); 12426 } 12427 if (FoundSimplification) 12428 V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); 12429 return V; 12430 } 12431 12432 case ISD::INSERT_SUBVECTOR: { 12433 SDValue BaseV = V->getOperand(0); 12434 SDValue SubV = V->getOperand(1); 12435 auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); 12436 if (!IdxN) 12437 return V; 12438 12439 int SubSize = SubV.getValueType().getVectorNumElements(); 12440 int Idx = IdxN->getZExtValue(); 12441 bool SubVectorUsed = false; 12442 SmallBitVector SubUsedElements(SubSize, false); 12443 for (int i = 0; i < SubSize; ++i) 12444 if (UsedElements[i + Idx]) { 12445 SubVectorUsed = true; 12446 SubUsedElements[i] = true; 12447 UsedElements[i + Idx] = false; 12448 } 12449 12450 // Now recurse on both the base and sub vectors. 12451 SDValue SimplifiedSubV = 12452 SubVectorUsed 12453 ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) 12454 : DAG.getUNDEF(SubV.getValueType()); 12455 SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); 12456 if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) 12457 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, 12458 SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); 12459 return V; 12460 } 12461 } 12462 } 12463 12464 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, 12465 SDValue N1, SelectionDAG &DAG) { 12466 EVT VT = SVN->getValueType(0); 12467 int NumElts = VT.getVectorNumElements(); 12468 SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); 12469 for (int M : SVN->getMask()) 12470 if (M >= 0 && M < NumElts) 12471 N0UsedElements[M] = true; 12472 else if (M >= NumElts) 12473 N1UsedElements[M - NumElts] = true; 12474 12475 SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); 12476 SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); 12477 if (S0 == N0 && S1 == N1) 12478 return SDValue(); 12479 12480 return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); 12481 } 12482 12483 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, 12484 // or turn a shuffle of a single concat into simpler shuffle then concat. 12485 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 12486 EVT VT = N->getValueType(0); 12487 unsigned NumElts = VT.getVectorNumElements(); 12488 12489 SDValue N0 = N->getOperand(0); 12490 SDValue N1 = N->getOperand(1); 12491 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 12492 12493 SmallVector<SDValue, 4> Ops; 12494 EVT ConcatVT = N0.getOperand(0).getValueType(); 12495 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 12496 unsigned NumConcats = NumElts / NumElemsPerConcat; 12497 12498 // Special case: shuffle(concat(A,B)) can be more efficiently represented 12499 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high 12500 // half vector elements. 12501 if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && 12502 std::all_of(SVN->getMask().begin() + NumElemsPerConcat, 12503 SVN->getMask().end(), [](int i) { return i == -1; })) { 12504 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), 12505 ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat)); 12506 N1 = DAG.getUNDEF(ConcatVT); 12507 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); 12508 } 12509 12510 // Look at every vector that's inserted. We're looking for exact 12511 // subvector-sized copies from a concatenated vector 12512 for (unsigned I = 0; I != NumConcats; ++I) { 12513 // Make sure we're dealing with a copy. 12514 unsigned Begin = I * NumElemsPerConcat; 12515 bool AllUndef = true, NoUndef = true; 12516 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 12517 if (SVN->getMaskElt(J) >= 0) 12518 AllUndef = false; 12519 else 12520 NoUndef = false; 12521 } 12522 12523 if (NoUndef) { 12524 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 12525 return SDValue(); 12526 12527 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 12528 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 12529 return SDValue(); 12530 12531 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 12532 if (FirstElt < N0.getNumOperands()) 12533 Ops.push_back(N0.getOperand(FirstElt)); 12534 else 12535 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 12536 12537 } else if (AllUndef) { 12538 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 12539 } else { // Mixed with general masks and undefs, can't do optimization. 12540 return SDValue(); 12541 } 12542 } 12543 12544 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 12545 } 12546 12547 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 12548 EVT VT = N->getValueType(0); 12549 unsigned NumElts = VT.getVectorNumElements(); 12550 12551 SDValue N0 = N->getOperand(0); 12552 SDValue N1 = N->getOperand(1); 12553 12554 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 12555 12556 // Canonicalize shuffle undef, undef -> undef 12557 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 12558 return DAG.getUNDEF(VT); 12559 12560 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 12561 12562 // Canonicalize shuffle v, v -> v, undef 12563 if (N0 == N1) { 12564 SmallVector<int, 8> NewMask; 12565 for (unsigned i = 0; i != NumElts; ++i) { 12566 int Idx = SVN->getMaskElt(i); 12567 if (Idx >= (int)NumElts) Idx -= NumElts; 12568 NewMask.push_back(Idx); 12569 } 12570 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), 12571 &NewMask[0]); 12572 } 12573 12574 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 12575 if (N0.getOpcode() == ISD::UNDEF) { 12576 SmallVector<int, 8> NewMask; 12577 for (unsigned i = 0; i != NumElts; ++i) { 12578 int Idx = SVN->getMaskElt(i); 12579 if (Idx >= 0) { 12580 if (Idx >= (int)NumElts) 12581 Idx -= NumElts; 12582 else 12583 Idx = -1; // remove reference to lhs 12584 } 12585 NewMask.push_back(Idx); 12586 } 12587 return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), 12588 &NewMask[0]); 12589 } 12590 12591 // Remove references to rhs if it is undef 12592 if (N1.getOpcode() == ISD::UNDEF) { 12593 bool Changed = false; 12594 SmallVector<int, 8> NewMask; 12595 for (unsigned i = 0; i != NumElts; ++i) { 12596 int Idx = SVN->getMaskElt(i); 12597 if (Idx >= (int)NumElts) { 12598 Idx = -1; 12599 Changed = true; 12600 } 12601 NewMask.push_back(Idx); 12602 } 12603 if (Changed) 12604 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); 12605 } 12606 12607 // If it is a splat, check if the argument vector is another splat or a 12608 // build_vector. 12609 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 12610 SDNode *V = N0.getNode(); 12611 12612 // If this is a bit convert that changes the element type of the vector but 12613 // not the number of vector elements, look through it. Be careful not to 12614 // look though conversions that change things like v4f32 to v2f64. 12615 if (V->getOpcode() == ISD::BITCAST) { 12616 SDValue ConvInput = V->getOperand(0); 12617 if (ConvInput.getValueType().isVector() && 12618 ConvInput.getValueType().getVectorNumElements() == NumElts) 12619 V = ConvInput.getNode(); 12620 } 12621 12622 if (V->getOpcode() == ISD::BUILD_VECTOR) { 12623 assert(V->getNumOperands() == NumElts && 12624 "BUILD_VECTOR has wrong number of operands"); 12625 SDValue Base; 12626 bool AllSame = true; 12627 for (unsigned i = 0; i != NumElts; ++i) { 12628 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 12629 Base = V->getOperand(i); 12630 break; 12631 } 12632 } 12633 // Splat of <u, u, u, u>, return <u, u, u, u> 12634 if (!Base.getNode()) 12635 return N0; 12636 for (unsigned i = 0; i != NumElts; ++i) { 12637 if (V->getOperand(i) != Base) { 12638 AllSame = false; 12639 break; 12640 } 12641 } 12642 // Splat of <x, x, x, x>, return <x, x, x, x> 12643 if (AllSame) 12644 return N0; 12645 12646 // Canonicalize any other splat as a build_vector. 12647 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); 12648 SmallVector<SDValue, 8> Ops(NumElts, Splatted); 12649 SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 12650 V->getValueType(0), Ops); 12651 12652 // We may have jumped through bitcasts, so the type of the 12653 // BUILD_VECTOR may not match the type of the shuffle. 12654 if (V->getValueType(0) != VT) 12655 NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); 12656 return NewBV; 12657 } 12658 } 12659 12660 // There are various patterns used to build up a vector from smaller vectors, 12661 // subvectors, or elements. Scan chains of these and replace unused insertions 12662 // or components with undef. 12663 if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) 12664 return S; 12665 12666 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 12667 Level < AfterLegalizeVectorOps && 12668 (N1.getOpcode() == ISD::UNDEF || 12669 (N1.getOpcode() == ISD::CONCAT_VECTORS && 12670 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 12671 SDValue V = partitionShuffleOfConcats(N, DAG); 12672 12673 if (V.getNode()) 12674 return V; 12675 } 12676 12677 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - 12678 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. 12679 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { 12680 SmallVector<SDValue, 8> Ops; 12681 for (int M : SVN->getMask()) { 12682 SDValue Op = DAG.getUNDEF(VT.getScalarType()); 12683 if (M >= 0) { 12684 int Idx = M % NumElts; 12685 SDValue &S = (M < (int)NumElts ? N0 : N1); 12686 if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { 12687 Op = S.getOperand(Idx); 12688 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { 12689 if (Idx == 0) 12690 Op = S.getOperand(0); 12691 } else { 12692 // Operand can't be combined - bail out. 12693 break; 12694 } 12695 } 12696 Ops.push_back(Op); 12697 } 12698 if (Ops.size() == VT.getVectorNumElements()) { 12699 // BUILD_VECTOR requires all inputs to be of the same type, find the 12700 // maximum type and extend them all. 12701 EVT SVT = VT.getScalarType(); 12702 if (SVT.isInteger()) 12703 for (SDValue &Op : Ops) 12704 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); 12705 if (SVT != VT.getScalarType()) 12706 for (SDValue &Op : Ops) 12707 Op = TLI.isZExtFree(Op.getValueType(), SVT) 12708 ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) 12709 : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); 12710 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); 12711 } 12712 } 12713 12714 // If this shuffle only has a single input that is a bitcasted shuffle, 12715 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output 12716 // back to their original types. 12717 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 12718 N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && 12719 TLI.isTypeLegal(VT)) { 12720 12721 // Peek through the bitcast only if there is one user. 12722 SDValue BC0 = N0; 12723 while (BC0.getOpcode() == ISD::BITCAST) { 12724 if (!BC0.hasOneUse()) 12725 break; 12726 BC0 = BC0.getOperand(0); 12727 } 12728 12729 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { 12730 if (Scale == 1) 12731 return SmallVector<int, 8>(Mask.begin(), Mask.end()); 12732 12733 SmallVector<int, 8> NewMask; 12734 for (int M : Mask) 12735 for (int s = 0; s != Scale; ++s) 12736 NewMask.push_back(M < 0 ? -1 : Scale * M + s); 12737 return NewMask; 12738 }; 12739 12740 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { 12741 EVT SVT = VT.getScalarType(); 12742 EVT InnerVT = BC0->getValueType(0); 12743 EVT InnerSVT = InnerVT.getScalarType(); 12744 12745 // Determine which shuffle works with the smaller scalar type. 12746 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; 12747 EVT ScaleSVT = ScaleVT.getScalarType(); 12748 12749 if (TLI.isTypeLegal(ScaleVT) && 12750 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 12751 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { 12752 12753 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 12754 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); 12755 12756 // Scale the shuffle masks to the smaller scalar type. 12757 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); 12758 SmallVector<int, 8> InnerMask = 12759 ScaleShuffleMask(InnerSVN->getMask(), InnerScale); 12760 SmallVector<int, 8> OuterMask = 12761 ScaleShuffleMask(SVN->getMask(), OuterScale); 12762 12763 // Merge the shuffle masks. 12764 SmallVector<int, 8> NewMask; 12765 for (int M : OuterMask) 12766 NewMask.push_back(M < 0 ? -1 : InnerMask[M]); 12767 12768 // Test for shuffle mask legality over both commutations. 12769 SDValue SV0 = BC0->getOperand(0); 12770 SDValue SV1 = BC0->getOperand(1); 12771 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 12772 if (!LegalMask) { 12773 std::swap(SV0, SV1); 12774 ShuffleVectorSDNode::commuteMask(NewMask); 12775 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); 12776 } 12777 12778 if (LegalMask) { 12779 SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); 12780 SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); 12781 return DAG.getNode( 12782 ISD::BITCAST, SDLoc(N), VT, 12783 DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); 12784 } 12785 } 12786 } 12787 } 12788 12789 // Canonicalize shuffles according to rules: 12790 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) 12791 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) 12792 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) 12793 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && 12794 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 12795 TLI.isTypeLegal(VT)) { 12796 // The incoming shuffle must be of the same type as the result of the 12797 // current shuffle. 12798 assert(N1->getOperand(0).getValueType() == VT && 12799 "Shuffle types don't match"); 12800 12801 SDValue SV0 = N1->getOperand(0); 12802 SDValue SV1 = N1->getOperand(1); 12803 bool HasSameOp0 = N0 == SV0; 12804 bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; 12805 if (HasSameOp0 || IsSV1Undef || N0 == SV1) 12806 // Commute the operands of this shuffle so that next rule 12807 // will trigger. 12808 return DAG.getCommutedVectorShuffle(*SVN); 12809 } 12810 12811 // Try to fold according to rules: 12812 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 12813 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 12814 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 12815 // Don't try to fold shuffles with illegal type. 12816 // Only fold if this shuffle is the only user of the other shuffle. 12817 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && 12818 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { 12819 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 12820 12821 // The incoming shuffle must be of the same type as the result of the 12822 // current shuffle. 12823 assert(OtherSV->getOperand(0).getValueType() == VT && 12824 "Shuffle types don't match"); 12825 12826 SDValue SV0, SV1; 12827 SmallVector<int, 4> Mask; 12828 // Compute the combined shuffle mask for a shuffle with SV0 as the first 12829 // operand, and SV1 as the second operand. 12830 for (unsigned i = 0; i != NumElts; ++i) { 12831 int Idx = SVN->getMaskElt(i); 12832 if (Idx < 0) { 12833 // Propagate Undef. 12834 Mask.push_back(Idx); 12835 continue; 12836 } 12837 12838 SDValue CurrentVec; 12839 if (Idx < (int)NumElts) { 12840 // This shuffle index refers to the inner shuffle N0. Lookup the inner 12841 // shuffle mask to identify which vector is actually referenced. 12842 Idx = OtherSV->getMaskElt(Idx); 12843 if (Idx < 0) { 12844 // Propagate Undef. 12845 Mask.push_back(Idx); 12846 continue; 12847 } 12848 12849 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) 12850 : OtherSV->getOperand(1); 12851 } else { 12852 // This shuffle index references an element within N1. 12853 CurrentVec = N1; 12854 } 12855 12856 // Simple case where 'CurrentVec' is UNDEF. 12857 if (CurrentVec.getOpcode() == ISD::UNDEF) { 12858 Mask.push_back(-1); 12859 continue; 12860 } 12861 12862 // Canonicalize the shuffle index. We don't know yet if CurrentVec 12863 // will be the first or second operand of the combined shuffle. 12864 Idx = Idx % NumElts; 12865 if (!SV0.getNode() || SV0 == CurrentVec) { 12866 // Ok. CurrentVec is the left hand side. 12867 // Update the mask accordingly. 12868 SV0 = CurrentVec; 12869 Mask.push_back(Idx); 12870 continue; 12871 } 12872 12873 // Bail out if we cannot convert the shuffle pair into a single shuffle. 12874 if (SV1.getNode() && SV1 != CurrentVec) 12875 return SDValue(); 12876 12877 // Ok. CurrentVec is the right hand side. 12878 // Update the mask accordingly. 12879 SV1 = CurrentVec; 12880 Mask.push_back(Idx + NumElts); 12881 } 12882 12883 // Check if all indices in Mask are Undef. In case, propagate Undef. 12884 bool isUndefMask = true; 12885 for (unsigned i = 0; i != NumElts && isUndefMask; ++i) 12886 isUndefMask &= Mask[i] < 0; 12887 12888 if (isUndefMask) 12889 return DAG.getUNDEF(VT); 12890 12891 if (!SV0.getNode()) 12892 SV0 = DAG.getUNDEF(VT); 12893 if (!SV1.getNode()) 12894 SV1 = DAG.getUNDEF(VT); 12895 12896 // Avoid introducing shuffles with illegal mask. 12897 if (!TLI.isShuffleMaskLegal(Mask, VT)) { 12898 ShuffleVectorSDNode::commuteMask(Mask); 12899 12900 if (!TLI.isShuffleMaskLegal(Mask, VT)) 12901 return SDValue(); 12902 12903 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) 12904 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) 12905 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) 12906 std::swap(SV0, SV1); 12907 } 12908 12909 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) 12910 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) 12911 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) 12912 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); 12913 } 12914 12915 return SDValue(); 12916 } 12917 12918 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { 12919 SDValue InVal = N->getOperand(0); 12920 EVT VT = N->getValueType(0); 12921 12922 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern 12923 // with a VECTOR_SHUFFLE. 12924 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 12925 SDValue InVec = InVal->getOperand(0); 12926 SDValue EltNo = InVal->getOperand(1); 12927 12928 // FIXME: We could support implicit truncation if the shuffle can be 12929 // scaled to a smaller vector scalar type. 12930 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); 12931 if (C0 && VT == InVec.getValueType() && 12932 VT.getScalarType() == InVal.getValueType()) { 12933 SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); 12934 int Elt = C0->getZExtValue(); 12935 NewMask[0] = Elt; 12936 12937 if (TLI.isShuffleMaskLegal(NewMask, VT)) 12938 return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), 12939 NewMask); 12940 } 12941 } 12942 12943 return SDValue(); 12944 } 12945 12946 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 12947 SDValue N0 = N->getOperand(0); 12948 SDValue N2 = N->getOperand(2); 12949 12950 // If the input vector is a concatenation, and the insert replaces 12951 // one of the halves, we can optimize into a single concat_vectors. 12952 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 12953 N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { 12954 APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); 12955 EVT VT = N->getValueType(0); 12956 12957 // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> 12958 // (concat_vectors Z, Y) 12959 if (InsIdx == 0) 12960 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 12961 N->getOperand(1), N0.getOperand(1)); 12962 12963 // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> 12964 // (concat_vectors X, Z) 12965 if (InsIdx == VT.getVectorNumElements()/2) 12966 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 12967 N0.getOperand(0), N->getOperand(1)); 12968 } 12969 12970 return SDValue(); 12971 } 12972 12973 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { 12974 SDValue N0 = N->getOperand(0); 12975 12976 // fold (fp_to_fp16 (fp16_to_fp op)) -> op 12977 if (N0->getOpcode() == ISD::FP16_TO_FP) 12978 return N0->getOperand(0); 12979 12980 return SDValue(); 12981 } 12982 12983 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle 12984 /// with the destination vector and a zero vector. 12985 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 12986 /// vector_shuffle V, Zero, <0, 4, 2, 4> 12987 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 12988 EVT VT = N->getValueType(0); 12989 SDValue LHS = N->getOperand(0); 12990 SDValue RHS = N->getOperand(1); 12991 SDLoc dl(N); 12992 12993 // Make sure we're not running after operation legalization where it 12994 // may have custom lowered the vector shuffles. 12995 if (LegalOperations) 12996 return SDValue(); 12997 12998 if (N->getOpcode() != ISD::AND) 12999 return SDValue(); 13000 13001 if (RHS.getOpcode() == ISD::BITCAST) 13002 RHS = RHS.getOperand(0); 13003 13004 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 13005 SmallVector<int, 8> Indices; 13006 unsigned NumElts = RHS.getNumOperands(); 13007 13008 for (unsigned i = 0; i != NumElts; ++i) { 13009 SDValue Elt = RHS.getOperand(i); 13010 if (isAllOnesConstant(Elt)) 13011 Indices.push_back(i); 13012 else if (isNullConstant(Elt)) 13013 Indices.push_back(NumElts+i); 13014 else 13015 return SDValue(); 13016 } 13017 13018 // Let's see if the target supports this vector_shuffle. 13019 EVT RVT = RHS.getValueType(); 13020 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 13021 return SDValue(); 13022 13023 // Return the new VECTOR_SHUFFLE node. 13024 EVT EltVT = RVT.getVectorElementType(); 13025 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 13026 DAG.getConstant(0, dl, EltVT)); 13027 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps); 13028 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 13029 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 13030 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 13031 } 13032 13033 return SDValue(); 13034 } 13035 13036 /// Visit a binary vector operation, like ADD. 13037 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 13038 assert(N->getValueType(0).isVector() && 13039 "SimplifyVBinOp only works on vectors!"); 13040 13041 SDValue LHS = N->getOperand(0); 13042 SDValue RHS = N->getOperand(1); 13043 13044 if (SDValue Shuffle = XformToShuffleWithZero(N)) 13045 return Shuffle; 13046 13047 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 13048 // this operation. 13049 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 13050 RHS.getOpcode() == ISD::BUILD_VECTOR) { 13051 // Check if both vectors are constants. If not bail out. 13052 if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && 13053 cast<BuildVectorSDNode>(RHS)->isConstant())) 13054 return SDValue(); 13055 13056 SmallVector<SDValue, 8> Ops; 13057 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 13058 SDValue LHSOp = LHS.getOperand(i); 13059 SDValue RHSOp = RHS.getOperand(i); 13060 13061 // Can't fold divide by zero. 13062 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 13063 N->getOpcode() == ISD::FDIV) { 13064 if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP && 13065 cast<ConstantFPSDNode>(RHSOp.getNode())->isZero())) 13066 break; 13067 } 13068 13069 EVT VT = LHSOp.getValueType(); 13070 EVT RVT = RHSOp.getValueType(); 13071 if (RVT != VT) { 13072 // Integer BUILD_VECTOR operands may have types larger than the element 13073 // size (e.g., when the element type is not legal). Prior to type 13074 // legalization, the types may not match between the two BUILD_VECTORS. 13075 // Truncate one of the operands to make them match. 13076 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 13077 RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); 13078 } else { 13079 LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); 13080 VT = RVT; 13081 } 13082 } 13083 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, 13084 LHSOp, RHSOp); 13085 if (FoldOp.getOpcode() != ISD::UNDEF && 13086 FoldOp.getOpcode() != ISD::Constant && 13087 FoldOp.getOpcode() != ISD::ConstantFP) 13088 break; 13089 Ops.push_back(FoldOp); 13090 AddToWorklist(FoldOp.getNode()); 13091 } 13092 13093 if (Ops.size() == LHS.getNumOperands()) 13094 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); 13095 } 13096 13097 // Type legalization might introduce new shuffles in the DAG. 13098 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 13099 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 13100 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 13101 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 13102 LHS.getOperand(1).getOpcode() == ISD::UNDEF && 13103 RHS.getOperand(1).getOpcode() == ISD::UNDEF) { 13104 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 13105 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 13106 13107 if (SVN0->getMask().equals(SVN1->getMask())) { 13108 EVT VT = N->getValueType(0); 13109 SDValue UndefVector = LHS.getOperand(1); 13110 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 13111 LHS.getOperand(0), RHS.getOperand(0)); 13112 AddUsersToWorklist(N); 13113 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 13114 &SVN0->getMask()[0]); 13115 } 13116 } 13117 13118 return SDValue(); 13119 } 13120 13121 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, 13122 SDValue N1, SDValue N2){ 13123 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 13124 13125 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 13126 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 13127 13128 // If we got a simplified select_cc node back from SimplifySelectCC, then 13129 // break it down into a new SETCC node, and a new SELECT node, and then return 13130 // the SELECT node, since we were called with a SELECT node. 13131 if (SCC.getNode()) { 13132 // Check to see if we got a select_cc back (to turn into setcc/select). 13133 // Otherwise, just return whatever node we got back, like fabs. 13134 if (SCC.getOpcode() == ISD::SELECT_CC) { 13135 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 13136 N0.getValueType(), 13137 SCC.getOperand(0), SCC.getOperand(1), 13138 SCC.getOperand(4)); 13139 AddToWorklist(SETCC.getNode()); 13140 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, 13141 SCC.getOperand(2), SCC.getOperand(3)); 13142 } 13143 13144 return SCC; 13145 } 13146 return SDValue(); 13147 } 13148 13149 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values 13150 /// being selected between, see if we can simplify the select. Callers of this 13151 /// should assume that TheSelect is deleted if this returns true. As such, they 13152 /// should return the appropriate thing (e.g. the node) back to the top-level of 13153 /// the DAG combiner loop to avoid it being looked at. 13154 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 13155 SDValue RHS) { 13156 13157 // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) 13158 // The select + setcc is redundant, because fsqrt returns NaN for X < -0. 13159 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { 13160 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { 13161 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) 13162 SDValue Sqrt = RHS; 13163 ISD::CondCode CC; 13164 SDValue CmpLHS; 13165 const ConstantFPSDNode *NegZero = nullptr; 13166 13167 if (TheSelect->getOpcode() == ISD::SELECT_CC) { 13168 CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); 13169 CmpLHS = TheSelect->getOperand(0); 13170 NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); 13171 } else { 13172 // SELECT or VSELECT 13173 SDValue Cmp = TheSelect->getOperand(0); 13174 if (Cmp.getOpcode() == ISD::SETCC) { 13175 CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); 13176 CmpLHS = Cmp.getOperand(0); 13177 NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); 13178 } 13179 } 13180 if (NegZero && NegZero->isNegative() && NegZero->isZero() && 13181 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || 13182 CC == ISD::SETULT || CC == ISD::SETLT)) { 13183 // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) 13184 CombineTo(TheSelect, Sqrt); 13185 return true; 13186 } 13187 } 13188 } 13189 // Cannot simplify select with vector condition 13190 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 13191 13192 // If this is a select from two identical things, try to pull the operation 13193 // through the select. 13194 if (LHS.getOpcode() != RHS.getOpcode() || 13195 !LHS.hasOneUse() || !RHS.hasOneUse()) 13196 return false; 13197 13198 // If this is a load and the token chain is identical, replace the select 13199 // of two loads with a load through a select of the address to load from. 13200 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 13201 // constants have been dropped into the constant pool. 13202 if (LHS.getOpcode() == ISD::LOAD) { 13203 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 13204 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 13205 13206 // Token chains must be identical. 13207 if (LHS.getOperand(0) != RHS.getOperand(0) || 13208 // Do not let this transformation reduce the number of volatile loads. 13209 LLD->isVolatile() || RLD->isVolatile() || 13210 // FIXME: If either is a pre/post inc/dec load, 13211 // we'd need to split out the address adjustment. 13212 LLD->isIndexed() || RLD->isIndexed() || 13213 // If this is an EXTLOAD, the VT's must match. 13214 LLD->getMemoryVT() != RLD->getMemoryVT() || 13215 // If this is an EXTLOAD, the kind of extension must match. 13216 (LLD->getExtensionType() != RLD->getExtensionType() && 13217 // The only exception is if one of the extensions is anyext. 13218 LLD->getExtensionType() != ISD::EXTLOAD && 13219 RLD->getExtensionType() != ISD::EXTLOAD) || 13220 // FIXME: this discards src value information. This is 13221 // over-conservative. It would be beneficial to be able to remember 13222 // both potential memory locations. Since we are discarding 13223 // src value info, don't do the transformation if the memory 13224 // locations are not in the default address space. 13225 LLD->getPointerInfo().getAddrSpace() != 0 || 13226 RLD->getPointerInfo().getAddrSpace() != 0 || 13227 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 13228 LLD->getBasePtr().getValueType())) 13229 return false; 13230 13231 // Check that the select condition doesn't reach either load. If so, 13232 // folding this will induce a cycle into the DAG. If not, this is safe to 13233 // xform, so create a select of the addresses. 13234 SDValue Addr; 13235 if (TheSelect->getOpcode() == ISD::SELECT) { 13236 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 13237 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 13238 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 13239 return false; 13240 // The loads must not depend on one another. 13241 if (LLD->isPredecessorOf(RLD) || 13242 RLD->isPredecessorOf(LLD)) 13243 return false; 13244 Addr = DAG.getSelect(SDLoc(TheSelect), 13245 LLD->getBasePtr().getValueType(), 13246 TheSelect->getOperand(0), LLD->getBasePtr(), 13247 RLD->getBasePtr()); 13248 } else { // Otherwise SELECT_CC 13249 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 13250 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 13251 13252 if ((LLD->hasAnyUseOfValue(1) && 13253 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 13254 (RLD->hasAnyUseOfValue(1) && 13255 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 13256 return false; 13257 13258 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 13259 LLD->getBasePtr().getValueType(), 13260 TheSelect->getOperand(0), 13261 TheSelect->getOperand(1), 13262 LLD->getBasePtr(), RLD->getBasePtr(), 13263 TheSelect->getOperand(4)); 13264 } 13265 13266 SDValue Load; 13267 // It is safe to replace the two loads if they have different alignments, 13268 // but the new load must be the minimum (most restrictive) alignment of the 13269 // inputs. 13270 bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); 13271 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); 13272 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 13273 Load = DAG.getLoad(TheSelect->getValueType(0), 13274 SDLoc(TheSelect), 13275 // FIXME: Discards pointer and AA info. 13276 LLD->getChain(), Addr, MachinePointerInfo(), 13277 LLD->isVolatile(), LLD->isNonTemporal(), 13278 isInvariant, Alignment); 13279 } else { 13280 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 13281 RLD->getExtensionType() : LLD->getExtensionType(), 13282 SDLoc(TheSelect), 13283 TheSelect->getValueType(0), 13284 // FIXME: Discards pointer and AA info. 13285 LLD->getChain(), Addr, MachinePointerInfo(), 13286 LLD->getMemoryVT(), LLD->isVolatile(), 13287 LLD->isNonTemporal(), isInvariant, Alignment); 13288 } 13289 13290 // Users of the select now use the result of the load. 13291 CombineTo(TheSelect, Load); 13292 13293 // Users of the old loads now use the new load's chain. We know the 13294 // old-load value is dead now. 13295 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 13296 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 13297 return true; 13298 } 13299 13300 return false; 13301 } 13302 13303 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 13304 /// where 'cond' is the comparison specified by CC. 13305 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, 13306 SDValue N2, SDValue N3, 13307 ISD::CondCode CC, bool NotExtCompare) { 13308 // (x ? y : y) -> y. 13309 if (N2 == N3) return N2; 13310 13311 EVT VT = N2.getValueType(); 13312 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 13313 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 13314 13315 // Determine if the condition we're dealing with is constant 13316 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 13317 N0, N1, CC, DL, false); 13318 if (SCC.getNode()) AddToWorklist(SCC.getNode()); 13319 13320 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 13321 // fold select_cc true, x, y -> x 13322 // fold select_cc false, x, y -> y 13323 return !SCCC->isNullValue() ? N2 : N3; 13324 } 13325 13326 // Check to see if we can simplify the select into an fabs node 13327 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 13328 // Allow either -0.0 or 0.0 13329 if (CFP->isZero()) { 13330 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 13331 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 13332 N0 == N2 && N3.getOpcode() == ISD::FNEG && 13333 N2 == N3.getOperand(0)) 13334 return DAG.getNode(ISD::FABS, DL, VT, N0); 13335 13336 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 13337 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 13338 N0 == N3 && N2.getOpcode() == ISD::FNEG && 13339 N2.getOperand(0) == N3) 13340 return DAG.getNode(ISD::FABS, DL, VT, N3); 13341 } 13342 } 13343 13344 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 13345 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 13346 // in it. This is a win when the constant is not otherwise available because 13347 // it replaces two constant pool loads with one. We only do this if the FP 13348 // type is known to be legal, because if it isn't, then we are before legalize 13349 // types an we want the other legalization to happen first (e.g. to avoid 13350 // messing with soft float) and if the ConstantFP is not legal, because if 13351 // it is legal, we may not need to store the FP constant in a constant pool. 13352 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 13353 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 13354 if (TLI.isTypeLegal(N2.getValueType()) && 13355 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 13356 TargetLowering::Legal && 13357 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 13358 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 13359 // If both constants have multiple uses, then we won't need to do an 13360 // extra load, they are likely around in registers for other users. 13361 (TV->hasOneUse() || FV->hasOneUse())) { 13362 Constant *Elts[] = { 13363 const_cast<ConstantFP*>(FV->getConstantFPValue()), 13364 const_cast<ConstantFP*>(TV->getConstantFPValue()) 13365 }; 13366 Type *FPTy = Elts[0]->getType(); 13367 const DataLayout &TD = DAG.getDataLayout(); 13368 13369 // Create a ConstantArray of the two constants. 13370 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 13371 SDValue CPIdx = 13372 DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), 13373 TD.getPrefTypeAlignment(FPTy)); 13374 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 13375 13376 // Get the offsets to the 0 and 1 element of the array so that we can 13377 // select between them. 13378 SDValue Zero = DAG.getIntPtrConstant(0, DL); 13379 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 13380 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); 13381 13382 SDValue Cond = DAG.getSetCC(DL, 13383 getSetCCResultType(N0.getValueType()), 13384 N0, N1, CC); 13385 AddToWorklist(Cond.getNode()); 13386 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 13387 Cond, One, Zero); 13388 AddToWorklist(CstOffset.getNode()); 13389 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 13390 CstOffset); 13391 AddToWorklist(CPIdx.getNode()); 13392 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 13393 MachinePointerInfo::getConstantPool(), false, 13394 false, false, Alignment); 13395 } 13396 } 13397 13398 // Check to see if we can perform the "gzip trick", transforming 13399 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 13400 if (isNullConstant(N3) && CC == ISD::SETLT && 13401 (isNullConstant(N1) || // (a < 0) ? b : 0 13402 (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 13403 EVT XType = N0.getValueType(); 13404 EVT AType = N2.getValueType(); 13405 if (XType.bitsGE(AType)) { 13406 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 13407 // single-bit constant. 13408 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { 13409 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 13410 ShCtV = XType.getSizeInBits() - ShCtV - 1; 13411 SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), 13412 getShiftAmountTy(N0.getValueType())); 13413 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), 13414 XType, N0, ShCt); 13415 AddToWorklist(Shift.getNode()); 13416 13417 if (XType.bitsGT(AType)) { 13418 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 13419 AddToWorklist(Shift.getNode()); 13420 } 13421 13422 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 13423 } 13424 13425 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), 13426 XType, N0, 13427 DAG.getConstant(XType.getSizeInBits() - 1, 13428 SDLoc(N0), 13429 getShiftAmountTy(N0.getValueType()))); 13430 AddToWorklist(Shift.getNode()); 13431 13432 if (XType.bitsGT(AType)) { 13433 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 13434 AddToWorklist(Shift.getNode()); 13435 } 13436 13437 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 13438 } 13439 } 13440 13441 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 13442 // where y is has a single bit set. 13443 // A plaintext description would be, we can turn the SELECT_CC into an AND 13444 // when the condition can be materialized as an all-ones register. Any 13445 // single bit-test can be materialized as an all-ones register with 13446 // shift-left and shift-right-arith. 13447 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 13448 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { 13449 SDValue AndLHS = N0->getOperand(0); 13450 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 13451 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 13452 // Shift the tested bit over the sign bit. 13453 APInt AndMask = ConstAndRHS->getAPIntValue(); 13454 SDValue ShlAmt = 13455 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), 13456 getShiftAmountTy(AndLHS.getValueType())); 13457 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 13458 13459 // Now arithmetic right shift it all the way over, so the result is either 13460 // all-ones, or zero. 13461 SDValue ShrAmt = 13462 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), 13463 getShiftAmountTy(Shl.getValueType())); 13464 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 13465 13466 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 13467 } 13468 } 13469 13470 // fold select C, 16, 0 -> shl C, 4 13471 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && 13472 TLI.getBooleanContents(N0.getValueType()) == 13473 TargetLowering::ZeroOrOneBooleanContent) { 13474 13475 // If the caller doesn't want us to simplify this into a zext of a compare, 13476 // don't do it. 13477 if (NotExtCompare && N2C->isOne()) 13478 return SDValue(); 13479 13480 // Get a SetCC of the condition 13481 // NOTE: Don't create a SETCC if it's not legal on this target. 13482 if (!LegalOperations || 13483 TLI.isOperationLegal(ISD::SETCC, 13484 LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { 13485 SDValue Temp, SCC; 13486 // cast from setcc result type to select result type 13487 if (LegalTypes) { 13488 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 13489 N0, N1, CC); 13490 if (N2.getValueType().bitsLT(SCC.getValueType())) 13491 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 13492 N2.getValueType()); 13493 else 13494 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 13495 N2.getValueType(), SCC); 13496 } else { 13497 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 13498 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 13499 N2.getValueType(), SCC); 13500 } 13501 13502 AddToWorklist(SCC.getNode()); 13503 AddToWorklist(Temp.getNode()); 13504 13505 if (N2C->isOne()) 13506 return Temp; 13507 13508 // shl setcc result by log2 n2c 13509 return DAG.getNode( 13510 ISD::SHL, DL, N2.getValueType(), Temp, 13511 DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), 13512 getShiftAmountTy(Temp.getValueType()))); 13513 } 13514 } 13515 13516 // Check to see if this is the equivalent of setcc 13517 // FIXME: Turn all of these into setcc if setcc if setcc is legal 13518 // otherwise, go ahead with the folds. 13519 if (0 && isNullConstant(N3) && isOneConstant(N2)) { 13520 EVT XType = N0.getValueType(); 13521 if (!LegalOperations || 13522 TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { 13523 SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); 13524 if (Res.getValueType() != VT) 13525 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 13526 return Res; 13527 } 13528 13529 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 13530 if (isNullConstant(N1) && CC == ISD::SETEQ && 13531 (!LegalOperations || 13532 TLI.isOperationLegal(ISD::CTLZ, XType))) { 13533 SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); 13534 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 13535 DAG.getConstant(Log2_32(XType.getSizeInBits()), 13536 SDLoc(Ctlz), 13537 getShiftAmountTy(Ctlz.getValueType()))); 13538 } 13539 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 13540 if (isNullConstant(N1) && CC == ISD::SETGT) { 13541 SDLoc DL(N0); 13542 SDValue NegN0 = DAG.getNode(ISD::SUB, DL, 13543 XType, DAG.getConstant(0, DL, XType), N0); 13544 SDValue NotN0 = DAG.getNOT(DL, N0, XType); 13545 return DAG.getNode(ISD::SRL, DL, XType, 13546 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 13547 DAG.getConstant(XType.getSizeInBits() - 1, DL, 13548 getShiftAmountTy(XType))); 13549 } 13550 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 13551 if (isAllOnesConstant(N1) && CC == ISD::SETGT) { 13552 SDLoc DL(N0); 13553 SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0, 13554 DAG.getConstant(XType.getSizeInBits() - 1, DL, 13555 getShiftAmountTy(N0.getValueType()))); 13556 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL, 13557 XType)); 13558 } 13559 } 13560 13561 // Check to see if this is an integer abs. 13562 // select_cc setg[te] X, 0, X, -X -> 13563 // select_cc setgt X, -1, X, -X -> 13564 // select_cc setl[te] X, 0, -X, X -> 13565 // select_cc setlt X, 1, -X, X -> 13566 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 13567 if (N1C) { 13568 ConstantSDNode *SubC = nullptr; 13569 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 13570 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 13571 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 13572 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 13573 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 13574 (N1C->isOne() && CC == ISD::SETLT)) && 13575 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 13576 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 13577 13578 EVT XType = N0.getValueType(); 13579 if (SubC && SubC->isNullValue() && XType.isInteger()) { 13580 SDLoc DL(N0); 13581 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, 13582 N0, 13583 DAG.getConstant(XType.getSizeInBits() - 1, DL, 13584 getShiftAmountTy(N0.getValueType()))); 13585 SDValue Add = DAG.getNode(ISD::ADD, DL, 13586 XType, N0, Shift); 13587 AddToWorklist(Shift.getNode()); 13588 AddToWorklist(Add.getNode()); 13589 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 13590 } 13591 } 13592 13593 return SDValue(); 13594 } 13595 13596 /// This is a stub for TargetLowering::SimplifySetCC. 13597 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 13598 SDValue N1, ISD::CondCode Cond, 13599 SDLoc DL, bool foldBooleans) { 13600 TargetLowering::DAGCombinerInfo 13601 DagCombineInfo(DAG, Level, false, this); 13602 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 13603 } 13604 13605 /// Given an ISD::SDIV node expressing a divide by constant, return 13606 /// a DAG expression to select that will generate the same value by multiplying 13607 /// by a magic number. 13608 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 13609 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 13610 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 13611 if (!C) 13612 return SDValue(); 13613 13614 // Avoid division by zero. 13615 if (C->isNullValue()) 13616 return SDValue(); 13617 13618 std::vector<SDNode*> Built; 13619 SDValue S = 13620 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 13621 13622 for (SDNode *N : Built) 13623 AddToWorklist(N); 13624 return S; 13625 } 13626 13627 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a 13628 /// DAG expression that will generate the same value by right shifting. 13629 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { 13630 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 13631 if (!C) 13632 return SDValue(); 13633 13634 // Avoid division by zero. 13635 if (C->isNullValue()) 13636 return SDValue(); 13637 13638 std::vector<SDNode *> Built; 13639 SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); 13640 13641 for (SDNode *N : Built) 13642 AddToWorklist(N); 13643 return S; 13644 } 13645 13646 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG 13647 /// expression that will generate the same value by multiplying by a magic 13648 /// number. 13649 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". 13650 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 13651 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 13652 if (!C) 13653 return SDValue(); 13654 13655 // Avoid division by zero. 13656 if (C->isNullValue()) 13657 return SDValue(); 13658 13659 std::vector<SDNode*> Built; 13660 SDValue S = 13661 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 13662 13663 for (SDNode *N : Built) 13664 AddToWorklist(N); 13665 return S; 13666 } 13667 13668 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { 13669 if (Level >= AfterLegalizeDAG) 13670 return SDValue(); 13671 13672 // Expose the DAG combiner to the target combiner implementations. 13673 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 13674 13675 unsigned Iterations = 0; 13676 if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { 13677 if (Iterations) { 13678 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 13679 // For the reciprocal, we need to find the zero of the function: 13680 // F(X) = A X - 1 [which has a zero at X = 1/A] 13681 // => 13682 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 13683 // does not require additional intermediate precision] 13684 EVT VT = Op.getValueType(); 13685 SDLoc DL(Op); 13686 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); 13687 13688 AddToWorklist(Est.getNode()); 13689 13690 // Newton iterations: Est = Est + Est (1 - Arg * Est) 13691 for (unsigned i = 0; i < Iterations; ++i) { 13692 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); 13693 AddToWorklist(NewEst.getNode()); 13694 13695 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); 13696 AddToWorklist(NewEst.getNode()); 13697 13698 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 13699 AddToWorklist(NewEst.getNode()); 13700 13701 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); 13702 AddToWorklist(Est.getNode()); 13703 } 13704 } 13705 return Est; 13706 } 13707 13708 return SDValue(); 13709 } 13710 13711 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 13712 /// For the reciprocal sqrt, we need to find the zero of the function: 13713 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 13714 /// => 13715 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) 13716 /// As a result, we precompute A/2 prior to the iteration loop. 13717 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, 13718 unsigned Iterations) { 13719 EVT VT = Arg.getValueType(); 13720 SDLoc DL(Arg); 13721 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); 13722 13723 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that 13724 // this entire sequence requires only one FP constant. 13725 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); 13726 AddToWorklist(HalfArg.getNode()); 13727 13728 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); 13729 AddToWorklist(HalfArg.getNode()); 13730 13731 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 13732 for (unsigned i = 0; i < Iterations; ++i) { 13733 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 13734 AddToWorklist(NewEst.getNode()); 13735 13736 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); 13737 AddToWorklist(NewEst.getNode()); 13738 13739 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); 13740 AddToWorklist(NewEst.getNode()); 13741 13742 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); 13743 AddToWorklist(Est.getNode()); 13744 } 13745 return Est; 13746 } 13747 13748 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 13749 /// For the reciprocal sqrt, we need to find the zero of the function: 13750 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 13751 /// => 13752 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) 13753 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, 13754 unsigned Iterations) { 13755 EVT VT = Arg.getValueType(); 13756 SDLoc DL(Arg); 13757 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); 13758 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); 13759 13760 // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) 13761 for (unsigned i = 0; i < Iterations; ++i) { 13762 SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); 13763 AddToWorklist(HalfEst.getNode()); 13764 13765 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); 13766 AddToWorklist(Est.getNode()); 13767 13768 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); 13769 AddToWorklist(Est.getNode()); 13770 13771 Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); 13772 AddToWorklist(Est.getNode()); 13773 13774 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); 13775 AddToWorklist(Est.getNode()); 13776 } 13777 return Est; 13778 } 13779 13780 SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { 13781 if (Level >= AfterLegalizeDAG) 13782 return SDValue(); 13783 13784 // Expose the DAG combiner to the target combiner implementations. 13785 TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); 13786 unsigned Iterations = 0; 13787 bool UseOneConstNR = false; 13788 if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { 13789 AddToWorklist(Est.getNode()); 13790 if (Iterations) { 13791 Est = UseOneConstNR ? 13792 BuildRsqrtNROneConst(Op, Est, Iterations) : 13793 BuildRsqrtNRTwoConst(Op, Est, Iterations); 13794 } 13795 return Est; 13796 } 13797 13798 return SDValue(); 13799 } 13800 13801 /// Return true if base is a frame index, which is known not to alias with 13802 /// anything but itself. Provides base object and offset as results. 13803 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 13804 const GlobalValue *&GV, const void *&CV) { 13805 // Assume it is a primitive operation. 13806 Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; 13807 13808 // If it's an adding a simple constant then integrate the offset. 13809 if (Base.getOpcode() == ISD::ADD) { 13810 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 13811 Base = Base.getOperand(0); 13812 Offset += C->getZExtValue(); 13813 } 13814 } 13815 13816 // Return the underlying GlobalValue, and update the Offset. Return false 13817 // for GlobalAddressSDNode since the same GlobalAddress may be represented 13818 // by multiple nodes with different offsets. 13819 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 13820 GV = G->getGlobal(); 13821 Offset += G->getOffset(); 13822 return false; 13823 } 13824 13825 // Return the underlying Constant value, and update the Offset. Return false 13826 // for ConstantSDNodes since the same constant pool entry may be represented 13827 // by multiple nodes with different offsets. 13828 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 13829 CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() 13830 : (const void *)C->getConstVal(); 13831 Offset += C->getOffset(); 13832 return false; 13833 } 13834 // If it's any of the following then it can't alias with anything but itself. 13835 return isa<FrameIndexSDNode>(Base); 13836 } 13837 13838 /// Return true if there is any possibility that the two addresses overlap. 13839 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 13840 // If they are the same then they must be aliases. 13841 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 13842 13843 // If they are both volatile then they cannot be reordered. 13844 if (Op0->isVolatile() && Op1->isVolatile()) return true; 13845 13846 // If one operation reads from invariant memory, and the other may store, they 13847 // cannot alias. These should really be checking the equivalent of mayWrite, 13848 // but it only matters for memory nodes other than load /store. 13849 if (Op0->isInvariant() && Op1->writeMem()) 13850 return false; 13851 13852 if (Op1->isInvariant() && Op0->writeMem()) 13853 return false; 13854 13855 // Gather base node and offset information. 13856 SDValue Base1, Base2; 13857 int64_t Offset1, Offset2; 13858 const GlobalValue *GV1, *GV2; 13859 const void *CV1, *CV2; 13860 bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), 13861 Base1, Offset1, GV1, CV1); 13862 bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), 13863 Base2, Offset2, GV2, CV2); 13864 13865 // If they have a same base address then check to see if they overlap. 13866 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 13867 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 13868 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 13869 13870 // It is possible for different frame indices to alias each other, mostly 13871 // when tail call optimization reuses return address slots for arguments. 13872 // To catch this case, look up the actual index of frame indices to compute 13873 // the real alias relationship. 13874 if (isFrameIndex1 && isFrameIndex2) { 13875 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 13876 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 13877 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 13878 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 13879 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 13880 } 13881 13882 // Otherwise, if we know what the bases are, and they aren't identical, then 13883 // we know they cannot alias. 13884 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 13885 return false; 13886 13887 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 13888 // compared to the size and offset of the access, we may be able to prove they 13889 // do not alias. This check is conservative for now to catch cases created by 13890 // splitting vector types. 13891 if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && 13892 (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && 13893 (Op0->getMemoryVT().getSizeInBits() >> 3 == 13894 Op1->getMemoryVT().getSizeInBits() >> 3) && 13895 (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { 13896 int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); 13897 int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); 13898 13899 // There is no overlap between these relatively aligned accesses of similar 13900 // size, return no alias. 13901 if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || 13902 (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) 13903 return false; 13904 } 13905 13906 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 13907 ? CombinerGlobalAA 13908 : DAG.getSubtarget().useAA(); 13909 #ifndef NDEBUG 13910 if (CombinerAAOnlyFunc.getNumOccurrences() && 13911 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 13912 UseAA = false; 13913 #endif 13914 if (UseAA && 13915 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 13916 // Use alias analysis information. 13917 int64_t MinOffset = std::min(Op0->getSrcValueOffset(), 13918 Op1->getSrcValueOffset()); 13919 int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + 13920 Op0->getSrcValueOffset() - MinOffset; 13921 int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + 13922 Op1->getSrcValueOffset() - MinOffset; 13923 AliasResult AAResult = 13924 AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, 13925 UseTBAA ? Op0->getAAInfo() : AAMDNodes()), 13926 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, 13927 UseTBAA ? Op1->getAAInfo() : AAMDNodes())); 13928 if (AAResult == NoAlias) 13929 return false; 13930 } 13931 13932 // Otherwise we have to assume they alias. 13933 return true; 13934 } 13935 13936 /// Walk up chain skipping non-aliasing memory nodes, 13937 /// looking for aliasing nodes and adding them to the Aliases vector. 13938 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 13939 SmallVectorImpl<SDValue> &Aliases) { 13940 SmallVector<SDValue, 8> Chains; // List of chains to visit. 13941 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 13942 13943 // Get alias information for node. 13944 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 13945 13946 // Starting off. 13947 Chains.push_back(OriginalChain); 13948 unsigned Depth = 0; 13949 13950 // Look at each chain and determine if it is an alias. If so, add it to the 13951 // aliases list. If not, then continue up the chain looking for the next 13952 // candidate. 13953 while (!Chains.empty()) { 13954 SDValue Chain = Chains.pop_back_val(); 13955 13956 // For TokenFactor nodes, look at each operand and only continue up the 13957 // chain until we find two aliases. If we've seen two aliases, assume we'll 13958 // find more and revert to original chain since the xform is unlikely to be 13959 // profitable. 13960 // 13961 // FIXME: The depth check could be made to return the last non-aliasing 13962 // chain we found before we hit a tokenfactor rather than the original 13963 // chain. 13964 if (Depth > 6 || Aliases.size() == 2) { 13965 Aliases.clear(); 13966 Aliases.push_back(OriginalChain); 13967 return; 13968 } 13969 13970 // Don't bother if we've been before. 13971 if (!Visited.insert(Chain.getNode()).second) 13972 continue; 13973 13974 switch (Chain.getOpcode()) { 13975 case ISD::EntryToken: 13976 // Entry token is ideal chain operand, but handled in FindBetterChain. 13977 break; 13978 13979 case ISD::LOAD: 13980 case ISD::STORE: { 13981 // Get alias information for Chain. 13982 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 13983 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 13984 13985 // If chain is alias then stop here. 13986 if (!(IsLoad && IsOpLoad) && 13987 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 13988 Aliases.push_back(Chain); 13989 } else { 13990 // Look further up the chain. 13991 Chains.push_back(Chain.getOperand(0)); 13992 ++Depth; 13993 } 13994 break; 13995 } 13996 13997 case ISD::TokenFactor: 13998 // We have to check each of the operands of the token factor for "small" 13999 // token factors, so we queue them up. Adding the operands to the queue 14000 // (stack) in reverse order maintains the original order and increases the 14001 // likelihood that getNode will find a matching token factor (CSE.) 14002 if (Chain.getNumOperands() > 16) { 14003 Aliases.push_back(Chain); 14004 break; 14005 } 14006 for (unsigned n = Chain.getNumOperands(); n;) 14007 Chains.push_back(Chain.getOperand(--n)); 14008 ++Depth; 14009 break; 14010 14011 default: 14012 // For all other instructions we will just have to take what we can get. 14013 Aliases.push_back(Chain); 14014 break; 14015 } 14016 } 14017 14018 // We need to be careful here to also search for aliases through the 14019 // value operand of a store, etc. Consider the following situation: 14020 // Token1 = ... 14021 // L1 = load Token1, %52 14022 // S1 = store Token1, L1, %51 14023 // L2 = load Token1, %52+8 14024 // S2 = store Token1, L2, %51+8 14025 // Token2 = Token(S1, S2) 14026 // L3 = load Token2, %53 14027 // S3 = store Token2, L3, %52 14028 // L4 = load Token2, %53+8 14029 // S4 = store Token2, L4, %52+8 14030 // If we search for aliases of S3 (which loads address %52), and we look 14031 // only through the chain, then we'll miss the trivial dependence on L1 14032 // (which also loads from %52). We then might change all loads and 14033 // stores to use Token1 as their chain operand, which could result in 14034 // copying %53 into %52 before copying %52 into %51 (which should 14035 // happen first). 14036 // 14037 // The problem is, however, that searching for such data dependencies 14038 // can become expensive, and the cost is not directly related to the 14039 // chain depth. Instead, we'll rule out such configurations here by 14040 // insisting that we've visited all chain users (except for users 14041 // of the original chain, which is not necessary). When doing this, 14042 // we need to look through nodes we don't care about (otherwise, things 14043 // like register copies will interfere with trivial cases). 14044 14045 SmallVector<const SDNode *, 16> Worklist; 14046 for (const SDNode *N : Visited) 14047 if (N != OriginalChain.getNode()) 14048 Worklist.push_back(N); 14049 14050 while (!Worklist.empty()) { 14051 const SDNode *M = Worklist.pop_back_val(); 14052 14053 // We have already visited M, and want to make sure we've visited any uses 14054 // of M that we care about. For uses that we've not visisted, and don't 14055 // care about, queue them to the worklist. 14056 14057 for (SDNode::use_iterator UI = M->use_begin(), 14058 UIE = M->use_end(); UI != UIE; ++UI) 14059 if (UI.getUse().getValueType() == MVT::Other && 14060 Visited.insert(*UI).second) { 14061 if (isa<MemSDNode>(*UI)) { 14062 // We've not visited this use, and we care about it (it could have an 14063 // ordering dependency with the original node). 14064 Aliases.clear(); 14065 Aliases.push_back(OriginalChain); 14066 return; 14067 } 14068 14069 // We've not visited this use, but we don't care about it. Mark it as 14070 // visited and enqueue it to the worklist. 14071 Worklist.push_back(*UI); 14072 } 14073 } 14074 } 14075 14076 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain 14077 /// (aliasing node.) 14078 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 14079 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 14080 14081 // Accumulate all the aliases to this node. 14082 GatherAllAliases(N, OldChain, Aliases); 14083 14084 // If no operands then chain to entry token. 14085 if (Aliases.size() == 0) 14086 return DAG.getEntryNode(); 14087 14088 // If a single operand then chain to it. We don't need to revisit it. 14089 if (Aliases.size() == 1) 14090 return Aliases[0]; 14091 14092 // Construct a custom tailored token factor. 14093 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 14094 } 14095 14096 /// This is the entry point for the file. 14097 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 14098 CodeGenOpt::Level OptLevel) { 14099 /// This is the main entry point to this class. 14100 DAGCombiner(*this, AA, OptLevel).Run(Level); 14101 } 14102