1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #define DEBUG_TYPE "dagcombine" 20 #include "llvm/CodeGen/SelectionDAG.h" 21 #include "llvm/DerivedTypes.h" 22 #include "llvm/LLVMContext.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/Analysis/AliasAnalysis.h" 26 #include "llvm/Target/TargetData.h" 27 #include "llvm/Target/TargetLowering.h" 28 #include "llvm/Target/TargetMachine.h" 29 #include "llvm/Target/TargetOptions.h" 30 #include "llvm/ADT/SmallPtrSet.h" 31 #include "llvm/ADT/Statistic.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <algorithm> 38 using namespace llvm; 39 40 STATISTIC(NodesCombined , "Number of dag nodes combined"); 41 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 42 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 43 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 44 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 45 46 namespace { 47 static cl::opt<bool> 48 CombinerAA("combiner-alias-analysis", cl::Hidden, 49 cl::desc("Turn on alias analysis during testing")); 50 51 static cl::opt<bool> 52 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 53 cl::desc("Include global information in alias analysis")); 54 55 //------------------------------ DAGCombiner ---------------------------------// 56 57 class DAGCombiner { 58 SelectionDAG &DAG; 59 const TargetLowering &TLI; 60 CombineLevel Level; 61 CodeGenOpt::Level OptLevel; 62 bool LegalOperations; 63 bool LegalTypes; 64 65 // Worklist of all of the nodes that need to be simplified. 66 std::vector<SDNode*> WorkList; 67 68 // AA - Used for DAG load/store alias analysis. 69 AliasAnalysis &AA; 70 71 /// AddUsersToWorkList - When an instruction is simplified, add all users of 72 /// the instruction to the work lists because they might get more simplified 73 /// now. 74 /// 75 void AddUsersToWorkList(SDNode *N) { 76 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 77 UI != UE; ++UI) 78 AddToWorkList(*UI); 79 } 80 81 /// visit - call the node-specific routine that knows how to fold each 82 /// particular type of node. 83 SDValue visit(SDNode *N); 84 85 public: 86 /// AddToWorkList - Add to the work list making sure it's instance is at the 87 /// the back (next to be processed.) 88 void AddToWorkList(SDNode *N) { 89 removeFromWorkList(N); 90 WorkList.push_back(N); 91 } 92 93 /// removeFromWorkList - remove all instances of N from the worklist. 94 /// 95 void removeFromWorkList(SDNode *N) { 96 WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), 97 WorkList.end()); 98 } 99 100 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 101 bool AddTo = true); 102 103 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 104 return CombineTo(N, &Res, 1, AddTo); 105 } 106 107 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 108 bool AddTo = true) { 109 SDValue To[] = { Res0, Res1 }; 110 return CombineTo(N, To, 2, AddTo); 111 } 112 113 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 114 115 private: 116 117 /// SimplifyDemandedBits - Check the specified integer node value to see if 118 /// it can be simplified or if things it uses can be simplified by bit 119 /// propagation. If so, return true. 120 bool SimplifyDemandedBits(SDValue Op) { 121 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 122 APInt Demanded = APInt::getAllOnesValue(BitWidth); 123 return SimplifyDemandedBits(Op, Demanded); 124 } 125 126 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 127 128 bool CombineToPreIndexedLoadStore(SDNode *N); 129 bool CombineToPostIndexedLoadStore(SDNode *N); 130 131 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 132 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 133 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 134 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 135 SDValue PromoteIntBinOp(SDValue Op); 136 SDValue PromoteIntShiftOp(SDValue Op); 137 SDValue PromoteExtend(SDValue Op); 138 bool PromoteLoad(SDValue Op); 139 140 void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 141 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 142 ISD::NodeType ExtType); 143 144 /// combine - call the node-specific routine that knows how to fold each 145 /// particular type of node. If that doesn't do anything, try the 146 /// target-specific DAG combines. 147 SDValue combine(SDNode *N); 148 149 // Visitation implementation - Implement dag node combining for different 150 // node types. The semantics are as follows: 151 // Return Value: 152 // SDValue.getNode() == 0 - No change was made 153 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 154 // otherwise - N should be replaced by the returned Operand. 155 // 156 SDValue visitTokenFactor(SDNode *N); 157 SDValue visitMERGE_VALUES(SDNode *N); 158 SDValue visitADD(SDNode *N); 159 SDValue visitSUB(SDNode *N); 160 SDValue visitADDC(SDNode *N); 161 SDValue visitADDE(SDNode *N); 162 SDValue visitMUL(SDNode *N); 163 SDValue visitSDIV(SDNode *N); 164 SDValue visitUDIV(SDNode *N); 165 SDValue visitSREM(SDNode *N); 166 SDValue visitUREM(SDNode *N); 167 SDValue visitMULHU(SDNode *N); 168 SDValue visitMULHS(SDNode *N); 169 SDValue visitSMUL_LOHI(SDNode *N); 170 SDValue visitUMUL_LOHI(SDNode *N); 171 SDValue visitSMULO(SDNode *N); 172 SDValue visitUMULO(SDNode *N); 173 SDValue visitSDIVREM(SDNode *N); 174 SDValue visitUDIVREM(SDNode *N); 175 SDValue visitAND(SDNode *N); 176 SDValue visitOR(SDNode *N); 177 SDValue visitXOR(SDNode *N); 178 SDValue SimplifyVBinOp(SDNode *N); 179 SDValue visitSHL(SDNode *N); 180 SDValue visitSRA(SDNode *N); 181 SDValue visitSRL(SDNode *N); 182 SDValue visitCTLZ(SDNode *N); 183 SDValue visitCTTZ(SDNode *N); 184 SDValue visitCTPOP(SDNode *N); 185 SDValue visitSELECT(SDNode *N); 186 SDValue visitSELECT_CC(SDNode *N); 187 SDValue visitSETCC(SDNode *N); 188 SDValue visitSIGN_EXTEND(SDNode *N); 189 SDValue visitZERO_EXTEND(SDNode *N); 190 SDValue visitANY_EXTEND(SDNode *N); 191 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 192 SDValue visitTRUNCATE(SDNode *N); 193 SDValue visitBITCAST(SDNode *N); 194 SDValue visitBUILD_PAIR(SDNode *N); 195 SDValue visitFADD(SDNode *N); 196 SDValue visitFSUB(SDNode *N); 197 SDValue visitFMUL(SDNode *N); 198 SDValue visitFDIV(SDNode *N); 199 SDValue visitFREM(SDNode *N); 200 SDValue visitFCOPYSIGN(SDNode *N); 201 SDValue visitSINT_TO_FP(SDNode *N); 202 SDValue visitUINT_TO_FP(SDNode *N); 203 SDValue visitFP_TO_SINT(SDNode *N); 204 SDValue visitFP_TO_UINT(SDNode *N); 205 SDValue visitFP_ROUND(SDNode *N); 206 SDValue visitFP_ROUND_INREG(SDNode *N); 207 SDValue visitFP_EXTEND(SDNode *N); 208 SDValue visitFNEG(SDNode *N); 209 SDValue visitFABS(SDNode *N); 210 SDValue visitBRCOND(SDNode *N); 211 SDValue visitBR_CC(SDNode *N); 212 SDValue visitLOAD(SDNode *N); 213 SDValue visitSTORE(SDNode *N); 214 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 215 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 216 SDValue visitBUILD_VECTOR(SDNode *N); 217 SDValue visitCONCAT_VECTORS(SDNode *N); 218 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 219 SDValue visitVECTOR_SHUFFLE(SDNode *N); 220 SDValue visitMEMBARRIER(SDNode *N); 221 222 SDValue XformToShuffleWithZero(SDNode *N); 223 SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); 224 225 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 226 227 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 228 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 229 SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); 230 SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, 231 SDValue N3, ISD::CondCode CC, 232 bool NotExtCompare = false); 233 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 234 DebugLoc DL, bool foldBooleans = true); 235 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 236 unsigned HiOp); 237 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 238 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 239 SDValue BuildSDIV(SDNode *N); 240 SDValue BuildUDIV(SDNode *N); 241 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 242 bool DemandHighBits = true); 243 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 244 SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); 245 SDValue ReduceLoadWidth(SDNode *N); 246 SDValue ReduceLoadOpStoreWidth(SDNode *N); 247 SDValue TransformFPLoadStorePair(SDNode *N); 248 249 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 250 251 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 252 /// looking for aliasing nodes and adding them to the Aliases vector. 253 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 254 SmallVector<SDValue, 8> &Aliases); 255 256 /// isAlias - Return true if there is any possibility that the two addresses 257 /// overlap. 258 bool isAlias(SDValue Ptr1, int64_t Size1, 259 const Value *SrcValue1, int SrcValueOffset1, 260 unsigned SrcValueAlign1, 261 const MDNode *TBAAInfo1, 262 SDValue Ptr2, int64_t Size2, 263 const Value *SrcValue2, int SrcValueOffset2, 264 unsigned SrcValueAlign2, 265 const MDNode *TBAAInfo2) const; 266 267 /// FindAliasInfo - Extracts the relevant alias information from the memory 268 /// node. Returns true if the operand was a load. 269 bool FindAliasInfo(SDNode *N, 270 SDValue &Ptr, int64_t &Size, 271 const Value *&SrcValue, int &SrcValueOffset, 272 unsigned &SrcValueAlignment, 273 const MDNode *&TBAAInfo) const; 274 275 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 276 /// looking for a better chain (aliasing node.) 277 SDValue FindBetterChain(SDNode *N, SDValue Chain); 278 279 public: 280 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 281 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 282 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 283 284 /// Run - runs the dag combiner on all nodes in the work list 285 void Run(CombineLevel AtLevel); 286 287 SelectionDAG &getDAG() const { return DAG; } 288 289 /// getShiftAmountTy - Returns a type large enough to hold any valid 290 /// shift amount - before type legalization these can be huge. 291 EVT getShiftAmountTy(EVT LHSTy) { 292 return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); 293 } 294 295 /// isTypeLegal - This method returns true if we are running before type 296 /// legalization or if the specified VT is legal. 297 bool isTypeLegal(const EVT &VT) { 298 if (!LegalTypes) return true; 299 return TLI.isTypeLegal(VT); 300 } 301 }; 302 } 303 304 305 namespace { 306 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 307 /// nodes from the worklist. 308 class WorkListRemover : public SelectionDAG::DAGUpdateListener { 309 DAGCombiner &DC; 310 public: 311 explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} 312 313 virtual void NodeDeleted(SDNode *N, SDNode *E) { 314 DC.removeFromWorkList(N); 315 } 316 317 virtual void NodeUpdated(SDNode *N) { 318 // Ignore updates. 319 } 320 }; 321 } 322 323 //===----------------------------------------------------------------------===// 324 // TargetLowering::DAGCombinerInfo implementation 325 //===----------------------------------------------------------------------===// 326 327 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 328 ((DAGCombiner*)DC)->AddToWorkList(N); 329 } 330 331 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 332 ((DAGCombiner*)DC)->removeFromWorkList(N); 333 } 334 335 SDValue TargetLowering::DAGCombinerInfo:: 336 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 337 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 338 } 339 340 SDValue TargetLowering::DAGCombinerInfo:: 341 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 342 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 343 } 344 345 346 SDValue TargetLowering::DAGCombinerInfo:: 347 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 348 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 349 } 350 351 void TargetLowering::DAGCombinerInfo:: 352 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 353 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 354 } 355 356 //===----------------------------------------------------------------------===// 357 // Helper Functions 358 //===----------------------------------------------------------------------===// 359 360 /// isNegatibleForFree - Return 1 if we can compute the negated form of the 361 /// specified expression for the same cost as the expression itself, or 2 if we 362 /// can compute the negated form more cheaply than the expression itself. 363 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 364 const TargetOptions *Options, 365 unsigned Depth = 0) { 366 // No compile time optimizations on this type. 367 if (Op.getValueType() == MVT::ppcf128) 368 return 0; 369 370 // fneg is removable even if it has multiple uses. 371 if (Op.getOpcode() == ISD::FNEG) return 2; 372 373 // Don't allow anything with multiple uses. 374 if (!Op.hasOneUse()) return 0; 375 376 // Don't recurse exponentially. 377 if (Depth > 6) return 0; 378 379 switch (Op.getOpcode()) { 380 default: return false; 381 case ISD::ConstantFP: 382 // Don't invert constant FP values after legalize. The negated constant 383 // isn't necessarily legal. 384 return LegalOperations ? 0 : 1; 385 case ISD::FADD: 386 // FIXME: determine better conditions for this xform. 387 if (!Options->UnsafeFPMath) return 0; 388 389 // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) 390 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, 391 Depth + 1)) 392 return V; 393 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 394 return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, 395 Depth + 1); 396 case ISD::FSUB: 397 // We can't turn -(A-B) into B-A when we honor signed zeros. 398 if (!Options->UnsafeFPMath) return 0; 399 400 // fold (fneg (fsub A, B)) -> (fsub B, A) 401 return 1; 402 403 case ISD::FMUL: 404 case ISD::FDIV: 405 if (Options->HonorSignDependentRoundingFPMath()) return 0; 406 407 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 408 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, 409 Depth + 1)) 410 return V; 411 412 return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, 413 Depth + 1); 414 415 case ISD::FP_EXTEND: 416 case ISD::FP_ROUND: 417 case ISD::FSIN: 418 return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, 419 Depth + 1); 420 } 421 } 422 423 /// GetNegatedExpression - If isNegatibleForFree returns true, this function 424 /// returns the newly negated expression. 425 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 426 bool LegalOperations, unsigned Depth = 0) { 427 // fneg is removable even if it has multiple uses. 428 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 429 430 // Don't allow anything with multiple uses. 431 assert(Op.hasOneUse() && "Unknown reuse!"); 432 433 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 434 switch (Op.getOpcode()) { 435 default: llvm_unreachable("Unknown code"); 436 case ISD::ConstantFP: { 437 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 438 V.changeSign(); 439 return DAG.getConstantFP(V, Op.getValueType()); 440 } 441 case ISD::FADD: 442 // FIXME: determine better conditions for this xform. 443 assert(DAG.getTarget().Options.UnsafeFPMath); 444 445 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 446 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 447 &DAG.getTarget().Options, Depth+1)) 448 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 449 GetNegatedExpression(Op.getOperand(0), DAG, 450 LegalOperations, Depth+1), 451 Op.getOperand(1)); 452 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 453 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 454 GetNegatedExpression(Op.getOperand(1), DAG, 455 LegalOperations, Depth+1), 456 Op.getOperand(0)); 457 case ISD::FSUB: 458 // We can't turn -(A-B) into B-A when we honor signed zeros. 459 assert(DAG.getTarget().Options.UnsafeFPMath); 460 461 // fold (fneg (fsub 0, B)) -> B 462 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 463 if (N0CFP->getValueAPF().isZero()) 464 return Op.getOperand(1); 465 466 // fold (fneg (fsub A, B)) -> (fsub B, A) 467 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 468 Op.getOperand(1), Op.getOperand(0)); 469 470 case ISD::FMUL: 471 case ISD::FDIV: 472 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 473 474 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 475 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 476 &DAG.getTarget().Options, Depth+1)) 477 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 478 GetNegatedExpression(Op.getOperand(0), DAG, 479 LegalOperations, Depth+1), 480 Op.getOperand(1)); 481 482 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 483 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 484 Op.getOperand(0), 485 GetNegatedExpression(Op.getOperand(1), DAG, 486 LegalOperations, Depth+1)); 487 488 case ISD::FP_EXTEND: 489 case ISD::FSIN: 490 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 491 GetNegatedExpression(Op.getOperand(0), DAG, 492 LegalOperations, Depth+1)); 493 case ISD::FP_ROUND: 494 return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), 495 GetNegatedExpression(Op.getOperand(0), DAG, 496 LegalOperations, Depth+1), 497 Op.getOperand(1)); 498 } 499 } 500 501 502 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 503 // that selects between the values 1 and 0, making it equivalent to a setcc. 504 // Also, set the incoming LHS, RHS, and CC references to the appropriate 505 // nodes based on the type of node we are checking. This simplifies life a 506 // bit for the callers. 507 static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 508 SDValue &CC) { 509 if (N.getOpcode() == ISD::SETCC) { 510 LHS = N.getOperand(0); 511 RHS = N.getOperand(1); 512 CC = N.getOperand(2); 513 return true; 514 } 515 if (N.getOpcode() == ISD::SELECT_CC && 516 N.getOperand(2).getOpcode() == ISD::Constant && 517 N.getOperand(3).getOpcode() == ISD::Constant && 518 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 519 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 520 LHS = N.getOperand(0); 521 RHS = N.getOperand(1); 522 CC = N.getOperand(4); 523 return true; 524 } 525 return false; 526 } 527 528 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 529 // one use. If this is true, it allows the users to invert the operation for 530 // free when it is profitable to do so. 531 static bool isOneUseSetCC(SDValue N) { 532 SDValue N0, N1, N2; 533 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 534 return true; 535 return false; 536 } 537 538 SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, 539 SDValue N0, SDValue N1) { 540 EVT VT = N0.getValueType(); 541 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 542 if (isa<ConstantSDNode>(N1)) { 543 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 544 SDValue OpNode = 545 DAG.FoldConstantArithmetic(Opc, VT, 546 cast<ConstantSDNode>(N0.getOperand(1)), 547 cast<ConstantSDNode>(N1)); 548 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 549 } 550 if (N0.hasOneUse()) { 551 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 552 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 553 N0.getOperand(0), N1); 554 AddToWorkList(OpNode.getNode()); 555 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 556 } 557 } 558 559 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 560 if (isa<ConstantSDNode>(N0)) { 561 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 562 SDValue OpNode = 563 DAG.FoldConstantArithmetic(Opc, VT, 564 cast<ConstantSDNode>(N1.getOperand(1)), 565 cast<ConstantSDNode>(N0)); 566 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 567 } 568 if (N1.hasOneUse()) { 569 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 570 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 571 N1.getOperand(0), N0); 572 AddToWorkList(OpNode.getNode()); 573 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 574 } 575 } 576 577 return SDValue(); 578 } 579 580 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 581 bool AddTo) { 582 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 583 ++NodesCombined; 584 DEBUG(dbgs() << "\nReplacing.1 "; 585 N->dump(&DAG); 586 dbgs() << "\nWith: "; 587 To[0].getNode()->dump(&DAG); 588 dbgs() << " and " << NumTo-1 << " other values\n"; 589 for (unsigned i = 0, e = NumTo; i != e; ++i) 590 assert((!To[i].getNode() || 591 N->getValueType(i) == To[i].getValueType()) && 592 "Cannot combine value to value of different type!")); 593 WorkListRemover DeadNodes(*this); 594 DAG.ReplaceAllUsesWith(N, To, &DeadNodes); 595 596 if (AddTo) { 597 // Push the new nodes and any users onto the worklist 598 for (unsigned i = 0, e = NumTo; i != e; ++i) { 599 if (To[i].getNode()) { 600 AddToWorkList(To[i].getNode()); 601 AddUsersToWorkList(To[i].getNode()); 602 } 603 } 604 } 605 606 // Finally, if the node is now dead, remove it from the graph. The node 607 // may not be dead if the replacement process recursively simplified to 608 // something else needing this node. 609 if (N->use_empty()) { 610 // Nodes can be reintroduced into the worklist. Make sure we do not 611 // process a node that has been replaced. 612 removeFromWorkList(N); 613 614 // Finally, since the node is now dead, remove it from the graph. 615 DAG.DeleteNode(N); 616 } 617 return SDValue(N, 0); 618 } 619 620 void DAGCombiner:: 621 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 622 // Replace all uses. If any nodes become isomorphic to other nodes and 623 // are deleted, make sure to remove them from our worklist. 624 WorkListRemover DeadNodes(*this); 625 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); 626 627 // Push the new node and any (possibly new) users onto the worklist. 628 AddToWorkList(TLO.New.getNode()); 629 AddUsersToWorkList(TLO.New.getNode()); 630 631 // Finally, if the node is now dead, remove it from the graph. The node 632 // may not be dead if the replacement process recursively simplified to 633 // something else needing this node. 634 if (TLO.Old.getNode()->use_empty()) { 635 removeFromWorkList(TLO.Old.getNode()); 636 637 // If the operands of this node are only used by the node, they will now 638 // be dead. Make sure to visit them first to delete dead nodes early. 639 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 640 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 641 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 642 643 DAG.DeleteNode(TLO.Old.getNode()); 644 } 645 } 646 647 /// SimplifyDemandedBits - Check the specified integer node value to see if 648 /// it can be simplified or if things it uses can be simplified by bit 649 /// propagation. If so, return true. 650 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 651 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 652 APInt KnownZero, KnownOne; 653 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 654 return false; 655 656 // Revisit the node. 657 AddToWorkList(Op.getNode()); 658 659 // Replace the old value with the new one. 660 ++NodesCombined; 661 DEBUG(dbgs() << "\nReplacing.2 "; 662 TLO.Old.getNode()->dump(&DAG); 663 dbgs() << "\nWith: "; 664 TLO.New.getNode()->dump(&DAG); 665 dbgs() << '\n'); 666 667 CommitTargetLoweringOpt(TLO); 668 return true; 669 } 670 671 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 672 DebugLoc dl = Load->getDebugLoc(); 673 EVT VT = Load->getValueType(0); 674 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 675 676 DEBUG(dbgs() << "\nReplacing.9 "; 677 Load->dump(&DAG); 678 dbgs() << "\nWith: "; 679 Trunc.getNode()->dump(&DAG); 680 dbgs() << '\n'); 681 WorkListRemover DeadNodes(*this); 682 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); 683 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), 684 &DeadNodes); 685 removeFromWorkList(Load); 686 DAG.DeleteNode(Load); 687 AddToWorkList(Trunc.getNode()); 688 } 689 690 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 691 Replace = false; 692 DebugLoc dl = Op.getDebugLoc(); 693 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 694 EVT MemVT = LD->getMemoryVT(); 695 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 696 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 697 : ISD::EXTLOAD) 698 : LD->getExtensionType(); 699 Replace = true; 700 return DAG.getExtLoad(ExtType, dl, PVT, 701 LD->getChain(), LD->getBasePtr(), 702 LD->getPointerInfo(), 703 MemVT, LD->isVolatile(), 704 LD->isNonTemporal(), LD->getAlignment()); 705 } 706 707 unsigned Opc = Op.getOpcode(); 708 switch (Opc) { 709 default: break; 710 case ISD::AssertSext: 711 return DAG.getNode(ISD::AssertSext, dl, PVT, 712 SExtPromoteOperand(Op.getOperand(0), PVT), 713 Op.getOperand(1)); 714 case ISD::AssertZext: 715 return DAG.getNode(ISD::AssertZext, dl, PVT, 716 ZExtPromoteOperand(Op.getOperand(0), PVT), 717 Op.getOperand(1)); 718 case ISD::Constant: { 719 unsigned ExtOpc = 720 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 721 return DAG.getNode(ExtOpc, dl, PVT, Op); 722 } 723 } 724 725 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 726 return SDValue(); 727 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 728 } 729 730 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 731 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 732 return SDValue(); 733 EVT OldVT = Op.getValueType(); 734 DebugLoc dl = Op.getDebugLoc(); 735 bool Replace = false; 736 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 737 if (NewOp.getNode() == 0) 738 return SDValue(); 739 AddToWorkList(NewOp.getNode()); 740 741 if (Replace) 742 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 743 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 744 DAG.getValueType(OldVT)); 745 } 746 747 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 748 EVT OldVT = Op.getValueType(); 749 DebugLoc dl = Op.getDebugLoc(); 750 bool Replace = false; 751 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 752 if (NewOp.getNode() == 0) 753 return SDValue(); 754 AddToWorkList(NewOp.getNode()); 755 756 if (Replace) 757 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 758 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 759 } 760 761 /// PromoteIntBinOp - Promote the specified integer binary operation if the 762 /// target indicates it is beneficial. e.g. On x86, it's usually better to 763 /// promote i16 operations to i32 since i16 instructions are longer. 764 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 765 if (!LegalOperations) 766 return SDValue(); 767 768 EVT VT = Op.getValueType(); 769 if (VT.isVector() || !VT.isInteger()) 770 return SDValue(); 771 772 // If operation type is 'undesirable', e.g. i16 on x86, consider 773 // promoting it. 774 unsigned Opc = Op.getOpcode(); 775 if (TLI.isTypeDesirableForOp(Opc, VT)) 776 return SDValue(); 777 778 EVT PVT = VT; 779 // Consult target whether it is a good idea to promote this operation and 780 // what's the right type to promote it to. 781 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 782 assert(PVT != VT && "Don't know what type to promote to!"); 783 784 bool Replace0 = false; 785 SDValue N0 = Op.getOperand(0); 786 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 787 if (NN0.getNode() == 0) 788 return SDValue(); 789 790 bool Replace1 = false; 791 SDValue N1 = Op.getOperand(1); 792 SDValue NN1; 793 if (N0 == N1) 794 NN1 = NN0; 795 else { 796 NN1 = PromoteOperand(N1, PVT, Replace1); 797 if (NN1.getNode() == 0) 798 return SDValue(); 799 } 800 801 AddToWorkList(NN0.getNode()); 802 if (NN1.getNode()) 803 AddToWorkList(NN1.getNode()); 804 805 if (Replace0) 806 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 807 if (Replace1) 808 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 809 810 DEBUG(dbgs() << "\nPromoting "; 811 Op.getNode()->dump(&DAG)); 812 DebugLoc dl = Op.getDebugLoc(); 813 return DAG.getNode(ISD::TRUNCATE, dl, VT, 814 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 815 } 816 return SDValue(); 817 } 818 819 /// PromoteIntShiftOp - Promote the specified integer shift operation if the 820 /// target indicates it is beneficial. e.g. On x86, it's usually better to 821 /// promote i16 operations to i32 since i16 instructions are longer. 822 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 823 if (!LegalOperations) 824 return SDValue(); 825 826 EVT VT = Op.getValueType(); 827 if (VT.isVector() || !VT.isInteger()) 828 return SDValue(); 829 830 // If operation type is 'undesirable', e.g. i16 on x86, consider 831 // promoting it. 832 unsigned Opc = Op.getOpcode(); 833 if (TLI.isTypeDesirableForOp(Opc, VT)) 834 return SDValue(); 835 836 EVT PVT = VT; 837 // Consult target whether it is a good idea to promote this operation and 838 // what's the right type to promote it to. 839 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 840 assert(PVT != VT && "Don't know what type to promote to!"); 841 842 bool Replace = false; 843 SDValue N0 = Op.getOperand(0); 844 if (Opc == ISD::SRA) 845 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 846 else if (Opc == ISD::SRL) 847 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 848 else 849 N0 = PromoteOperand(N0, PVT, Replace); 850 if (N0.getNode() == 0) 851 return SDValue(); 852 853 AddToWorkList(N0.getNode()); 854 if (Replace) 855 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 856 857 DEBUG(dbgs() << "\nPromoting "; 858 Op.getNode()->dump(&DAG)); 859 DebugLoc dl = Op.getDebugLoc(); 860 return DAG.getNode(ISD::TRUNCATE, dl, VT, 861 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 862 } 863 return SDValue(); 864 } 865 866 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 867 if (!LegalOperations) 868 return SDValue(); 869 870 EVT VT = Op.getValueType(); 871 if (VT.isVector() || !VT.isInteger()) 872 return SDValue(); 873 874 // If operation type is 'undesirable', e.g. i16 on x86, consider 875 // promoting it. 876 unsigned Opc = Op.getOpcode(); 877 if (TLI.isTypeDesirableForOp(Opc, VT)) 878 return SDValue(); 879 880 EVT PVT = VT; 881 // Consult target whether it is a good idea to promote this operation and 882 // what's the right type to promote it to. 883 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 884 assert(PVT != VT && "Don't know what type to promote to!"); 885 // fold (aext (aext x)) -> (aext x) 886 // fold (aext (zext x)) -> (zext x) 887 // fold (aext (sext x)) -> (sext x) 888 DEBUG(dbgs() << "\nPromoting "; 889 Op.getNode()->dump(&DAG)); 890 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); 891 } 892 return SDValue(); 893 } 894 895 bool DAGCombiner::PromoteLoad(SDValue Op) { 896 if (!LegalOperations) 897 return false; 898 899 EVT VT = Op.getValueType(); 900 if (VT.isVector() || !VT.isInteger()) 901 return false; 902 903 // If operation type is 'undesirable', e.g. i16 on x86, consider 904 // promoting it. 905 unsigned Opc = Op.getOpcode(); 906 if (TLI.isTypeDesirableForOp(Opc, VT)) 907 return false; 908 909 EVT PVT = VT; 910 // Consult target whether it is a good idea to promote this operation and 911 // what's the right type to promote it to. 912 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 913 assert(PVT != VT && "Don't know what type to promote to!"); 914 915 DebugLoc dl = Op.getDebugLoc(); 916 SDNode *N = Op.getNode(); 917 LoadSDNode *LD = cast<LoadSDNode>(N); 918 EVT MemVT = LD->getMemoryVT(); 919 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 920 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 921 : ISD::EXTLOAD) 922 : LD->getExtensionType(); 923 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 924 LD->getChain(), LD->getBasePtr(), 925 LD->getPointerInfo(), 926 MemVT, LD->isVolatile(), 927 LD->isNonTemporal(), LD->getAlignment()); 928 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 929 930 DEBUG(dbgs() << "\nPromoting "; 931 N->dump(&DAG); 932 dbgs() << "\nTo: "; 933 Result.getNode()->dump(&DAG); 934 dbgs() << '\n'); 935 WorkListRemover DeadNodes(*this); 936 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); 937 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); 938 removeFromWorkList(N); 939 DAG.DeleteNode(N); 940 AddToWorkList(Result.getNode()); 941 return true; 942 } 943 return false; 944 } 945 946 947 //===----------------------------------------------------------------------===// 948 // Main DAG Combiner implementation 949 //===----------------------------------------------------------------------===// 950 951 void DAGCombiner::Run(CombineLevel AtLevel) { 952 // set the instance variables, so that the various visit routines may use it. 953 Level = AtLevel; 954 LegalOperations = Level >= AfterLegalizeVectorOps; 955 LegalTypes = Level >= AfterLegalizeTypes; 956 957 // Add all the dag nodes to the worklist. 958 WorkList.reserve(DAG.allnodes_size()); 959 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 960 E = DAG.allnodes_end(); I != E; ++I) 961 WorkList.push_back(I); 962 963 // Create a dummy node (which is not added to allnodes), that adds a reference 964 // to the root node, preventing it from being deleted, and tracking any 965 // changes of the root. 966 HandleSDNode Dummy(DAG.getRoot()); 967 968 // The root of the dag may dangle to deleted nodes until the dag combiner is 969 // done. Set it to null to avoid confusion. 970 DAG.setRoot(SDValue()); 971 972 // while the worklist isn't empty, inspect the node on the end of it and 973 // try and combine it. 974 while (!WorkList.empty()) { 975 SDNode *N = WorkList.back(); 976 WorkList.pop_back(); 977 978 // If N has no uses, it is dead. Make sure to revisit all N's operands once 979 // N is deleted from the DAG, since they too may now be dead or may have a 980 // reduced number of uses, allowing other xforms. 981 if (N->use_empty() && N != &Dummy) { 982 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 983 AddToWorkList(N->getOperand(i).getNode()); 984 985 DAG.DeleteNode(N); 986 continue; 987 } 988 989 SDValue RV = combine(N); 990 991 if (RV.getNode() == 0) 992 continue; 993 994 ++NodesCombined; 995 996 // If we get back the same node we passed in, rather than a new node or 997 // zero, we know that the node must have defined multiple values and 998 // CombineTo was used. Since CombineTo takes care of the worklist 999 // mechanics for us, we have no work to do in this case. 1000 if (RV.getNode() == N) 1001 continue; 1002 1003 assert(N->getOpcode() != ISD::DELETED_NODE && 1004 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1005 "Node was deleted but visit returned new node!"); 1006 1007 DEBUG(dbgs() << "\nReplacing.3 "; 1008 N->dump(&DAG); 1009 dbgs() << "\nWith: "; 1010 RV.getNode()->dump(&DAG); 1011 dbgs() << '\n'); 1012 1013 // Transfer debug value. 1014 DAG.TransferDbgValues(SDValue(N, 0), RV); 1015 WorkListRemover DeadNodes(*this); 1016 if (N->getNumValues() == RV.getNode()->getNumValues()) 1017 DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); 1018 else { 1019 assert(N->getValueType(0) == RV.getValueType() && 1020 N->getNumValues() == 1 && "Type mismatch"); 1021 SDValue OpV = RV; 1022 DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); 1023 } 1024 1025 // Push the new node and any users onto the worklist 1026 AddToWorkList(RV.getNode()); 1027 AddUsersToWorkList(RV.getNode()); 1028 1029 // Add any uses of the old node to the worklist in case this node is the 1030 // last one that uses them. They may become dead after this node is 1031 // deleted. 1032 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1033 AddToWorkList(N->getOperand(i).getNode()); 1034 1035 // Finally, if the node is now dead, remove it from the graph. The node 1036 // may not be dead if the replacement process recursively simplified to 1037 // something else needing this node. 1038 if (N->use_empty()) { 1039 // Nodes can be reintroduced into the worklist. Make sure we do not 1040 // process a node that has been replaced. 1041 removeFromWorkList(N); 1042 1043 // Finally, since the node is now dead, remove it from the graph. 1044 DAG.DeleteNode(N); 1045 } 1046 } 1047 1048 // If the root changed (e.g. it was a dead load, update the root). 1049 DAG.setRoot(Dummy.getValue()); 1050 } 1051 1052 SDValue DAGCombiner::visit(SDNode *N) { 1053 switch (N->getOpcode()) { 1054 default: break; 1055 case ISD::TokenFactor: return visitTokenFactor(N); 1056 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1057 case ISD::ADD: return visitADD(N); 1058 case ISD::SUB: return visitSUB(N); 1059 case ISD::ADDC: return visitADDC(N); 1060 case ISD::ADDE: return visitADDE(N); 1061 case ISD::MUL: return visitMUL(N); 1062 case ISD::SDIV: return visitSDIV(N); 1063 case ISD::UDIV: return visitUDIV(N); 1064 case ISD::SREM: return visitSREM(N); 1065 case ISD::UREM: return visitUREM(N); 1066 case ISD::MULHU: return visitMULHU(N); 1067 case ISD::MULHS: return visitMULHS(N); 1068 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1069 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1070 case ISD::SMULO: return visitSMULO(N); 1071 case ISD::UMULO: return visitUMULO(N); 1072 case ISD::SDIVREM: return visitSDIVREM(N); 1073 case ISD::UDIVREM: return visitUDIVREM(N); 1074 case ISD::AND: return visitAND(N); 1075 case ISD::OR: return visitOR(N); 1076 case ISD::XOR: return visitXOR(N); 1077 case ISD::SHL: return visitSHL(N); 1078 case ISD::SRA: return visitSRA(N); 1079 case ISD::SRL: return visitSRL(N); 1080 case ISD::CTLZ: return visitCTLZ(N); 1081 case ISD::CTTZ: return visitCTTZ(N); 1082 case ISD::CTPOP: return visitCTPOP(N); 1083 case ISD::SELECT: return visitSELECT(N); 1084 case ISD::SELECT_CC: return visitSELECT_CC(N); 1085 case ISD::SETCC: return visitSETCC(N); 1086 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1087 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1088 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1089 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1090 case ISD::TRUNCATE: return visitTRUNCATE(N); 1091 case ISD::BITCAST: return visitBITCAST(N); 1092 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1093 case ISD::FADD: return visitFADD(N); 1094 case ISD::FSUB: return visitFSUB(N); 1095 case ISD::FMUL: return visitFMUL(N); 1096 case ISD::FDIV: return visitFDIV(N); 1097 case ISD::FREM: return visitFREM(N); 1098 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1099 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1100 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1101 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1102 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1103 case ISD::FP_ROUND: return visitFP_ROUND(N); 1104 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1105 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1106 case ISD::FNEG: return visitFNEG(N); 1107 case ISD::FABS: return visitFABS(N); 1108 case ISD::BRCOND: return visitBRCOND(N); 1109 case ISD::BR_CC: return visitBR_CC(N); 1110 case ISD::LOAD: return visitLOAD(N); 1111 case ISD::STORE: return visitSTORE(N); 1112 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1113 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1114 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1115 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1116 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1117 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1118 case ISD::MEMBARRIER: return visitMEMBARRIER(N); 1119 } 1120 return SDValue(); 1121 } 1122 1123 SDValue DAGCombiner::combine(SDNode *N) { 1124 SDValue RV = visit(N); 1125 1126 // If nothing happened, try a target-specific DAG combine. 1127 if (RV.getNode() == 0) { 1128 assert(N->getOpcode() != ISD::DELETED_NODE && 1129 "Node was deleted but visit returned NULL!"); 1130 1131 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1132 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1133 1134 // Expose the DAG combiner to the target combiner impls. 1135 TargetLowering::DAGCombinerInfo 1136 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 1137 1138 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1139 } 1140 } 1141 1142 // If nothing happened still, try promoting the operation. 1143 if (RV.getNode() == 0) { 1144 switch (N->getOpcode()) { 1145 default: break; 1146 case ISD::ADD: 1147 case ISD::SUB: 1148 case ISD::MUL: 1149 case ISD::AND: 1150 case ISD::OR: 1151 case ISD::XOR: 1152 RV = PromoteIntBinOp(SDValue(N, 0)); 1153 break; 1154 case ISD::SHL: 1155 case ISD::SRA: 1156 case ISD::SRL: 1157 RV = PromoteIntShiftOp(SDValue(N, 0)); 1158 break; 1159 case ISD::SIGN_EXTEND: 1160 case ISD::ZERO_EXTEND: 1161 case ISD::ANY_EXTEND: 1162 RV = PromoteExtend(SDValue(N, 0)); 1163 break; 1164 case ISD::LOAD: 1165 if (PromoteLoad(SDValue(N, 0))) 1166 RV = SDValue(N, 0); 1167 break; 1168 } 1169 } 1170 1171 // If N is a commutative binary node, try commuting it to enable more 1172 // sdisel CSE. 1173 if (RV.getNode() == 0 && 1174 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1175 N->getNumValues() == 1) { 1176 SDValue N0 = N->getOperand(0); 1177 SDValue N1 = N->getOperand(1); 1178 1179 // Constant operands are canonicalized to RHS. 1180 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1181 SDValue Ops[] = { N1, N0 }; 1182 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 1183 Ops, 2); 1184 if (CSENode) 1185 return SDValue(CSENode, 0); 1186 } 1187 } 1188 1189 return RV; 1190 } 1191 1192 /// getInputChainForNode - Given a node, return its input chain if it has one, 1193 /// otherwise return a null sd operand. 1194 static SDValue getInputChainForNode(SDNode *N) { 1195 if (unsigned NumOps = N->getNumOperands()) { 1196 if (N->getOperand(0).getValueType() == MVT::Other) 1197 return N->getOperand(0); 1198 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1199 return N->getOperand(NumOps-1); 1200 for (unsigned i = 1; i < NumOps-1; ++i) 1201 if (N->getOperand(i).getValueType() == MVT::Other) 1202 return N->getOperand(i); 1203 } 1204 return SDValue(); 1205 } 1206 1207 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1208 // If N has two operands, where one has an input chain equal to the other, 1209 // the 'other' chain is redundant. 1210 if (N->getNumOperands() == 2) { 1211 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1212 return N->getOperand(0); 1213 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1214 return N->getOperand(1); 1215 } 1216 1217 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1218 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1219 SmallPtrSet<SDNode*, 16> SeenOps; 1220 bool Changed = false; // If we should replace this token factor. 1221 1222 // Start out with this token factor. 1223 TFs.push_back(N); 1224 1225 // Iterate through token factors. The TFs grows when new token factors are 1226 // encountered. 1227 for (unsigned i = 0; i < TFs.size(); ++i) { 1228 SDNode *TF = TFs[i]; 1229 1230 // Check each of the operands. 1231 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1232 SDValue Op = TF->getOperand(i); 1233 1234 switch (Op.getOpcode()) { 1235 case ISD::EntryToken: 1236 // Entry tokens don't need to be added to the list. They are 1237 // rededundant. 1238 Changed = true; 1239 break; 1240 1241 case ISD::TokenFactor: 1242 if (Op.hasOneUse() && 1243 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1244 // Queue up for processing. 1245 TFs.push_back(Op.getNode()); 1246 // Clean up in case the token factor is removed. 1247 AddToWorkList(Op.getNode()); 1248 Changed = true; 1249 break; 1250 } 1251 // Fall thru 1252 1253 default: 1254 // Only add if it isn't already in the list. 1255 if (SeenOps.insert(Op.getNode())) 1256 Ops.push_back(Op); 1257 else 1258 Changed = true; 1259 break; 1260 } 1261 } 1262 } 1263 1264 SDValue Result; 1265 1266 // If we've change things around then replace token factor. 1267 if (Changed) { 1268 if (Ops.empty()) { 1269 // The entry token is the only possible outcome. 1270 Result = DAG.getEntryNode(); 1271 } else { 1272 // New and improved token factor. 1273 Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 1274 MVT::Other, &Ops[0], Ops.size()); 1275 } 1276 1277 // Don't add users to work list. 1278 return CombineTo(N, Result, false); 1279 } 1280 1281 return Result; 1282 } 1283 1284 /// MERGE_VALUES can always be eliminated. 1285 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1286 WorkListRemover DeadNodes(*this); 1287 // Replacing results may cause a different MERGE_VALUES to suddenly 1288 // be CSE'd with N, and carry its uses with it. Iterate until no 1289 // uses remain, to ensure that the node can be safely deleted. 1290 do { 1291 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1292 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), 1293 &DeadNodes); 1294 } while (!N->use_empty()); 1295 removeFromWorkList(N); 1296 DAG.DeleteNode(N); 1297 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1298 } 1299 1300 static 1301 SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, 1302 SelectionDAG &DAG) { 1303 EVT VT = N0.getValueType(); 1304 SDValue N00 = N0.getOperand(0); 1305 SDValue N01 = N0.getOperand(1); 1306 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1307 1308 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1309 isa<ConstantSDNode>(N00.getOperand(1))) { 1310 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1311 N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 1312 DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, 1313 N00.getOperand(0), N01), 1314 DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, 1315 N00.getOperand(1), N01)); 1316 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1317 } 1318 1319 return SDValue(); 1320 } 1321 1322 SDValue DAGCombiner::visitADD(SDNode *N) { 1323 SDValue N0 = N->getOperand(0); 1324 SDValue N1 = N->getOperand(1); 1325 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1326 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1327 EVT VT = N0.getValueType(); 1328 1329 // fold vector ops 1330 if (VT.isVector()) { 1331 SDValue FoldedVOp = SimplifyVBinOp(N); 1332 if (FoldedVOp.getNode()) return FoldedVOp; 1333 } 1334 1335 // fold (add x, undef) -> undef 1336 if (N0.getOpcode() == ISD::UNDEF) 1337 return N0; 1338 if (N1.getOpcode() == ISD::UNDEF) 1339 return N1; 1340 // fold (add c1, c2) -> c1+c2 1341 if (N0C && N1C) 1342 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1343 // canonicalize constant to RHS 1344 if (N0C && !N1C) 1345 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); 1346 // fold (add x, 0) -> x 1347 if (N1C && N1C->isNullValue()) 1348 return N0; 1349 // fold (add Sym, c) -> Sym+c 1350 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1351 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1352 GA->getOpcode() == ISD::GlobalAddress) 1353 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1354 GA->getOffset() + 1355 (uint64_t)N1C->getSExtValue()); 1356 // fold ((c1-A)+c2) -> (c1+c2)-A 1357 if (N1C && N0.getOpcode() == ISD::SUB) 1358 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1359 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1360 DAG.getConstant(N1C->getAPIntValue()+ 1361 N0C->getAPIntValue(), VT), 1362 N0.getOperand(1)); 1363 // reassociate add 1364 SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); 1365 if (RADD.getNode() != 0) 1366 return RADD; 1367 // fold ((0-A) + B) -> B-A 1368 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1369 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1370 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); 1371 // fold (A + (0-B)) -> A-B 1372 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1373 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1374 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); 1375 // fold (A+(B-A)) -> B 1376 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1377 return N1.getOperand(0); 1378 // fold ((B-A)+A) -> B 1379 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1380 return N0.getOperand(0); 1381 // fold (A+(B-(A+C))) to (B-C) 1382 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1383 N0 == N1.getOperand(1).getOperand(0)) 1384 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1385 N1.getOperand(1).getOperand(1)); 1386 // fold (A+(B-(C+A))) to (B-C) 1387 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1388 N0 == N1.getOperand(1).getOperand(1)) 1389 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1390 N1.getOperand(1).getOperand(0)); 1391 // fold (A+((B-A)+or-C)) to (B+or-C) 1392 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1393 N1.getOperand(0).getOpcode() == ISD::SUB && 1394 N0 == N1.getOperand(0).getOperand(1)) 1395 return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, 1396 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1397 1398 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1399 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1400 SDValue N00 = N0.getOperand(0); 1401 SDValue N01 = N0.getOperand(1); 1402 SDValue N10 = N1.getOperand(0); 1403 SDValue N11 = N1.getOperand(1); 1404 1405 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1406 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1407 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), 1408 DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); 1409 } 1410 1411 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1412 return SDValue(N, 0); 1413 1414 // fold (a+b) -> (a|b) iff a and b share no bits. 1415 if (VT.isInteger() && !VT.isVector()) { 1416 APInt LHSZero, LHSOne; 1417 APInt RHSZero, RHSOne; 1418 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 1419 DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); 1420 1421 if (LHSZero.getBoolValue()) { 1422 DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); 1423 1424 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1425 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1426 if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || 1427 (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) 1428 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); 1429 } 1430 } 1431 1432 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1433 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1434 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); 1435 if (Result.getNode()) return Result; 1436 } 1437 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1438 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); 1439 if (Result.getNode()) return Result; 1440 } 1441 1442 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1443 if (N1.getOpcode() == ISD::SHL && 1444 N1.getOperand(0).getOpcode() == ISD::SUB) 1445 if (ConstantSDNode *C = 1446 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1447 if (C->getAPIntValue() == 0) 1448 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, 1449 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1450 N1.getOperand(0).getOperand(1), 1451 N1.getOperand(1))); 1452 if (N0.getOpcode() == ISD::SHL && 1453 N0.getOperand(0).getOpcode() == ISD::SUB) 1454 if (ConstantSDNode *C = 1455 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1456 if (C->getAPIntValue() == 0) 1457 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, 1458 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1459 N0.getOperand(0).getOperand(1), 1460 N0.getOperand(1))); 1461 1462 if (N1.getOpcode() == ISD::AND) { 1463 SDValue AndOp0 = N1.getOperand(0); 1464 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1465 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1466 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1467 1468 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1469 // and similar xforms where the inner op is either ~0 or 0. 1470 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1471 DebugLoc DL = N->getDebugLoc(); 1472 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1473 } 1474 } 1475 1476 // add (sext i1), X -> sub X, (zext i1) 1477 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1478 N0.getOperand(0).getValueType() == MVT::i1 && 1479 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1480 DebugLoc DL = N->getDebugLoc(); 1481 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1482 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1483 } 1484 1485 return SDValue(); 1486 } 1487 1488 SDValue DAGCombiner::visitADDC(SDNode *N) { 1489 SDValue N0 = N->getOperand(0); 1490 SDValue N1 = N->getOperand(1); 1491 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1492 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1493 EVT VT = N0.getValueType(); 1494 1495 // If the flag result is dead, turn this into an ADD. 1496 if (N->hasNUsesOfValue(0, 1)) 1497 return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), 1498 DAG.getNode(ISD::CARRY_FALSE, 1499 N->getDebugLoc(), MVT::Glue)); 1500 1501 // canonicalize constant to RHS. 1502 if (N0C && !N1C) 1503 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1504 1505 // fold (addc x, 0) -> x + no carry out 1506 if (N1C && N1C->isNullValue()) 1507 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1508 N->getDebugLoc(), MVT::Glue)); 1509 1510 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1511 APInt LHSZero, LHSOne; 1512 APInt RHSZero, RHSOne; 1513 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 1514 DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); 1515 1516 if (LHSZero.getBoolValue()) { 1517 DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); 1518 1519 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1520 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1521 if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || 1522 (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) 1523 return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), 1524 DAG.getNode(ISD::CARRY_FALSE, 1525 N->getDebugLoc(), MVT::Glue)); 1526 } 1527 1528 return SDValue(); 1529 } 1530 1531 SDValue DAGCombiner::visitADDE(SDNode *N) { 1532 SDValue N0 = N->getOperand(0); 1533 SDValue N1 = N->getOperand(1); 1534 SDValue CarryIn = N->getOperand(2); 1535 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1536 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1537 1538 // canonicalize constant to RHS 1539 if (N0C && !N1C) 1540 return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), 1541 N1, N0, CarryIn); 1542 1543 // fold (adde x, y, false) -> (addc x, y) 1544 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1545 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1546 1547 return SDValue(); 1548 } 1549 1550 // Since it may not be valid to emit a fold to zero for vector initializers 1551 // check if we can before folding. 1552 static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, 1553 SelectionDAG &DAG, bool LegalOperations) { 1554 if (!VT.isVector()) { 1555 return DAG.getConstant(0, VT); 1556 } 1557 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { 1558 // Produce a vector of zeros. 1559 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 1560 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 1561 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 1562 &Ops[0], Ops.size()); 1563 } 1564 return SDValue(); 1565 } 1566 1567 SDValue DAGCombiner::visitSUB(SDNode *N) { 1568 SDValue N0 = N->getOperand(0); 1569 SDValue N1 = N->getOperand(1); 1570 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1571 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1572 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : 1573 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1574 EVT VT = N0.getValueType(); 1575 1576 // fold vector ops 1577 if (VT.isVector()) { 1578 SDValue FoldedVOp = SimplifyVBinOp(N); 1579 if (FoldedVOp.getNode()) return FoldedVOp; 1580 } 1581 1582 // fold (sub x, x) -> 0 1583 // FIXME: Refactor this and xor and other similar operations together. 1584 if (N0 == N1) 1585 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 1586 // fold (sub c1, c2) -> c1-c2 1587 if (N0C && N1C) 1588 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1589 // fold (sub x, c) -> (add x, -c) 1590 if (N1C) 1591 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, 1592 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1593 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1594 if (N0C && N0C->isAllOnesValue()) 1595 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 1596 // fold A-(A-B) -> B 1597 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1598 return N1.getOperand(1); 1599 // fold (A+B)-A -> B 1600 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1601 return N0.getOperand(1); 1602 // fold (A+B)-B -> A 1603 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1604 return N0.getOperand(0); 1605 // fold C2-(A+C1) -> (C2-C1)-A 1606 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1607 SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); 1608 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, 1609 N1.getOperand(0)); 1610 } 1611 // fold ((A+(B+or-C))-B) -> A+or-C 1612 if (N0.getOpcode() == ISD::ADD && 1613 (N0.getOperand(1).getOpcode() == ISD::SUB || 1614 N0.getOperand(1).getOpcode() == ISD::ADD) && 1615 N0.getOperand(1).getOperand(0) == N1) 1616 return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, 1617 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1618 // fold ((A+(C+B))-B) -> A+C 1619 if (N0.getOpcode() == ISD::ADD && 1620 N0.getOperand(1).getOpcode() == ISD::ADD && 1621 N0.getOperand(1).getOperand(1) == N1) 1622 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1623 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1624 // fold ((A-(B-C))-C) -> A-B 1625 if (N0.getOpcode() == ISD::SUB && 1626 N0.getOperand(1).getOpcode() == ISD::SUB && 1627 N0.getOperand(1).getOperand(1) == N1) 1628 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1629 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1630 1631 // If either operand of a sub is undef, the result is undef 1632 if (N0.getOpcode() == ISD::UNDEF) 1633 return N0; 1634 if (N1.getOpcode() == ISD::UNDEF) 1635 return N1; 1636 1637 // If the relocation model supports it, consider symbol offsets. 1638 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1639 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1640 // fold (sub Sym, c) -> Sym-c 1641 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1642 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1643 GA->getOffset() - 1644 (uint64_t)N1C->getSExtValue()); 1645 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1646 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1647 if (GA->getGlobal() == GB->getGlobal()) 1648 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1649 VT); 1650 } 1651 1652 return SDValue(); 1653 } 1654 1655 SDValue DAGCombiner::visitMUL(SDNode *N) { 1656 SDValue N0 = N->getOperand(0); 1657 SDValue N1 = N->getOperand(1); 1658 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1659 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1660 EVT VT = N0.getValueType(); 1661 1662 // fold vector ops 1663 if (VT.isVector()) { 1664 SDValue FoldedVOp = SimplifyVBinOp(N); 1665 if (FoldedVOp.getNode()) return FoldedVOp; 1666 } 1667 1668 // fold (mul x, undef) -> 0 1669 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1670 return DAG.getConstant(0, VT); 1671 // fold (mul c1, c2) -> c1*c2 1672 if (N0C && N1C) 1673 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 1674 // canonicalize constant to RHS 1675 if (N0C && !N1C) 1676 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); 1677 // fold (mul x, 0) -> 0 1678 if (N1C && N1C->isNullValue()) 1679 return N1; 1680 // fold (mul x, -1) -> 0-x 1681 if (N1C && N1C->isAllOnesValue()) 1682 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1683 DAG.getConstant(0, VT), N0); 1684 // fold (mul x, (1 << c)) -> x << c 1685 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1686 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1687 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1688 getShiftAmountTy(N0.getValueType()))); 1689 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1690 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 1691 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 1692 // FIXME: If the input is something that is easily negated (e.g. a 1693 // single-use add), we should put the negate there. 1694 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1695 DAG.getConstant(0, VT), 1696 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1697 DAG.getConstant(Log2Val, 1698 getShiftAmountTy(N0.getValueType())))); 1699 } 1700 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1701 if (N1C && N0.getOpcode() == ISD::SHL && 1702 isa<ConstantSDNode>(N0.getOperand(1))) { 1703 SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1704 N1, N0.getOperand(1)); 1705 AddToWorkList(C3.getNode()); 1706 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1707 N0.getOperand(0), C3); 1708 } 1709 1710 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1711 // use. 1712 { 1713 SDValue Sh(0,0), Y(0,0); 1714 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1715 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 1716 N0.getNode()->hasOneUse()) { 1717 Sh = N0; Y = N1; 1718 } else if (N1.getOpcode() == ISD::SHL && 1719 isa<ConstantSDNode>(N1.getOperand(1)) && 1720 N1.getNode()->hasOneUse()) { 1721 Sh = N1; Y = N0; 1722 } 1723 1724 if (Sh.getNode()) { 1725 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1726 Sh.getOperand(0), Y); 1727 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1728 Mul, Sh.getOperand(1)); 1729 } 1730 } 1731 1732 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1733 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1734 isa<ConstantSDNode>(N0.getOperand(1))) 1735 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1736 DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, 1737 N0.getOperand(0), N1), 1738 DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, 1739 N0.getOperand(1), N1)); 1740 1741 // reassociate mul 1742 SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); 1743 if (RMUL.getNode() != 0) 1744 return RMUL; 1745 1746 return SDValue(); 1747 } 1748 1749 SDValue DAGCombiner::visitSDIV(SDNode *N) { 1750 SDValue N0 = N->getOperand(0); 1751 SDValue N1 = N->getOperand(1); 1752 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1753 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1754 EVT VT = N->getValueType(0); 1755 1756 // fold vector ops 1757 if (VT.isVector()) { 1758 SDValue FoldedVOp = SimplifyVBinOp(N); 1759 if (FoldedVOp.getNode()) return FoldedVOp; 1760 } 1761 1762 // fold (sdiv c1, c2) -> c1/c2 1763 if (N0C && N1C && !N1C->isNullValue()) 1764 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1765 // fold (sdiv X, 1) -> X 1766 if (N1C && N1C->getAPIntValue() == 1LL) 1767 return N0; 1768 // fold (sdiv X, -1) -> 0-X 1769 if (N1C && N1C->isAllOnesValue()) 1770 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1771 DAG.getConstant(0, VT), N0); 1772 // If we know the sign bits of both operands are zero, strength reduce to a 1773 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 1774 if (!VT.isVector()) { 1775 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1776 return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), 1777 N0, N1); 1778 } 1779 // fold (sdiv X, pow2) -> simple ops after legalize 1780 if (N1C && !N1C->isNullValue() && 1781 (N1C->getAPIntValue().isPowerOf2() || 1782 (-N1C->getAPIntValue()).isPowerOf2())) { 1783 // If dividing by powers of two is cheap, then don't perform the following 1784 // fold. 1785 if (TLI.isPow2DivCheap()) 1786 return SDValue(); 1787 1788 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 1789 1790 // Splat the sign bit into the register 1791 SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 1792 DAG.getConstant(VT.getSizeInBits()-1, 1793 getShiftAmountTy(N0.getValueType()))); 1794 AddToWorkList(SGN.getNode()); 1795 1796 // Add (N0 < 0) ? abs2 - 1 : 0; 1797 SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, 1798 DAG.getConstant(VT.getSizeInBits() - lg2, 1799 getShiftAmountTy(SGN.getValueType()))); 1800 SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); 1801 AddToWorkList(SRL.getNode()); 1802 AddToWorkList(ADD.getNode()); // Divide by pow2 1803 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, 1804 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 1805 1806 // If we're dividing by a positive value, we're done. Otherwise, we must 1807 // negate the result. 1808 if (N1C->getAPIntValue().isNonNegative()) 1809 return SRA; 1810 1811 AddToWorkList(SRA.getNode()); 1812 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1813 DAG.getConstant(0, VT), SRA); 1814 } 1815 1816 // if integer divide is expensive and we satisfy the requirements, emit an 1817 // alternate sequence. 1818 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1819 SDValue Op = BuildSDIV(N); 1820 if (Op.getNode()) return Op; 1821 } 1822 1823 // undef / X -> 0 1824 if (N0.getOpcode() == ISD::UNDEF) 1825 return DAG.getConstant(0, VT); 1826 // X / undef -> undef 1827 if (N1.getOpcode() == ISD::UNDEF) 1828 return N1; 1829 1830 return SDValue(); 1831 } 1832 1833 SDValue DAGCombiner::visitUDIV(SDNode *N) { 1834 SDValue N0 = N->getOperand(0); 1835 SDValue N1 = N->getOperand(1); 1836 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1837 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1838 EVT VT = N->getValueType(0); 1839 1840 // fold vector ops 1841 if (VT.isVector()) { 1842 SDValue FoldedVOp = SimplifyVBinOp(N); 1843 if (FoldedVOp.getNode()) return FoldedVOp; 1844 } 1845 1846 // fold (udiv c1, c2) -> c1/c2 1847 if (N0C && N1C && !N1C->isNullValue()) 1848 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 1849 // fold (udiv x, (1 << c)) -> x >>u c 1850 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1851 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 1852 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1853 getShiftAmountTy(N0.getValueType()))); 1854 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 1855 if (N1.getOpcode() == ISD::SHL) { 1856 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1857 if (SHC->getAPIntValue().isPowerOf2()) { 1858 EVT ADDVT = N1.getOperand(1).getValueType(); 1859 SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, 1860 N1.getOperand(1), 1861 DAG.getConstant(SHC->getAPIntValue() 1862 .logBase2(), 1863 ADDVT)); 1864 AddToWorkList(Add.getNode()); 1865 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); 1866 } 1867 } 1868 } 1869 // fold (udiv x, c) -> alternate 1870 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1871 SDValue Op = BuildUDIV(N); 1872 if (Op.getNode()) return Op; 1873 } 1874 1875 // undef / X -> 0 1876 if (N0.getOpcode() == ISD::UNDEF) 1877 return DAG.getConstant(0, VT); 1878 // X / undef -> undef 1879 if (N1.getOpcode() == ISD::UNDEF) 1880 return N1; 1881 1882 return SDValue(); 1883 } 1884 1885 SDValue DAGCombiner::visitSREM(SDNode *N) { 1886 SDValue N0 = N->getOperand(0); 1887 SDValue N1 = N->getOperand(1); 1888 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1889 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1890 EVT VT = N->getValueType(0); 1891 1892 // fold (srem c1, c2) -> c1%c2 1893 if (N0C && N1C && !N1C->isNullValue()) 1894 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 1895 // If we know the sign bits of both operands are zero, strength reduce to a 1896 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 1897 if (!VT.isVector()) { 1898 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1899 return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); 1900 } 1901 1902 // If X/C can be simplified by the division-by-constant logic, lower 1903 // X%C to the equivalent of X-X/C*C. 1904 if (N1C && !N1C->isNullValue()) { 1905 SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); 1906 AddToWorkList(Div.getNode()); 1907 SDValue OptimizedDiv = combine(Div.getNode()); 1908 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1909 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1910 OptimizedDiv, N1); 1911 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 1912 AddToWorkList(Mul.getNode()); 1913 return Sub; 1914 } 1915 } 1916 1917 // undef % X -> 0 1918 if (N0.getOpcode() == ISD::UNDEF) 1919 return DAG.getConstant(0, VT); 1920 // X % undef -> undef 1921 if (N1.getOpcode() == ISD::UNDEF) 1922 return N1; 1923 1924 return SDValue(); 1925 } 1926 1927 SDValue DAGCombiner::visitUREM(SDNode *N) { 1928 SDValue N0 = N->getOperand(0); 1929 SDValue N1 = N->getOperand(1); 1930 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1931 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1932 EVT VT = N->getValueType(0); 1933 1934 // fold (urem c1, c2) -> c1%c2 1935 if (N0C && N1C && !N1C->isNullValue()) 1936 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 1937 // fold (urem x, pow2) -> (and x, pow2-1) 1938 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 1939 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, 1940 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 1941 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 1942 if (N1.getOpcode() == ISD::SHL) { 1943 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1944 if (SHC->getAPIntValue().isPowerOf2()) { 1945 SDValue Add = 1946 DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, 1947 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 1948 VT)); 1949 AddToWorkList(Add.getNode()); 1950 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); 1951 } 1952 } 1953 } 1954 1955 // If X/C can be simplified by the division-by-constant logic, lower 1956 // X%C to the equivalent of X-X/C*C. 1957 if (N1C && !N1C->isNullValue()) { 1958 SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); 1959 AddToWorkList(Div.getNode()); 1960 SDValue OptimizedDiv = combine(Div.getNode()); 1961 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1962 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1963 OptimizedDiv, N1); 1964 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 1965 AddToWorkList(Mul.getNode()); 1966 return Sub; 1967 } 1968 } 1969 1970 // undef % X -> 0 1971 if (N0.getOpcode() == ISD::UNDEF) 1972 return DAG.getConstant(0, VT); 1973 // X % undef -> undef 1974 if (N1.getOpcode() == ISD::UNDEF) 1975 return N1; 1976 1977 return SDValue(); 1978 } 1979 1980 SDValue DAGCombiner::visitMULHS(SDNode *N) { 1981 SDValue N0 = N->getOperand(0); 1982 SDValue N1 = N->getOperand(1); 1983 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1984 EVT VT = N->getValueType(0); 1985 DebugLoc DL = N->getDebugLoc(); 1986 1987 // fold (mulhs x, 0) -> 0 1988 if (N1C && N1C->isNullValue()) 1989 return N1; 1990 // fold (mulhs x, 1) -> (sra x, size(x)-1) 1991 if (N1C && N1C->getAPIntValue() == 1) 1992 return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, 1993 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 1994 getShiftAmountTy(N0.getValueType()))); 1995 // fold (mulhs x, undef) -> 0 1996 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1997 return DAG.getConstant(0, VT); 1998 1999 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2000 // plus a shift. 2001 if (VT.isSimple() && !VT.isVector()) { 2002 MVT Simple = VT.getSimpleVT(); 2003 unsigned SimpleSize = Simple.getSizeInBits(); 2004 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2005 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2006 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2007 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2008 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2009 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2010 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2011 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2012 } 2013 } 2014 2015 return SDValue(); 2016 } 2017 2018 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2019 SDValue N0 = N->getOperand(0); 2020 SDValue N1 = N->getOperand(1); 2021 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2022 EVT VT = N->getValueType(0); 2023 DebugLoc DL = N->getDebugLoc(); 2024 2025 // fold (mulhu x, 0) -> 0 2026 if (N1C && N1C->isNullValue()) 2027 return N1; 2028 // fold (mulhu x, 1) -> 0 2029 if (N1C && N1C->getAPIntValue() == 1) 2030 return DAG.getConstant(0, N0.getValueType()); 2031 // fold (mulhu x, undef) -> 0 2032 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2033 return DAG.getConstant(0, VT); 2034 2035 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2036 // plus a shift. 2037 if (VT.isSimple() && !VT.isVector()) { 2038 MVT Simple = VT.getSimpleVT(); 2039 unsigned SimpleSize = Simple.getSizeInBits(); 2040 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2041 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2042 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2043 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2044 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2045 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2046 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2047 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2048 } 2049 } 2050 2051 return SDValue(); 2052 } 2053 2054 /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 2055 /// compute two values. LoOp and HiOp give the opcodes for the two computations 2056 /// that are being performed. Return true if a simplification was made. 2057 /// 2058 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2059 unsigned HiOp) { 2060 // If the high half is not needed, just compute the low half. 2061 bool HiExists = N->hasAnyUseOfValue(1); 2062 if (!HiExists && 2063 (!LegalOperations || 2064 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 2065 SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2066 N->op_begin(), N->getNumOperands()); 2067 return CombineTo(N, Res, Res); 2068 } 2069 2070 // If the low half is not needed, just compute the high half. 2071 bool LoExists = N->hasAnyUseOfValue(0); 2072 if (!LoExists && 2073 (!LegalOperations || 2074 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2075 SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2076 N->op_begin(), N->getNumOperands()); 2077 return CombineTo(N, Res, Res); 2078 } 2079 2080 // If both halves are used, return as it is. 2081 if (LoExists && HiExists) 2082 return SDValue(); 2083 2084 // If the two computed results can be simplified separately, separate them. 2085 if (LoExists) { 2086 SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2087 N->op_begin(), N->getNumOperands()); 2088 AddToWorkList(Lo.getNode()); 2089 SDValue LoOpt = combine(Lo.getNode()); 2090 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2091 (!LegalOperations || 2092 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2093 return CombineTo(N, LoOpt, LoOpt); 2094 } 2095 2096 if (HiExists) { 2097 SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2098 N->op_begin(), N->getNumOperands()); 2099 AddToWorkList(Hi.getNode()); 2100 SDValue HiOpt = combine(Hi.getNode()); 2101 if (HiOpt.getNode() && HiOpt != Hi && 2102 (!LegalOperations || 2103 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2104 return CombineTo(N, HiOpt, HiOpt); 2105 } 2106 2107 return SDValue(); 2108 } 2109 2110 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2111 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2112 if (Res.getNode()) return Res; 2113 2114 EVT VT = N->getValueType(0); 2115 DebugLoc DL = N->getDebugLoc(); 2116 2117 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2118 // plus a shift. 2119 if (VT.isSimple() && !VT.isVector()) { 2120 MVT Simple = VT.getSimpleVT(); 2121 unsigned SimpleSize = Simple.getSizeInBits(); 2122 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2123 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2124 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2125 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2126 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2127 // Compute the high part as N1. 2128 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2129 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2130 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2131 // Compute the low part as N0. 2132 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2133 return CombineTo(N, Lo, Hi); 2134 } 2135 } 2136 2137 return SDValue(); 2138 } 2139 2140 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2141 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2142 if (Res.getNode()) return Res; 2143 2144 EVT VT = N->getValueType(0); 2145 DebugLoc DL = N->getDebugLoc(); 2146 2147 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2148 // plus a shift. 2149 if (VT.isSimple() && !VT.isVector()) { 2150 MVT Simple = VT.getSimpleVT(); 2151 unsigned SimpleSize = Simple.getSizeInBits(); 2152 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2153 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2154 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2155 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2156 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2157 // Compute the high part as N1. 2158 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2159 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2160 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2161 // Compute the low part as N0. 2162 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2163 return CombineTo(N, Lo, Hi); 2164 } 2165 } 2166 2167 return SDValue(); 2168 } 2169 2170 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2171 // (smulo x, 2) -> (saddo x, x) 2172 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2173 if (C2->getAPIntValue() == 2) 2174 return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), 2175 N->getOperand(0), N->getOperand(0)); 2176 2177 return SDValue(); 2178 } 2179 2180 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2181 // (umulo x, 2) -> (uaddo x, x) 2182 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2183 if (C2->getAPIntValue() == 2) 2184 return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), 2185 N->getOperand(0), N->getOperand(0)); 2186 2187 return SDValue(); 2188 } 2189 2190 SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2191 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2192 if (Res.getNode()) return Res; 2193 2194 return SDValue(); 2195 } 2196 2197 SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2198 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2199 if (Res.getNode()) return Res; 2200 2201 return SDValue(); 2202 } 2203 2204 /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 2205 /// two operands of the same opcode, try to simplify it. 2206 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2207 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2208 EVT VT = N0.getValueType(); 2209 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2210 2211 // Bail early if none of these transforms apply. 2212 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2213 2214 // For each of OP in AND/OR/XOR: 2215 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2216 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2217 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2218 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2219 // 2220 // do not sink logical op inside of a vector extend, since it may combine 2221 // into a vsetcc. 2222 EVT Op0VT = N0.getOperand(0).getValueType(); 2223 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2224 N0.getOpcode() == ISD::SIGN_EXTEND || 2225 // Avoid infinite looping with PromoteIntBinOp. 2226 (N0.getOpcode() == ISD::ANY_EXTEND && 2227 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2228 (N0.getOpcode() == ISD::TRUNCATE && 2229 (!TLI.isZExtFree(VT, Op0VT) || 2230 !TLI.isTruncateFree(Op0VT, VT)) && 2231 TLI.isTypeLegal(Op0VT))) && 2232 !VT.isVector() && 2233 Op0VT == N1.getOperand(0).getValueType() && 2234 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2235 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2236 N0.getOperand(0).getValueType(), 2237 N0.getOperand(0), N1.getOperand(0)); 2238 AddToWorkList(ORNode.getNode()); 2239 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); 2240 } 2241 2242 // For each of OP in SHL/SRL/SRA/AND... 2243 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2244 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2245 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2246 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2247 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2248 N0.getOperand(1) == N1.getOperand(1)) { 2249 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2250 N0.getOperand(0).getValueType(), 2251 N0.getOperand(0), N1.getOperand(0)); 2252 AddToWorkList(ORNode.getNode()); 2253 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 2254 ORNode, N0.getOperand(1)); 2255 } 2256 2257 return SDValue(); 2258 } 2259 2260 SDValue DAGCombiner::visitAND(SDNode *N) { 2261 SDValue N0 = N->getOperand(0); 2262 SDValue N1 = N->getOperand(1); 2263 SDValue LL, LR, RL, RR, CC0, CC1; 2264 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2265 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2266 EVT VT = N1.getValueType(); 2267 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2268 2269 // fold vector ops 2270 if (VT.isVector()) { 2271 SDValue FoldedVOp = SimplifyVBinOp(N); 2272 if (FoldedVOp.getNode()) return FoldedVOp; 2273 } 2274 2275 // fold (and x, undef) -> 0 2276 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2277 return DAG.getConstant(0, VT); 2278 // fold (and c1, c2) -> c1&c2 2279 if (N0C && N1C) 2280 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2281 // canonicalize constant to RHS 2282 if (N0C && !N1C) 2283 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); 2284 // fold (and x, -1) -> x 2285 if (N1C && N1C->isAllOnesValue()) 2286 return N0; 2287 // if (and x, c) is known to be zero, return 0 2288 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2289 APInt::getAllOnesValue(BitWidth))) 2290 return DAG.getConstant(0, VT); 2291 // reassociate and 2292 SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); 2293 if (RAND.getNode() != 0) 2294 return RAND; 2295 // fold (and (or x, C), D) -> D if (C & D) == D 2296 if (N1C && N0.getOpcode() == ISD::OR) 2297 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2298 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2299 return N1; 2300 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2301 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2302 SDValue N0Op0 = N0.getOperand(0); 2303 APInt Mask = ~N1C->getAPIntValue(); 2304 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2305 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2306 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), 2307 N0.getValueType(), N0Op0); 2308 2309 // Replace uses of the AND with uses of the Zero extend node. 2310 CombineTo(N, Zext); 2311 2312 // We actually want to replace all uses of the any_extend with the 2313 // zero_extend, to avoid duplicating things. This will later cause this 2314 // AND to be folded. 2315 CombineTo(N0.getNode(), Zext); 2316 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2317 } 2318 } 2319 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2320 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2321 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2322 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2323 2324 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2325 LL.getValueType().isInteger()) { 2326 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2327 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2328 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2329 LR.getValueType(), LL, RL); 2330 AddToWorkList(ORNode.getNode()); 2331 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2332 } 2333 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2334 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2335 SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), 2336 LR.getValueType(), LL, RL); 2337 AddToWorkList(ANDNode.getNode()); 2338 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2339 } 2340 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2341 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2342 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2343 LR.getValueType(), LL, RL); 2344 AddToWorkList(ORNode.getNode()); 2345 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2346 } 2347 } 2348 // canonicalize equivalent to ll == rl 2349 if (LL == RR && LR == RL) { 2350 Op1 = ISD::getSetCCSwappedOperands(Op1); 2351 std::swap(RL, RR); 2352 } 2353 if (LL == RL && LR == RR) { 2354 bool isInteger = LL.getValueType().isInteger(); 2355 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2356 if (Result != ISD::SETCC_INVALID && 2357 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2358 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2359 LL, LR, Result); 2360 } 2361 } 2362 2363 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2364 if (N0.getOpcode() == N1.getOpcode()) { 2365 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2366 if (Tmp.getNode()) return Tmp; 2367 } 2368 2369 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2370 // fold (and (sra)) -> (and (srl)) when possible. 2371 if (!VT.isVector() && 2372 SimplifyDemandedBits(SDValue(N, 0))) 2373 return SDValue(N, 0); 2374 2375 // fold (zext_inreg (extload x)) -> (zextload x) 2376 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2377 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2378 EVT MemVT = LN0->getMemoryVT(); 2379 // If we zero all the possible extended bits, then we can turn this into 2380 // a zextload if we are running before legalize or the operation is legal. 2381 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2382 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2383 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2384 ((!LegalOperations && !LN0->isVolatile()) || 2385 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2386 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2387 LN0->getChain(), LN0->getBasePtr(), 2388 LN0->getPointerInfo(), MemVT, 2389 LN0->isVolatile(), LN0->isNonTemporal(), 2390 LN0->getAlignment()); 2391 AddToWorkList(N); 2392 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2393 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2394 } 2395 } 2396 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2397 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2398 N0.hasOneUse()) { 2399 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2400 EVT MemVT = LN0->getMemoryVT(); 2401 // If we zero all the possible extended bits, then we can turn this into 2402 // a zextload if we are running before legalize or the operation is legal. 2403 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2404 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2405 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2406 ((!LegalOperations && !LN0->isVolatile()) || 2407 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2408 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2409 LN0->getChain(), 2410 LN0->getBasePtr(), LN0->getPointerInfo(), 2411 MemVT, 2412 LN0->isVolatile(), LN0->isNonTemporal(), 2413 LN0->getAlignment()); 2414 AddToWorkList(N); 2415 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2416 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2417 } 2418 } 2419 2420 // fold (and (load x), 255) -> (zextload x, i8) 2421 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2422 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2423 if (N1C && (N0.getOpcode() == ISD::LOAD || 2424 (N0.getOpcode() == ISD::ANY_EXTEND && 2425 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2426 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2427 LoadSDNode *LN0 = HasAnyExt 2428 ? cast<LoadSDNode>(N0.getOperand(0)) 2429 : cast<LoadSDNode>(N0); 2430 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2431 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 2432 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2433 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2434 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2435 EVT LoadedVT = LN0->getMemoryVT(); 2436 2437 if (ExtVT == LoadedVT && 2438 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2439 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2440 2441 SDValue NewLoad = 2442 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2443 LN0->getChain(), LN0->getBasePtr(), 2444 LN0->getPointerInfo(), 2445 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2446 LN0->getAlignment()); 2447 AddToWorkList(N); 2448 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2449 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2450 } 2451 2452 // Do not change the width of a volatile load. 2453 // Do not generate loads of non-round integer types since these can 2454 // be expensive (and would be wrong if the type is not byte sized). 2455 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2456 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2457 EVT PtrType = LN0->getOperand(1).getValueType(); 2458 2459 unsigned Alignment = LN0->getAlignment(); 2460 SDValue NewPtr = LN0->getBasePtr(); 2461 2462 // For big endian targets, we need to add an offset to the pointer 2463 // to load the correct bytes. For little endian systems, we merely 2464 // need to read fewer bytes from the same pointer. 2465 if (TLI.isBigEndian()) { 2466 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2467 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2468 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2469 NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, 2470 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2471 Alignment = MinAlign(Alignment, PtrOff); 2472 } 2473 2474 AddToWorkList(NewPtr.getNode()); 2475 2476 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2477 SDValue Load = 2478 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2479 LN0->getChain(), NewPtr, 2480 LN0->getPointerInfo(), 2481 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2482 Alignment); 2483 AddToWorkList(N); 2484 CombineTo(LN0, Load, Load.getValue(1)); 2485 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2486 } 2487 } 2488 } 2489 } 2490 2491 return SDValue(); 2492 } 2493 2494 /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 2495 /// 2496 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 2497 bool DemandHighBits) { 2498 if (!LegalOperations) 2499 return SDValue(); 2500 2501 EVT VT = N->getValueType(0); 2502 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 2503 return SDValue(); 2504 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2505 return SDValue(); 2506 2507 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 2508 bool LookPassAnd0 = false; 2509 bool LookPassAnd1 = false; 2510 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 2511 std::swap(N0, N1); 2512 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 2513 std::swap(N0, N1); 2514 if (N0.getOpcode() == ISD::AND) { 2515 if (!N0.getNode()->hasOneUse()) 2516 return SDValue(); 2517 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2518 if (!N01C || N01C->getZExtValue() != 0xFF00) 2519 return SDValue(); 2520 N0 = N0.getOperand(0); 2521 LookPassAnd0 = true; 2522 } 2523 2524 if (N1.getOpcode() == ISD::AND) { 2525 if (!N1.getNode()->hasOneUse()) 2526 return SDValue(); 2527 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2528 if (!N11C || N11C->getZExtValue() != 0xFF) 2529 return SDValue(); 2530 N1 = N1.getOperand(0); 2531 LookPassAnd1 = true; 2532 } 2533 2534 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 2535 std::swap(N0, N1); 2536 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 2537 return SDValue(); 2538 if (!N0.getNode()->hasOneUse() || 2539 !N1.getNode()->hasOneUse()) 2540 return SDValue(); 2541 2542 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2543 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2544 if (!N01C || !N11C) 2545 return SDValue(); 2546 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 2547 return SDValue(); 2548 2549 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 2550 SDValue N00 = N0->getOperand(0); 2551 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 2552 if (!N00.getNode()->hasOneUse()) 2553 return SDValue(); 2554 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 2555 if (!N001C || N001C->getZExtValue() != 0xFF) 2556 return SDValue(); 2557 N00 = N00.getOperand(0); 2558 LookPassAnd0 = true; 2559 } 2560 2561 SDValue N10 = N1->getOperand(0); 2562 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 2563 if (!N10.getNode()->hasOneUse()) 2564 return SDValue(); 2565 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 2566 if (!N101C || N101C->getZExtValue() != 0xFF00) 2567 return SDValue(); 2568 N10 = N10.getOperand(0); 2569 LookPassAnd1 = true; 2570 } 2571 2572 if (N00 != N10) 2573 return SDValue(); 2574 2575 // Make sure everything beyond the low halfword is zero since the SRL 16 2576 // will clear the top bits. 2577 unsigned OpSizeInBits = VT.getSizeInBits(); 2578 if (DemandHighBits && OpSizeInBits > 16 && 2579 (!LookPassAnd0 || !LookPassAnd1) && 2580 !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) 2581 return SDValue(); 2582 2583 SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); 2584 if (OpSizeInBits > 16) 2585 Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, 2586 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 2587 return Res; 2588 } 2589 2590 /// isBSwapHWordElement - Return true if the specified node is an element 2591 /// that makes up a 32-bit packed halfword byteswap. i.e. 2592 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2593 static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { 2594 if (!N.getNode()->hasOneUse()) 2595 return false; 2596 2597 unsigned Opc = N.getOpcode(); 2598 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 2599 return false; 2600 2601 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2602 if (!N1C) 2603 return false; 2604 2605 unsigned Num; 2606 switch (N1C->getZExtValue()) { 2607 default: 2608 return false; 2609 case 0xFF: Num = 0; break; 2610 case 0xFF00: Num = 1; break; 2611 case 0xFF0000: Num = 2; break; 2612 case 0xFF000000: Num = 3; break; 2613 } 2614 2615 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 2616 SDValue N0 = N.getOperand(0); 2617 if (Opc == ISD::AND) { 2618 if (Num == 0 || Num == 2) { 2619 // (x >> 8) & 0xff 2620 // (x >> 8) & 0xff0000 2621 if (N0.getOpcode() != ISD::SRL) 2622 return false; 2623 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2624 if (!C || C->getZExtValue() != 8) 2625 return false; 2626 } else { 2627 // (x << 8) & 0xff00 2628 // (x << 8) & 0xff000000 2629 if (N0.getOpcode() != ISD::SHL) 2630 return false; 2631 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2632 if (!C || C->getZExtValue() != 8) 2633 return false; 2634 } 2635 } else if (Opc == ISD::SHL) { 2636 // (x & 0xff) << 8 2637 // (x & 0xff0000) << 8 2638 if (Num != 0 && Num != 2) 2639 return false; 2640 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2641 if (!C || C->getZExtValue() != 8) 2642 return false; 2643 } else { // Opc == ISD::SRL 2644 // (x & 0xff00) >> 8 2645 // (x & 0xff000000) >> 8 2646 if (Num != 1 && Num != 3) 2647 return false; 2648 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2649 if (!C || C->getZExtValue() != 8) 2650 return false; 2651 } 2652 2653 if (Parts[Num]) 2654 return false; 2655 2656 Parts[Num] = N0.getOperand(0).getNode(); 2657 return true; 2658 } 2659 2660 /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 2661 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2662 /// => (rotl (bswap x), 16) 2663 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 2664 if (!LegalOperations) 2665 return SDValue(); 2666 2667 EVT VT = N->getValueType(0); 2668 if (VT != MVT::i32) 2669 return SDValue(); 2670 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2671 return SDValue(); 2672 2673 SmallVector<SDNode*,4> Parts(4, (SDNode*)0); 2674 // Look for either 2675 // (or (or (and), (and)), (or (and), (and))) 2676 // (or (or (or (and), (and)), (and)), (and)) 2677 if (N0.getOpcode() != ISD::OR) 2678 return SDValue(); 2679 SDValue N00 = N0.getOperand(0); 2680 SDValue N01 = N0.getOperand(1); 2681 2682 if (N1.getOpcode() == ISD::OR) { 2683 // (or (or (and), (and)), (or (and), (and))) 2684 SDValue N000 = N00.getOperand(0); 2685 if (!isBSwapHWordElement(N000, Parts)) 2686 return SDValue(); 2687 2688 SDValue N001 = N00.getOperand(1); 2689 if (!isBSwapHWordElement(N001, Parts)) 2690 return SDValue(); 2691 SDValue N010 = N01.getOperand(0); 2692 if (!isBSwapHWordElement(N010, Parts)) 2693 return SDValue(); 2694 SDValue N011 = N01.getOperand(1); 2695 if (!isBSwapHWordElement(N011, Parts)) 2696 return SDValue(); 2697 } else { 2698 // (or (or (or (and), (and)), (and)), (and)) 2699 if (!isBSwapHWordElement(N1, Parts)) 2700 return SDValue(); 2701 if (!isBSwapHWordElement(N01, Parts)) 2702 return SDValue(); 2703 if (N00.getOpcode() != ISD::OR) 2704 return SDValue(); 2705 SDValue N000 = N00.getOperand(0); 2706 if (!isBSwapHWordElement(N000, Parts)) 2707 return SDValue(); 2708 SDValue N001 = N00.getOperand(1); 2709 if (!isBSwapHWordElement(N001, Parts)) 2710 return SDValue(); 2711 } 2712 2713 // Make sure the parts are all coming from the same node. 2714 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 2715 return SDValue(); 2716 2717 SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, 2718 SDValue(Parts[0],0)); 2719 2720 // Result of the bswap should be rotated by 16. If it's not legal, than 2721 // do (x << 16) | (x >> 16). 2722 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 2723 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 2724 return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); 2725 else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 2726 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); 2727 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, 2728 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), 2729 DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); 2730 } 2731 2732 SDValue DAGCombiner::visitOR(SDNode *N) { 2733 SDValue N0 = N->getOperand(0); 2734 SDValue N1 = N->getOperand(1); 2735 SDValue LL, LR, RL, RR, CC0, CC1; 2736 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2737 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2738 EVT VT = N1.getValueType(); 2739 2740 // fold vector ops 2741 if (VT.isVector()) { 2742 SDValue FoldedVOp = SimplifyVBinOp(N); 2743 if (FoldedVOp.getNode()) return FoldedVOp; 2744 } 2745 2746 // fold (or x, undef) -> -1 2747 if (!LegalOperations && 2748 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 2749 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 2750 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 2751 } 2752 // fold (or c1, c2) -> c1|c2 2753 if (N0C && N1C) 2754 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 2755 // canonicalize constant to RHS 2756 if (N0C && !N1C) 2757 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); 2758 // fold (or x, 0) -> x 2759 if (N1C && N1C->isNullValue()) 2760 return N0; 2761 // fold (or x, -1) -> -1 2762 if (N1C && N1C->isAllOnesValue()) 2763 return N1; 2764 // fold (or x, c) -> c iff (x & ~c) == 0 2765 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 2766 return N1; 2767 2768 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 2769 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 2770 if (BSwap.getNode() != 0) 2771 return BSwap; 2772 BSwap = MatchBSwapHWordLow(N, N0, N1); 2773 if (BSwap.getNode() != 0) 2774 return BSwap; 2775 2776 // reassociate or 2777 SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); 2778 if (ROR.getNode() != 0) 2779 return ROR; 2780 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 2781 // iff (c1 & c2) == 0. 2782 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2783 isa<ConstantSDNode>(N0.getOperand(1))) { 2784 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 2785 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 2786 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 2787 DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 2788 N0.getOperand(0), N1), 2789 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 2790 } 2791 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 2792 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2793 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2794 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2795 2796 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2797 LL.getValueType().isInteger()) { 2798 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 2799 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 2800 if (cast<ConstantSDNode>(LR)->isNullValue() && 2801 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 2802 SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), 2803 LR.getValueType(), LL, RL); 2804 AddToWorkList(ORNode.getNode()); 2805 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2806 } 2807 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 2808 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 2809 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2810 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 2811 SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), 2812 LR.getValueType(), LL, RL); 2813 AddToWorkList(ANDNode.getNode()); 2814 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2815 } 2816 } 2817 // canonicalize equivalent to ll == rl 2818 if (LL == RR && LR == RL) { 2819 Op1 = ISD::getSetCCSwappedOperands(Op1); 2820 std::swap(RL, RR); 2821 } 2822 if (LL == RL && LR == RR) { 2823 bool isInteger = LL.getValueType().isInteger(); 2824 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 2825 if (Result != ISD::SETCC_INVALID && 2826 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2827 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2828 LL, LR, Result); 2829 } 2830 } 2831 2832 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 2833 if (N0.getOpcode() == N1.getOpcode()) { 2834 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2835 if (Tmp.getNode()) return Tmp; 2836 } 2837 2838 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 2839 if (N0.getOpcode() == ISD::AND && 2840 N1.getOpcode() == ISD::AND && 2841 N0.getOperand(1).getOpcode() == ISD::Constant && 2842 N1.getOperand(1).getOpcode() == ISD::Constant && 2843 // Don't increase # computations. 2844 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 2845 // We can only do this xform if we know that bits from X that are set in C2 2846 // but not in C1 are already zero. Likewise for Y. 2847 const APInt &LHSMask = 2848 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 2849 const APInt &RHSMask = 2850 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 2851 2852 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 2853 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 2854 SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 2855 N0.getOperand(0), N1.getOperand(0)); 2856 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, 2857 DAG.getConstant(LHSMask | RHSMask, VT)); 2858 } 2859 } 2860 2861 // See if this is some rotate idiom. 2862 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) 2863 return SDValue(Rot, 0); 2864 2865 // Simplify the operands using demanded-bits information. 2866 if (!VT.isVector() && 2867 SimplifyDemandedBits(SDValue(N, 0))) 2868 return SDValue(N, 0); 2869 2870 return SDValue(); 2871 } 2872 2873 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 2874 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 2875 if (Op.getOpcode() == ISD::AND) { 2876 if (isa<ConstantSDNode>(Op.getOperand(1))) { 2877 Mask = Op.getOperand(1); 2878 Op = Op.getOperand(0); 2879 } else { 2880 return false; 2881 } 2882 } 2883 2884 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 2885 Shift = Op; 2886 return true; 2887 } 2888 2889 return false; 2890 } 2891 2892 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 2893 // idioms for rotate, and if the target supports rotation instructions, generate 2894 // a rot[lr]. 2895 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { 2896 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 2897 EVT VT = LHS.getValueType(); 2898 if (!TLI.isTypeLegal(VT)) return 0; 2899 2900 // The target must have at least one rotate flavor. 2901 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 2902 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 2903 if (!HasROTL && !HasROTR) return 0; 2904 2905 // Match "(X shl/srl V1) & V2" where V2 may not be present. 2906 SDValue LHSShift; // The shift. 2907 SDValue LHSMask; // AND value if any. 2908 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 2909 return 0; // Not part of a rotate. 2910 2911 SDValue RHSShift; // The shift. 2912 SDValue RHSMask; // AND value if any. 2913 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 2914 return 0; // Not part of a rotate. 2915 2916 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 2917 return 0; // Not shifting the same value. 2918 2919 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 2920 return 0; // Shifts must disagree. 2921 2922 // Canonicalize shl to left side in a shl/srl pair. 2923 if (RHSShift.getOpcode() == ISD::SHL) { 2924 std::swap(LHS, RHS); 2925 std::swap(LHSShift, RHSShift); 2926 std::swap(LHSMask , RHSMask ); 2927 } 2928 2929 unsigned OpSizeInBits = VT.getSizeInBits(); 2930 SDValue LHSShiftArg = LHSShift.getOperand(0); 2931 SDValue LHSShiftAmt = LHSShift.getOperand(1); 2932 SDValue RHSShiftAmt = RHSShift.getOperand(1); 2933 2934 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 2935 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 2936 if (LHSShiftAmt.getOpcode() == ISD::Constant && 2937 RHSShiftAmt.getOpcode() == ISD::Constant) { 2938 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 2939 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 2940 if ((LShVal + RShVal) != OpSizeInBits) 2941 return 0; 2942 2943 SDValue Rot; 2944 if (HasROTL) 2945 Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); 2946 else 2947 Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); 2948 2949 // If there is an AND of either shifted operand, apply it to the result. 2950 if (LHSMask.getNode() || RHSMask.getNode()) { 2951 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 2952 2953 if (LHSMask.getNode()) { 2954 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 2955 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 2956 } 2957 if (RHSMask.getNode()) { 2958 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 2959 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 2960 } 2961 2962 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 2963 } 2964 2965 return Rot.getNode(); 2966 } 2967 2968 // If there is a mask here, and we have a variable shift, we can't be sure 2969 // that we're masking out the right stuff. 2970 if (LHSMask.getNode() || RHSMask.getNode()) 2971 return 0; 2972 2973 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 2974 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 2975 if (RHSShiftAmt.getOpcode() == ISD::SUB && 2976 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 2977 if (ConstantSDNode *SUBC = 2978 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 2979 if (SUBC->getAPIntValue() == OpSizeInBits) { 2980 if (HasROTL) 2981 return DAG.getNode(ISD::ROTL, DL, VT, 2982 LHSShiftArg, LHSShiftAmt).getNode(); 2983 else 2984 return DAG.getNode(ISD::ROTR, DL, VT, 2985 LHSShiftArg, RHSShiftAmt).getNode(); 2986 } 2987 } 2988 } 2989 2990 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 2991 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 2992 if (LHSShiftAmt.getOpcode() == ISD::SUB && 2993 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 2994 if (ConstantSDNode *SUBC = 2995 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 2996 if (SUBC->getAPIntValue() == OpSizeInBits) { 2997 if (HasROTR) 2998 return DAG.getNode(ISD::ROTR, DL, VT, 2999 LHSShiftArg, RHSShiftAmt).getNode(); 3000 else 3001 return DAG.getNode(ISD::ROTL, DL, VT, 3002 LHSShiftArg, LHSShiftAmt).getNode(); 3003 } 3004 } 3005 } 3006 3007 // Look for sign/zext/any-extended or truncate cases: 3008 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 3009 || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 3010 || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 3011 || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3012 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 3013 || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 3014 || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 3015 || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3016 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 3017 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 3018 if (RExtOp0.getOpcode() == ISD::SUB && 3019 RExtOp0.getOperand(1) == LExtOp0) { 3020 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3021 // (rotl x, y) 3022 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3023 // (rotr x, (sub 32, y)) 3024 if (ConstantSDNode *SUBC = 3025 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 3026 if (SUBC->getAPIntValue() == OpSizeInBits) { 3027 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3028 LHSShiftArg, 3029 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3030 } 3031 } 3032 } else if (LExtOp0.getOpcode() == ISD::SUB && 3033 RExtOp0 == LExtOp0.getOperand(1)) { 3034 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3035 // (rotr x, y) 3036 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3037 // (rotl x, (sub 32, y)) 3038 if (ConstantSDNode *SUBC = 3039 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 3040 if (SUBC->getAPIntValue() == OpSizeInBits) { 3041 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 3042 LHSShiftArg, 3043 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3044 } 3045 } 3046 } 3047 } 3048 3049 return 0; 3050 } 3051 3052 SDValue DAGCombiner::visitXOR(SDNode *N) { 3053 SDValue N0 = N->getOperand(0); 3054 SDValue N1 = N->getOperand(1); 3055 SDValue LHS, RHS, CC; 3056 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3057 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3058 EVT VT = N0.getValueType(); 3059 3060 // fold vector ops 3061 if (VT.isVector()) { 3062 SDValue FoldedVOp = SimplifyVBinOp(N); 3063 if (FoldedVOp.getNode()) return FoldedVOp; 3064 } 3065 3066 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3067 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3068 return DAG.getConstant(0, VT); 3069 // fold (xor x, undef) -> undef 3070 if (N0.getOpcode() == ISD::UNDEF) 3071 return N0; 3072 if (N1.getOpcode() == ISD::UNDEF) 3073 return N1; 3074 // fold (xor c1, c2) -> c1^c2 3075 if (N0C && N1C) 3076 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3077 // canonicalize constant to RHS 3078 if (N0C && !N1C) 3079 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 3080 // fold (xor x, 0) -> x 3081 if (N1C && N1C->isNullValue()) 3082 return N0; 3083 // reassociate xor 3084 SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); 3085 if (RXOR.getNode() != 0) 3086 return RXOR; 3087 3088 // fold !(x cc y) -> (x !cc y) 3089 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3090 bool isInt = LHS.getValueType().isInteger(); 3091 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3092 isInt); 3093 3094 if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { 3095 switch (N0.getOpcode()) { 3096 default: 3097 llvm_unreachable("Unhandled SetCC Equivalent!"); 3098 case ISD::SETCC: 3099 return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); 3100 case ISD::SELECT_CC: 3101 return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), 3102 N0.getOperand(3), NotCC); 3103 } 3104 } 3105 } 3106 3107 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3108 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3109 N0.getNode()->hasOneUse() && 3110 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3111 SDValue V = N0.getOperand(0); 3112 V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, 3113 DAG.getConstant(1, V.getValueType())); 3114 AddToWorkList(V.getNode()); 3115 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); 3116 } 3117 3118 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3119 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3120 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3121 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3122 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3123 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3124 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3125 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3126 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3127 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3128 } 3129 } 3130 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3131 if (N1C && N1C->isAllOnesValue() && 3132 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3133 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3134 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3135 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3136 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3137 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3138 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3139 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3140 } 3141 } 3142 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3143 if (N1C && N0.getOpcode() == ISD::XOR) { 3144 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3145 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3146 if (N00C) 3147 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), 3148 DAG.getConstant(N1C->getAPIntValue() ^ 3149 N00C->getAPIntValue(), VT)); 3150 if (N01C) 3151 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), 3152 DAG.getConstant(N1C->getAPIntValue() ^ 3153 N01C->getAPIntValue(), VT)); 3154 } 3155 // fold (xor x, x) -> 0 3156 if (N0 == N1) 3157 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 3158 3159 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3160 if (N0.getOpcode() == N1.getOpcode()) { 3161 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3162 if (Tmp.getNode()) return Tmp; 3163 } 3164 3165 // Simplify the expression using non-local knowledge. 3166 if (!VT.isVector() && 3167 SimplifyDemandedBits(SDValue(N, 0))) 3168 return SDValue(N, 0); 3169 3170 return SDValue(); 3171 } 3172 3173 /// visitShiftByConstant - Handle transforms common to the three shifts, when 3174 /// the shift amount is a constant. 3175 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 3176 SDNode *LHS = N->getOperand(0).getNode(); 3177 if (!LHS->hasOneUse()) return SDValue(); 3178 3179 // We want to pull some binops through shifts, so that we have (and (shift)) 3180 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3181 // thing happens with address calculations, so it's important to canonicalize 3182 // it. 3183 bool HighBitSet = false; // Can we transform this if the high bit is set? 3184 3185 switch (LHS->getOpcode()) { 3186 default: return SDValue(); 3187 case ISD::OR: 3188 case ISD::XOR: 3189 HighBitSet = false; // We can only transform sra if the high bit is clear. 3190 break; 3191 case ISD::AND: 3192 HighBitSet = true; // We can only transform sra if the high bit is set. 3193 break; 3194 case ISD::ADD: 3195 if (N->getOpcode() != ISD::SHL) 3196 return SDValue(); // only shl(add) not sr[al](add). 3197 HighBitSet = false; // We can only transform sra if the high bit is clear. 3198 break; 3199 } 3200 3201 // We require the RHS of the binop to be a constant as well. 3202 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3203 if (!BinOpCst) return SDValue(); 3204 3205 // FIXME: disable this unless the input to the binop is a shift by a constant. 3206 // If it is not a shift, it pessimizes some common cases like: 3207 // 3208 // void foo(int *X, int i) { X[i & 1235] = 1; } 3209 // int bar(int *X, int i) { return X[i & 255]; } 3210 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3211 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3212 BinOpLHSVal->getOpcode() != ISD::SRA && 3213 BinOpLHSVal->getOpcode() != ISD::SRL) || 3214 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3215 return SDValue(); 3216 3217 EVT VT = N->getValueType(0); 3218 3219 // If this is a signed shift right, and the high bit is modified by the 3220 // logical operation, do not perform the transformation. The highBitSet 3221 // boolean indicates the value of the high bit of the constant which would 3222 // cause it to be modified for this operation. 3223 if (N->getOpcode() == ISD::SRA) { 3224 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3225 if (BinOpRHSSignSet != HighBitSet) 3226 return SDValue(); 3227 } 3228 3229 // Fold the constants, shifting the binop RHS by the shift amount. 3230 SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), 3231 N->getValueType(0), 3232 LHS->getOperand(1), N->getOperand(1)); 3233 3234 // Create the new shift. 3235 SDValue NewShift = DAG.getNode(N->getOpcode(), 3236 LHS->getOperand(0).getDebugLoc(), 3237 VT, LHS->getOperand(0), N->getOperand(1)); 3238 3239 // Create the new binop. 3240 return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); 3241 } 3242 3243 SDValue DAGCombiner::visitSHL(SDNode *N) { 3244 SDValue N0 = N->getOperand(0); 3245 SDValue N1 = N->getOperand(1); 3246 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3247 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3248 EVT VT = N0.getValueType(); 3249 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3250 3251 // fold (shl c1, c2) -> c1<<c2 3252 if (N0C && N1C) 3253 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 3254 // fold (shl 0, x) -> 0 3255 if (N0C && N0C->isNullValue()) 3256 return N0; 3257 // fold (shl x, c >= size(x)) -> undef 3258 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3259 return DAG.getUNDEF(VT); 3260 // fold (shl x, 0) -> x 3261 if (N1C && N1C->isNullValue()) 3262 return N0; 3263 // fold (shl undef, x) -> 0 3264 if (N0.getOpcode() == ISD::UNDEF) 3265 return DAG.getConstant(0, VT); 3266 // if (shl x, c) is known to be zero, return 0 3267 if (DAG.MaskedValueIsZero(SDValue(N, 0), 3268 APInt::getAllOnesValue(OpSizeInBits))) 3269 return DAG.getConstant(0, VT); 3270 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 3271 if (N1.getOpcode() == ISD::TRUNCATE && 3272 N1.getOperand(0).getOpcode() == ISD::AND && 3273 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3274 SDValue N101 = N1.getOperand(0).getOperand(1); 3275 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3276 EVT TruncVT = N1.getValueType(); 3277 SDValue N100 = N1.getOperand(0).getOperand(0); 3278 APInt TruncC = N101C->getAPIntValue(); 3279 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3280 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 3281 DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, 3282 DAG.getNode(ISD::TRUNCATE, 3283 N->getDebugLoc(), 3284 TruncVT, N100), 3285 DAG.getConstant(TruncC, TruncVT))); 3286 } 3287 } 3288 3289 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3290 return SDValue(N, 0); 3291 3292 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 3293 if (N1C && N0.getOpcode() == ISD::SHL && 3294 N0.getOperand(1).getOpcode() == ISD::Constant) { 3295 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3296 uint64_t c2 = N1C->getZExtValue(); 3297 if (c1 + c2 >= OpSizeInBits) 3298 return DAG.getConstant(0, VT); 3299 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3300 DAG.getConstant(c1 + c2, N1.getValueType())); 3301 } 3302 3303 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 3304 // For this to be valid, the second form must not preserve any of the bits 3305 // that are shifted out by the inner shift in the first form. This means 3306 // the outer shift size must be >= the number of bits added by the ext. 3307 // As a corollary, we don't care what kind of ext it is. 3308 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 3309 N0.getOpcode() == ISD::ANY_EXTEND || 3310 N0.getOpcode() == ISD::SIGN_EXTEND) && 3311 N0.getOperand(0).getOpcode() == ISD::SHL && 3312 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3313 uint64_t c1 = 3314 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3315 uint64_t c2 = N1C->getZExtValue(); 3316 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3317 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3318 if (c2 >= OpSizeInBits - InnerShiftSize) { 3319 if (c1 + c2 >= OpSizeInBits) 3320 return DAG.getConstant(0, VT); 3321 return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, 3322 DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, 3323 N0.getOperand(0)->getOperand(0)), 3324 DAG.getConstant(c1 + c2, N1.getValueType())); 3325 } 3326 } 3327 3328 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 3329 // (and (srl x, (sub c1, c2), MASK) 3330 if (N1C && N0.getOpcode() == ISD::SRL && 3331 N0.getOperand(1).getOpcode() == ISD::Constant) { 3332 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3333 if (c1 < VT.getSizeInBits()) { 3334 uint64_t c2 = N1C->getZExtValue(); 3335 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3336 VT.getSizeInBits() - c1); 3337 SDValue Shift; 3338 if (c2 > c1) { 3339 Mask = Mask.shl(c2-c1); 3340 Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3341 DAG.getConstant(c2-c1, N1.getValueType())); 3342 } else { 3343 Mask = Mask.lshr(c1-c2); 3344 Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3345 DAG.getConstant(c1-c2, N1.getValueType())); 3346 } 3347 return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, 3348 DAG.getConstant(Mask, VT)); 3349 } 3350 } 3351 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 3352 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 3353 SDValue HiBitsMask = 3354 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 3355 VT.getSizeInBits() - 3356 N1C->getZExtValue()), 3357 VT); 3358 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3359 HiBitsMask); 3360 } 3361 3362 if (N1C) { 3363 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 3364 if (NewSHL.getNode()) 3365 return NewSHL; 3366 } 3367 3368 return SDValue(); 3369 } 3370 3371 SDValue DAGCombiner::visitSRA(SDNode *N) { 3372 SDValue N0 = N->getOperand(0); 3373 SDValue N1 = N->getOperand(1); 3374 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3375 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3376 EVT VT = N0.getValueType(); 3377 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3378 3379 // fold (sra c1, c2) -> (sra c1, c2) 3380 if (N0C && N1C) 3381 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 3382 // fold (sra 0, x) -> 0 3383 if (N0C && N0C->isNullValue()) 3384 return N0; 3385 // fold (sra -1, x) -> -1 3386 if (N0C && N0C->isAllOnesValue()) 3387 return N0; 3388 // fold (sra x, (setge c, size(x))) -> undef 3389 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3390 return DAG.getUNDEF(VT); 3391 // fold (sra x, 0) -> x 3392 if (N1C && N1C->isNullValue()) 3393 return N0; 3394 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 3395 // sext_inreg. 3396 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 3397 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 3398 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 3399 if (VT.isVector()) 3400 ExtVT = EVT::getVectorVT(*DAG.getContext(), 3401 ExtVT, VT.getVectorNumElements()); 3402 if ((!LegalOperations || 3403 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 3404 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 3405 N0.getOperand(0), DAG.getValueType(ExtVT)); 3406 } 3407 3408 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 3409 if (N1C && N0.getOpcode() == ISD::SRA) { 3410 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3411 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 3412 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 3413 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), 3414 DAG.getConstant(Sum, N1C->getValueType(0))); 3415 } 3416 } 3417 3418 // fold (sra (shl X, m), (sub result_size, n)) 3419 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 3420 // result_size - n != m. 3421 // If truncate is free for the target sext(shl) is likely to result in better 3422 // code. 3423 if (N0.getOpcode() == ISD::SHL) { 3424 // Get the two constanst of the shifts, CN0 = m, CN = n. 3425 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3426 if (N01C && N1C) { 3427 // Determine what the truncate's result bitsize and type would be. 3428 EVT TruncVT = 3429 EVT::getIntegerVT(*DAG.getContext(), 3430 OpSizeInBits - N1C->getZExtValue()); 3431 // Determine the residual right-shift amount. 3432 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 3433 3434 // If the shift is not a no-op (in which case this should be just a sign 3435 // extend already), the truncated to type is legal, sign_extend is legal 3436 // on that type, and the truncate to that type is both legal and free, 3437 // perform the transform. 3438 if ((ShiftAmt > 0) && 3439 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 3440 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 3441 TLI.isTruncateFree(VT, TruncVT)) { 3442 3443 SDValue Amt = DAG.getConstant(ShiftAmt, 3444 getShiftAmountTy(N0.getOperand(0).getValueType())); 3445 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, 3446 N0.getOperand(0), Amt); 3447 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, 3448 Shift); 3449 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), 3450 N->getValueType(0), Trunc); 3451 } 3452 } 3453 } 3454 3455 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 3456 if (N1.getOpcode() == ISD::TRUNCATE && 3457 N1.getOperand(0).getOpcode() == ISD::AND && 3458 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3459 SDValue N101 = N1.getOperand(0).getOperand(1); 3460 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3461 EVT TruncVT = N1.getValueType(); 3462 SDValue N100 = N1.getOperand(0).getOperand(0); 3463 APInt TruncC = N101C->getAPIntValue(); 3464 TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 3465 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 3466 DAG.getNode(ISD::AND, N->getDebugLoc(), 3467 TruncVT, 3468 DAG.getNode(ISD::TRUNCATE, 3469 N->getDebugLoc(), 3470 TruncVT, N100), 3471 DAG.getConstant(TruncC, TruncVT))); 3472 } 3473 } 3474 3475 // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) 3476 // if c1 is equal to the number of bits the trunc removes 3477 if (N0.getOpcode() == ISD::TRUNCATE && 3478 (N0.getOperand(0).getOpcode() == ISD::SRL || 3479 N0.getOperand(0).getOpcode() == ISD::SRA) && 3480 N0.getOperand(0).hasOneUse() && 3481 N0.getOperand(0).getOperand(1).hasOneUse() && 3482 N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 3483 EVT LargeVT = N0.getOperand(0).getValueType(); 3484 ConstantSDNode *LargeShiftAmt = 3485 cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); 3486 3487 if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == 3488 LargeShiftAmt->getZExtValue()) { 3489 SDValue Amt = 3490 DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), 3491 getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); 3492 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, 3493 N0.getOperand(0).getOperand(0), Amt); 3494 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); 3495 } 3496 } 3497 3498 // Simplify, based on bits shifted out of the LHS. 3499 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3500 return SDValue(N, 0); 3501 3502 3503 // If the sign bit is known to be zero, switch this to a SRL. 3504 if (DAG.SignBitIsZero(N0)) 3505 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); 3506 3507 if (N1C) { 3508 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 3509 if (NewSRA.getNode()) 3510 return NewSRA; 3511 } 3512 3513 return SDValue(); 3514 } 3515 3516 SDValue DAGCombiner::visitSRL(SDNode *N) { 3517 SDValue N0 = N->getOperand(0); 3518 SDValue N1 = N->getOperand(1); 3519 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3520 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3521 EVT VT = N0.getValueType(); 3522 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3523 3524 // fold (srl c1, c2) -> c1 >>u c2 3525 if (N0C && N1C) 3526 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 3527 // fold (srl 0, x) -> 0 3528 if (N0C && N0C->isNullValue()) 3529 return N0; 3530 // fold (srl x, c >= size(x)) -> undef 3531 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3532 return DAG.getUNDEF(VT); 3533 // fold (srl x, 0) -> x 3534 if (N1C && N1C->isNullValue()) 3535 return N0; 3536 // if (srl x, c) is known to be zero, return 0 3537 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3538 APInt::getAllOnesValue(OpSizeInBits))) 3539 return DAG.getConstant(0, VT); 3540 3541 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 3542 if (N1C && N0.getOpcode() == ISD::SRL && 3543 N0.getOperand(1).getOpcode() == ISD::Constant) { 3544 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3545 uint64_t c2 = N1C->getZExtValue(); 3546 if (c1 + c2 >= OpSizeInBits) 3547 return DAG.getConstant(0, VT); 3548 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3549 DAG.getConstant(c1 + c2, N1.getValueType())); 3550 } 3551 3552 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 3553 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 3554 N0.getOperand(0).getOpcode() == ISD::SRL && 3555 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3556 uint64_t c1 = 3557 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3558 uint64_t c2 = N1C->getZExtValue(); 3559 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3560 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 3561 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3562 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 3563 if (c1 + OpSizeInBits == InnerShiftSize) { 3564 if (c1 + c2 >= InnerShiftSize) 3565 return DAG.getConstant(0, VT); 3566 return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, 3567 DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 3568 N0.getOperand(0)->getOperand(0), 3569 DAG.getConstant(c1 + c2, ShiftCountVT))); 3570 } 3571 } 3572 3573 // fold (srl (shl x, c), c) -> (and x, cst2) 3574 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 3575 N0.getValueSizeInBits() <= 64) { 3576 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 3577 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3578 DAG.getConstant(~0ULL >> ShAmt, VT)); 3579 } 3580 3581 3582 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 3583 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3584 // Shifting in all undef bits? 3585 EVT SmallVT = N0.getOperand(0).getValueType(); 3586 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 3587 return DAG.getUNDEF(VT); 3588 3589 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 3590 uint64_t ShiftAmt = N1C->getZExtValue(); 3591 SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, 3592 N0.getOperand(0), 3593 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 3594 AddToWorkList(SmallShift.getNode()); 3595 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); 3596 } 3597 } 3598 3599 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 3600 // bit, which is unmodified by sra. 3601 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 3602 if (N0.getOpcode() == ISD::SRA) 3603 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); 3604 } 3605 3606 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 3607 if (N1C && N0.getOpcode() == ISD::CTLZ && 3608 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 3609 APInt KnownZero, KnownOne; 3610 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 3611 DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); 3612 3613 // If any of the input bits are KnownOne, then the input couldn't be all 3614 // zeros, thus the result of the srl will always be zero. 3615 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 3616 3617 // If all of the bits input the to ctlz node are known to be zero, then 3618 // the result of the ctlz is "32" and the result of the shift is one. 3619 APInt UnknownBits = ~KnownZero & Mask; 3620 if (UnknownBits == 0) return DAG.getConstant(1, VT); 3621 3622 // Otherwise, check to see if there is exactly one bit input to the ctlz. 3623 if ((UnknownBits & (UnknownBits - 1)) == 0) { 3624 // Okay, we know that only that the single bit specified by UnknownBits 3625 // could be set on input to the CTLZ node. If this bit is set, the SRL 3626 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 3627 // to an SRL/XOR pair, which is likely to simplify more. 3628 unsigned ShAmt = UnknownBits.countTrailingZeros(); 3629 SDValue Op = N0.getOperand(0); 3630 3631 if (ShAmt) { 3632 Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, 3633 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 3634 AddToWorkList(Op.getNode()); 3635 } 3636 3637 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 3638 Op, DAG.getConstant(1, VT)); 3639 } 3640 } 3641 3642 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 3643 if (N1.getOpcode() == ISD::TRUNCATE && 3644 N1.getOperand(0).getOpcode() == ISD::AND && 3645 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3646 SDValue N101 = N1.getOperand(0).getOperand(1); 3647 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3648 EVT TruncVT = N1.getValueType(); 3649 SDValue N100 = N1.getOperand(0).getOperand(0); 3650 APInt TruncC = N101C->getAPIntValue(); 3651 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3652 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 3653 DAG.getNode(ISD::AND, N->getDebugLoc(), 3654 TruncVT, 3655 DAG.getNode(ISD::TRUNCATE, 3656 N->getDebugLoc(), 3657 TruncVT, N100), 3658 DAG.getConstant(TruncC, TruncVT))); 3659 } 3660 } 3661 3662 // fold operands of srl based on knowledge that the low bits are not 3663 // demanded. 3664 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3665 return SDValue(N, 0); 3666 3667 if (N1C) { 3668 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 3669 if (NewSRL.getNode()) 3670 return NewSRL; 3671 } 3672 3673 // Attempt to convert a srl of a load into a narrower zero-extending load. 3674 SDValue NarrowLoad = ReduceLoadWidth(N); 3675 if (NarrowLoad.getNode()) 3676 return NarrowLoad; 3677 3678 // Here is a common situation. We want to optimize: 3679 // 3680 // %a = ... 3681 // %b = and i32 %a, 2 3682 // %c = srl i32 %b, 1 3683 // brcond i32 %c ... 3684 // 3685 // into 3686 // 3687 // %a = ... 3688 // %b = and %a, 2 3689 // %c = setcc eq %b, 0 3690 // brcond %c ... 3691 // 3692 // However when after the source operand of SRL is optimized into AND, the SRL 3693 // itself may not be optimized further. Look for it and add the BRCOND into 3694 // the worklist. 3695 if (N->hasOneUse()) { 3696 SDNode *Use = *N->use_begin(); 3697 if (Use->getOpcode() == ISD::BRCOND) 3698 AddToWorkList(Use); 3699 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 3700 // Also look pass the truncate. 3701 Use = *Use->use_begin(); 3702 if (Use->getOpcode() == ISD::BRCOND) 3703 AddToWorkList(Use); 3704 } 3705 } 3706 3707 return SDValue(); 3708 } 3709 3710 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 3711 SDValue N0 = N->getOperand(0); 3712 EVT VT = N->getValueType(0); 3713 3714 // fold (ctlz c1) -> c2 3715 if (isa<ConstantSDNode>(N0)) 3716 return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); 3717 return SDValue(); 3718 } 3719 3720 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 3721 SDValue N0 = N->getOperand(0); 3722 EVT VT = N->getValueType(0); 3723 3724 // fold (cttz c1) -> c2 3725 if (isa<ConstantSDNode>(N0)) 3726 return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); 3727 return SDValue(); 3728 } 3729 3730 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 3731 SDValue N0 = N->getOperand(0); 3732 EVT VT = N->getValueType(0); 3733 3734 // fold (ctpop c1) -> c2 3735 if (isa<ConstantSDNode>(N0)) 3736 return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); 3737 return SDValue(); 3738 } 3739 3740 SDValue DAGCombiner::visitSELECT(SDNode *N) { 3741 SDValue N0 = N->getOperand(0); 3742 SDValue N1 = N->getOperand(1); 3743 SDValue N2 = N->getOperand(2); 3744 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3745 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3746 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3747 EVT VT = N->getValueType(0); 3748 EVT VT0 = N0.getValueType(); 3749 3750 // fold (select C, X, X) -> X 3751 if (N1 == N2) 3752 return N1; 3753 // fold (select true, X, Y) -> X 3754 if (N0C && !N0C->isNullValue()) 3755 return N1; 3756 // fold (select false, X, Y) -> Y 3757 if (N0C && N0C->isNullValue()) 3758 return N2; 3759 // fold (select C, 1, X) -> (or C, X) 3760 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 3761 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 3762 // fold (select C, 0, 1) -> (xor C, 1) 3763 if (VT.isInteger() && 3764 (VT0 == MVT::i1 || 3765 (VT0.isInteger() && 3766 TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && 3767 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 3768 SDValue XORNode; 3769 if (VT == VT0) 3770 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, 3771 N0, DAG.getConstant(1, VT0)); 3772 XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, 3773 N0, DAG.getConstant(1, VT0)); 3774 AddToWorkList(XORNode.getNode()); 3775 if (VT.bitsGT(VT0)) 3776 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); 3777 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); 3778 } 3779 // fold (select C, 0, X) -> (and (not C), X) 3780 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 3781 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 3782 AddToWorkList(NOTNode.getNode()); 3783 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); 3784 } 3785 // fold (select C, X, 1) -> (or (not C), X) 3786 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 3787 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 3788 AddToWorkList(NOTNode.getNode()); 3789 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); 3790 } 3791 // fold (select C, X, 0) -> (and C, X) 3792 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 3793 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 3794 // fold (select X, X, Y) -> (or X, Y) 3795 // fold (select X, 1, Y) -> (or X, Y) 3796 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 3797 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 3798 // fold (select X, Y, X) -> (and X, Y) 3799 // fold (select X, Y, 0) -> (and X, Y) 3800 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 3801 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 3802 3803 // If we can fold this based on the true/false value, do so. 3804 if (SimplifySelectOps(N, N1, N2)) 3805 return SDValue(N, 0); // Don't revisit N. 3806 3807 // fold selects based on a setcc into other things, such as min/max/abs 3808 if (N0.getOpcode() == ISD::SETCC) { 3809 // FIXME: 3810 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 3811 // having to say they don't support SELECT_CC on every type the DAG knows 3812 // about, since there is no way to mark an opcode illegal at all value types 3813 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 3814 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 3815 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, 3816 N0.getOperand(0), N0.getOperand(1), 3817 N1, N2, N0.getOperand(2)); 3818 return SimplifySelect(N->getDebugLoc(), N0, N1, N2); 3819 } 3820 3821 return SDValue(); 3822 } 3823 3824 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 3825 SDValue N0 = N->getOperand(0); 3826 SDValue N1 = N->getOperand(1); 3827 SDValue N2 = N->getOperand(2); 3828 SDValue N3 = N->getOperand(3); 3829 SDValue N4 = N->getOperand(4); 3830 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 3831 3832 // fold select_cc lhs, rhs, x, x, cc -> x 3833 if (N2 == N3) 3834 return N2; 3835 3836 // Determine if the condition we're dealing with is constant 3837 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 3838 N0, N1, CC, N->getDebugLoc(), false); 3839 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 3840 3841 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 3842 if (!SCCC->isNullValue()) 3843 return N2; // cond always true -> true val 3844 else 3845 return N3; // cond always false -> false val 3846 } 3847 3848 // Fold to a simpler select_cc 3849 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 3850 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), 3851 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 3852 SCC.getOperand(2)); 3853 3854 // If we can fold this based on the true/false value, do so. 3855 if (SimplifySelectOps(N, N2, N3)) 3856 return SDValue(N, 0); // Don't revisit N. 3857 3858 // fold select_cc into other things, such as min/max/abs 3859 return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); 3860 } 3861 3862 SDValue DAGCombiner::visitSETCC(SDNode *N) { 3863 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 3864 cast<CondCodeSDNode>(N->getOperand(2))->get(), 3865 N->getDebugLoc()); 3866 } 3867 3868 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 3869 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 3870 // transformation. Returns true if extension are possible and the above 3871 // mentioned transformation is profitable. 3872 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 3873 unsigned ExtOpc, 3874 SmallVector<SDNode*, 4> &ExtendNodes, 3875 const TargetLowering &TLI) { 3876 bool HasCopyToRegUses = false; 3877 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 3878 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3879 UE = N0.getNode()->use_end(); 3880 UI != UE; ++UI) { 3881 SDNode *User = *UI; 3882 if (User == N) 3883 continue; 3884 if (UI.getUse().getResNo() != N0.getResNo()) 3885 continue; 3886 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 3887 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 3888 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 3889 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 3890 // Sign bits will be lost after a zext. 3891 return false; 3892 bool Add = false; 3893 for (unsigned i = 0; i != 2; ++i) { 3894 SDValue UseOp = User->getOperand(i); 3895 if (UseOp == N0) 3896 continue; 3897 if (!isa<ConstantSDNode>(UseOp)) 3898 return false; 3899 Add = true; 3900 } 3901 if (Add) 3902 ExtendNodes.push_back(User); 3903 continue; 3904 } 3905 // If truncates aren't free and there are users we can't 3906 // extend, it isn't worthwhile. 3907 if (!isTruncFree) 3908 return false; 3909 // Remember if this value is live-out. 3910 if (User->getOpcode() == ISD::CopyToReg) 3911 HasCopyToRegUses = true; 3912 } 3913 3914 if (HasCopyToRegUses) { 3915 bool BothLiveOut = false; 3916 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 3917 UI != UE; ++UI) { 3918 SDUse &Use = UI.getUse(); 3919 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 3920 BothLiveOut = true; 3921 break; 3922 } 3923 } 3924 if (BothLiveOut) 3925 // Both unextended and extended values are live out. There had better be 3926 // a good reason for the transformation. 3927 return ExtendNodes.size(); 3928 } 3929 return true; 3930 } 3931 3932 void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 3933 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 3934 ISD::NodeType ExtType) { 3935 // Extend SetCC uses if necessary. 3936 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 3937 SDNode *SetCC = SetCCs[i]; 3938 SmallVector<SDValue, 4> Ops; 3939 3940 for (unsigned j = 0; j != 2; ++j) { 3941 SDValue SOp = SetCC->getOperand(j); 3942 if (SOp == Trunc) 3943 Ops.push_back(ExtLoad); 3944 else 3945 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 3946 } 3947 3948 Ops.push_back(SetCC->getOperand(2)); 3949 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), 3950 &Ops[0], Ops.size())); 3951 } 3952 } 3953 3954 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 3955 SDValue N0 = N->getOperand(0); 3956 EVT VT = N->getValueType(0); 3957 3958 // fold (sext c1) -> c1 3959 if (isa<ConstantSDNode>(N0)) 3960 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); 3961 3962 // fold (sext (sext x)) -> (sext x) 3963 // fold (sext (aext x)) -> (sext x) 3964 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 3965 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, 3966 N0.getOperand(0)); 3967 3968 if (N0.getOpcode() == ISD::TRUNCATE) { 3969 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 3970 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 3971 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 3972 if (NarrowLoad.getNode()) { 3973 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 3974 if (NarrowLoad.getNode() != N0.getNode()) { 3975 CombineTo(N0.getNode(), NarrowLoad); 3976 // CombineTo deleted the truncate, if needed, but not what's under it. 3977 AddToWorkList(oye); 3978 } 3979 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3980 } 3981 3982 // See if the value being truncated is already sign extended. If so, just 3983 // eliminate the trunc/sext pair. 3984 SDValue Op = N0.getOperand(0); 3985 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 3986 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 3987 unsigned DestBits = VT.getScalarType().getSizeInBits(); 3988 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 3989 3990 if (OpBits == DestBits) { 3991 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 3992 // bits, it is already ready. 3993 if (NumSignBits > DestBits-MidBits) 3994 return Op; 3995 } else if (OpBits < DestBits) { 3996 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 3997 // bits, just sext from i32. 3998 if (NumSignBits > OpBits-MidBits) 3999 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); 4000 } else { 4001 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 4002 // bits, just truncate to i32. 4003 if (NumSignBits > OpBits-MidBits) 4004 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4005 } 4006 4007 // fold (sext (truncate x)) -> (sextinreg x). 4008 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 4009 N0.getValueType())) { 4010 if (OpBits < DestBits) 4011 Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); 4012 else if (OpBits > DestBits) 4013 Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); 4014 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, 4015 DAG.getValueType(N0.getValueType())); 4016 } 4017 } 4018 4019 // fold (sext (load x)) -> (sext (truncate (sextload x))) 4020 // None of the supported targets knows how to perform load and sign extend 4021 // on vectors in one instruction. We only perform this transformation on 4022 // scalars. 4023 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4024 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4025 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 4026 bool DoXform = true; 4027 SmallVector<SDNode*, 4> SetCCs; 4028 if (!N0.hasOneUse()) 4029 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 4030 if (DoXform) { 4031 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4032 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4033 LN0->getChain(), 4034 LN0->getBasePtr(), LN0->getPointerInfo(), 4035 N0.getValueType(), 4036 LN0->isVolatile(), LN0->isNonTemporal(), 4037 LN0->getAlignment()); 4038 CombineTo(N, ExtLoad); 4039 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4040 N0.getValueType(), ExtLoad); 4041 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4042 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4043 ISD::SIGN_EXTEND); 4044 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4045 } 4046 } 4047 4048 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 4049 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 4050 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4051 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4052 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4053 EVT MemVT = LN0->getMemoryVT(); 4054 if ((!LegalOperations && !LN0->isVolatile()) || 4055 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 4056 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4057 LN0->getChain(), 4058 LN0->getBasePtr(), LN0->getPointerInfo(), 4059 MemVT, 4060 LN0->isVolatile(), LN0->isNonTemporal(), 4061 LN0->getAlignment()); 4062 CombineTo(N, ExtLoad); 4063 CombineTo(N0.getNode(), 4064 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4065 N0.getValueType(), ExtLoad), 4066 ExtLoad.getValue(1)); 4067 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4068 } 4069 } 4070 4071 // fold (sext (and/or/xor (load x), cst)) -> 4072 // (and/or/xor (sextload x), (sext cst)) 4073 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4074 N0.getOpcode() == ISD::XOR) && 4075 isa<LoadSDNode>(N0.getOperand(0)) && 4076 N0.getOperand(1).getOpcode() == ISD::Constant && 4077 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 4078 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4079 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4080 if (LN0->getExtensionType() != ISD::ZEXTLOAD) { 4081 bool DoXform = true; 4082 SmallVector<SDNode*, 4> SetCCs; 4083 if (!N0.hasOneUse()) 4084 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 4085 SetCCs, TLI); 4086 if (DoXform) { 4087 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, 4088 LN0->getChain(), LN0->getBasePtr(), 4089 LN0->getPointerInfo(), 4090 LN0->getMemoryVT(), 4091 LN0->isVolatile(), 4092 LN0->isNonTemporal(), 4093 LN0->getAlignment()); 4094 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4095 Mask = Mask.sext(VT.getSizeInBits()); 4096 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4097 ExtLoad, DAG.getConstant(Mask, VT)); 4098 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4099 N0.getOperand(0).getDebugLoc(), 4100 N0.getOperand(0).getValueType(), ExtLoad); 4101 CombineTo(N, And); 4102 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4103 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4104 ISD::SIGN_EXTEND); 4105 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4106 } 4107 } 4108 } 4109 4110 if (N0.getOpcode() == ISD::SETCC) { 4111 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 4112 // Only do this before legalize for now. 4113 if (VT.isVector() && !LegalOperations) { 4114 EVT N0VT = N0.getOperand(0).getValueType(); 4115 // We know that the # elements of the results is the same as the 4116 // # elements of the compare (and the # elements of the compare result 4117 // for that matter). Check to see that they are the same size. If so, 4118 // we know that the element size of the sext'd result matches the 4119 // element size of the compare operands. 4120 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4121 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4122 N0.getOperand(1), 4123 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4124 // If the desired elements are smaller or larger than the source 4125 // elements we can use a matching integer vector type and then 4126 // truncate/sign extend 4127 else { 4128 EVT MatchingElementType = 4129 EVT::getIntegerVT(*DAG.getContext(), 4130 N0VT.getScalarType().getSizeInBits()); 4131 EVT MatchingVectorType = 4132 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4133 N0VT.getVectorNumElements()); 4134 SDValue VsetCC = 4135 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4136 N0.getOperand(1), 4137 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4138 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4139 } 4140 } 4141 4142 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 4143 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 4144 SDValue NegOne = 4145 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 4146 SDValue SCC = 4147 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4148 NegOne, DAG.getConstant(0, VT), 4149 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4150 if (SCC.getNode()) return SCC; 4151 if (!LegalOperations || 4152 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) 4153 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4154 DAG.getSetCC(N->getDebugLoc(), 4155 TLI.getSetCCResultType(VT), 4156 N0.getOperand(0), N0.getOperand(1), 4157 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4158 NegOne, DAG.getConstant(0, VT)); 4159 } 4160 4161 // fold (sext x) -> (zext x) if the sign bit is known zero. 4162 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 4163 DAG.SignBitIsZero(N0)) 4164 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4165 4166 return SDValue(); 4167 } 4168 4169 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 4170 SDValue N0 = N->getOperand(0); 4171 EVT VT = N->getValueType(0); 4172 4173 // fold (zext c1) -> c1 4174 if (isa<ConstantSDNode>(N0)) 4175 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4176 // fold (zext (zext x)) -> (zext x) 4177 // fold (zext (aext x)) -> (zext x) 4178 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4179 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, 4180 N0.getOperand(0)); 4181 4182 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4183 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 4184 if (N0.getOpcode() == ISD::TRUNCATE) { 4185 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4186 if (NarrowLoad.getNode()) { 4187 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4188 if (NarrowLoad.getNode() != N0.getNode()) { 4189 CombineTo(N0.getNode(), NarrowLoad); 4190 // CombineTo deleted the truncate, if needed, but not what's under it. 4191 AddToWorkList(oye); 4192 } 4193 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4194 } 4195 } 4196 4197 // fold (zext (truncate x)) -> (and x, mask) 4198 if (N0.getOpcode() == ISD::TRUNCATE && 4199 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 4200 4201 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4202 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 4203 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4204 if (NarrowLoad.getNode()) { 4205 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4206 if (NarrowLoad.getNode() != N0.getNode()) { 4207 CombineTo(N0.getNode(), NarrowLoad); 4208 // CombineTo deleted the truncate, if needed, but not what's under it. 4209 AddToWorkList(oye); 4210 } 4211 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4212 } 4213 4214 SDValue Op = N0.getOperand(0); 4215 if (Op.getValueType().bitsLT(VT)) { 4216 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); 4217 } else if (Op.getValueType().bitsGT(VT)) { 4218 Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4219 } 4220 return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), 4221 N0.getValueType().getScalarType()); 4222 } 4223 4224 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 4225 // if either of the casts is not free. 4226 if (N0.getOpcode() == ISD::AND && 4227 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4228 N0.getOperand(1).getOpcode() == ISD::Constant && 4229 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4230 N0.getValueType()) || 4231 !TLI.isZExtFree(N0.getValueType(), VT))) { 4232 SDValue X = N0.getOperand(0).getOperand(0); 4233 if (X.getValueType().bitsLT(VT)) { 4234 X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); 4235 } else if (X.getValueType().bitsGT(VT)) { 4236 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 4237 } 4238 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4239 Mask = Mask.zext(VT.getSizeInBits()); 4240 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4241 X, DAG.getConstant(Mask, VT)); 4242 } 4243 4244 // fold (zext (load x)) -> (zext (truncate (zextload x))) 4245 // None of the supported targets knows how to perform load and vector_zext 4246 // on vectors in one instruction. We only perform this transformation on 4247 // scalars. 4248 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4249 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4250 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 4251 bool DoXform = true; 4252 SmallVector<SDNode*, 4> SetCCs; 4253 if (!N0.hasOneUse()) 4254 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 4255 if (DoXform) { 4256 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4257 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4258 LN0->getChain(), 4259 LN0->getBasePtr(), LN0->getPointerInfo(), 4260 N0.getValueType(), 4261 LN0->isVolatile(), LN0->isNonTemporal(), 4262 LN0->getAlignment()); 4263 CombineTo(N, ExtLoad); 4264 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4265 N0.getValueType(), ExtLoad); 4266 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4267 4268 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4269 ISD::ZERO_EXTEND); 4270 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4271 } 4272 } 4273 4274 // fold (zext (and/or/xor (load x), cst)) -> 4275 // (and/or/xor (zextload x), (zext cst)) 4276 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4277 N0.getOpcode() == ISD::XOR) && 4278 isa<LoadSDNode>(N0.getOperand(0)) && 4279 N0.getOperand(1).getOpcode() == ISD::Constant && 4280 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 4281 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4282 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4283 if (LN0->getExtensionType() != ISD::SEXTLOAD) { 4284 bool DoXform = true; 4285 SmallVector<SDNode*, 4> SetCCs; 4286 if (!N0.hasOneUse()) 4287 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 4288 SetCCs, TLI); 4289 if (DoXform) { 4290 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, 4291 LN0->getChain(), LN0->getBasePtr(), 4292 LN0->getPointerInfo(), 4293 LN0->getMemoryVT(), 4294 LN0->isVolatile(), 4295 LN0->isNonTemporal(), 4296 LN0->getAlignment()); 4297 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4298 Mask = Mask.zext(VT.getSizeInBits()); 4299 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4300 ExtLoad, DAG.getConstant(Mask, VT)); 4301 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4302 N0.getOperand(0).getDebugLoc(), 4303 N0.getOperand(0).getValueType(), ExtLoad); 4304 CombineTo(N, And); 4305 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4306 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4307 ISD::ZERO_EXTEND); 4308 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4309 } 4310 } 4311 } 4312 4313 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 4314 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 4315 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4316 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4317 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4318 EVT MemVT = LN0->getMemoryVT(); 4319 if ((!LegalOperations && !LN0->isVolatile()) || 4320 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 4321 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4322 LN0->getChain(), 4323 LN0->getBasePtr(), LN0->getPointerInfo(), 4324 MemVT, 4325 LN0->isVolatile(), LN0->isNonTemporal(), 4326 LN0->getAlignment()); 4327 CombineTo(N, ExtLoad); 4328 CombineTo(N0.getNode(), 4329 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), 4330 ExtLoad), 4331 ExtLoad.getValue(1)); 4332 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4333 } 4334 } 4335 4336 if (N0.getOpcode() == ISD::SETCC) { 4337 if (!LegalOperations && VT.isVector()) { 4338 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 4339 // Only do this before legalize for now. 4340 EVT N0VT = N0.getOperand(0).getValueType(); 4341 EVT EltVT = VT.getVectorElementType(); 4342 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 4343 DAG.getConstant(1, EltVT)); 4344 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4345 // We know that the # elements of the results is the same as the 4346 // # elements of the compare (and the # elements of the compare result 4347 // for that matter). Check to see that they are the same size. If so, 4348 // we know that the element size of the sext'd result matches the 4349 // element size of the compare operands. 4350 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4351 DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4352 N0.getOperand(1), 4353 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4354 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4355 &OneOps[0], OneOps.size())); 4356 4357 // If the desired elements are smaller or larger than the source 4358 // elements we can use a matching integer vector type and then 4359 // truncate/sign extend 4360 EVT MatchingElementType = 4361 EVT::getIntegerVT(*DAG.getContext(), 4362 N0VT.getScalarType().getSizeInBits()); 4363 EVT MatchingVectorType = 4364 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4365 N0VT.getVectorNumElements()); 4366 SDValue VsetCC = 4367 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4368 N0.getOperand(1), 4369 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4370 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4371 DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), 4372 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4373 &OneOps[0], OneOps.size())); 4374 } 4375 4376 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4377 SDValue SCC = 4378 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4379 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4380 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4381 if (SCC.getNode()) return SCC; 4382 } 4383 4384 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 4385 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 4386 isa<ConstantSDNode>(N0.getOperand(1)) && 4387 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 4388 N0.hasOneUse()) { 4389 SDValue ShAmt = N0.getOperand(1); 4390 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 4391 if (N0.getOpcode() == ISD::SHL) { 4392 SDValue InnerZExt = N0.getOperand(0); 4393 // If the original shl may be shifting out bits, do not perform this 4394 // transformation. 4395 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 4396 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 4397 if (ShAmtVal > KnownZeroBits) 4398 return SDValue(); 4399 } 4400 4401 DebugLoc DL = N->getDebugLoc(); 4402 4403 // Ensure that the shift amount is wide enough for the shifted value. 4404 if (VT.getSizeInBits() >= 256) 4405 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 4406 4407 return DAG.getNode(N0.getOpcode(), DL, VT, 4408 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 4409 ShAmt); 4410 } 4411 4412 return SDValue(); 4413 } 4414 4415 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 4416 SDValue N0 = N->getOperand(0); 4417 EVT VT = N->getValueType(0); 4418 4419 // fold (aext c1) -> c1 4420 if (isa<ConstantSDNode>(N0)) 4421 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); 4422 // fold (aext (aext x)) -> (aext x) 4423 // fold (aext (zext x)) -> (zext x) 4424 // fold (aext (sext x)) -> (sext x) 4425 if (N0.getOpcode() == ISD::ANY_EXTEND || 4426 N0.getOpcode() == ISD::ZERO_EXTEND || 4427 N0.getOpcode() == ISD::SIGN_EXTEND) 4428 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); 4429 4430 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 4431 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 4432 if (N0.getOpcode() == ISD::TRUNCATE) { 4433 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4434 if (NarrowLoad.getNode()) { 4435 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4436 if (NarrowLoad.getNode() != N0.getNode()) { 4437 CombineTo(N0.getNode(), NarrowLoad); 4438 // CombineTo deleted the truncate, if needed, but not what's under it. 4439 AddToWorkList(oye); 4440 } 4441 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4442 } 4443 } 4444 4445 // fold (aext (truncate x)) 4446 if (N0.getOpcode() == ISD::TRUNCATE) { 4447 SDValue TruncOp = N0.getOperand(0); 4448 if (TruncOp.getValueType() == VT) 4449 return TruncOp; // x iff x size == zext size. 4450 if (TruncOp.getValueType().bitsGT(VT)) 4451 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); 4452 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); 4453 } 4454 4455 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 4456 // if the trunc is not free. 4457 if (N0.getOpcode() == ISD::AND && 4458 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4459 N0.getOperand(1).getOpcode() == ISD::Constant && 4460 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4461 N0.getValueType())) { 4462 SDValue X = N0.getOperand(0).getOperand(0); 4463 if (X.getValueType().bitsLT(VT)) { 4464 X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); 4465 } else if (X.getValueType().bitsGT(VT)) { 4466 X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); 4467 } 4468 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4469 Mask = Mask.zext(VT.getSizeInBits()); 4470 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4471 X, DAG.getConstant(Mask, VT)); 4472 } 4473 4474 // fold (aext (load x)) -> (aext (truncate (extload x))) 4475 // None of the supported targets knows how to perform load and any_ext 4476 // on vectors in one instruction. We only perform this transformation on 4477 // scalars. 4478 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4479 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4480 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 4481 bool DoXform = true; 4482 SmallVector<SDNode*, 4> SetCCs; 4483 if (!N0.hasOneUse()) 4484 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 4485 if (DoXform) { 4486 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4487 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 4488 LN0->getChain(), 4489 LN0->getBasePtr(), LN0->getPointerInfo(), 4490 N0.getValueType(), 4491 LN0->isVolatile(), LN0->isNonTemporal(), 4492 LN0->getAlignment()); 4493 CombineTo(N, ExtLoad); 4494 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4495 N0.getValueType(), ExtLoad); 4496 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4497 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4498 ISD::ANY_EXTEND); 4499 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4500 } 4501 } 4502 4503 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 4504 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 4505 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 4506 if (N0.getOpcode() == ISD::LOAD && 4507 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4508 N0.hasOneUse()) { 4509 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4510 EVT MemVT = LN0->getMemoryVT(); 4511 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), 4512 VT, LN0->getChain(), LN0->getBasePtr(), 4513 LN0->getPointerInfo(), MemVT, 4514 LN0->isVolatile(), LN0->isNonTemporal(), 4515 LN0->getAlignment()); 4516 CombineTo(N, ExtLoad); 4517 CombineTo(N0.getNode(), 4518 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4519 N0.getValueType(), ExtLoad), 4520 ExtLoad.getValue(1)); 4521 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4522 } 4523 4524 if (N0.getOpcode() == ISD::SETCC) { 4525 // aext(setcc) -> sext_in_reg(vsetcc) for vectors. 4526 // Only do this before legalize for now. 4527 if (VT.isVector() && !LegalOperations) { 4528 EVT N0VT = N0.getOperand(0).getValueType(); 4529 // We know that the # elements of the results is the same as the 4530 // # elements of the compare (and the # elements of the compare result 4531 // for that matter). Check to see that they are the same size. If so, 4532 // we know that the element size of the sext'd result matches the 4533 // element size of the compare operands. 4534 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4535 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4536 N0.getOperand(1), 4537 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4538 // If the desired elements are smaller or larger than the source 4539 // elements we can use a matching integer vector type and then 4540 // truncate/sign extend 4541 else { 4542 EVT MatchingElementType = 4543 EVT::getIntegerVT(*DAG.getContext(), 4544 N0VT.getScalarType().getSizeInBits()); 4545 EVT MatchingVectorType = 4546 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4547 N0VT.getVectorNumElements()); 4548 SDValue VsetCC = 4549 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4550 N0.getOperand(1), 4551 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4552 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4553 } 4554 } 4555 4556 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4557 SDValue SCC = 4558 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4559 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4560 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4561 if (SCC.getNode()) 4562 return SCC; 4563 } 4564 4565 return SDValue(); 4566 } 4567 4568 /// GetDemandedBits - See if the specified operand can be simplified with the 4569 /// knowledge that only the bits specified by Mask are used. If so, return the 4570 /// simpler operand, otherwise return a null SDValue. 4571 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 4572 switch (V.getOpcode()) { 4573 default: break; 4574 case ISD::Constant: { 4575 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 4576 assert(CV != 0 && "Const value should be ConstSDNode."); 4577 const APInt &CVal = CV->getAPIntValue(); 4578 APInt NewVal = CVal & Mask; 4579 if (NewVal != CVal) { 4580 return DAG.getConstant(NewVal, V.getValueType()); 4581 } 4582 break; 4583 } 4584 case ISD::OR: 4585 case ISD::XOR: 4586 // If the LHS or RHS don't contribute bits to the or, drop them. 4587 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 4588 return V.getOperand(1); 4589 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 4590 return V.getOperand(0); 4591 break; 4592 case ISD::SRL: 4593 // Only look at single-use SRLs. 4594 if (!V.getNode()->hasOneUse()) 4595 break; 4596 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 4597 // See if we can recursively simplify the LHS. 4598 unsigned Amt = RHSC->getZExtValue(); 4599 4600 // Watch out for shift count overflow though. 4601 if (Amt >= Mask.getBitWidth()) break; 4602 APInt NewMask = Mask << Amt; 4603 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 4604 if (SimplifyLHS.getNode()) 4605 return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), 4606 SimplifyLHS, V.getOperand(1)); 4607 } 4608 } 4609 return SDValue(); 4610 } 4611 4612 /// ReduceLoadWidth - If the result of a wider load is shifted to right of N 4613 /// bits and then truncated to a narrower type and where N is a multiple 4614 /// of number of bits of the narrower type, transform it to a narrower load 4615 /// from address + N / num of bits of new type. If the result is to be 4616 /// extended, also fold the extension to form a extending load. 4617 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 4618 unsigned Opc = N->getOpcode(); 4619 4620 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 4621 SDValue N0 = N->getOperand(0); 4622 EVT VT = N->getValueType(0); 4623 EVT ExtVT = VT; 4624 4625 // This transformation isn't valid for vector loads. 4626 if (VT.isVector()) 4627 return SDValue(); 4628 4629 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 4630 // extended to VT. 4631 if (Opc == ISD::SIGN_EXTEND_INREG) { 4632 ExtType = ISD::SEXTLOAD; 4633 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 4634 } else if (Opc == ISD::SRL) { 4635 // Another special-case: SRL is basically zero-extending a narrower value. 4636 ExtType = ISD::ZEXTLOAD; 4637 N0 = SDValue(N, 0); 4638 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4639 if (!N01) return SDValue(); 4640 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 4641 VT.getSizeInBits() - N01->getZExtValue()); 4642 } 4643 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 4644 return SDValue(); 4645 4646 unsigned EVTBits = ExtVT.getSizeInBits(); 4647 4648 // Do not generate loads of non-round integer types since these can 4649 // be expensive (and would be wrong if the type is not byte sized). 4650 if (!ExtVT.isRound()) 4651 return SDValue(); 4652 4653 unsigned ShAmt = 0; 4654 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4655 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4656 ShAmt = N01->getZExtValue(); 4657 // Is the shift amount a multiple of size of VT? 4658 if ((ShAmt & (EVTBits-1)) == 0) { 4659 N0 = N0.getOperand(0); 4660 // Is the load width a multiple of size of VT? 4661 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 4662 return SDValue(); 4663 } 4664 4665 // At this point, we must have a load or else we can't do the transform. 4666 if (!isa<LoadSDNode>(N0)) return SDValue(); 4667 4668 // If the shift amount is larger than the input type then we're not 4669 // accessing any of the loaded bytes. If the load was a zextload/extload 4670 // then the result of the shift+trunc is zero/undef (handled elsewhere). 4671 // If the load was a sextload then the result is a splat of the sign bit 4672 // of the extended byte. This is not worth optimizing for. 4673 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 4674 return SDValue(); 4675 } 4676 } 4677 4678 // If the load is shifted left (and the result isn't shifted back right), 4679 // we can fold the truncate through the shift. 4680 unsigned ShLeftAmt = 0; 4681 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 4682 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 4683 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4684 ShLeftAmt = N01->getZExtValue(); 4685 N0 = N0.getOperand(0); 4686 } 4687 } 4688 4689 // If we haven't found a load, we can't narrow it. Don't transform one with 4690 // multiple uses, this would require adding a new load. 4691 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || 4692 // Don't change the width of a volatile load. 4693 cast<LoadSDNode>(N0)->isVolatile()) 4694 return SDValue(); 4695 4696 // Verify that we are actually reducing a load width here. 4697 if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) 4698 return SDValue(); 4699 4700 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4701 EVT PtrType = N0.getOperand(1).getValueType(); 4702 4703 // For big endian targets, we need to adjust the offset to the pointer to 4704 // load the correct bytes. 4705 if (TLI.isBigEndian()) { 4706 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 4707 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 4708 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 4709 } 4710 4711 uint64_t PtrOff = ShAmt / 8; 4712 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 4713 SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), 4714 PtrType, LN0->getBasePtr(), 4715 DAG.getConstant(PtrOff, PtrType)); 4716 AddToWorkList(NewPtr.getNode()); 4717 4718 SDValue Load; 4719 if (ExtType == ISD::NON_EXTLOAD) 4720 Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, 4721 LN0->getPointerInfo().getWithOffset(PtrOff), 4722 LN0->isVolatile(), LN0->isNonTemporal(), 4723 LN0->isInvariant(), NewAlign); 4724 else 4725 Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, 4726 LN0->getPointerInfo().getWithOffset(PtrOff), 4727 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 4728 NewAlign); 4729 4730 // Replace the old load's chain with the new load's chain. 4731 WorkListRemover DeadNodes(*this); 4732 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), 4733 &DeadNodes); 4734 4735 // Shift the result left, if we've swallowed a left shift. 4736 SDValue Result = Load; 4737 if (ShLeftAmt != 0) { 4738 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 4739 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 4740 ShImmTy = VT; 4741 Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, 4742 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 4743 } 4744 4745 // Return the new loaded value. 4746 return Result; 4747 } 4748 4749 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 4750 SDValue N0 = N->getOperand(0); 4751 SDValue N1 = N->getOperand(1); 4752 EVT VT = N->getValueType(0); 4753 EVT EVT = cast<VTSDNode>(N1)->getVT(); 4754 unsigned VTBits = VT.getScalarType().getSizeInBits(); 4755 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 4756 4757 // fold (sext_in_reg c1) -> c1 4758 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 4759 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); 4760 4761 // If the input is already sign extended, just drop the extension. 4762 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 4763 return N0; 4764 4765 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 4766 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 4767 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 4768 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 4769 N0.getOperand(0), N1); 4770 } 4771 4772 // fold (sext_in_reg (sext x)) -> (sext x) 4773 // fold (sext_in_reg (aext x)) -> (sext x) 4774 // if x is small enough. 4775 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 4776 SDValue N00 = N0.getOperand(0); 4777 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 4778 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 4779 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); 4780 } 4781 4782 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 4783 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 4784 return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); 4785 4786 // fold operands of sext_in_reg based on knowledge that the top bits are not 4787 // demanded. 4788 if (SimplifyDemandedBits(SDValue(N, 0))) 4789 return SDValue(N, 0); 4790 4791 // fold (sext_in_reg (load x)) -> (smaller sextload x) 4792 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 4793 SDValue NarrowLoad = ReduceLoadWidth(N); 4794 if (NarrowLoad.getNode()) 4795 return NarrowLoad; 4796 4797 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 4798 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 4799 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 4800 if (N0.getOpcode() == ISD::SRL) { 4801 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 4802 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 4803 // We can turn this into an SRA iff the input to the SRL is already sign 4804 // extended enough. 4805 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 4806 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 4807 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, 4808 N0.getOperand(0), N0.getOperand(1)); 4809 } 4810 } 4811 4812 // fold (sext_inreg (extload x)) -> (sextload x) 4813 if (ISD::isEXTLoad(N0.getNode()) && 4814 ISD::isUNINDEXEDLoad(N0.getNode()) && 4815 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 4816 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4817 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 4818 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4819 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4820 LN0->getChain(), 4821 LN0->getBasePtr(), LN0->getPointerInfo(), 4822 EVT, 4823 LN0->isVolatile(), LN0->isNonTemporal(), 4824 LN0->getAlignment()); 4825 CombineTo(N, ExtLoad); 4826 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4827 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4828 } 4829 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 4830 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4831 N0.hasOneUse() && 4832 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 4833 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4834 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 4835 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4836 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4837 LN0->getChain(), 4838 LN0->getBasePtr(), LN0->getPointerInfo(), 4839 EVT, 4840 LN0->isVolatile(), LN0->isNonTemporal(), 4841 LN0->getAlignment()); 4842 CombineTo(N, ExtLoad); 4843 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4844 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4845 } 4846 4847 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 4848 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 4849 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 4850 N0.getOperand(1), false); 4851 if (BSwap.getNode() != 0) 4852 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 4853 BSwap, N1); 4854 } 4855 4856 return SDValue(); 4857 } 4858 4859 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 4860 SDValue N0 = N->getOperand(0); 4861 EVT VT = N->getValueType(0); 4862 4863 // noop truncate 4864 if (N0.getValueType() == N->getValueType(0)) 4865 return N0; 4866 // fold (truncate c1) -> c1 4867 if (isa<ConstantSDNode>(N0)) 4868 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); 4869 // fold (truncate (truncate x)) -> (truncate x) 4870 if (N0.getOpcode() == ISD::TRUNCATE) 4871 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 4872 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 4873 if (N0.getOpcode() == ISD::ZERO_EXTEND || 4874 N0.getOpcode() == ISD::SIGN_EXTEND || 4875 N0.getOpcode() == ISD::ANY_EXTEND) { 4876 if (N0.getOperand(0).getValueType().bitsLT(VT)) 4877 // if the source is smaller than the dest, we still need an extend 4878 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4879 N0.getOperand(0)); 4880 else if (N0.getOperand(0).getValueType().bitsGT(VT)) 4881 // if the source is larger than the dest, than we just need the truncate 4882 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 4883 else 4884 // if the source and dest are the same type, we can drop both the extend 4885 // and the truncate. 4886 return N0.getOperand(0); 4887 } 4888 4889 // See if we can simplify the input to this truncate through knowledge that 4890 // only the low bits are being used. 4891 // For example "trunc (or (shl x, 8), y)" // -> trunc y 4892 // Currently we only perform this optimization on scalars because vectors 4893 // may have different active low bits. 4894 if (!VT.isVector()) { 4895 SDValue Shorter = 4896 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 4897 VT.getSizeInBits())); 4898 if (Shorter.getNode()) 4899 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); 4900 } 4901 // fold (truncate (load x)) -> (smaller load x) 4902 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 4903 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 4904 SDValue Reduced = ReduceLoadWidth(N); 4905 if (Reduced.getNode()) 4906 return Reduced; 4907 } 4908 4909 // Simplify the operands using demanded-bits information. 4910 if (!VT.isVector() && 4911 SimplifyDemandedBits(SDValue(N, 0))) 4912 return SDValue(N, 0); 4913 4914 return SDValue(); 4915 } 4916 4917 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 4918 SDValue Elt = N->getOperand(i); 4919 if (Elt.getOpcode() != ISD::MERGE_VALUES) 4920 return Elt.getNode(); 4921 return Elt.getOperand(Elt.getResNo()).getNode(); 4922 } 4923 4924 /// CombineConsecutiveLoads - build_pair (load, load) -> load 4925 /// if load locations are consecutive. 4926 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 4927 assert(N->getOpcode() == ISD::BUILD_PAIR); 4928 4929 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 4930 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 4931 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 4932 LD1->getPointerInfo().getAddrSpace() != 4933 LD2->getPointerInfo().getAddrSpace()) 4934 return SDValue(); 4935 EVT LD1VT = LD1->getValueType(0); 4936 4937 if (ISD::isNON_EXTLoad(LD2) && 4938 LD2->hasOneUse() && 4939 // If both are volatile this would reduce the number of volatile loads. 4940 // If one is volatile it might be ok, but play conservative and bail out. 4941 !LD1->isVolatile() && 4942 !LD2->isVolatile() && 4943 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 4944 unsigned Align = LD1->getAlignment(); 4945 unsigned NewAlign = TLI.getTargetData()-> 4946 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 4947 4948 if (NewAlign <= Align && 4949 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 4950 return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), 4951 LD1->getBasePtr(), LD1->getPointerInfo(), 4952 false, false, false, Align); 4953 } 4954 4955 return SDValue(); 4956 } 4957 4958 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 4959 SDValue N0 = N->getOperand(0); 4960 EVT VT = N->getValueType(0); 4961 4962 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 4963 // Only do this before legalize, since afterward the target may be depending 4964 // on the bitconvert. 4965 // First check to see if this is all constant. 4966 if (!LegalTypes && 4967 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 4968 VT.isVector()) { 4969 bool isSimple = true; 4970 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 4971 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 4972 N0.getOperand(i).getOpcode() != ISD::Constant && 4973 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 4974 isSimple = false; 4975 break; 4976 } 4977 4978 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 4979 assert(!DestEltVT.isVector() && 4980 "Element type of vector ValueType must not be vector!"); 4981 if (isSimple) 4982 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 4983 } 4984 4985 // If the input is a constant, let getNode fold it. 4986 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 4987 SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); 4988 if (Res.getNode() != N) { 4989 if (!LegalOperations || 4990 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 4991 return Res; 4992 4993 // Folding it resulted in an illegal node, and it's too late to 4994 // do that. Clean up the old node and forego the transformation. 4995 // Ideally this won't happen very often, because instcombine 4996 // and the earlier dagcombine runs (where illegal nodes are 4997 // permitted) should have folded most of them already. 4998 DAG.DeleteNode(Res.getNode()); 4999 } 5000 } 5001 5002 // (conv (conv x, t1), t2) -> (conv x, t2) 5003 if (N0.getOpcode() == ISD::BITCAST) 5004 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, 5005 N0.getOperand(0)); 5006 5007 // fold (conv (load x)) -> (load (conv*)x) 5008 // If the resultant load doesn't need a higher alignment than the original! 5009 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 5010 // Do not change the width of a volatile load. 5011 !cast<LoadSDNode>(N0)->isVolatile() && 5012 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 5013 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5014 unsigned Align = TLI.getTargetData()-> 5015 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5016 unsigned OrigAlign = LN0->getAlignment(); 5017 5018 if (Align <= OrigAlign) { 5019 SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), 5020 LN0->getBasePtr(), LN0->getPointerInfo(), 5021 LN0->isVolatile(), LN0->isNonTemporal(), 5022 LN0->isInvariant(), OrigAlign); 5023 AddToWorkList(N); 5024 CombineTo(N0.getNode(), 5025 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5026 N0.getValueType(), Load), 5027 Load.getValue(1)); 5028 return Load; 5029 } 5030 } 5031 5032 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 5033 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 5034 // This often reduces constant pool loads. 5035 if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && 5036 N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { 5037 SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, 5038 N0.getOperand(0)); 5039 AddToWorkList(NewConv.getNode()); 5040 5041 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5042 if (N0.getOpcode() == ISD::FNEG) 5043 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 5044 NewConv, DAG.getConstant(SignBit, VT)); 5045 assert(N0.getOpcode() == ISD::FABS); 5046 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 5047 NewConv, DAG.getConstant(~SignBit, VT)); 5048 } 5049 5050 // fold (bitconvert (fcopysign cst, x)) -> 5051 // (or (and (bitconvert x), sign), (and cst, (not sign))) 5052 // Note that we don't handle (copysign x, cst) because this can always be 5053 // folded to an fneg or fabs. 5054 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 5055 isa<ConstantFPSDNode>(N0.getOperand(0)) && 5056 VT.isInteger() && !VT.isVector()) { 5057 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 5058 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 5059 if (isTypeLegal(IntXVT)) { 5060 SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5061 IntXVT, N0.getOperand(1)); 5062 AddToWorkList(X.getNode()); 5063 5064 // If X has a different width than the result/lhs, sext it or truncate it. 5065 unsigned VTWidth = VT.getSizeInBits(); 5066 if (OrigXWidth < VTWidth) { 5067 X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); 5068 AddToWorkList(X.getNode()); 5069 } else if (OrigXWidth > VTWidth) { 5070 // To get the sign bit in the right place, we have to shift it right 5071 // before truncating. 5072 X = DAG.getNode(ISD::SRL, X.getDebugLoc(), 5073 X.getValueType(), X, 5074 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 5075 AddToWorkList(X.getNode()); 5076 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 5077 AddToWorkList(X.getNode()); 5078 } 5079 5080 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5081 X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, 5082 X, DAG.getConstant(SignBit, VT)); 5083 AddToWorkList(X.getNode()); 5084 5085 SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5086 VT, N0.getOperand(0)); 5087 Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, 5088 Cst, DAG.getConstant(~SignBit, VT)); 5089 AddToWorkList(Cst.getNode()); 5090 5091 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); 5092 } 5093 } 5094 5095 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 5096 if (N0.getOpcode() == ISD::BUILD_PAIR) { 5097 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 5098 if (CombineLD.getNode()) 5099 return CombineLD; 5100 } 5101 5102 return SDValue(); 5103 } 5104 5105 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 5106 EVT VT = N->getValueType(0); 5107 return CombineConsecutiveLoads(N, VT); 5108 } 5109 5110 /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 5111 /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 5112 /// destination element value type. 5113 SDValue DAGCombiner:: 5114 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 5115 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 5116 5117 // If this is already the right type, we're done. 5118 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 5119 5120 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 5121 unsigned DstBitSize = DstEltVT.getSizeInBits(); 5122 5123 // If this is a conversion of N elements of one type to N elements of another 5124 // type, convert each element. This handles FP<->INT cases. 5125 if (SrcBitSize == DstBitSize) { 5126 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5127 BV->getValueType(0).getVectorNumElements()); 5128 5129 // Due to the FP element handling below calling this routine recursively, 5130 // we can end up with a scalar-to-vector node here. 5131 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 5132 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5133 DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5134 DstEltVT, BV->getOperand(0))); 5135 5136 SmallVector<SDValue, 8> Ops; 5137 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5138 SDValue Op = BV->getOperand(i); 5139 // If the vector element type is not legal, the BUILD_VECTOR operands 5140 // are promoted and implicitly truncated. Make that explicit here. 5141 if (Op.getValueType() != SrcEltVT) 5142 Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); 5143 Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5144 DstEltVT, Op)); 5145 AddToWorkList(Ops.back().getNode()); 5146 } 5147 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5148 &Ops[0], Ops.size()); 5149 } 5150 5151 // Otherwise, we're growing or shrinking the elements. To avoid having to 5152 // handle annoying details of growing/shrinking FP values, we convert them to 5153 // int first. 5154 if (SrcEltVT.isFloatingPoint()) { 5155 // Convert the input float vector to a int vector where the elements are the 5156 // same sizes. 5157 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 5158 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 5159 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 5160 SrcEltVT = IntVT; 5161 } 5162 5163 // Now we know the input is an integer vector. If the output is a FP type, 5164 // convert to integer first, then to FP of the right size. 5165 if (DstEltVT.isFloatingPoint()) { 5166 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 5167 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 5168 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 5169 5170 // Next, convert to FP elements of the same size. 5171 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 5172 } 5173 5174 // Okay, we know the src/dst types are both integers of differing types. 5175 // Handling growing first. 5176 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 5177 if (SrcBitSize < DstBitSize) { 5178 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 5179 5180 SmallVector<SDValue, 8> Ops; 5181 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 5182 i += NumInputsPerOutput) { 5183 bool isLE = TLI.isLittleEndian(); 5184 APInt NewBits = APInt(DstBitSize, 0); 5185 bool EltIsUndef = true; 5186 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 5187 // Shift the previously computed bits over. 5188 NewBits <<= SrcBitSize; 5189 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 5190 if (Op.getOpcode() == ISD::UNDEF) continue; 5191 EltIsUndef = false; 5192 5193 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 5194 zextOrTrunc(SrcBitSize).zext(DstBitSize); 5195 } 5196 5197 if (EltIsUndef) 5198 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5199 else 5200 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 5201 } 5202 5203 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 5204 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5205 &Ops[0], Ops.size()); 5206 } 5207 5208 // Finally, this must be the case where we are shrinking elements: each input 5209 // turns into multiple outputs. 5210 bool isS2V = ISD::isScalarToVector(BV); 5211 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 5212 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5213 NumOutputsPerInput*BV->getNumOperands()); 5214 SmallVector<SDValue, 8> Ops; 5215 5216 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5217 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 5218 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 5219 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5220 continue; 5221 } 5222 5223 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 5224 getAPIntValue().zextOrTrunc(SrcBitSize); 5225 5226 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 5227 APInt ThisVal = OpVal.trunc(DstBitSize); 5228 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 5229 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 5230 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 5231 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5232 Ops[0]); 5233 OpVal = OpVal.lshr(DstBitSize); 5234 } 5235 5236 // For big endian targets, swap the order of the pieces of each element. 5237 if (TLI.isBigEndian()) 5238 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 5239 } 5240 5241 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5242 &Ops[0], Ops.size()); 5243 } 5244 5245 SDValue DAGCombiner::visitFADD(SDNode *N) { 5246 SDValue N0 = N->getOperand(0); 5247 SDValue N1 = N->getOperand(1); 5248 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5249 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5250 EVT VT = N->getValueType(0); 5251 5252 // fold vector ops 5253 if (VT.isVector()) { 5254 SDValue FoldedVOp = SimplifyVBinOp(N); 5255 if (FoldedVOp.getNode()) return FoldedVOp; 5256 } 5257 5258 // fold (fadd c1, c2) -> (fadd c1, c2) 5259 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5260 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); 5261 // canonicalize constant to RHS 5262 if (N0CFP && !N1CFP) 5263 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); 5264 // fold (fadd A, 0) -> A 5265 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5266 N1CFP->getValueAPF().isZero()) 5267 return N0; 5268 // fold (fadd A, (fneg B)) -> (fsub A, B) 5269 if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2) 5270 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, 5271 GetNegatedExpression(N1, DAG, LegalOperations)); 5272 // fold (fadd (fneg A), B) -> (fsub B, A) 5273 if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2) 5274 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, 5275 GetNegatedExpression(N0, DAG, LegalOperations)); 5276 5277 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 5278 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5279 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 5280 isa<ConstantFPSDNode>(N0.getOperand(1))) 5281 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), 5282 DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5283 N0.getOperand(1), N1)); 5284 5285 return SDValue(); 5286 } 5287 5288 SDValue DAGCombiner::visitFSUB(SDNode *N) { 5289 SDValue N0 = N->getOperand(0); 5290 SDValue N1 = N->getOperand(1); 5291 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5292 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5293 EVT VT = N->getValueType(0); 5294 5295 // fold vector ops 5296 if (VT.isVector()) { 5297 SDValue FoldedVOp = SimplifyVBinOp(N); 5298 if (FoldedVOp.getNode()) return FoldedVOp; 5299 } 5300 5301 // fold (fsub c1, c2) -> c1-c2 5302 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5303 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); 5304 // fold (fsub A, 0) -> A 5305 if (DAG.getTarget().Options.UnsafeFPMath && 5306 N1CFP && N1CFP->getValueAPF().isZero()) 5307 return N0; 5308 // fold (fsub 0, B) -> -B 5309 if (DAG.getTarget().Options.UnsafeFPMath && 5310 N0CFP && N0CFP->getValueAPF().isZero()) { 5311 if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) 5312 return GetNegatedExpression(N1, DAG, LegalOperations); 5313 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5314 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); 5315 } 5316 // fold (fsub A, (fneg B)) -> (fadd A, B) 5317 if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) 5318 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, 5319 GetNegatedExpression(N1, DAG, LegalOperations)); 5320 5321 return SDValue(); 5322 } 5323 5324 SDValue DAGCombiner::visitFMUL(SDNode *N) { 5325 SDValue N0 = N->getOperand(0); 5326 SDValue N1 = N->getOperand(1); 5327 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5328 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5329 EVT VT = N->getValueType(0); 5330 5331 // fold vector ops 5332 if (VT.isVector()) { 5333 SDValue FoldedVOp = SimplifyVBinOp(N); 5334 if (FoldedVOp.getNode()) return FoldedVOp; 5335 } 5336 5337 // fold (fmul c1, c2) -> c1*c2 5338 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5339 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); 5340 // canonicalize constant to RHS 5341 if (N0CFP && !N1CFP) 5342 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); 5343 // fold (fmul A, 0) -> 0 5344 if (DAG.getTarget().Options.UnsafeFPMath && 5345 N1CFP && N1CFP->getValueAPF().isZero()) 5346 return N1; 5347 // fold (fmul A, 0) -> 0, vector edition. 5348 if (DAG.getTarget().Options.UnsafeFPMath && 5349 ISD::isBuildVectorAllZeros(N1.getNode())) 5350 return N1; 5351 // fold (fmul X, 2.0) -> (fadd X, X) 5352 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 5353 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); 5354 // fold (fmul X, -1.0) -> (fneg X) 5355 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 5356 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5357 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); 5358 5359 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 5360 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, 5361 &DAG.getTarget().Options)) { 5362 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, 5363 &DAG.getTarget().Options)) { 5364 // Both can be negated for free, check to see if at least one is cheaper 5365 // negated. 5366 if (LHSNeg == 2 || RHSNeg == 2) 5367 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5368 GetNegatedExpression(N0, DAG, LegalOperations), 5369 GetNegatedExpression(N1, DAG, LegalOperations)); 5370 } 5371 } 5372 5373 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 5374 if (DAG.getTarget().Options.UnsafeFPMath && 5375 N1CFP && N0.getOpcode() == ISD::FMUL && 5376 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 5377 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), 5378 DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5379 N0.getOperand(1), N1)); 5380 5381 return SDValue(); 5382 } 5383 5384 SDValue DAGCombiner::visitFDIV(SDNode *N) { 5385 SDValue N0 = N->getOperand(0); 5386 SDValue N1 = N->getOperand(1); 5387 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5388 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5389 EVT VT = N->getValueType(0); 5390 5391 // fold vector ops 5392 if (VT.isVector()) { 5393 SDValue FoldedVOp = SimplifyVBinOp(N); 5394 if (FoldedVOp.getNode()) return FoldedVOp; 5395 } 5396 5397 // fold (fdiv c1, c2) -> c1/c2 5398 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5399 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); 5400 5401 5402 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 5403 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, 5404 &DAG.getTarget().Options)) { 5405 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, 5406 &DAG.getTarget().Options)) { 5407 // Both can be negated for free, check to see if at least one is cheaper 5408 // negated. 5409 if (LHSNeg == 2 || RHSNeg == 2) 5410 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, 5411 GetNegatedExpression(N0, DAG, LegalOperations), 5412 GetNegatedExpression(N1, DAG, LegalOperations)); 5413 } 5414 } 5415 5416 return SDValue(); 5417 } 5418 5419 SDValue DAGCombiner::visitFREM(SDNode *N) { 5420 SDValue N0 = N->getOperand(0); 5421 SDValue N1 = N->getOperand(1); 5422 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5423 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5424 EVT VT = N->getValueType(0); 5425 5426 // fold (frem c1, c2) -> fmod(c1,c2) 5427 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5428 return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); 5429 5430 return SDValue(); 5431 } 5432 5433 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 5434 SDValue N0 = N->getOperand(0); 5435 SDValue N1 = N->getOperand(1); 5436 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5437 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5438 EVT VT = N->getValueType(0); 5439 5440 if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold 5441 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); 5442 5443 if (N1CFP) { 5444 const APFloat& V = N1CFP->getValueAPF(); 5445 // copysign(x, c1) -> fabs(x) iff ispos(c1) 5446 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 5447 if (!V.isNegative()) { 5448 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 5449 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 5450 } else { 5451 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5452 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 5453 DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); 5454 } 5455 } 5456 5457 // copysign(fabs(x), y) -> copysign(x, y) 5458 // copysign(fneg(x), y) -> copysign(x, y) 5459 // copysign(copysign(x,z), y) -> copysign(x, y) 5460 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 5461 N0.getOpcode() == ISD::FCOPYSIGN) 5462 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5463 N0.getOperand(0), N1); 5464 5465 // copysign(x, abs(y)) -> abs(x) 5466 if (N1.getOpcode() == ISD::FABS) 5467 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 5468 5469 // copysign(x, copysign(y,z)) -> copysign(x, z) 5470 if (N1.getOpcode() == ISD::FCOPYSIGN) 5471 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5472 N0, N1.getOperand(1)); 5473 5474 // copysign(x, fp_extend(y)) -> copysign(x, y) 5475 // copysign(x, fp_round(y)) -> copysign(x, y) 5476 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 5477 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5478 N0, N1.getOperand(0)); 5479 5480 return SDValue(); 5481 } 5482 5483 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 5484 SDValue N0 = N->getOperand(0); 5485 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 5486 EVT VT = N->getValueType(0); 5487 EVT OpVT = N0.getValueType(); 5488 5489 // fold (sint_to_fp c1) -> c1fp 5490 if (N0C && OpVT != MVT::ppcf128 && 5491 // ...but only if the target supports immediate floating-point values 5492 (!LegalOperations || 5493 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 5494 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 5495 5496 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 5497 // but UINT_TO_FP is legal on this target, try to convert. 5498 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 5499 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 5500 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 5501 if (DAG.SignBitIsZero(N0)) 5502 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 5503 } 5504 5505 return SDValue(); 5506 } 5507 5508 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 5509 SDValue N0 = N->getOperand(0); 5510 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 5511 EVT VT = N->getValueType(0); 5512 EVT OpVT = N0.getValueType(); 5513 5514 // fold (uint_to_fp c1) -> c1fp 5515 if (N0C && OpVT != MVT::ppcf128 && 5516 // ...but only if the target supports immediate floating-point values 5517 (!LegalOperations || 5518 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 5519 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 5520 5521 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 5522 // but SINT_TO_FP is legal on this target, try to convert. 5523 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 5524 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 5525 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 5526 if (DAG.SignBitIsZero(N0)) 5527 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 5528 } 5529 5530 return SDValue(); 5531 } 5532 5533 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 5534 SDValue N0 = N->getOperand(0); 5535 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5536 EVT VT = N->getValueType(0); 5537 5538 // fold (fp_to_sint c1fp) -> c1 5539 if (N0CFP) 5540 return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); 5541 5542 return SDValue(); 5543 } 5544 5545 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 5546 SDValue N0 = N->getOperand(0); 5547 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5548 EVT VT = N->getValueType(0); 5549 5550 // fold (fp_to_uint c1fp) -> c1 5551 if (N0CFP && VT != MVT::ppcf128) 5552 return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); 5553 5554 return SDValue(); 5555 } 5556 5557 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 5558 SDValue N0 = N->getOperand(0); 5559 SDValue N1 = N->getOperand(1); 5560 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5561 EVT VT = N->getValueType(0); 5562 5563 // fold (fp_round c1fp) -> c1fp 5564 if (N0CFP && N0.getValueType() != MVT::ppcf128) 5565 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); 5566 5567 // fold (fp_round (fp_extend x)) -> x 5568 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 5569 return N0.getOperand(0); 5570 5571 // fold (fp_round (fp_round x)) -> (fp_round x) 5572 if (N0.getOpcode() == ISD::FP_ROUND) { 5573 // This is a value preserving truncation if both round's are. 5574 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 5575 N0.getNode()->getConstantOperandVal(1) == 1; 5576 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), 5577 DAG.getIntPtrConstant(IsTrunc)); 5578 } 5579 5580 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 5581 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 5582 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, 5583 N0.getOperand(0), N1); 5584 AddToWorkList(Tmp.getNode()); 5585 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5586 Tmp, N0.getOperand(1)); 5587 } 5588 5589 return SDValue(); 5590 } 5591 5592 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 5593 SDValue N0 = N->getOperand(0); 5594 EVT VT = N->getValueType(0); 5595 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 5596 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5597 5598 // fold (fp_round_inreg c1fp) -> c1fp 5599 if (N0CFP && isTypeLegal(EVT)) { 5600 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 5601 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); 5602 } 5603 5604 return SDValue(); 5605 } 5606 5607 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 5608 SDValue N0 = N->getOperand(0); 5609 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5610 EVT VT = N->getValueType(0); 5611 5612 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 5613 if (N->hasOneUse() && 5614 N->use_begin()->getOpcode() == ISD::FP_ROUND) 5615 return SDValue(); 5616 5617 // fold (fp_extend c1fp) -> c1fp 5618 if (N0CFP && VT != MVT::ppcf128) 5619 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); 5620 5621 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 5622 // value of X. 5623 if (N0.getOpcode() == ISD::FP_ROUND 5624 && N0.getNode()->getConstantOperandVal(1) == 1) { 5625 SDValue In = N0.getOperand(0); 5626 if (In.getValueType() == VT) return In; 5627 if (VT.bitsLT(In.getValueType())) 5628 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, 5629 In, N0.getOperand(1)); 5630 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); 5631 } 5632 5633 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 5634 if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && 5635 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5636 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 5637 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5638 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 5639 LN0->getChain(), 5640 LN0->getBasePtr(), LN0->getPointerInfo(), 5641 N0.getValueType(), 5642 LN0->isVolatile(), LN0->isNonTemporal(), 5643 LN0->getAlignment()); 5644 CombineTo(N, ExtLoad); 5645 CombineTo(N0.getNode(), 5646 DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), 5647 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 5648 ExtLoad.getValue(1)); 5649 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5650 } 5651 5652 return SDValue(); 5653 } 5654 5655 SDValue DAGCombiner::visitFNEG(SDNode *N) { 5656 SDValue N0 = N->getOperand(0); 5657 EVT VT = N->getValueType(0); 5658 5659 if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options)) 5660 return GetNegatedExpression(N0, DAG, LegalOperations); 5661 5662 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 5663 // constant pool values. 5664 if (N0.getOpcode() == ISD::BITCAST && 5665 !VT.isVector() && 5666 N0.getNode()->hasOneUse() && 5667 N0.getOperand(0).getValueType().isInteger()) { 5668 SDValue Int = N0.getOperand(0); 5669 EVT IntVT = Int.getValueType(); 5670 if (IntVT.isInteger() && !IntVT.isVector()) { 5671 Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, 5672 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 5673 AddToWorkList(Int.getNode()); 5674 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5675 VT, Int); 5676 } 5677 } 5678 5679 return SDValue(); 5680 } 5681 5682 SDValue DAGCombiner::visitFABS(SDNode *N) { 5683 SDValue N0 = N->getOperand(0); 5684 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5685 EVT VT = N->getValueType(0); 5686 5687 // fold (fabs c1) -> fabs(c1) 5688 if (N0CFP && VT != MVT::ppcf128) 5689 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 5690 // fold (fabs (fabs x)) -> (fabs x) 5691 if (N0.getOpcode() == ISD::FABS) 5692 return N->getOperand(0); 5693 // fold (fabs (fneg x)) -> (fabs x) 5694 // fold (fabs (fcopysign x, y)) -> (fabs x) 5695 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 5696 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); 5697 5698 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 5699 // constant pool values. 5700 if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && 5701 N0.getOperand(0).getValueType().isInteger() && 5702 !N0.getOperand(0).getValueType().isVector()) { 5703 SDValue Int = N0.getOperand(0); 5704 EVT IntVT = Int.getValueType(); 5705 if (IntVT.isInteger() && !IntVT.isVector()) { 5706 Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, 5707 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 5708 AddToWorkList(Int.getNode()); 5709 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5710 N->getValueType(0), Int); 5711 } 5712 } 5713 5714 return SDValue(); 5715 } 5716 5717 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 5718 SDValue Chain = N->getOperand(0); 5719 SDValue N1 = N->getOperand(1); 5720 SDValue N2 = N->getOperand(2); 5721 5722 // If N is a constant we could fold this into a fallthrough or unconditional 5723 // branch. However that doesn't happen very often in normal code, because 5724 // Instcombine/SimplifyCFG should have handled the available opportunities. 5725 // If we did this folding here, it would be necessary to update the 5726 // MachineBasicBlock CFG, which is awkward. 5727 5728 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 5729 // on the target. 5730 if (N1.getOpcode() == ISD::SETCC && 5731 TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { 5732 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 5733 Chain, N1.getOperand(2), 5734 N1.getOperand(0), N1.getOperand(1), N2); 5735 } 5736 5737 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 5738 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 5739 (N1.getOperand(0).hasOneUse() && 5740 N1.getOperand(0).getOpcode() == ISD::SRL))) { 5741 SDNode *Trunc = 0; 5742 if (N1.getOpcode() == ISD::TRUNCATE) { 5743 // Look pass the truncate. 5744 Trunc = N1.getNode(); 5745 N1 = N1.getOperand(0); 5746 } 5747 5748 // Match this pattern so that we can generate simpler code: 5749 // 5750 // %a = ... 5751 // %b = and i32 %a, 2 5752 // %c = srl i32 %b, 1 5753 // brcond i32 %c ... 5754 // 5755 // into 5756 // 5757 // %a = ... 5758 // %b = and i32 %a, 2 5759 // %c = setcc eq %b, 0 5760 // brcond %c ... 5761 // 5762 // This applies only when the AND constant value has one bit set and the 5763 // SRL constant is equal to the log2 of the AND constant. The back-end is 5764 // smart enough to convert the result into a TEST/JMP sequence. 5765 SDValue Op0 = N1.getOperand(0); 5766 SDValue Op1 = N1.getOperand(1); 5767 5768 if (Op0.getOpcode() == ISD::AND && 5769 Op1.getOpcode() == ISD::Constant) { 5770 SDValue AndOp1 = Op0.getOperand(1); 5771 5772 if (AndOp1.getOpcode() == ISD::Constant) { 5773 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 5774 5775 if (AndConst.isPowerOf2() && 5776 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 5777 SDValue SetCC = 5778 DAG.getSetCC(N->getDebugLoc(), 5779 TLI.getSetCCResultType(Op0.getValueType()), 5780 Op0, DAG.getConstant(0, Op0.getValueType()), 5781 ISD::SETNE); 5782 5783 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 5784 MVT::Other, Chain, SetCC, N2); 5785 // Don't add the new BRCond into the worklist or else SimplifySelectCC 5786 // will convert it back to (X & C1) >> C2. 5787 CombineTo(N, NewBRCond, false); 5788 // Truncate is dead. 5789 if (Trunc) { 5790 removeFromWorkList(Trunc); 5791 DAG.DeleteNode(Trunc); 5792 } 5793 // Replace the uses of SRL with SETCC 5794 WorkListRemover DeadNodes(*this); 5795 DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); 5796 removeFromWorkList(N1.getNode()); 5797 DAG.DeleteNode(N1.getNode()); 5798 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5799 } 5800 } 5801 } 5802 5803 if (Trunc) 5804 // Restore N1 if the above transformation doesn't match. 5805 N1 = N->getOperand(1); 5806 } 5807 5808 // Transform br(xor(x, y)) -> br(x != y) 5809 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 5810 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 5811 SDNode *TheXor = N1.getNode(); 5812 SDValue Op0 = TheXor->getOperand(0); 5813 SDValue Op1 = TheXor->getOperand(1); 5814 if (Op0.getOpcode() == Op1.getOpcode()) { 5815 // Avoid missing important xor optimizations. 5816 SDValue Tmp = visitXOR(TheXor); 5817 if (Tmp.getNode() && Tmp.getNode() != TheXor) { 5818 DEBUG(dbgs() << "\nReplacing.8 "; 5819 TheXor->dump(&DAG); 5820 dbgs() << "\nWith: "; 5821 Tmp.getNode()->dump(&DAG); 5822 dbgs() << '\n'); 5823 WorkListRemover DeadNodes(*this); 5824 DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); 5825 removeFromWorkList(TheXor); 5826 DAG.DeleteNode(TheXor); 5827 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 5828 MVT::Other, Chain, Tmp, N2); 5829 } 5830 } 5831 5832 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 5833 bool Equal = false; 5834 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 5835 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 5836 Op0.getOpcode() == ISD::XOR) { 5837 TheXor = Op0.getNode(); 5838 Equal = true; 5839 } 5840 5841 EVT SetCCVT = N1.getValueType(); 5842 if (LegalTypes) 5843 SetCCVT = TLI.getSetCCResultType(SetCCVT); 5844 SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), 5845 SetCCVT, 5846 Op0, Op1, 5847 Equal ? ISD::SETEQ : ISD::SETNE); 5848 // Replace the uses of XOR with SETCC 5849 WorkListRemover DeadNodes(*this); 5850 DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); 5851 removeFromWorkList(N1.getNode()); 5852 DAG.DeleteNode(N1.getNode()); 5853 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 5854 MVT::Other, Chain, SetCC, N2); 5855 } 5856 } 5857 5858 return SDValue(); 5859 } 5860 5861 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 5862 // 5863 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 5864 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 5865 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 5866 5867 // If N is a constant we could fold this into a fallthrough or unconditional 5868 // branch. However that doesn't happen very often in normal code, because 5869 // Instcombine/SimplifyCFG should have handled the available opportunities. 5870 // If we did this folding here, it would be necessary to update the 5871 // MachineBasicBlock CFG, which is awkward. 5872 5873 // Use SimplifySetCC to simplify SETCC's. 5874 SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), 5875 CondLHS, CondRHS, CC->get(), N->getDebugLoc(), 5876 false); 5877 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 5878 5879 // fold to a simpler setcc 5880 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 5881 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 5882 N->getOperand(0), Simp.getOperand(2), 5883 Simp.getOperand(0), Simp.getOperand(1), 5884 N->getOperand(4)); 5885 5886 return SDValue(); 5887 } 5888 5889 /// CombineToPreIndexedLoadStore - Try turning a load / store into a 5890 /// pre-indexed load / store when the base pointer is an add or subtract 5891 /// and it has other uses besides the load / store. After the 5892 /// transformation, the new indexed load / store has effectively folded 5893 /// the add / subtract in and all of its other uses are redirected to the 5894 /// new load / store. 5895 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 5896 if (Level < AfterLegalizeDAG) 5897 return false; 5898 5899 bool isLoad = true; 5900 SDValue Ptr; 5901 EVT VT; 5902 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5903 if (LD->isIndexed()) 5904 return false; 5905 VT = LD->getMemoryVT(); 5906 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 5907 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 5908 return false; 5909 Ptr = LD->getBasePtr(); 5910 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5911 if (ST->isIndexed()) 5912 return false; 5913 VT = ST->getMemoryVT(); 5914 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 5915 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 5916 return false; 5917 Ptr = ST->getBasePtr(); 5918 isLoad = false; 5919 } else { 5920 return false; 5921 } 5922 5923 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 5924 // out. There is no reason to make this a preinc/predec. 5925 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 5926 Ptr.getNode()->hasOneUse()) 5927 return false; 5928 5929 // Ask the target to do addressing mode selection. 5930 SDValue BasePtr; 5931 SDValue Offset; 5932 ISD::MemIndexedMode AM = ISD::UNINDEXED; 5933 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 5934 return false; 5935 // Don't create a indexed load / store with zero offset. 5936 if (isa<ConstantSDNode>(Offset) && 5937 cast<ConstantSDNode>(Offset)->isNullValue()) 5938 return false; 5939 5940 // Try turning it into a pre-indexed load / store except when: 5941 // 1) The new base ptr is a frame index. 5942 // 2) If N is a store and the new base ptr is either the same as or is a 5943 // predecessor of the value being stored. 5944 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 5945 // that would create a cycle. 5946 // 4) All uses are load / store ops that use it as old base ptr. 5947 5948 // Check #1. Preinc'ing a frame index would require copying the stack pointer 5949 // (plus the implicit offset) to a register to preinc anyway. 5950 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 5951 return false; 5952 5953 // Check #2. 5954 if (!isLoad) { 5955 SDValue Val = cast<StoreSDNode>(N)->getValue(); 5956 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 5957 return false; 5958 } 5959 5960 // Now check for #3 and #4. 5961 bool RealUse = false; 5962 5963 // Caches for hasPredecessorHelper 5964 SmallPtrSet<const SDNode *, 32> Visited; 5965 SmallVector<const SDNode *, 16> Worklist; 5966 5967 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 5968 E = Ptr.getNode()->use_end(); I != E; ++I) { 5969 SDNode *Use = *I; 5970 if (Use == N) 5971 continue; 5972 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 5973 return false; 5974 5975 if (!((Use->getOpcode() == ISD::LOAD && 5976 cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || 5977 (Use->getOpcode() == ISD::STORE && 5978 cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) 5979 RealUse = true; 5980 } 5981 5982 if (!RealUse) 5983 return false; 5984 5985 SDValue Result; 5986 if (isLoad) 5987 Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 5988 BasePtr, Offset, AM); 5989 else 5990 Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 5991 BasePtr, Offset, AM); 5992 ++PreIndexedNodes; 5993 ++NodesCombined; 5994 DEBUG(dbgs() << "\nReplacing.4 "; 5995 N->dump(&DAG); 5996 dbgs() << "\nWith: "; 5997 Result.getNode()->dump(&DAG); 5998 dbgs() << '\n'); 5999 WorkListRemover DeadNodes(*this); 6000 if (isLoad) { 6001 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), 6002 &DeadNodes); 6003 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), 6004 &DeadNodes); 6005 } else { 6006 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), 6007 &DeadNodes); 6008 } 6009 6010 // Finally, since the node is now dead, remove it from the graph. 6011 DAG.DeleteNode(N); 6012 6013 // Replace the uses of Ptr with uses of the updated base value. 6014 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), 6015 &DeadNodes); 6016 removeFromWorkList(Ptr.getNode()); 6017 DAG.DeleteNode(Ptr.getNode()); 6018 6019 return true; 6020 } 6021 6022 /// CombineToPostIndexedLoadStore - Try to combine a load / store with a 6023 /// add / sub of the base pointer node into a post-indexed load / store. 6024 /// The transformation folded the add / subtract into the new indexed 6025 /// load / store effectively and all of its uses are redirected to the 6026 /// new load / store. 6027 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 6028 if (Level < AfterLegalizeDAG) 6029 return false; 6030 6031 bool isLoad = true; 6032 SDValue Ptr; 6033 EVT VT; 6034 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6035 if (LD->isIndexed()) 6036 return false; 6037 VT = LD->getMemoryVT(); 6038 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 6039 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 6040 return false; 6041 Ptr = LD->getBasePtr(); 6042 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6043 if (ST->isIndexed()) 6044 return false; 6045 VT = ST->getMemoryVT(); 6046 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 6047 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 6048 return false; 6049 Ptr = ST->getBasePtr(); 6050 isLoad = false; 6051 } else { 6052 return false; 6053 } 6054 6055 if (Ptr.getNode()->hasOneUse()) 6056 return false; 6057 6058 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6059 E = Ptr.getNode()->use_end(); I != E; ++I) { 6060 SDNode *Op = *I; 6061 if (Op == N || 6062 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 6063 continue; 6064 6065 SDValue BasePtr; 6066 SDValue Offset; 6067 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6068 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 6069 // Don't create a indexed load / store with zero offset. 6070 if (isa<ConstantSDNode>(Offset) && 6071 cast<ConstantSDNode>(Offset)->isNullValue()) 6072 continue; 6073 6074 // Try turning it into a post-indexed load / store except when 6075 // 1) All uses are load / store ops that use it as base ptr. 6076 // 2) Op must be independent of N, i.e. Op is neither a predecessor 6077 // nor a successor of N. Otherwise, if Op is folded that would 6078 // create a cycle. 6079 6080 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6081 continue; 6082 6083 // Check for #1. 6084 bool TryNext = false; 6085 for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), 6086 EE = BasePtr.getNode()->use_end(); II != EE; ++II) { 6087 SDNode *Use = *II; 6088 if (Use == Ptr.getNode()) 6089 continue; 6090 6091 // If all the uses are load / store addresses, then don't do the 6092 // transformation. 6093 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 6094 bool RealUse = false; 6095 for (SDNode::use_iterator III = Use->use_begin(), 6096 EEE = Use->use_end(); III != EEE; ++III) { 6097 SDNode *UseUse = *III; 6098 if (!((UseUse->getOpcode() == ISD::LOAD && 6099 cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || 6100 (UseUse->getOpcode() == ISD::STORE && 6101 cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) 6102 RealUse = true; 6103 } 6104 6105 if (!RealUse) { 6106 TryNext = true; 6107 break; 6108 } 6109 } 6110 } 6111 6112 if (TryNext) 6113 continue; 6114 6115 // Check for #2 6116 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 6117 SDValue Result = isLoad 6118 ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 6119 BasePtr, Offset, AM) 6120 : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 6121 BasePtr, Offset, AM); 6122 ++PostIndexedNodes; 6123 ++NodesCombined; 6124 DEBUG(dbgs() << "\nReplacing.5 "; 6125 N->dump(&DAG); 6126 dbgs() << "\nWith: "; 6127 Result.getNode()->dump(&DAG); 6128 dbgs() << '\n'); 6129 WorkListRemover DeadNodes(*this); 6130 if (isLoad) { 6131 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), 6132 &DeadNodes); 6133 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), 6134 &DeadNodes); 6135 } else { 6136 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), 6137 &DeadNodes); 6138 } 6139 6140 // Finally, since the node is now dead, remove it from the graph. 6141 DAG.DeleteNode(N); 6142 6143 // Replace the uses of Use with uses of the updated base value. 6144 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 6145 Result.getValue(isLoad ? 1 : 0), 6146 &DeadNodes); 6147 removeFromWorkList(Op); 6148 DAG.DeleteNode(Op); 6149 return true; 6150 } 6151 } 6152 } 6153 6154 return false; 6155 } 6156 6157 SDValue DAGCombiner::visitLOAD(SDNode *N) { 6158 LoadSDNode *LD = cast<LoadSDNode>(N); 6159 SDValue Chain = LD->getChain(); 6160 SDValue Ptr = LD->getBasePtr(); 6161 6162 // If load is not volatile and there are no uses of the loaded value (and 6163 // the updated indexed value in case of indexed loads), change uses of the 6164 // chain value into uses of the chain input (i.e. delete the dead load). 6165 if (!LD->isVolatile()) { 6166 if (N->getValueType(1) == MVT::Other) { 6167 // Unindexed loads. 6168 if (N->hasNUsesOfValue(0, 0)) { 6169 // It's not safe to use the two value CombineTo variant here. e.g. 6170 // v1, chain2 = load chain1, loc 6171 // v2, chain3 = load chain2, loc 6172 // v3 = add v2, c 6173 // Now we replace use of chain2 with chain1. This makes the second load 6174 // isomorphic to the one we are deleting, and thus makes this load live. 6175 DEBUG(dbgs() << "\nReplacing.6 "; 6176 N->dump(&DAG); 6177 dbgs() << "\nWith chain: "; 6178 Chain.getNode()->dump(&DAG); 6179 dbgs() << "\n"); 6180 WorkListRemover DeadNodes(*this); 6181 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); 6182 6183 if (N->use_empty()) { 6184 removeFromWorkList(N); 6185 DAG.DeleteNode(N); 6186 } 6187 6188 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6189 } 6190 } else { 6191 // Indexed loads. 6192 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 6193 if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { 6194 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 6195 DEBUG(dbgs() << "\nReplacing.7 "; 6196 N->dump(&DAG); 6197 dbgs() << "\nWith: "; 6198 Undef.getNode()->dump(&DAG); 6199 dbgs() << " and 2 other values\n"); 6200 WorkListRemover DeadNodes(*this); 6201 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); 6202 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 6203 DAG.getUNDEF(N->getValueType(1)), 6204 &DeadNodes); 6205 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); 6206 removeFromWorkList(N); 6207 DAG.DeleteNode(N); 6208 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6209 } 6210 } 6211 } 6212 6213 // If this load is directly stored, replace the load value with the stored 6214 // value. 6215 // TODO: Handle store large -> read small portion. 6216 // TODO: Handle TRUNCSTORE/LOADEXT 6217 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 6218 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 6219 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 6220 if (PrevST->getBasePtr() == Ptr && 6221 PrevST->getValue().getValueType() == N->getValueType(0)) 6222 return CombineTo(N, Chain.getOperand(1), Chain); 6223 } 6224 } 6225 6226 // Try to infer better alignment information than the load already has. 6227 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 6228 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 6229 if (Align > LD->getAlignment()) 6230 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), 6231 LD->getValueType(0), 6232 Chain, Ptr, LD->getPointerInfo(), 6233 LD->getMemoryVT(), 6234 LD->isVolatile(), LD->isNonTemporal(), Align); 6235 } 6236 } 6237 6238 if (CombinerAA) { 6239 // Walk up chain skipping non-aliasing memory nodes. 6240 SDValue BetterChain = FindBetterChain(N, Chain); 6241 6242 // If there is a better chain. 6243 if (Chain != BetterChain) { 6244 SDValue ReplLoad; 6245 6246 // Replace the chain to void dependency. 6247 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 6248 ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), 6249 BetterChain, Ptr, LD->getPointerInfo(), 6250 LD->isVolatile(), LD->isNonTemporal(), 6251 LD->isInvariant(), LD->getAlignment()); 6252 } else { 6253 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), 6254 LD->getValueType(0), 6255 BetterChain, Ptr, LD->getPointerInfo(), 6256 LD->getMemoryVT(), 6257 LD->isVolatile(), 6258 LD->isNonTemporal(), 6259 LD->getAlignment()); 6260 } 6261 6262 // Create token factor to keep old chain connected. 6263 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 6264 MVT::Other, Chain, ReplLoad.getValue(1)); 6265 6266 // Make sure the new and old chains are cleaned up. 6267 AddToWorkList(Token.getNode()); 6268 6269 // Replace uses with load result and token factor. Don't add users 6270 // to work list. 6271 return CombineTo(N, ReplLoad.getValue(0), Token, false); 6272 } 6273 } 6274 6275 // Try transforming N to an indexed load. 6276 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 6277 return SDValue(N, 0); 6278 6279 return SDValue(); 6280 } 6281 6282 /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 6283 /// load is having specific bytes cleared out. If so, return the byte size 6284 /// being masked out and the shift amount. 6285 static std::pair<unsigned, unsigned> 6286 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 6287 std::pair<unsigned, unsigned> Result(0, 0); 6288 6289 // Check for the structure we're looking for. 6290 if (V->getOpcode() != ISD::AND || 6291 !isa<ConstantSDNode>(V->getOperand(1)) || 6292 !ISD::isNormalLoad(V->getOperand(0).getNode())) 6293 return Result; 6294 6295 // Check the chain and pointer. 6296 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 6297 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 6298 6299 // The store should be chained directly to the load or be an operand of a 6300 // tokenfactor. 6301 if (LD == Chain.getNode()) 6302 ; // ok. 6303 else if (Chain->getOpcode() != ISD::TokenFactor) 6304 return Result; // Fail. 6305 else { 6306 bool isOk = false; 6307 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 6308 if (Chain->getOperand(i).getNode() == LD) { 6309 isOk = true; 6310 break; 6311 } 6312 if (!isOk) return Result; 6313 } 6314 6315 // This only handles simple types. 6316 if (V.getValueType() != MVT::i16 && 6317 V.getValueType() != MVT::i32 && 6318 V.getValueType() != MVT::i64) 6319 return Result; 6320 6321 // Check the constant mask. Invert it so that the bits being masked out are 6322 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 6323 // follow the sign bit for uniformity. 6324 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 6325 unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); 6326 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 6327 unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); 6328 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 6329 if (NotMaskLZ == 64) return Result; // All zero mask. 6330 6331 // See if we have a continuous run of bits. If so, we have 0*1+0* 6332 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 6333 return Result; 6334 6335 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 6336 if (V.getValueType() != MVT::i64 && NotMaskLZ) 6337 NotMaskLZ -= 64-V.getValueSizeInBits(); 6338 6339 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 6340 switch (MaskedBytes) { 6341 case 1: 6342 case 2: 6343 case 4: break; 6344 default: return Result; // All one mask, or 5-byte mask. 6345 } 6346 6347 // Verify that the first bit starts at a multiple of mask so that the access 6348 // is aligned the same as the access width. 6349 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 6350 6351 Result.first = MaskedBytes; 6352 Result.second = NotMaskTZ/8; 6353 return Result; 6354 } 6355 6356 6357 /// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 6358 /// provides a value as specified by MaskInfo. If so, replace the specified 6359 /// store with a narrower store of truncated IVal. 6360 static SDNode * 6361 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 6362 SDValue IVal, StoreSDNode *St, 6363 DAGCombiner *DC) { 6364 unsigned NumBytes = MaskInfo.first; 6365 unsigned ByteShift = MaskInfo.second; 6366 SelectionDAG &DAG = DC->getDAG(); 6367 6368 // Check to see if IVal is all zeros in the part being masked in by the 'or' 6369 // that uses this. If not, this is not a replacement. 6370 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 6371 ByteShift*8, (ByteShift+NumBytes)*8); 6372 if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; 6373 6374 // Check that it is legal on the target to do this. It is legal if the new 6375 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 6376 // legalization. 6377 MVT VT = MVT::getIntegerVT(NumBytes*8); 6378 if (!DC->isTypeLegal(VT)) 6379 return 0; 6380 6381 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 6382 // shifted by ByteShift and truncated down to NumBytes. 6383 if (ByteShift) 6384 IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, 6385 DAG.getConstant(ByteShift*8, 6386 DC->getShiftAmountTy(IVal.getValueType()))); 6387 6388 // Figure out the offset for the store and the alignment of the access. 6389 unsigned StOffset; 6390 unsigned NewAlign = St->getAlignment(); 6391 6392 if (DAG.getTargetLoweringInfo().isLittleEndian()) 6393 StOffset = ByteShift; 6394 else 6395 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 6396 6397 SDValue Ptr = St->getBasePtr(); 6398 if (StOffset) { 6399 Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), 6400 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 6401 NewAlign = MinAlign(NewAlign, StOffset); 6402 } 6403 6404 // Truncate down to the new size. 6405 IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); 6406 6407 ++OpsNarrowed; 6408 return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 6409 St->getPointerInfo().getWithOffset(StOffset), 6410 false, false, NewAlign).getNode(); 6411 } 6412 6413 6414 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 6415 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 6416 /// of the loaded bits, try narrowing the load and store if it would end up 6417 /// being a win for performance or code size. 6418 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 6419 StoreSDNode *ST = cast<StoreSDNode>(N); 6420 if (ST->isVolatile()) 6421 return SDValue(); 6422 6423 SDValue Chain = ST->getChain(); 6424 SDValue Value = ST->getValue(); 6425 SDValue Ptr = ST->getBasePtr(); 6426 EVT VT = Value.getValueType(); 6427 6428 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 6429 return SDValue(); 6430 6431 unsigned Opc = Value.getOpcode(); 6432 6433 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 6434 // is a byte mask indicating a consecutive number of bytes, check to see if 6435 // Y is known to provide just those bytes. If so, we try to replace the 6436 // load + replace + store sequence with a single (narrower) store, which makes 6437 // the load dead. 6438 if (Opc == ISD::OR) { 6439 std::pair<unsigned, unsigned> MaskedLoad; 6440 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 6441 if (MaskedLoad.first) 6442 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 6443 Value.getOperand(1), ST,this)) 6444 return SDValue(NewST, 0); 6445 6446 // Or is commutative, so try swapping X and Y. 6447 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 6448 if (MaskedLoad.first) 6449 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 6450 Value.getOperand(0), ST,this)) 6451 return SDValue(NewST, 0); 6452 } 6453 6454 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 6455 Value.getOperand(1).getOpcode() != ISD::Constant) 6456 return SDValue(); 6457 6458 SDValue N0 = Value.getOperand(0); 6459 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6460 Chain == SDValue(N0.getNode(), 1)) { 6461 LoadSDNode *LD = cast<LoadSDNode>(N0); 6462 if (LD->getBasePtr() != Ptr || 6463 LD->getPointerInfo().getAddrSpace() != 6464 ST->getPointerInfo().getAddrSpace()) 6465 return SDValue(); 6466 6467 // Find the type to narrow it the load / op / store to. 6468 SDValue N1 = Value.getOperand(1); 6469 unsigned BitWidth = N1.getValueSizeInBits(); 6470 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 6471 if (Opc == ISD::AND) 6472 Imm ^= APInt::getAllOnesValue(BitWidth); 6473 if (Imm == 0 || Imm.isAllOnesValue()) 6474 return SDValue(); 6475 unsigned ShAmt = Imm.countTrailingZeros(); 6476 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 6477 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 6478 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 6479 while (NewBW < BitWidth && 6480 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 6481 TLI.isNarrowingProfitable(VT, NewVT))) { 6482 NewBW = NextPowerOf2(NewBW); 6483 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 6484 } 6485 if (NewBW >= BitWidth) 6486 return SDValue(); 6487 6488 // If the lsb changed does not start at the type bitwidth boundary, 6489 // start at the previous one. 6490 if (ShAmt % NewBW) 6491 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 6492 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); 6493 if ((Imm & Mask) == Imm) { 6494 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 6495 if (Opc == ISD::AND) 6496 NewImm ^= APInt::getAllOnesValue(NewBW); 6497 uint64_t PtrOff = ShAmt / 8; 6498 // For big endian targets, we need to adjust the offset to the pointer to 6499 // load the correct bytes. 6500 if (TLI.isBigEndian()) 6501 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 6502 6503 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 6504 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 6505 if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) 6506 return SDValue(); 6507 6508 SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), 6509 Ptr.getValueType(), Ptr, 6510 DAG.getConstant(PtrOff, Ptr.getValueType())); 6511 SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), 6512 LD->getChain(), NewPtr, 6513 LD->getPointerInfo().getWithOffset(PtrOff), 6514 LD->isVolatile(), LD->isNonTemporal(), 6515 LD->isInvariant(), NewAlign); 6516 SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, 6517 DAG.getConstant(NewImm, NewVT)); 6518 SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), 6519 NewVal, NewPtr, 6520 ST->getPointerInfo().getWithOffset(PtrOff), 6521 false, false, NewAlign); 6522 6523 AddToWorkList(NewPtr.getNode()); 6524 AddToWorkList(NewLD.getNode()); 6525 AddToWorkList(NewVal.getNode()); 6526 WorkListRemover DeadNodes(*this); 6527 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), 6528 &DeadNodes); 6529 ++OpsNarrowed; 6530 return NewST; 6531 } 6532 } 6533 6534 return SDValue(); 6535 } 6536 6537 /// TransformFPLoadStorePair - For a given floating point load / store pair, 6538 /// if the load value isn't used by any other operations, then consider 6539 /// transforming the pair to integer load / store operations if the target 6540 /// deems the transformation profitable. 6541 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 6542 StoreSDNode *ST = cast<StoreSDNode>(N); 6543 SDValue Chain = ST->getChain(); 6544 SDValue Value = ST->getValue(); 6545 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 6546 Value.hasOneUse() && 6547 Chain == SDValue(Value.getNode(), 1)) { 6548 LoadSDNode *LD = cast<LoadSDNode>(Value); 6549 EVT VT = LD->getMemoryVT(); 6550 if (!VT.isFloatingPoint() || 6551 VT != ST->getMemoryVT() || 6552 LD->isNonTemporal() || 6553 ST->isNonTemporal() || 6554 LD->getPointerInfo().getAddrSpace() != 0 || 6555 ST->getPointerInfo().getAddrSpace() != 0) 6556 return SDValue(); 6557 6558 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 6559 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 6560 !TLI.isOperationLegal(ISD::STORE, IntVT) || 6561 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 6562 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 6563 return SDValue(); 6564 6565 unsigned LDAlign = LD->getAlignment(); 6566 unsigned STAlign = ST->getAlignment(); 6567 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 6568 unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); 6569 if (LDAlign < ABIAlign || STAlign < ABIAlign) 6570 return SDValue(); 6571 6572 SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), 6573 LD->getChain(), LD->getBasePtr(), 6574 LD->getPointerInfo(), 6575 false, false, false, LDAlign); 6576 6577 SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), 6578 NewLD, ST->getBasePtr(), 6579 ST->getPointerInfo(), 6580 false, false, STAlign); 6581 6582 AddToWorkList(NewLD.getNode()); 6583 AddToWorkList(NewST.getNode()); 6584 WorkListRemover DeadNodes(*this); 6585 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1), 6586 &DeadNodes); 6587 ++LdStFP2Int; 6588 return NewST; 6589 } 6590 6591 return SDValue(); 6592 } 6593 6594 SDValue DAGCombiner::visitSTORE(SDNode *N) { 6595 StoreSDNode *ST = cast<StoreSDNode>(N); 6596 SDValue Chain = ST->getChain(); 6597 SDValue Value = ST->getValue(); 6598 SDValue Ptr = ST->getBasePtr(); 6599 6600 // If this is a store of a bit convert, store the input value if the 6601 // resultant store does not need a higher alignment than the original. 6602 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 6603 ST->isUnindexed()) { 6604 unsigned OrigAlign = ST->getAlignment(); 6605 EVT SVT = Value.getOperand(0).getValueType(); 6606 unsigned Align = TLI.getTargetData()-> 6607 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 6608 if (Align <= OrigAlign && 6609 ((!LegalOperations && !ST->isVolatile()) || 6610 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 6611 return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), 6612 Ptr, ST->getPointerInfo(), ST->isVolatile(), 6613 ST->isNonTemporal(), OrigAlign); 6614 } 6615 6616 // Turn 'store undef, Ptr' -> nothing. 6617 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 6618 return Chain; 6619 6620 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 6621 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 6622 // NOTE: If the original store is volatile, this transform must not increase 6623 // the number of stores. For example, on x86-32 an f64 can be stored in one 6624 // processor operation but an i64 (which is not legal) requires two. So the 6625 // transform should not be done in this case. 6626 if (Value.getOpcode() != ISD::TargetConstantFP) { 6627 SDValue Tmp; 6628 switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { 6629 default: llvm_unreachable("Unknown FP type"); 6630 case MVT::f80: // We don't do this for these yet. 6631 case MVT::f128: 6632 case MVT::ppcf128: 6633 break; 6634 case MVT::f32: 6635 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 6636 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 6637 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 6638 bitcastToAPInt().getZExtValue(), MVT::i32); 6639 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 6640 Ptr, ST->getPointerInfo(), ST->isVolatile(), 6641 ST->isNonTemporal(), ST->getAlignment()); 6642 } 6643 break; 6644 case MVT::f64: 6645 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 6646 !ST->isVolatile()) || 6647 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 6648 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 6649 getZExtValue(), MVT::i64); 6650 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 6651 Ptr, ST->getPointerInfo(), ST->isVolatile(), 6652 ST->isNonTemporal(), ST->getAlignment()); 6653 } 6654 6655 if (!ST->isVolatile() && 6656 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 6657 // Many FP stores are not made apparent until after legalize, e.g. for 6658 // argument passing. Since this is so common, custom legalize the 6659 // 64-bit integer store into two 32-bit stores. 6660 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 6661 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 6662 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 6663 if (TLI.isBigEndian()) std::swap(Lo, Hi); 6664 6665 unsigned Alignment = ST->getAlignment(); 6666 bool isVolatile = ST->isVolatile(); 6667 bool isNonTemporal = ST->isNonTemporal(); 6668 6669 SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, 6670 Ptr, ST->getPointerInfo(), 6671 isVolatile, isNonTemporal, 6672 ST->getAlignment()); 6673 Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, 6674 DAG.getConstant(4, Ptr.getValueType())); 6675 Alignment = MinAlign(Alignment, 4U); 6676 SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, 6677 Ptr, ST->getPointerInfo().getWithOffset(4), 6678 isVolatile, isNonTemporal, 6679 Alignment); 6680 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 6681 St0, St1); 6682 } 6683 6684 break; 6685 } 6686 } 6687 } 6688 6689 // Try to infer better alignment information than the store already has. 6690 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 6691 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 6692 if (Align > ST->getAlignment()) 6693 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, 6694 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 6695 ST->isVolatile(), ST->isNonTemporal(), Align); 6696 } 6697 } 6698 6699 // Try transforming a pair floating point load / store ops to integer 6700 // load / store ops. 6701 SDValue NewST = TransformFPLoadStorePair(N); 6702 if (NewST.getNode()) 6703 return NewST; 6704 6705 if (CombinerAA) { 6706 // Walk up chain skipping non-aliasing memory nodes. 6707 SDValue BetterChain = FindBetterChain(N, Chain); 6708 6709 // If there is a better chain. 6710 if (Chain != BetterChain) { 6711 SDValue ReplStore; 6712 6713 // Replace the chain to avoid dependency. 6714 if (ST->isTruncatingStore()) { 6715 ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, 6716 ST->getPointerInfo(), 6717 ST->getMemoryVT(), ST->isVolatile(), 6718 ST->isNonTemporal(), ST->getAlignment()); 6719 } else { 6720 ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, 6721 ST->getPointerInfo(), 6722 ST->isVolatile(), ST->isNonTemporal(), 6723 ST->getAlignment()); 6724 } 6725 6726 // Create token to keep both nodes around. 6727 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 6728 MVT::Other, Chain, ReplStore); 6729 6730 // Make sure the new and old chains are cleaned up. 6731 AddToWorkList(Token.getNode()); 6732 6733 // Don't add users to work list. 6734 return CombineTo(N, Token, false); 6735 } 6736 } 6737 6738 // Try transforming N to an indexed store. 6739 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 6740 return SDValue(N, 0); 6741 6742 // FIXME: is there such a thing as a truncating indexed store? 6743 if (ST->isTruncatingStore() && ST->isUnindexed() && 6744 Value.getValueType().isInteger()) { 6745 // See if we can simplify the input to this truncstore with knowledge that 6746 // only the low bits are being used. For example: 6747 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 6748 SDValue Shorter = 6749 GetDemandedBits(Value, 6750 APInt::getLowBitsSet( 6751 Value.getValueType().getScalarType().getSizeInBits(), 6752 ST->getMemoryVT().getScalarType().getSizeInBits())); 6753 AddToWorkList(Value.getNode()); 6754 if (Shorter.getNode()) 6755 return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, 6756 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 6757 ST->isVolatile(), ST->isNonTemporal(), 6758 ST->getAlignment()); 6759 6760 // Otherwise, see if we can simplify the operation with 6761 // SimplifyDemandedBits, which only works if the value has a single use. 6762 if (SimplifyDemandedBits(Value, 6763 APInt::getLowBitsSet( 6764 Value.getValueType().getScalarType().getSizeInBits(), 6765 ST->getMemoryVT().getScalarType().getSizeInBits()))) 6766 return SDValue(N, 0); 6767 } 6768 6769 // If this is a load followed by a store to the same location, then the store 6770 // is dead/noop. 6771 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 6772 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 6773 ST->isUnindexed() && !ST->isVolatile() && 6774 // There can't be any side effects between the load and store, such as 6775 // a call or store. 6776 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 6777 // The store is dead, remove it. 6778 return Chain; 6779 } 6780 } 6781 6782 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 6783 // truncating store. We can do this even if this is already a truncstore. 6784 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 6785 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 6786 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 6787 ST->getMemoryVT())) { 6788 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), 6789 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 6790 ST->isVolatile(), ST->isNonTemporal(), 6791 ST->getAlignment()); 6792 } 6793 6794 return ReduceLoadOpStoreWidth(N); 6795 } 6796 6797 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 6798 SDValue InVec = N->getOperand(0); 6799 SDValue InVal = N->getOperand(1); 6800 SDValue EltNo = N->getOperand(2); 6801 DebugLoc dl = N->getDebugLoc(); 6802 6803 // If the inserted element is an UNDEF, just use the input vector. 6804 if (InVal.getOpcode() == ISD::UNDEF) 6805 return InVec; 6806 6807 EVT VT = InVec.getValueType(); 6808 6809 // If we can't generate a legal BUILD_VECTOR, exit 6810 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 6811 return SDValue(); 6812 6813 // Check that we know which element is being inserted 6814 if (!isa<ConstantSDNode>(EltNo)) 6815 return SDValue(); 6816 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6817 6818 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 6819 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 6820 // vector elements. 6821 SmallVector<SDValue, 8> Ops; 6822 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 6823 Ops.append(InVec.getNode()->op_begin(), 6824 InVec.getNode()->op_end()); 6825 } else if (InVec.getOpcode() == ISD::UNDEF) { 6826 unsigned NElts = VT.getVectorNumElements(); 6827 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 6828 } else { 6829 return SDValue(); 6830 } 6831 6832 // Insert the element 6833 if (Elt < Ops.size()) { 6834 // All the operands of BUILD_VECTOR must have the same type; 6835 // we enforce that here. 6836 EVT OpVT = Ops[0].getValueType(); 6837 if (InVal.getValueType() != OpVT) 6838 InVal = OpVT.bitsGT(InVal.getValueType()) ? 6839 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 6840 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 6841 Ops[Elt] = InVal; 6842 } 6843 6844 // Return the new vector 6845 return DAG.getNode(ISD::BUILD_VECTOR, dl, 6846 VT, &Ops[0], Ops.size()); 6847 } 6848 6849 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 6850 // (vextract (scalar_to_vector val, 0) -> val 6851 SDValue InVec = N->getOperand(0); 6852 6853 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 6854 // Check if the result type doesn't match the inserted element type. A 6855 // SCALAR_TO_VECTOR may truncate the inserted element and the 6856 // EXTRACT_VECTOR_ELT may widen the extracted vector. 6857 SDValue InOp = InVec.getOperand(0); 6858 EVT NVT = N->getValueType(0); 6859 if (InOp.getValueType() != NVT) { 6860 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 6861 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); 6862 } 6863 return InOp; 6864 } 6865 6866 // Perform only after legalization to ensure build_vector / vector_shuffle 6867 // optimizations have already been done. 6868 if (!LegalOperations) return SDValue(); 6869 6870 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 6871 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 6872 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 6873 SDValue EltNo = N->getOperand(1); 6874 6875 if (isa<ConstantSDNode>(EltNo)) { 6876 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6877 bool NewLoad = false; 6878 bool BCNumEltsChanged = false; 6879 EVT VT = InVec.getValueType(); 6880 EVT ExtVT = VT.getVectorElementType(); 6881 EVT LVT = ExtVT; 6882 6883 if (InVec.getOpcode() == ISD::BITCAST) { 6884 EVT BCVT = InVec.getOperand(0).getValueType(); 6885 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 6886 return SDValue(); 6887 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 6888 BCNumEltsChanged = true; 6889 InVec = InVec.getOperand(0); 6890 ExtVT = BCVT.getVectorElementType(); 6891 NewLoad = true; 6892 } 6893 6894 LoadSDNode *LN0 = NULL; 6895 const ShuffleVectorSDNode *SVN = NULL; 6896 if (ISD::isNormalLoad(InVec.getNode())) { 6897 LN0 = cast<LoadSDNode>(InVec); 6898 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 6899 InVec.getOperand(0).getValueType() == ExtVT && 6900 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 6901 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 6902 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 6903 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 6904 // => 6905 // (load $addr+1*size) 6906 6907 // If the bit convert changed the number of elements, it is unsafe 6908 // to examine the mask. 6909 if (BCNumEltsChanged) 6910 return SDValue(); 6911 6912 // Select the input vector, guarding against out of range extract vector. 6913 unsigned NumElems = VT.getVectorNumElements(); 6914 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 6915 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 6916 6917 if (InVec.getOpcode() == ISD::BITCAST) 6918 InVec = InVec.getOperand(0); 6919 if (ISD::isNormalLoad(InVec.getNode())) { 6920 LN0 = cast<LoadSDNode>(InVec); 6921 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 6922 } 6923 } 6924 6925 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 6926 return SDValue(); 6927 6928 // If Idx was -1 above, Elt is going to be -1, so just return undef. 6929 if (Elt == -1) 6930 return DAG.getUNDEF(LVT); 6931 6932 unsigned Align = LN0->getAlignment(); 6933 if (NewLoad) { 6934 // Check the resultant load doesn't need a higher alignment than the 6935 // original load. 6936 unsigned NewAlign = 6937 TLI.getTargetData() 6938 ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); 6939 6940 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) 6941 return SDValue(); 6942 6943 Align = NewAlign; 6944 } 6945 6946 SDValue NewPtr = LN0->getBasePtr(); 6947 unsigned PtrOff = 0; 6948 6949 if (Elt) { 6950 PtrOff = LVT.getSizeInBits() * Elt / 8; 6951 EVT PtrType = NewPtr.getValueType(); 6952 if (TLI.isBigEndian()) 6953 PtrOff = VT.getSizeInBits() / 8 - PtrOff; 6954 NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, 6955 DAG.getConstant(PtrOff, PtrType)); 6956 } 6957 6958 // The replacement we need to do here is a little tricky: we need to 6959 // replace an extractelement of a load with a load. 6960 // Use ReplaceAllUsesOfValuesWith to do the replacement. 6961 SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, 6962 LN0->getPointerInfo().getWithOffset(PtrOff), 6963 LN0->isVolatile(), LN0->isNonTemporal(), 6964 LN0->isInvariant(), Align); 6965 WorkListRemover DeadNodes(*this); 6966 SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; 6967 SDValue To[] = { Load.getValue(0), Load.getValue(1) }; 6968 DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); 6969 // Since we're explcitly calling ReplaceAllUses, add the new node to the 6970 // worklist explicitly as well. 6971 AddToWorkList(Load.getNode()); 6972 // Make sure to revisit this node to clean it up; it will usually be dead. 6973 AddToWorkList(N); 6974 return SDValue(N, 0); 6975 } 6976 6977 return SDValue(); 6978 } 6979 6980 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 6981 unsigned NumInScalars = N->getNumOperands(); 6982 DebugLoc dl = N->getDebugLoc(); 6983 EVT VT = N->getValueType(0); 6984 // Check to see if this is a BUILD_VECTOR of a bunch of values 6985 // which come from any_extend or zero_extend nodes. If so, we can create 6986 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 6987 // optimizations. We do not handle sign-extend because we can't fill the sign 6988 // using shuffles. 6989 EVT SourceType = MVT::Other; 6990 bool allAnyExt = true; 6991 for (unsigned i = 0; i < NumInScalars; ++i) { 6992 SDValue In = N->getOperand(i); 6993 // Ignore undef inputs. 6994 if (In.getOpcode() == ISD::UNDEF) continue; 6995 6996 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 6997 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 6998 6999 // Abort if the element is not an extension. 7000 if (!ZeroExt && !AnyExt) { 7001 SourceType = MVT::Other; 7002 break; 7003 } 7004 7005 // The input is a ZeroExt or AnyExt. Check the original type. 7006 EVT InTy = In.getOperand(0).getValueType(); 7007 7008 // Check that all of the widened source types are the same. 7009 if (SourceType == MVT::Other) 7010 // First time. 7011 SourceType = InTy; 7012 else if (InTy != SourceType) { 7013 // Multiple income types. Abort. 7014 SourceType = MVT::Other; 7015 break; 7016 } 7017 7018 // Check if all of the extends are ANY_EXTENDs. 7019 allAnyExt &= AnyExt; 7020 } 7021 7022 7023 // In order to have valid types, all of the inputs must be extended from the 7024 // same source type and all of the inputs must be any or zero extend. 7025 // Scalar sizes must be a power of two. 7026 EVT OutScalarTy = N->getValueType(0).getScalarType(); 7027 bool validTypes = SourceType != MVT::Other && 7028 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 7029 isPowerOf2_32(SourceType.getSizeInBits()); 7030 7031 // We perform this optimization post type-legalization because 7032 // the type-legalizer often scalarizes integer-promoted vectors. 7033 // Performing this optimization before may create bit-casts which 7034 // will be type-legalized to complex code sequences. 7035 // We perform this optimization only before the operation legalizer because we 7036 // may introduce illegal operations. 7037 if (LegalTypes && !LegalOperations && validTypes) { 7038 bool isLE = TLI.isLittleEndian(); 7039 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 7040 assert(ElemRatio > 1 && "Invalid element size ratio"); 7041 SDValue Filler = allAnyExt ? DAG.getUNDEF(SourceType): 7042 DAG.getConstant(0, SourceType); 7043 7044 unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); 7045 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 7046 7047 // Populate the new build_vector 7048 for (unsigned i=0; i < N->getNumOperands(); ++i) { 7049 SDValue Cast = N->getOperand(i); 7050 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 7051 Cast.getOpcode() == ISD::ZERO_EXTEND || 7052 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 7053 SDValue In; 7054 if (Cast.getOpcode() == ISD::UNDEF) 7055 In = DAG.getUNDEF(SourceType); 7056 else 7057 In = Cast->getOperand(0); 7058 unsigned Index = isLE ? (i * ElemRatio) : 7059 (i * ElemRatio + (ElemRatio - 1)); 7060 7061 assert(Index < Ops.size() && "Invalid index"); 7062 Ops[Index] = In; 7063 } 7064 7065 // The type of the new BUILD_VECTOR node. 7066 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 7067 assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && 7068 "Invalid vector size"); 7069 // Check if the new vector type is legal. 7070 if (!isTypeLegal(VecVT)) return SDValue(); 7071 7072 // Make the new BUILD_VECTOR. 7073 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 7074 VecVT, &Ops[0], Ops.size()); 7075 7076 // Bitcast to the desired type. 7077 return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); 7078 } 7079 7080 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 7081 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 7082 // at most two distinct vectors, turn this into a shuffle node. 7083 SDValue VecIn1, VecIn2; 7084 for (unsigned i = 0; i != NumInScalars; ++i) { 7085 // Ignore undef inputs. 7086 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 7087 7088 // If this input is something other than a EXTRACT_VECTOR_ELT with a 7089 // constant index, bail out. 7090 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 7091 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 7092 VecIn1 = VecIn2 = SDValue(0, 0); 7093 break; 7094 } 7095 7096 // If the input vector type disagrees with the result of the build_vector, 7097 // we can't make a shuffle. 7098 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 7099 if (ExtractedFromVec.getValueType() != VT) { 7100 VecIn1 = VecIn2 = SDValue(0, 0); 7101 break; 7102 } 7103 7104 // Otherwise, remember this. We allow up to two distinct input vectors. 7105 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 7106 continue; 7107 7108 if (VecIn1.getNode() == 0) { 7109 VecIn1 = ExtractedFromVec; 7110 } else if (VecIn2.getNode() == 0) { 7111 VecIn2 = ExtractedFromVec; 7112 } else { 7113 // Too many inputs. 7114 VecIn1 = VecIn2 = SDValue(0, 0); 7115 break; 7116 } 7117 } 7118 7119 // If everything is good, we can make a shuffle operation. 7120 if (VecIn1.getNode()) { 7121 SmallVector<int, 8> Mask; 7122 for (unsigned i = 0; i != NumInScalars; ++i) { 7123 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 7124 Mask.push_back(-1); 7125 continue; 7126 } 7127 7128 // If extracting from the first vector, just use the index directly. 7129 SDValue Extract = N->getOperand(i); 7130 SDValue ExtVal = Extract.getOperand(1); 7131 if (Extract.getOperand(0) == VecIn1) { 7132 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 7133 if (ExtIndex > VT.getVectorNumElements()) 7134 return SDValue(); 7135 7136 Mask.push_back(ExtIndex); 7137 continue; 7138 } 7139 7140 // Otherwise, use InIdx + VecSize 7141 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 7142 Mask.push_back(Idx+NumInScalars); 7143 } 7144 7145 // Add count and size info. 7146 if (!isTypeLegal(VT)) 7147 return SDValue(); 7148 7149 // Return the new VECTOR_SHUFFLE node. 7150 SDValue Ops[2]; 7151 Ops[0] = VecIn1; 7152 Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 7153 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); 7154 } 7155 7156 return SDValue(); 7157 } 7158 7159 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 7160 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 7161 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 7162 // inputs come from at most two distinct vectors, turn this into a shuffle 7163 // node. 7164 7165 // If we only have one input vector, we don't need to do any concatenation. 7166 if (N->getNumOperands() == 1) 7167 return N->getOperand(0); 7168 7169 return SDValue(); 7170 } 7171 7172 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 7173 EVT NVT = N->getValueType(0); 7174 SDValue V = N->getOperand(0); 7175 7176 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 7177 // Handle only simple case where vector being inserted and vector 7178 // being extracted are of same type, and are half size of larger vectors. 7179 EVT BigVT = V->getOperand(0).getValueType(); 7180 EVT SmallVT = V->getOperand(1).getValueType(); 7181 if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 7182 return SDValue(); 7183 7184 // Only handle cases where both indexes are constants with the same type. 7185 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7186 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 7187 7188 if (InsIdx && ExtIdx && 7189 InsIdx->getValueType(0).getSizeInBits() <= 64 && 7190 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 7191 // Combine: 7192 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 7193 // Into: 7194 // indices are equal => V1 7195 // otherwise => (extract_subvec V1, ExtIdx) 7196 if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) 7197 return V->getOperand(1); 7198 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, 7199 V->getOperand(0), N->getOperand(1)); 7200 } 7201 } 7202 7203 return SDValue(); 7204 } 7205 7206 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 7207 EVT VT = N->getValueType(0); 7208 unsigned NumElts = VT.getVectorNumElements(); 7209 7210 SDValue N0 = N->getOperand(0); 7211 7212 assert(N0.getValueType().getVectorNumElements() == NumElts && 7213 "Vector shuffle must be normalized in DAG"); 7214 7215 // FIXME: implement canonicalizations from DAG.getVectorShuffle() 7216 7217 // If it is a splat, check if the argument vector is another splat or a 7218 // build_vector with all scalar elements the same. 7219 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7220 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 7221 SDNode *V = N0.getNode(); 7222 7223 // If this is a bit convert that changes the element type of the vector but 7224 // not the number of vector elements, look through it. Be careful not to 7225 // look though conversions that change things like v4f32 to v2f64. 7226 if (V->getOpcode() == ISD::BITCAST) { 7227 SDValue ConvInput = V->getOperand(0); 7228 if (ConvInput.getValueType().isVector() && 7229 ConvInput.getValueType().getVectorNumElements() == NumElts) 7230 V = ConvInput.getNode(); 7231 } 7232 7233 if (V->getOpcode() == ISD::BUILD_VECTOR) { 7234 assert(V->getNumOperands() == NumElts && 7235 "BUILD_VECTOR has wrong number of operands"); 7236 SDValue Base; 7237 bool AllSame = true; 7238 for (unsigned i = 0; i != NumElts; ++i) { 7239 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 7240 Base = V->getOperand(i); 7241 break; 7242 } 7243 } 7244 // Splat of <u, u, u, u>, return <u, u, u, u> 7245 if (!Base.getNode()) 7246 return N0; 7247 for (unsigned i = 0; i != NumElts; ++i) { 7248 if (V->getOperand(i) != Base) { 7249 AllSame = false; 7250 break; 7251 } 7252 } 7253 // Splat of <x, x, x, x>, return <x, x, x, x> 7254 if (AllSame) 7255 return N0; 7256 } 7257 } 7258 return SDValue(); 7259 } 7260 7261 SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { 7262 if (!TLI.getShouldFoldAtomicFences()) 7263 return SDValue(); 7264 7265 SDValue atomic = N->getOperand(0); 7266 switch (atomic.getOpcode()) { 7267 case ISD::ATOMIC_CMP_SWAP: 7268 case ISD::ATOMIC_SWAP: 7269 case ISD::ATOMIC_LOAD_ADD: 7270 case ISD::ATOMIC_LOAD_SUB: 7271 case ISD::ATOMIC_LOAD_AND: 7272 case ISD::ATOMIC_LOAD_OR: 7273 case ISD::ATOMIC_LOAD_XOR: 7274 case ISD::ATOMIC_LOAD_NAND: 7275 case ISD::ATOMIC_LOAD_MIN: 7276 case ISD::ATOMIC_LOAD_MAX: 7277 case ISD::ATOMIC_LOAD_UMIN: 7278 case ISD::ATOMIC_LOAD_UMAX: 7279 break; 7280 default: 7281 return SDValue(); 7282 } 7283 7284 SDValue fence = atomic.getOperand(0); 7285 if (fence.getOpcode() != ISD::MEMBARRIER) 7286 return SDValue(); 7287 7288 switch (atomic.getOpcode()) { 7289 case ISD::ATOMIC_CMP_SWAP: 7290 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 7291 fence.getOperand(0), 7292 atomic.getOperand(1), atomic.getOperand(2), 7293 atomic.getOperand(3)), atomic.getResNo()); 7294 case ISD::ATOMIC_SWAP: 7295 case ISD::ATOMIC_LOAD_ADD: 7296 case ISD::ATOMIC_LOAD_SUB: 7297 case ISD::ATOMIC_LOAD_AND: 7298 case ISD::ATOMIC_LOAD_OR: 7299 case ISD::ATOMIC_LOAD_XOR: 7300 case ISD::ATOMIC_LOAD_NAND: 7301 case ISD::ATOMIC_LOAD_MIN: 7302 case ISD::ATOMIC_LOAD_MAX: 7303 case ISD::ATOMIC_LOAD_UMIN: 7304 case ISD::ATOMIC_LOAD_UMAX: 7305 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 7306 fence.getOperand(0), 7307 atomic.getOperand(1), atomic.getOperand(2)), 7308 atomic.getResNo()); 7309 default: 7310 return SDValue(); 7311 } 7312 } 7313 7314 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 7315 /// an AND to a vector_shuffle with the destination vector and a zero vector. 7316 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 7317 /// vector_shuffle V, Zero, <0, 4, 2, 4> 7318 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 7319 EVT VT = N->getValueType(0); 7320 DebugLoc dl = N->getDebugLoc(); 7321 SDValue LHS = N->getOperand(0); 7322 SDValue RHS = N->getOperand(1); 7323 if (N->getOpcode() == ISD::AND) { 7324 if (RHS.getOpcode() == ISD::BITCAST) 7325 RHS = RHS.getOperand(0); 7326 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 7327 SmallVector<int, 8> Indices; 7328 unsigned NumElts = RHS.getNumOperands(); 7329 for (unsigned i = 0; i != NumElts; ++i) { 7330 SDValue Elt = RHS.getOperand(i); 7331 if (!isa<ConstantSDNode>(Elt)) 7332 return SDValue(); 7333 else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 7334 Indices.push_back(i); 7335 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 7336 Indices.push_back(NumElts); 7337 else 7338 return SDValue(); 7339 } 7340 7341 // Let's see if the target supports this vector_shuffle. 7342 EVT RVT = RHS.getValueType(); 7343 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 7344 return SDValue(); 7345 7346 // Return the new VECTOR_SHUFFLE node. 7347 EVT EltVT = RVT.getVectorElementType(); 7348 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 7349 DAG.getConstant(0, EltVT)); 7350 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 7351 RVT, &ZeroOps[0], ZeroOps.size()); 7352 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 7353 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 7354 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 7355 } 7356 } 7357 7358 return SDValue(); 7359 } 7360 7361 /// SimplifyVBinOp - Visit a binary vector operation, like ADD. 7362 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 7363 // After legalize, the target may be depending on adds and other 7364 // binary ops to provide legal ways to construct constants or other 7365 // things. Simplifying them may result in a loss of legality. 7366 if (LegalOperations) return SDValue(); 7367 7368 assert(N->getValueType(0).isVector() && 7369 "SimplifyVBinOp only works on vectors!"); 7370 7371 SDValue LHS = N->getOperand(0); 7372 SDValue RHS = N->getOperand(1); 7373 SDValue Shuffle = XformToShuffleWithZero(N); 7374 if (Shuffle.getNode()) return Shuffle; 7375 7376 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 7377 // this operation. 7378 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 7379 RHS.getOpcode() == ISD::BUILD_VECTOR) { 7380 SmallVector<SDValue, 8> Ops; 7381 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 7382 SDValue LHSOp = LHS.getOperand(i); 7383 SDValue RHSOp = RHS.getOperand(i); 7384 // If these two elements can't be folded, bail out. 7385 if ((LHSOp.getOpcode() != ISD::UNDEF && 7386 LHSOp.getOpcode() != ISD::Constant && 7387 LHSOp.getOpcode() != ISD::ConstantFP) || 7388 (RHSOp.getOpcode() != ISD::UNDEF && 7389 RHSOp.getOpcode() != ISD::Constant && 7390 RHSOp.getOpcode() != ISD::ConstantFP)) 7391 break; 7392 7393 // Can't fold divide by zero. 7394 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 7395 N->getOpcode() == ISD::FDIV) { 7396 if ((RHSOp.getOpcode() == ISD::Constant && 7397 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 7398 (RHSOp.getOpcode() == ISD::ConstantFP && 7399 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 7400 break; 7401 } 7402 7403 EVT VT = LHSOp.getValueType(); 7404 EVT RVT = RHSOp.getValueType(); 7405 if (RVT != VT) { 7406 // Integer BUILD_VECTOR operands may have types larger than the element 7407 // size (e.g., when the element type is not legal). Prior to type 7408 // legalization, the types may not match between the two BUILD_VECTORS. 7409 // Truncate one of the operands to make them match. 7410 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 7411 RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); 7412 } else { 7413 LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); 7414 VT = RVT; 7415 } 7416 } 7417 SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, 7418 LHSOp, RHSOp); 7419 if (FoldOp.getOpcode() != ISD::UNDEF && 7420 FoldOp.getOpcode() != ISD::Constant && 7421 FoldOp.getOpcode() != ISD::ConstantFP) 7422 break; 7423 Ops.push_back(FoldOp); 7424 AddToWorkList(FoldOp.getNode()); 7425 } 7426 7427 if (Ops.size() == LHS.getNumOperands()) 7428 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 7429 LHS.getValueType(), &Ops[0], Ops.size()); 7430 } 7431 7432 return SDValue(); 7433 } 7434 7435 SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, 7436 SDValue N1, SDValue N2){ 7437 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 7438 7439 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 7440 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 7441 7442 // If we got a simplified select_cc node back from SimplifySelectCC, then 7443 // break it down into a new SETCC node, and a new SELECT node, and then return 7444 // the SELECT node, since we were called with a SELECT node. 7445 if (SCC.getNode()) { 7446 // Check to see if we got a select_cc back (to turn into setcc/select). 7447 // Otherwise, just return whatever node we got back, like fabs. 7448 if (SCC.getOpcode() == ISD::SELECT_CC) { 7449 SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), 7450 N0.getValueType(), 7451 SCC.getOperand(0), SCC.getOperand(1), 7452 SCC.getOperand(4)); 7453 AddToWorkList(SETCC.getNode()); 7454 return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), 7455 SCC.getOperand(2), SCC.getOperand(3), SETCC); 7456 } 7457 7458 return SCC; 7459 } 7460 return SDValue(); 7461 } 7462 7463 /// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 7464 /// are the two values being selected between, see if we can simplify the 7465 /// select. Callers of this should assume that TheSelect is deleted if this 7466 /// returns true. As such, they should return the appropriate thing (e.g. the 7467 /// node) back to the top-level of the DAG combiner loop to avoid it being 7468 /// looked at. 7469 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 7470 SDValue RHS) { 7471 7472 // Cannot simplify select with vector condition 7473 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 7474 7475 // If this is a select from two identical things, try to pull the operation 7476 // through the select. 7477 if (LHS.getOpcode() != RHS.getOpcode() || 7478 !LHS.hasOneUse() || !RHS.hasOneUse()) 7479 return false; 7480 7481 // If this is a load and the token chain is identical, replace the select 7482 // of two loads with a load through a select of the address to load from. 7483 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 7484 // constants have been dropped into the constant pool. 7485 if (LHS.getOpcode() == ISD::LOAD) { 7486 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 7487 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 7488 7489 // Token chains must be identical. 7490 if (LHS.getOperand(0) != RHS.getOperand(0) || 7491 // Do not let this transformation reduce the number of volatile loads. 7492 LLD->isVolatile() || RLD->isVolatile() || 7493 // If this is an EXTLOAD, the VT's must match. 7494 LLD->getMemoryVT() != RLD->getMemoryVT() || 7495 // If this is an EXTLOAD, the kind of extension must match. 7496 (LLD->getExtensionType() != RLD->getExtensionType() && 7497 // The only exception is if one of the extensions is anyext. 7498 LLD->getExtensionType() != ISD::EXTLOAD && 7499 RLD->getExtensionType() != ISD::EXTLOAD) || 7500 // FIXME: this discards src value information. This is 7501 // over-conservative. It would be beneficial to be able to remember 7502 // both potential memory locations. Since we are discarding 7503 // src value info, don't do the transformation if the memory 7504 // locations are not in the default address space. 7505 LLD->getPointerInfo().getAddrSpace() != 0 || 7506 RLD->getPointerInfo().getAddrSpace() != 0) 7507 return false; 7508 7509 // Check that the select condition doesn't reach either load. If so, 7510 // folding this will induce a cycle into the DAG. If not, this is safe to 7511 // xform, so create a select of the addresses. 7512 SDValue Addr; 7513 if (TheSelect->getOpcode() == ISD::SELECT) { 7514 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 7515 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 7516 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 7517 return false; 7518 Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), 7519 LLD->getBasePtr().getValueType(), 7520 TheSelect->getOperand(0), LLD->getBasePtr(), 7521 RLD->getBasePtr()); 7522 } else { // Otherwise SELECT_CC 7523 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 7524 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 7525 7526 if ((LLD->hasAnyUseOfValue(1) && 7527 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 7528 (LLD->hasAnyUseOfValue(1) && 7529 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) 7530 return false; 7531 7532 Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), 7533 LLD->getBasePtr().getValueType(), 7534 TheSelect->getOperand(0), 7535 TheSelect->getOperand(1), 7536 LLD->getBasePtr(), RLD->getBasePtr(), 7537 TheSelect->getOperand(4)); 7538 } 7539 7540 SDValue Load; 7541 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 7542 Load = DAG.getLoad(TheSelect->getValueType(0), 7543 TheSelect->getDebugLoc(), 7544 // FIXME: Discards pointer info. 7545 LLD->getChain(), Addr, MachinePointerInfo(), 7546 LLD->isVolatile(), LLD->isNonTemporal(), 7547 LLD->isInvariant(), LLD->getAlignment()); 7548 } else { 7549 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 7550 RLD->getExtensionType() : LLD->getExtensionType(), 7551 TheSelect->getDebugLoc(), 7552 TheSelect->getValueType(0), 7553 // FIXME: Discards pointer info. 7554 LLD->getChain(), Addr, MachinePointerInfo(), 7555 LLD->getMemoryVT(), LLD->isVolatile(), 7556 LLD->isNonTemporal(), LLD->getAlignment()); 7557 } 7558 7559 // Users of the select now use the result of the load. 7560 CombineTo(TheSelect, Load); 7561 7562 // Users of the old loads now use the new load's chain. We know the 7563 // old-load value is dead now. 7564 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 7565 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 7566 return true; 7567 } 7568 7569 return false; 7570 } 7571 7572 /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 7573 /// where 'cond' is the comparison specified by CC. 7574 SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, 7575 SDValue N2, SDValue N3, 7576 ISD::CondCode CC, bool NotExtCompare) { 7577 // (x ? y : y) -> y. 7578 if (N2 == N3) return N2; 7579 7580 EVT VT = N2.getValueType(); 7581 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 7582 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 7583 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 7584 7585 // Determine if the condition we're dealing with is constant 7586 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 7587 N0, N1, CC, DL, false); 7588 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 7589 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 7590 7591 // fold select_cc true, x, y -> x 7592 if (SCCC && !SCCC->isNullValue()) 7593 return N2; 7594 // fold select_cc false, x, y -> y 7595 if (SCCC && SCCC->isNullValue()) 7596 return N3; 7597 7598 // Check to see if we can simplify the select into an fabs node 7599 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 7600 // Allow either -0.0 or 0.0 7601 if (CFP->getValueAPF().isZero()) { 7602 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 7603 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 7604 N0 == N2 && N3.getOpcode() == ISD::FNEG && 7605 N2 == N3.getOperand(0)) 7606 return DAG.getNode(ISD::FABS, DL, VT, N0); 7607 7608 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 7609 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 7610 N0 == N3 && N2.getOpcode() == ISD::FNEG && 7611 N2.getOperand(0) == N3) 7612 return DAG.getNode(ISD::FABS, DL, VT, N3); 7613 } 7614 } 7615 7616 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 7617 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 7618 // in it. This is a win when the constant is not otherwise available because 7619 // it replaces two constant pool loads with one. We only do this if the FP 7620 // type is known to be legal, because if it isn't, then we are before legalize 7621 // types an we want the other legalization to happen first (e.g. to avoid 7622 // messing with soft float) and if the ConstantFP is not legal, because if 7623 // it is legal, we may not need to store the FP constant in a constant pool. 7624 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 7625 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 7626 if (TLI.isTypeLegal(N2.getValueType()) && 7627 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 7628 TargetLowering::Legal) && 7629 // If both constants have multiple uses, then we won't need to do an 7630 // extra load, they are likely around in registers for other users. 7631 (TV->hasOneUse() || FV->hasOneUse())) { 7632 Constant *Elts[] = { 7633 const_cast<ConstantFP*>(FV->getConstantFPValue()), 7634 const_cast<ConstantFP*>(TV->getConstantFPValue()) 7635 }; 7636 Type *FPTy = Elts[0]->getType(); 7637 const TargetData &TD = *TLI.getTargetData(); 7638 7639 // Create a ConstantArray of the two constants. 7640 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 7641 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 7642 TD.getPrefTypeAlignment(FPTy)); 7643 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 7644 7645 // Get the offsets to the 0 and 1 element of the array so that we can 7646 // select between them. 7647 SDValue Zero = DAG.getIntPtrConstant(0); 7648 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 7649 SDValue One = DAG.getIntPtrConstant(EltSize); 7650 7651 SDValue Cond = DAG.getSetCC(DL, 7652 TLI.getSetCCResultType(N0.getValueType()), 7653 N0, N1, CC); 7654 AddToWorkList(Cond.getNode()); 7655 SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), 7656 Cond, One, Zero); 7657 AddToWorkList(CstOffset.getNode()); 7658 CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, 7659 CstOffset); 7660 AddToWorkList(CPIdx.getNode()); 7661 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 7662 MachinePointerInfo::getConstantPool(), false, 7663 false, false, Alignment); 7664 7665 } 7666 } 7667 7668 // Check to see if we can perform the "gzip trick", transforming 7669 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 7670 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 7671 (N1C->isNullValue() || // (a < 0) ? b : 0 7672 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 7673 EVT XType = N0.getValueType(); 7674 EVT AType = N2.getValueType(); 7675 if (XType.bitsGE(AType)) { 7676 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 7677 // single-bit constant. 7678 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 7679 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 7680 ShCtV = XType.getSizeInBits()-ShCtV-1; 7681 SDValue ShCt = DAG.getConstant(ShCtV, 7682 getShiftAmountTy(N0.getValueType())); 7683 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), 7684 XType, N0, ShCt); 7685 AddToWorkList(Shift.getNode()); 7686 7687 if (XType.bitsGT(AType)) { 7688 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 7689 AddToWorkList(Shift.getNode()); 7690 } 7691 7692 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 7693 } 7694 7695 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), 7696 XType, N0, 7697 DAG.getConstant(XType.getSizeInBits()-1, 7698 getShiftAmountTy(N0.getValueType()))); 7699 AddToWorkList(Shift.getNode()); 7700 7701 if (XType.bitsGT(AType)) { 7702 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 7703 AddToWorkList(Shift.getNode()); 7704 } 7705 7706 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 7707 } 7708 } 7709 7710 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 7711 // where y is has a single bit set. 7712 // A plaintext description would be, we can turn the SELECT_CC into an AND 7713 // when the condition can be materialized as an all-ones register. Any 7714 // single bit-test can be materialized as an all-ones register with 7715 // shift-left and shift-right-arith. 7716 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 7717 N0->getValueType(0) == VT && 7718 N1C && N1C->isNullValue() && 7719 N2C && N2C->isNullValue()) { 7720 SDValue AndLHS = N0->getOperand(0); 7721 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 7722 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 7723 // Shift the tested bit over the sign bit. 7724 APInt AndMask = ConstAndRHS->getAPIntValue(); 7725 SDValue ShlAmt = 7726 DAG.getConstant(AndMask.countLeadingZeros(), 7727 getShiftAmountTy(AndLHS.getValueType())); 7728 SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); 7729 7730 // Now arithmetic right shift it all the way over, so the result is either 7731 // all-ones, or zero. 7732 SDValue ShrAmt = 7733 DAG.getConstant(AndMask.getBitWidth()-1, 7734 getShiftAmountTy(Shl.getValueType())); 7735 SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); 7736 7737 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 7738 } 7739 } 7740 7741 // fold select C, 16, 0 -> shl C, 4 7742 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 7743 TLI.getBooleanContents(N0.getValueType().isVector()) == 7744 TargetLowering::ZeroOrOneBooleanContent) { 7745 7746 // If the caller doesn't want us to simplify this into a zext of a compare, 7747 // don't do it. 7748 if (NotExtCompare && N2C->getAPIntValue() == 1) 7749 return SDValue(); 7750 7751 // Get a SetCC of the condition 7752 // FIXME: Should probably make sure that setcc is legal if we ever have a 7753 // target where it isn't. 7754 SDValue Temp, SCC; 7755 // cast from setcc result type to select result type 7756 if (LegalTypes) { 7757 SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), 7758 N0, N1, CC); 7759 if (N2.getValueType().bitsLT(SCC.getValueType())) 7760 Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); 7761 else 7762 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 7763 N2.getValueType(), SCC); 7764 } else { 7765 SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); 7766 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 7767 N2.getValueType(), SCC); 7768 } 7769 7770 AddToWorkList(SCC.getNode()); 7771 AddToWorkList(Temp.getNode()); 7772 7773 if (N2C->getAPIntValue() == 1) 7774 return Temp; 7775 7776 // shl setcc result by log2 n2c 7777 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 7778 DAG.getConstant(N2C->getAPIntValue().logBase2(), 7779 getShiftAmountTy(Temp.getValueType()))); 7780 } 7781 7782 // Check to see if this is the equivalent of setcc 7783 // FIXME: Turn all of these into setcc if setcc if setcc is legal 7784 // otherwise, go ahead with the folds. 7785 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 7786 EVT XType = N0.getValueType(); 7787 if (!LegalOperations || 7788 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { 7789 SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); 7790 if (Res.getValueType() != VT) 7791 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 7792 return Res; 7793 } 7794 7795 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 7796 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 7797 (!LegalOperations || 7798 TLI.isOperationLegal(ISD::CTLZ, XType))) { 7799 SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); 7800 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 7801 DAG.getConstant(Log2_32(XType.getSizeInBits()), 7802 getShiftAmountTy(Ctlz.getValueType()))); 7803 } 7804 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 7805 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 7806 SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), 7807 XType, DAG.getConstant(0, XType), N0); 7808 SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); 7809 return DAG.getNode(ISD::SRL, DL, XType, 7810 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 7811 DAG.getConstant(XType.getSizeInBits()-1, 7812 getShiftAmountTy(XType))); 7813 } 7814 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 7815 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 7816 SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, 7817 DAG.getConstant(XType.getSizeInBits()-1, 7818 getShiftAmountTy(N0.getValueType()))); 7819 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 7820 } 7821 } 7822 7823 // Check to see if this is an integer abs. 7824 // select_cc setg[te] X, 0, X, -X -> 7825 // select_cc setgt X, -1, X, -X -> 7826 // select_cc setl[te] X, 0, -X, X -> 7827 // select_cc setlt X, 1, -X, X -> 7828 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 7829 if (N1C) { 7830 ConstantSDNode *SubC = NULL; 7831 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 7832 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 7833 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 7834 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 7835 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 7836 (N1C->isOne() && CC == ISD::SETLT)) && 7837 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 7838 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 7839 7840 EVT XType = N0.getValueType(); 7841 if (SubC && SubC->isNullValue() && XType.isInteger()) { 7842 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, 7843 N0, 7844 DAG.getConstant(XType.getSizeInBits()-1, 7845 getShiftAmountTy(N0.getValueType()))); 7846 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), 7847 XType, N0, Shift); 7848 AddToWorkList(Shift.getNode()); 7849 AddToWorkList(Add.getNode()); 7850 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 7851 } 7852 } 7853 7854 return SDValue(); 7855 } 7856 7857 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 7858 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 7859 SDValue N1, ISD::CondCode Cond, 7860 DebugLoc DL, bool foldBooleans) { 7861 TargetLowering::DAGCombinerInfo 7862 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 7863 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 7864 } 7865 7866 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 7867 /// return a DAG expression to select that will generate the same value by 7868 /// multiplying by a magic number. See: 7869 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 7870 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 7871 std::vector<SDNode*> Built; 7872 SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); 7873 7874 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 7875 ii != ee; ++ii) 7876 AddToWorkList(*ii); 7877 return S; 7878 } 7879 7880 /// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 7881 /// return a DAG expression to select that will generate the same value by 7882 /// multiplying by a magic number. See: 7883 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 7884 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 7885 std::vector<SDNode*> Built; 7886 SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); 7887 7888 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 7889 ii != ee; ++ii) 7890 AddToWorkList(*ii); 7891 return S; 7892 } 7893 7894 /// FindBaseOffset - Return true if base is a frame index, which is known not 7895 // to alias with anything but itself. Provides base object and offset as 7896 // results. 7897 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 7898 const GlobalValue *&GV, void *&CV) { 7899 // Assume it is a primitive operation. 7900 Base = Ptr; Offset = 0; GV = 0; CV = 0; 7901 7902 // If it's an adding a simple constant then integrate the offset. 7903 if (Base.getOpcode() == ISD::ADD) { 7904 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 7905 Base = Base.getOperand(0); 7906 Offset += C->getZExtValue(); 7907 } 7908 } 7909 7910 // Return the underlying GlobalValue, and update the Offset. Return false 7911 // for GlobalAddressSDNode since the same GlobalAddress may be represented 7912 // by multiple nodes with different offsets. 7913 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 7914 GV = G->getGlobal(); 7915 Offset += G->getOffset(); 7916 return false; 7917 } 7918 7919 // Return the underlying Constant value, and update the Offset. Return false 7920 // for ConstantSDNodes since the same constant pool entry may be represented 7921 // by multiple nodes with different offsets. 7922 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 7923 CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() 7924 : (void *)C->getConstVal(); 7925 Offset += C->getOffset(); 7926 return false; 7927 } 7928 // If it's any of the following then it can't alias with anything but itself. 7929 return isa<FrameIndexSDNode>(Base); 7930 } 7931 7932 /// isAlias - Return true if there is any possibility that the two addresses 7933 /// overlap. 7934 bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, 7935 const Value *SrcValue1, int SrcValueOffset1, 7936 unsigned SrcValueAlign1, 7937 const MDNode *TBAAInfo1, 7938 SDValue Ptr2, int64_t Size2, 7939 const Value *SrcValue2, int SrcValueOffset2, 7940 unsigned SrcValueAlign2, 7941 const MDNode *TBAAInfo2) const { 7942 // If they are the same then they must be aliases. 7943 if (Ptr1 == Ptr2) return true; 7944 7945 // Gather base node and offset information. 7946 SDValue Base1, Base2; 7947 int64_t Offset1, Offset2; 7948 const GlobalValue *GV1, *GV2; 7949 void *CV1, *CV2; 7950 bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); 7951 bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); 7952 7953 // If they have a same base address then check to see if they overlap. 7954 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 7955 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 7956 7957 // It is possible for different frame indices to alias each other, mostly 7958 // when tail call optimization reuses return address slots for arguments. 7959 // To catch this case, look up the actual index of frame indices to compute 7960 // the real alias relationship. 7961 if (isFrameIndex1 && isFrameIndex2) { 7962 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7963 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 7964 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 7965 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 7966 } 7967 7968 // Otherwise, if we know what the bases are, and they aren't identical, then 7969 // we know they cannot alias. 7970 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 7971 return false; 7972 7973 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 7974 // compared to the size and offset of the access, we may be able to prove they 7975 // do not alias. This check is conservative for now to catch cases created by 7976 // splitting vector types. 7977 if ((SrcValueAlign1 == SrcValueAlign2) && 7978 (SrcValueOffset1 != SrcValueOffset2) && 7979 (Size1 == Size2) && (SrcValueAlign1 > Size1)) { 7980 int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; 7981 int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; 7982 7983 // There is no overlap between these relatively aligned accesses of similar 7984 // size, return no alias. 7985 if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) 7986 return false; 7987 } 7988 7989 if (CombinerGlobalAA) { 7990 // Use alias analysis information. 7991 int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); 7992 int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; 7993 int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; 7994 AliasAnalysis::AliasResult AAResult = 7995 AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), 7996 AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); 7997 if (AAResult == AliasAnalysis::NoAlias) 7998 return false; 7999 } 8000 8001 // Otherwise we have to assume they alias. 8002 return true; 8003 } 8004 8005 /// FindAliasInfo - Extracts the relevant alias information from the memory 8006 /// node. Returns true if the operand was a load. 8007 bool DAGCombiner::FindAliasInfo(SDNode *N, 8008 SDValue &Ptr, int64_t &Size, 8009 const Value *&SrcValue, 8010 int &SrcValueOffset, 8011 unsigned &SrcValueAlign, 8012 const MDNode *&TBAAInfo) const { 8013 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 8014 Ptr = LD->getBasePtr(); 8015 Size = LD->getMemoryVT().getSizeInBits() >> 3; 8016 SrcValue = LD->getSrcValue(); 8017 SrcValueOffset = LD->getSrcValueOffset(); 8018 SrcValueAlign = LD->getOriginalAlignment(); 8019 TBAAInfo = LD->getTBAAInfo(); 8020 return true; 8021 } 8022 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 8023 Ptr = ST->getBasePtr(); 8024 Size = ST->getMemoryVT().getSizeInBits() >> 3; 8025 SrcValue = ST->getSrcValue(); 8026 SrcValueOffset = ST->getSrcValueOffset(); 8027 SrcValueAlign = ST->getOriginalAlignment(); 8028 TBAAInfo = ST->getTBAAInfo(); 8029 return false; 8030 } 8031 llvm_unreachable("FindAliasInfo expected a memory operand"); 8032 } 8033 8034 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 8035 /// looking for aliasing nodes and adding them to the Aliases vector. 8036 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 8037 SmallVector<SDValue, 8> &Aliases) { 8038 SmallVector<SDValue, 8> Chains; // List of chains to visit. 8039 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 8040 8041 // Get alias information for node. 8042 SDValue Ptr; 8043 int64_t Size; 8044 const Value *SrcValue; 8045 int SrcValueOffset; 8046 unsigned SrcValueAlign; 8047 const MDNode *SrcTBAAInfo; 8048 bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 8049 SrcValueAlign, SrcTBAAInfo); 8050 8051 // Starting off. 8052 Chains.push_back(OriginalChain); 8053 unsigned Depth = 0; 8054 8055 // Look at each chain and determine if it is an alias. If so, add it to the 8056 // aliases list. If not, then continue up the chain looking for the next 8057 // candidate. 8058 while (!Chains.empty()) { 8059 SDValue Chain = Chains.back(); 8060 Chains.pop_back(); 8061 8062 // For TokenFactor nodes, look at each operand and only continue up the 8063 // chain until we find two aliases. If we've seen two aliases, assume we'll 8064 // find more and revert to original chain since the xform is unlikely to be 8065 // profitable. 8066 // 8067 // FIXME: The depth check could be made to return the last non-aliasing 8068 // chain we found before we hit a tokenfactor rather than the original 8069 // chain. 8070 if (Depth > 6 || Aliases.size() == 2) { 8071 Aliases.clear(); 8072 Aliases.push_back(OriginalChain); 8073 break; 8074 } 8075 8076 // Don't bother if we've been before. 8077 if (!Visited.insert(Chain.getNode())) 8078 continue; 8079 8080 switch (Chain.getOpcode()) { 8081 case ISD::EntryToken: 8082 // Entry token is ideal chain operand, but handled in FindBetterChain. 8083 break; 8084 8085 case ISD::LOAD: 8086 case ISD::STORE: { 8087 // Get alias information for Chain. 8088 SDValue OpPtr; 8089 int64_t OpSize; 8090 const Value *OpSrcValue; 8091 int OpSrcValueOffset; 8092 unsigned OpSrcValueAlign; 8093 const MDNode *OpSrcTBAAInfo; 8094 bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, 8095 OpSrcValue, OpSrcValueOffset, 8096 OpSrcValueAlign, 8097 OpSrcTBAAInfo); 8098 8099 // If chain is alias then stop here. 8100 if (!(IsLoad && IsOpLoad) && 8101 isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, 8102 SrcTBAAInfo, 8103 OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, 8104 OpSrcValueAlign, OpSrcTBAAInfo)) { 8105 Aliases.push_back(Chain); 8106 } else { 8107 // Look further up the chain. 8108 Chains.push_back(Chain.getOperand(0)); 8109 ++Depth; 8110 } 8111 break; 8112 } 8113 8114 case ISD::TokenFactor: 8115 // We have to check each of the operands of the token factor for "small" 8116 // token factors, so we queue them up. Adding the operands to the queue 8117 // (stack) in reverse order maintains the original order and increases the 8118 // likelihood that getNode will find a matching token factor (CSE.) 8119 if (Chain.getNumOperands() > 16) { 8120 Aliases.push_back(Chain); 8121 break; 8122 } 8123 for (unsigned n = Chain.getNumOperands(); n;) 8124 Chains.push_back(Chain.getOperand(--n)); 8125 ++Depth; 8126 break; 8127 8128 default: 8129 // For all other instructions we will just have to take what we can get. 8130 Aliases.push_back(Chain); 8131 break; 8132 } 8133 } 8134 } 8135 8136 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 8137 /// for a better chain (aliasing node.) 8138 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 8139 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 8140 8141 // Accumulate all the aliases to this node. 8142 GatherAllAliases(N, OldChain, Aliases); 8143 8144 // If no operands then chain to entry token. 8145 if (Aliases.size() == 0) 8146 return DAG.getEntryNode(); 8147 8148 // If a single operand then chain to it. We don't need to revisit it. 8149 if (Aliases.size() == 1) 8150 return Aliases[0]; 8151 8152 // Construct a custom tailored token factor. 8153 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 8154 &Aliases[0], Aliases.size()); 8155 } 8156 8157 // SelectionDAG::Combine - This is the entry point for the file. 8158 // 8159 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 8160 CodeGenOpt::Level OptLevel) { 8161 /// run - This is the main entry point to this class. 8162 /// 8163 DAGCombiner(*this, AA, OptLevel).Run(Level); 8164 } 8165