1 //===- InstCombineAddSub.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for add, fadd, sub, and fsub. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/IR/DataLayout.h" 17 #include "llvm/Support/GetElementPtrTypeIterator.h" 18 #include "llvm/Support/PatternMatch.h" 19 using namespace llvm; 20 using namespace PatternMatch; 21 22 namespace { 23 24 /// Class representing coefficient of floating-point addend. 25 /// This class needs to be highly efficient, which is especially true for 26 /// the constructor. As of I write this comment, the cost of the default 27 /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 28 /// perform write-merging). 29 /// 30 class FAddendCoef { 31 public: 32 // The constructor has to initialize a APFloat, which is uncessary for 33 // most addends which have coefficient either 1 or -1. So, the constructor 34 // is expensive. In order to avoid the cost of the constructor, we should 35 // reuse some instances whenever possible. The pre-created instances 36 // FAddCombine::Add[0-5] embodies this idea. 37 // 38 FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} 39 ~FAddendCoef(); 40 41 void set(short C) { 42 assert(!insaneIntVal(C) && "Insane coefficient"); 43 IsFp = false; IntVal = C; 44 } 45 46 void set(const APFloat& C); 47 48 void negate(); 49 50 bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } 51 Value *getValue(Type *) const; 52 53 // If possible, don't define operator+/operator- etc because these 54 // operators inevitably call FAddendCoef's constructor which is not cheap. 55 void operator=(const FAddendCoef &A); 56 void operator+=(const FAddendCoef &A); 57 void operator-=(const FAddendCoef &A); 58 void operator*=(const FAddendCoef &S); 59 60 bool isOne() const { return isInt() && IntVal == 1; } 61 bool isTwo() const { return isInt() && IntVal == 2; } 62 bool isMinusOne() const { return isInt() && IntVal == -1; } 63 bool isMinusTwo() const { return isInt() && IntVal == -2; } 64 65 private: 66 bool insaneIntVal(int V) { return V > 4 || V < -4; } 67 APFloat *getFpValPtr(void) 68 { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } 69 70 const APFloat &getFpVal(void) const { 71 assert(IsFp && BufHasFpVal && "Incorret state"); 72 return *reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); 73 } 74 75 APFloat &getFpVal(void) 76 { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } 77 78 bool isInt() const { return !IsFp; } 79 80 private: 81 82 bool IsFp; 83 84 // True iff FpValBuf contains an instance of APFloat. 85 bool BufHasFpVal; 86 87 // The integer coefficient of an individual addend is either 1 or -1, 88 // and we try to simplify at most 4 addends from neighboring at most 89 // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt 90 // is overkill of this end. 91 short IntVal; 92 93 AlignedCharArrayUnion<APFloat> FpValBuf; 94 }; 95 96 /// FAddend is used to represent floating-point addend. An addend is 97 /// represented as <C, V>, where the V is a symbolic value, and C is a 98 /// constant coefficient. A constant addend is represented as <C, 0>. 99 /// 100 class FAddend { 101 public: 102 FAddend() { Val = 0; } 103 104 Value *getSymVal (void) const { return Val; } 105 const FAddendCoef &getCoef(void) const { return Coeff; } 106 107 bool isConstant() const { return Val == 0; } 108 bool isZero() const { return Coeff.isZero(); } 109 110 void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } 111 void set(const APFloat& Coefficient, Value *V) 112 { Coeff.set(Coefficient); Val = V; } 113 void set(const ConstantFP* Coefficient, Value *V) 114 { Coeff.set(Coefficient->getValueAPF()); Val = V; } 115 116 void negate() { Coeff.negate(); } 117 118 /// Drill down the U-D chain one step to find the definition of V, and 119 /// try to break the definition into one or two addends. 120 static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); 121 122 /// Similar to FAddend::drillDownOneStep() except that the value being 123 /// splitted is the addend itself. 124 unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; 125 126 void operator+=(const FAddend &T) { 127 assert((Val == T.Val) && "Symbolic-values disagree"); 128 Coeff += T.Coeff; 129 } 130 131 private: 132 void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } 133 134 // This addend has the value of "Coeff * Val". 135 Value *Val; 136 FAddendCoef Coeff; 137 }; 138 139 /// FAddCombine is the class for optimizing an unsafe fadd/fsub along 140 /// with its neighboring at most two instructions. 141 /// 142 class FAddCombine { 143 public: 144 FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} 145 Value *simplify(Instruction *FAdd); 146 147 private: 148 typedef SmallVector<const FAddend*, 4> AddendVect; 149 150 Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); 151 152 /// Convert given addend to a Value 153 Value *createAddendVal(const FAddend &A, bool& NeedNeg); 154 155 /// Return the number of instructions needed to emit the N-ary addition. 156 unsigned calcInstrNumber(const AddendVect& Vect); 157 Value *createFSub(Value *Opnd0, Value *Opnd1); 158 Value *createFAdd(Value *Opnd0, Value *Opnd1); 159 Value *createFMul(Value *Opnd0, Value *Opnd1); 160 Value *createFNeg(Value *V); 161 Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); 162 void createInstPostProc(Instruction *NewInst); 163 164 InstCombiner::BuilderTy *Builder; 165 Instruction *Instr; 166 167 private: 168 // Debugging stuff are clustered here. 169 #ifndef NDEBUG 170 unsigned CreateInstrNum; 171 void initCreateInstNum() { CreateInstrNum = 0; } 172 void incCreateInstNum() { CreateInstrNum++; } 173 #else 174 void initCreateInstNum() {} 175 void incCreateInstNum() {} 176 #endif 177 }; 178 } 179 180 //===----------------------------------------------------------------------===// 181 // 182 // Implementation of 183 // {FAddendCoef, FAddend, FAddition, FAddCombine}. 184 // 185 //===----------------------------------------------------------------------===// 186 FAddendCoef::~FAddendCoef() { 187 if (BufHasFpVal) 188 getFpValPtr()->~APFloat(); 189 } 190 191 void FAddendCoef::set(const APFloat& C) { 192 APFloat *P = getFpValPtr(); 193 194 if (isInt()) { 195 // As the buffer is meanless byte stream, we cannot call 196 // APFloat::operator=(). 197 new(P) APFloat(C); 198 } else 199 *P = C; 200 201 IsFp = BufHasFpVal = true; 202 } 203 204 void FAddendCoef::operator=(const FAddendCoef& That) { 205 if (That.isInt()) 206 set(That.IntVal); 207 else 208 set(That.getFpVal()); 209 } 210 211 void FAddendCoef::operator+=(const FAddendCoef &That) { 212 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 213 if (isInt() == That.isInt()) { 214 if (isInt()) 215 IntVal += That.IntVal; 216 else 217 getFpVal().add(That.getFpVal(), RndMode); 218 return; 219 } 220 221 if (isInt()) { 222 const APFloat &T = That.getFpVal(); 223 set(T); 224 getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); 225 return; 226 } 227 228 APFloat &T = getFpVal(); 229 T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); 230 } 231 232 void FAddendCoef::operator-=(const FAddendCoef &That) { 233 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 234 if (isInt() == That.isInt()) { 235 if (isInt()) 236 IntVal -= That.IntVal; 237 else 238 getFpVal().subtract(That.getFpVal(), RndMode); 239 return; 240 } 241 242 if (isInt()) { 243 const APFloat &T = That.getFpVal(); 244 set(T); 245 getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); 246 return; 247 } 248 249 APFloat &T = getFpVal(); 250 T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); 251 } 252 253 void FAddendCoef::operator*=(const FAddendCoef &That) { 254 if (That.isOne()) 255 return; 256 257 if (That.isMinusOne()) { 258 negate(); 259 return; 260 } 261 262 if (isInt() && That.isInt()) { 263 int Res = IntVal * (int)That.IntVal; 264 assert(!insaneIntVal(Res) && "Insane int value"); 265 IntVal = Res; 266 return; 267 } 268 269 const fltSemantics &Semantic = 270 isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); 271 272 if (isInt()) 273 set(APFloat(Semantic, IntVal)); 274 APFloat &F0 = getFpVal(); 275 276 if (That.isInt()) 277 F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); 278 else 279 F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); 280 281 return; 282 } 283 284 void FAddendCoef::negate() { 285 if (isInt()) 286 IntVal = 0 - IntVal; 287 else 288 getFpVal().changeSign(); 289 } 290 291 Value *FAddendCoef::getValue(Type *Ty) const { 292 return isInt() ? 293 ConstantFP::get(Ty, float(IntVal)) : 294 ConstantFP::get(Ty->getContext(), getFpVal()); 295 } 296 297 // The definition of <Val> Addends 298 // ========================================= 299 // A + B <1, A>, <1,B> 300 // A - B <1, A>, <1,B> 301 // 0 - B <-1, B> 302 // C * A, <C, A> 303 // A + C <1, A> <C, NULL> 304 // 0 +/- 0 <0, NULL> (corner case) 305 // 306 // Legend: A and B are not constant, C is constant 307 // 308 unsigned FAddend::drillValueDownOneStep 309 (Value *Val, FAddend &Addend0, FAddend &Addend1) { 310 Instruction *I = 0; 311 if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) 312 return 0; 313 314 unsigned Opcode = I->getOpcode(); 315 316 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { 317 ConstantFP *C0, *C1; 318 Value *Opnd0 = I->getOperand(0); 319 Value *Opnd1 = I->getOperand(1); 320 if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) 321 Opnd0 = 0; 322 323 if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) 324 Opnd1 = 0; 325 326 if (Opnd0) { 327 if (!C0) 328 Addend0.set(1, Opnd0); 329 else 330 Addend0.set(C0, 0); 331 } 332 333 if (Opnd1) { 334 FAddend &Addend = Opnd0 ? Addend1 : Addend0; 335 if (!C1) 336 Addend.set(1, Opnd1); 337 else 338 Addend.set(C1, 0); 339 if (Opcode == Instruction::FSub) 340 Addend.negate(); 341 } 342 343 if (Opnd0 || Opnd1) 344 return Opnd0 && Opnd1 ? 2 : 1; 345 346 // Both operands are zero. Weird! 347 Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); 348 return 1; 349 } 350 351 if (I->getOpcode() == Instruction::FMul) { 352 Value *V0 = I->getOperand(0); 353 Value *V1 = I->getOperand(1); 354 if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { 355 Addend0.set(C, V1); 356 return 1; 357 } 358 359 if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { 360 Addend0.set(C, V0); 361 return 1; 362 } 363 } 364 365 return 0; 366 } 367 368 // Try to break *this* addend into two addends. e.g. Suppose this addend is 369 // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, 370 // i.e. <2.3, X> and <2.3, Y>. 371 // 372 unsigned FAddend::drillAddendDownOneStep 373 (FAddend &Addend0, FAddend &Addend1) const { 374 if (isConstant()) 375 return 0; 376 377 unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); 378 if (!BreakNum || Coeff.isOne()) 379 return BreakNum; 380 381 Addend0.Scale(Coeff); 382 383 if (BreakNum == 2) 384 Addend1.Scale(Coeff); 385 386 return BreakNum; 387 } 388 389 Value *FAddCombine::simplify(Instruction *I) { 390 assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); 391 392 // Currently we are not able to handle vector type. 393 if (I->getType()->isVectorTy()) 394 return 0; 395 396 assert((I->getOpcode() == Instruction::FAdd || 397 I->getOpcode() == Instruction::FSub) && "Expect add/sub"); 398 399 // Save the instruction before calling other member-functions. 400 Instr = I; 401 402 FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; 403 404 unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); 405 406 // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. 407 unsigned Opnd0_ExpNum = 0; 408 unsigned Opnd1_ExpNum = 0; 409 410 if (!Opnd0.isConstant()) 411 Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); 412 413 // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. 414 if (OpndNum == 2 && !Opnd1.isConstant()) 415 Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); 416 417 // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 418 if (Opnd0_ExpNum && Opnd1_ExpNum) { 419 AddendVect AllOpnds; 420 AllOpnds.push_back(&Opnd0_0); 421 AllOpnds.push_back(&Opnd1_0); 422 if (Opnd0_ExpNum == 2) 423 AllOpnds.push_back(&Opnd0_1); 424 if (Opnd1_ExpNum == 2) 425 AllOpnds.push_back(&Opnd1_1); 426 427 // Compute instruction quota. We should save at least one instruction. 428 unsigned InstQuota = 0; 429 430 Value *V0 = I->getOperand(0); 431 Value *V1 = I->getOperand(1); 432 InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && 433 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; 434 435 if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) 436 return R; 437 } 438 439 if (OpndNum != 2) { 440 // The input instruction is : "I=0.0 +/- V". If the "V" were able to be 441 // splitted into two addends, say "V = X - Y", the instruction would have 442 // been optimized into "I = Y - X" in the previous steps. 443 // 444 const FAddendCoef &CE = Opnd0.getCoef(); 445 return CE.isOne() ? Opnd0.getSymVal() : 0; 446 } 447 448 // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] 449 if (Opnd1_ExpNum) { 450 AddendVect AllOpnds; 451 AllOpnds.push_back(&Opnd0); 452 AllOpnds.push_back(&Opnd1_0); 453 if (Opnd1_ExpNum == 2) 454 AllOpnds.push_back(&Opnd1_1); 455 456 if (Value *R = simplifyFAdd(AllOpnds, 1)) 457 return R; 458 } 459 460 // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] 461 if (Opnd0_ExpNum) { 462 AddendVect AllOpnds; 463 AllOpnds.push_back(&Opnd1); 464 AllOpnds.push_back(&Opnd0_0); 465 if (Opnd0_ExpNum == 2) 466 AllOpnds.push_back(&Opnd0_1); 467 468 if (Value *R = simplifyFAdd(AllOpnds, 1)) 469 return R; 470 } 471 472 return 0; 473 } 474 475 Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { 476 477 unsigned AddendNum = Addends.size(); 478 assert(AddendNum <= 4 && "Too many addends"); 479 480 // For saving intermediate results; 481 unsigned NextTmpIdx = 0; 482 FAddend TmpResult[3]; 483 484 // Points to the constant addend of the resulting simplified expression. 485 // If the resulting expr has constant-addend, this constant-addend is 486 // desirable to reside at the top of the resulting expression tree. Placing 487 // constant close to supper-expr(s) will potentially reveal some optimization 488 // opportunities in super-expr(s). 489 // 490 const FAddend *ConstAdd = 0; 491 492 // Simplified addends are placed <SimpVect>. 493 AddendVect SimpVect; 494 495 // The outer loop works on one symbolic-value at a time. Suppose the input 496 // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 497 // The symbolic-values will be processed in this order: x, y, z. 498 // 499 for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { 500 501 const FAddend *ThisAddend = Addends[SymIdx]; 502 if (!ThisAddend) { 503 // This addend was processed before. 504 continue; 505 } 506 507 Value *Val = ThisAddend->getSymVal(); 508 unsigned StartIdx = SimpVect.size(); 509 SimpVect.push_back(ThisAddend); 510 511 // The inner loop collects addends sharing same symbolic-value, and these 512 // addends will be later on folded into a single addend. Following above 513 // example, if the symbolic value "y" is being processed, the inner loop 514 // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will 515 // be later on folded into "<b1+b2, y>". 516 // 517 for (unsigned SameSymIdx = SymIdx + 1; 518 SameSymIdx < AddendNum; SameSymIdx++) { 519 const FAddend *T = Addends[SameSymIdx]; 520 if (T && T->getSymVal() == Val) { 521 // Set null such that next iteration of the outer loop will not process 522 // this addend again. 523 Addends[SameSymIdx] = 0; 524 SimpVect.push_back(T); 525 } 526 } 527 528 // If multiple addends share same symbolic value, fold them together. 529 if (StartIdx + 1 != SimpVect.size()) { 530 FAddend &R = TmpResult[NextTmpIdx ++]; 531 R = *SimpVect[StartIdx]; 532 for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) 533 R += *SimpVect[Idx]; 534 535 // Pop all addends being folded and push the resulting folded addend. 536 SimpVect.resize(StartIdx); 537 if (Val != 0) { 538 if (!R.isZero()) { 539 SimpVect.push_back(&R); 540 } 541 } else { 542 // Don't push constant addend at this time. It will be the last element 543 // of <SimpVect>. 544 ConstAdd = &R; 545 } 546 } 547 } 548 549 assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 550 "out-of-bound access"); 551 552 if (ConstAdd) 553 SimpVect.push_back(ConstAdd); 554 555 Value *Result; 556 if (!SimpVect.empty()) 557 Result = createNaryFAdd(SimpVect, InstrQuota); 558 else { 559 // The addition is folded to 0.0. 560 Result = ConstantFP::get(Instr->getType(), 0.0); 561 } 562 563 return Result; 564 } 565 566 Value *FAddCombine::createNaryFAdd 567 (const AddendVect &Opnds, unsigned InstrQuota) { 568 assert(!Opnds.empty() && "Expect at least one addend"); 569 570 // Step 1: Check if the # of instructions needed exceeds the quota. 571 // 572 unsigned InstrNeeded = calcInstrNumber(Opnds); 573 if (InstrNeeded > InstrQuota) 574 return 0; 575 576 initCreateInstNum(); 577 578 // step 2: Emit the N-ary addition. 579 // Note that at most three instructions are involved in Fadd-InstCombine: the 580 // addition in question, and at most two neighboring instructions. 581 // The resulting optimized addition should have at least one less instruction 582 // than the original addition expression tree. This implies that the resulting 583 // N-ary addition has at most two instructions, and we don't need to worry 584 // about tree-height when constructing the N-ary addition. 585 586 Value *LastVal = 0; 587 bool LastValNeedNeg = false; 588 589 // Iterate the addends, creating fadd/fsub using adjacent two addends. 590 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 591 I != E; I++) { 592 bool NeedNeg; 593 Value *V = createAddendVal(**I, NeedNeg); 594 if (!LastVal) { 595 LastVal = V; 596 LastValNeedNeg = NeedNeg; 597 continue; 598 } 599 600 if (LastValNeedNeg == NeedNeg) { 601 LastVal = createFAdd(LastVal, V); 602 continue; 603 } 604 605 if (LastValNeedNeg) 606 LastVal = createFSub(V, LastVal); 607 else 608 LastVal = createFSub(LastVal, V); 609 610 LastValNeedNeg = false; 611 } 612 613 if (LastValNeedNeg) { 614 LastVal = createFNeg(LastVal); 615 } 616 617 #ifndef NDEBUG 618 assert(CreateInstrNum == InstrNeeded && 619 "Inconsistent in instruction numbers"); 620 #endif 621 622 return LastVal; 623 } 624 625 Value *FAddCombine::createFSub 626 (Value *Opnd0, Value *Opnd1) { 627 Value *V = Builder->CreateFSub(Opnd0, Opnd1); 628 createInstPostProc(cast<Instruction>(V)); 629 return V; 630 } 631 632 Value *FAddCombine::createFNeg(Value *V) { 633 Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); 634 return createFSub(Zero, V); 635 } 636 637 Value *FAddCombine::createFAdd 638 (Value *Opnd0, Value *Opnd1) { 639 Value *V = Builder->CreateFAdd(Opnd0, Opnd1); 640 createInstPostProc(cast<Instruction>(V)); 641 return V; 642 } 643 644 Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { 645 Value *V = Builder->CreateFMul(Opnd0, Opnd1); 646 createInstPostProc(cast<Instruction>(V)); 647 return V; 648 } 649 650 void FAddCombine::createInstPostProc(Instruction *NewInstr) { 651 NewInstr->setDebugLoc(Instr->getDebugLoc()); 652 653 // Keep track of the number of instruction created. 654 incCreateInstNum(); 655 656 // Propagate fast-math flags 657 NewInstr->setFastMathFlags(Instr->getFastMathFlags()); 658 } 659 660 // Return the number of instruction needed to emit the N-ary addition. 661 // NOTE: Keep this function in sync with createAddendVal(). 662 unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { 663 unsigned OpndNum = Opnds.size(); 664 unsigned InstrNeeded = OpndNum - 1; 665 666 // The number of addends in the form of "(-1)*x". 667 unsigned NegOpndNum = 0; 668 669 // Adjust the number of instructions needed to emit the N-ary add. 670 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 671 I != E; I++) { 672 const FAddend *Opnd = *I; 673 if (Opnd->isConstant()) 674 continue; 675 676 const FAddendCoef &CE = Opnd->getCoef(); 677 if (CE.isMinusOne() || CE.isMinusTwo()) 678 NegOpndNum++; 679 680 // Let the addend be "c * x". If "c == +/-1", the value of the addend 681 // is immediately available; otherwise, it needs exactly one instruction 682 // to evaluate the value. 683 if (!CE.isMinusOne() && !CE.isOne()) 684 InstrNeeded++; 685 } 686 if (NegOpndNum == OpndNum) 687 InstrNeeded++; 688 return InstrNeeded; 689 } 690 691 // Input Addend Value NeedNeg(output) 692 // ================================================================ 693 // Constant C C false 694 // <+/-1, V> V coefficient is -1 695 // <2/-2, V> "fadd V, V" coefficient is -2 696 // <C, V> "fmul V, C" false 697 // 698 // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. 699 Value *FAddCombine::createAddendVal 700 (const FAddend &Opnd, bool &NeedNeg) { 701 const FAddendCoef &Coeff = Opnd.getCoef(); 702 703 if (Opnd.isConstant()) { 704 NeedNeg = false; 705 return Coeff.getValue(Instr->getType()); 706 } 707 708 Value *OpndVal = Opnd.getSymVal(); 709 710 if (Coeff.isMinusOne() || Coeff.isOne()) { 711 NeedNeg = Coeff.isMinusOne(); 712 return OpndVal; 713 } 714 715 if (Coeff.isTwo() || Coeff.isMinusTwo()) { 716 NeedNeg = Coeff.isMinusTwo(); 717 return createFAdd(OpndVal, OpndVal); 718 } 719 720 NeedNeg = false; 721 return createFMul(OpndVal, Coeff.getValue(Instr->getType())); 722 } 723 724 /// AddOne - Add one to a ConstantInt. 725 static Constant *AddOne(Constant *C) { 726 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 727 } 728 729 /// SubOne - Subtract one from a ConstantInt. 730 static Constant *SubOne(ConstantInt *C) { 731 return ConstantInt::get(C->getContext(), C->getValue()-1); 732 } 733 734 735 // dyn_castFoldableMul - If this value is a multiply that can be folded into 736 // other computations (because it has a constant operand), return the 737 // non-constant operand of the multiply, and set CST to point to the multiplier. 738 // Otherwise, return null. 739 // 740 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 741 if (!V->hasOneUse() || !V->getType()->isIntegerTy()) 742 return 0; 743 744 Instruction *I = dyn_cast<Instruction>(V); 745 if (I == 0) return 0; 746 747 if (I->getOpcode() == Instruction::Mul) 748 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 749 return I->getOperand(0); 750 if (I->getOpcode() == Instruction::Shl) 751 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 752 // The multiplier is really 1 << CST. 753 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 754 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 755 CST = ConstantInt::get(V->getType()->getContext(), 756 APInt(BitWidth, 1).shl(CSTVal)); 757 return I->getOperand(0); 758 } 759 return 0; 760 } 761 762 763 /// WillNotOverflowSignedAdd - Return true if we can prove that: 764 /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 765 /// This basically requires proving that the add in the original type would not 766 /// overflow to change the sign bit or have a carry out. 767 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 768 // There are different heuristics we can use for this. Here are some simple 769 // ones. 770 771 // Add has the property that adding any two 2's complement numbers can only 772 // have one carry bit which can change a sign. As such, if LHS and RHS each 773 // have at least two sign bits, we know that the addition of the two values 774 // will sign extend fine. 775 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 776 return true; 777 778 779 // If one of the operands only has one non-zero bit, and if the other operand 780 // has a known-zero bit in a more significant place than it (not including the 781 // sign bit) the ripple may go up to and fill the zero, but won't change the 782 // sign. For example, (X & ~4) + 1. 783 784 // TODO: Implement. 785 786 return false; 787 } 788 789 Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 790 bool Changed = SimplifyAssociativeOrCommutative(I); 791 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 792 793 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 794 I.hasNoUnsignedWrap(), TD)) 795 return ReplaceInstUsesWith(I, V); 796 797 // (A*B)+(A*C) -> A*(B+C) etc 798 if (Value *V = SimplifyUsingDistributiveLaws(I)) 799 return ReplaceInstUsesWith(I, V); 800 801 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 802 // X + (signbit) --> X ^ signbit 803 const APInt &Val = CI->getValue(); 804 if (Val.isSignBit()) 805 return BinaryOperator::CreateXor(LHS, RHS); 806 807 // See if SimplifyDemandedBits can simplify this. This handles stuff like 808 // (X & 254)+1 -> (X&254)|1 809 if (SimplifyDemandedInstructionBits(I)) 810 return &I; 811 812 // zext(bool) + C -> bool ? C + 1 : C 813 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 814 if (ZI->getSrcTy()->isIntegerTy(1)) 815 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 816 817 Value *XorLHS = 0; ConstantInt *XorRHS = 0; 818 if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 819 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 820 const APInt &RHSVal = CI->getValue(); 821 unsigned ExtendAmt = 0; 822 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 823 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 824 if (XorRHS->getValue() == -RHSVal) { 825 if (RHSVal.isPowerOf2()) 826 ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; 827 else if (XorRHS->getValue().isPowerOf2()) 828 ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; 829 } 830 831 if (ExtendAmt) { 832 APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); 833 if (!MaskedValueIsZero(XorLHS, Mask)) 834 ExtendAmt = 0; 835 } 836 837 if (ExtendAmt) { 838 Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); 839 Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); 840 return BinaryOperator::CreateAShr(NewShl, ShAmt); 841 } 842 843 // If this is a xor that was canonicalized from a sub, turn it back into 844 // a sub and fuse this add with it. 845 if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { 846 IntegerType *IT = cast<IntegerType>(I.getType()); 847 APInt LHSKnownOne(IT->getBitWidth(), 0); 848 APInt LHSKnownZero(IT->getBitWidth(), 0); 849 ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); 850 if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) 851 return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), 852 XorLHS); 853 } 854 } 855 } 856 857 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 858 if (Instruction *NV = FoldOpIntoPhi(I)) 859 return NV; 860 861 if (I.getType()->isIntegerTy(1)) 862 return BinaryOperator::CreateXor(LHS, RHS); 863 864 // X + X --> X << 1 865 if (LHS == RHS) { 866 BinaryOperator *New = 867 BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); 868 New->setHasNoSignedWrap(I.hasNoSignedWrap()); 869 New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 870 return New; 871 } 872 873 // -A + B --> B - A 874 // -A + -B --> -(A + B) 875 if (Value *LHSV = dyn_castNegVal(LHS)) { 876 if (!isa<Constant>(RHS)) 877 if (Value *RHSV = dyn_castNegVal(RHS)) { 878 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 879 return BinaryOperator::CreateNeg(NewAdd); 880 } 881 882 return BinaryOperator::CreateSub(RHS, LHSV); 883 } 884 885 // A + -B --> A - B 886 if (!isa<Constant>(RHS)) 887 if (Value *V = dyn_castNegVal(RHS)) 888 return BinaryOperator::CreateSub(LHS, V); 889 890 891 ConstantInt *C2; 892 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 893 if (X == RHS) // X*C + X --> X * (C+1) 894 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 895 896 // X*C1 + X*C2 --> X * (C1+C2) 897 ConstantInt *C1; 898 if (X == dyn_castFoldableMul(RHS, C1)) 899 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 900 } 901 902 // X + X*C --> X * (C+1) 903 if (dyn_castFoldableMul(RHS, C2) == LHS) 904 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 905 906 // A+B --> A|B iff A and B have no bits set in common. 907 if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 908 APInt LHSKnownOne(IT->getBitWidth(), 0); 909 APInt LHSKnownZero(IT->getBitWidth(), 0); 910 ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); 911 if (LHSKnownZero != 0) { 912 APInt RHSKnownOne(IT->getBitWidth(), 0); 913 APInt RHSKnownZero(IT->getBitWidth(), 0); 914 ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); 915 916 // No bits in common -> bitwise or. 917 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 918 return BinaryOperator::CreateOr(LHS, RHS); 919 } 920 } 921 922 // W*X + Y*Z --> W * (X+Z) iff W == Y 923 { 924 Value *W, *X, *Y, *Z; 925 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 926 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 927 if (W != Y) { 928 if (W == Z) { 929 std::swap(Y, Z); 930 } else if (Y == X) { 931 std::swap(W, X); 932 } else if (X == Z) { 933 std::swap(Y, Z); 934 std::swap(W, X); 935 } 936 } 937 938 if (W == Y) { 939 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 940 return BinaryOperator::CreateMul(W, NewAdd); 941 } 942 } 943 } 944 945 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 946 Value *X = 0; 947 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 948 return BinaryOperator::CreateSub(SubOne(CRHS), X); 949 950 // (X & FF00) + xx00 -> (X+xx00) & FF00 951 if (LHS->hasOneUse() && 952 match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && 953 CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { 954 // See if all bits from the first bit set in the Add RHS up are included 955 // in the mask. First, get the rightmost bit. 956 const APInt &AddRHSV = CRHS->getValue(); 957 958 // Form a mask of all bits from the lowest bit added through the top. 959 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 960 961 // See if the and mask includes all of these bits. 962 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 963 964 if (AddRHSHighBits == AddRHSHighBitsAnd) { 965 // Okay, the xform is safe. Insert the new add pronto. 966 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 967 return BinaryOperator::CreateAnd(NewAdd, C2); 968 } 969 } 970 971 // Try to fold constant add into select arguments. 972 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 973 if (Instruction *R = FoldOpIntoSelect(I, SI)) 974 return R; 975 } 976 977 // add (select X 0 (sub n A)) A --> select X A n 978 { 979 SelectInst *SI = dyn_cast<SelectInst>(LHS); 980 Value *A = RHS; 981 if (!SI) { 982 SI = dyn_cast<SelectInst>(RHS); 983 A = LHS; 984 } 985 if (SI && SI->hasOneUse()) { 986 Value *TV = SI->getTrueValue(); 987 Value *FV = SI->getFalseValue(); 988 Value *N; 989 990 // Can we fold the add into the argument of the select? 991 // We check both true and false select arguments for a matching subtract. 992 if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) 993 // Fold the add into the true select value. 994 return SelectInst::Create(SI->getCondition(), N, A); 995 996 if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) 997 // Fold the add into the false select value. 998 return SelectInst::Create(SI->getCondition(), A, N); 999 } 1000 } 1001 1002 // Check for (add (sext x), y), see if we can merge this into an 1003 // integer add followed by a sext. 1004 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 1005 // (add (sext x), cst) --> (sext (add x, cst')) 1006 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 1007 Constant *CI = 1008 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 1009 if (LHSConv->hasOneUse() && 1010 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 1011 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 1012 // Insert the new, smaller add. 1013 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1014 CI, "addconv"); 1015 return new SExtInst(NewAdd, I.getType()); 1016 } 1017 } 1018 1019 // (add (sext x), (sext y)) --> (sext (add int x, y)) 1020 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 1021 // Only do this if x/y have the same type, if at last one of them has a 1022 // single use (so we don't increase the number of sexts), and if the 1023 // integer add will not overflow. 1024 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 1025 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 1026 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 1027 RHSConv->getOperand(0))) { 1028 // Insert the new integer add. 1029 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1030 RHSConv->getOperand(0), "addconv"); 1031 return new SExtInst(NewAdd, I.getType()); 1032 } 1033 } 1034 } 1035 1036 // Check for (x & y) + (x ^ y) 1037 { 1038 Value *A = 0, *B = 0; 1039 if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && 1040 (match(LHS, m_And(m_Specific(A), m_Specific(B))) || 1041 match(LHS, m_And(m_Specific(B), m_Specific(A))))) 1042 return BinaryOperator::CreateOr(A, B); 1043 1044 if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && 1045 (match(RHS, m_And(m_Specific(A), m_Specific(B))) || 1046 match(RHS, m_And(m_Specific(B), m_Specific(A))))) 1047 return BinaryOperator::CreateOr(A, B); 1048 } 1049 1050 return Changed ? &I : 0; 1051 } 1052 1053 Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 1054 bool Changed = SimplifyAssociativeOrCommutative(I); 1055 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1056 1057 if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) 1058 return ReplaceInstUsesWith(I, V); 1059 1060 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 1061 if (Instruction *NV = FoldOpIntoPhi(I)) 1062 return NV; 1063 1064 // -A + B --> B - A 1065 // -A + -B --> -(A + B) 1066 if (Value *LHSV = dyn_castFNegVal(LHS)) 1067 return BinaryOperator::CreateFSub(RHS, LHSV); 1068 1069 // A + -B --> A - B 1070 if (!isa<Constant>(RHS)) 1071 if (Value *V = dyn_castFNegVal(RHS)) 1072 return BinaryOperator::CreateFSub(LHS, V); 1073 1074 // Check for (fadd double (sitofp x), y), see if we can merge this into an 1075 // integer add followed by a promotion. 1076 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 1077 // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 1078 // ... if the constant fits in the integer value. This is useful for things 1079 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 1080 // requires a constant pool load, and generally allows the add to be better 1081 // instcombined. 1082 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 1083 Constant *CI = 1084 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 1085 if (LHSConv->hasOneUse() && 1086 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 1087 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 1088 // Insert the new integer add. 1089 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1090 CI, "addconv"); 1091 return new SIToFPInst(NewAdd, I.getType()); 1092 } 1093 } 1094 1095 // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 1096 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 1097 // Only do this if x/y have the same type, if at last one of them has a 1098 // single use (so we don't increase the number of int->fp conversions), 1099 // and if the integer add will not overflow. 1100 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 1101 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 1102 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 1103 RHSConv->getOperand(0))) { 1104 // Insert the new integer add. 1105 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1106 RHSConv->getOperand(0),"addconv"); 1107 return new SIToFPInst(NewAdd, I.getType()); 1108 } 1109 } 1110 } 1111 1112 if (I.hasUnsafeAlgebra()) { 1113 if (Value *V = FAddCombine(Builder).simplify(&I)) 1114 return ReplaceInstUsesWith(I, V); 1115 } 1116 1117 return Changed ? &I : 0; 1118 } 1119 1120 1121 /// Optimize pointer differences into the same array into a size. Consider: 1122 /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 1123 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 1124 /// 1125 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 1126 Type *Ty) { 1127 assert(TD && "Must have target data info for this"); 1128 1129 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 1130 // this. 1131 bool Swapped = false; 1132 GEPOperator *GEP1 = 0, *GEP2 = 0; 1133 1134 // For now we require one side to be the base pointer "A" or a constant 1135 // GEP derived from it. 1136 if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 1137 // (gep X, ...) - X 1138 if (LHSGEP->getOperand(0) == RHS) { 1139 GEP1 = LHSGEP; 1140 Swapped = false; 1141 } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 1142 // (gep X, ...) - (gep X, ...) 1143 if (LHSGEP->getOperand(0)->stripPointerCasts() == 1144 RHSGEP->getOperand(0)->stripPointerCasts()) { 1145 GEP2 = RHSGEP; 1146 GEP1 = LHSGEP; 1147 Swapped = false; 1148 } 1149 } 1150 } 1151 1152 if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 1153 // X - (gep X, ...) 1154 if (RHSGEP->getOperand(0) == LHS) { 1155 GEP1 = RHSGEP; 1156 Swapped = true; 1157 } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 1158 // (gep X, ...) - (gep X, ...) 1159 if (RHSGEP->getOperand(0)->stripPointerCasts() == 1160 LHSGEP->getOperand(0)->stripPointerCasts()) { 1161 GEP2 = LHSGEP; 1162 GEP1 = RHSGEP; 1163 Swapped = true; 1164 } 1165 } 1166 } 1167 1168 // Avoid duplicating the arithmetic if GEP2 has non-constant indices and 1169 // multiple users. 1170 if (GEP1 == 0 || 1171 (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) 1172 return 0; 1173 1174 // Emit the offset of the GEP and an intptr_t. 1175 Value *Result = EmitGEPOffset(GEP1); 1176 1177 // If we had a constant expression GEP on the other side offsetting the 1178 // pointer, subtract it from the offset we have. 1179 if (GEP2) { 1180 Value *Offset = EmitGEPOffset(GEP2); 1181 Result = Builder->CreateSub(Result, Offset); 1182 } 1183 1184 // If we have p - gep(p, ...) then we have to negate the result. 1185 if (Swapped) 1186 Result = Builder->CreateNeg(Result, "diff.neg"); 1187 1188 return Builder->CreateIntCast(Result, Ty, true); 1189 } 1190 1191 1192 Instruction *InstCombiner::visitSub(BinaryOperator &I) { 1193 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1194 1195 if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), 1196 I.hasNoUnsignedWrap(), TD)) 1197 return ReplaceInstUsesWith(I, V); 1198 1199 // (A*B)-(A*C) -> A*(B-C) etc 1200 if (Value *V = SimplifyUsingDistributiveLaws(I)) 1201 return ReplaceInstUsesWith(I, V); 1202 1203 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 1204 if (Value *V = dyn_castNegVal(Op1)) { 1205 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 1206 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 1207 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 1208 return Res; 1209 } 1210 1211 if (I.getType()->isIntegerTy(1)) 1212 return BinaryOperator::CreateXor(Op0, Op1); 1213 1214 // Replace (-1 - A) with (~A). 1215 if (match(Op0, m_AllOnes())) 1216 return BinaryOperator::CreateNot(Op1); 1217 1218 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 1219 // C - ~X == X + (1+C) 1220 Value *X = 0; 1221 if (match(Op1, m_Not(m_Value(X)))) 1222 return BinaryOperator::CreateAdd(X, AddOne(C)); 1223 1224 // -(X >>u 31) -> (X >>s 31) 1225 // -(X >>s 31) -> (X >>u 31) 1226 if (C->isZero()) { 1227 Value *X; ConstantInt *CI; 1228 if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && 1229 // Verify we are shifting out everything but the sign bit. 1230 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 1231 return BinaryOperator::CreateAShr(X, CI); 1232 1233 if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && 1234 // Verify we are shifting out everything but the sign bit. 1235 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 1236 return BinaryOperator::CreateLShr(X, CI); 1237 } 1238 1239 // Try to fold constant sub into select arguments. 1240 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 1241 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1242 return R; 1243 1244 // C-(X+C2) --> (C-C2)-X 1245 ConstantInt *C2; 1246 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) 1247 return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); 1248 1249 if (SimplifyDemandedInstructionBits(I)) 1250 return &I; 1251 } 1252 1253 1254 { Value *Y; 1255 // X-(X+Y) == -Y X-(Y+X) == -Y 1256 if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || 1257 match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) 1258 return BinaryOperator::CreateNeg(Y); 1259 1260 // (X-Y)-X == -Y 1261 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) 1262 return BinaryOperator::CreateNeg(Y); 1263 } 1264 1265 if (Op1->hasOneUse()) { 1266 Value *X = 0, *Y = 0, *Z = 0; 1267 Constant *C = 0; 1268 ConstantInt *CI = 0; 1269 1270 // (X - (Y - Z)) --> (X + (Z - Y)). 1271 if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) 1272 return BinaryOperator::CreateAdd(Op0, 1273 Builder->CreateSub(Z, Y, Op1->getName())); 1274 1275 // (X - (X & Y)) --> (X & ~Y) 1276 // 1277 if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || 1278 match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) 1279 return BinaryOperator::CreateAnd(Op0, 1280 Builder->CreateNot(Y, Y->getName() + ".not")); 1281 1282 // 0 - (X sdiv C) -> (X sdiv -C) 1283 if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && 1284 match(Op0, m_Zero())) 1285 return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); 1286 1287 // 0 - (X << Y) -> (-X << Y) when X is freely negatable. 1288 if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) 1289 if (Value *XNeg = dyn_castNegVal(X)) 1290 return BinaryOperator::CreateShl(XNeg, Y); 1291 1292 // X - X*C --> X * (1-C) 1293 if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { 1294 Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); 1295 return BinaryOperator::CreateMul(Op0, CP1); 1296 } 1297 1298 // X - X<<C --> X * (1-(1<<C)) 1299 if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { 1300 Constant *One = ConstantInt::get(I.getType(), 1); 1301 C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); 1302 return BinaryOperator::CreateMul(Op0, C); 1303 } 1304 1305 // X - A*-B -> X + A*B 1306 // X - -A*B -> X + A*B 1307 Value *A, *B; 1308 if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || 1309 match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) 1310 return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); 1311 1312 // X - A*CI -> X + A*-CI 1313 // X - CI*A -> X + A*-CI 1314 if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || 1315 match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { 1316 Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); 1317 return BinaryOperator::CreateAdd(Op0, NewMul); 1318 } 1319 } 1320 1321 ConstantInt *C1; 1322 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 1323 if (X == Op1) // X*C - X --> X * (C-1) 1324 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 1325 1326 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 1327 if (X == dyn_castFoldableMul(Op1, C2)) 1328 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 1329 } 1330 1331 // Optimize pointer differences into the same array into a size. Consider: 1332 // &A[10] - &A[0]: we should compile this to "10". 1333 if (TD) { 1334 Value *LHSOp, *RHSOp; 1335 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 1336 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 1337 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1338 return ReplaceInstUsesWith(I, Res); 1339 1340 // trunc(p)-trunc(q) -> trunc(p-q) 1341 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 1342 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 1343 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1344 return ReplaceInstUsesWith(I, Res); 1345 } 1346 1347 return 0; 1348 } 1349 1350 Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 1351 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1352 1353 if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) 1354 return ReplaceInstUsesWith(I, V); 1355 1356 // If this is a 'B = x-(-A)', change to B = x+A... 1357 if (Value *V = dyn_castFNegVal(Op1)) 1358 return BinaryOperator::CreateFAdd(Op0, V); 1359 1360 if (I.hasUnsafeAlgebra()) { 1361 if (Value *V = FAddCombine(Builder).simplify(&I)) 1362 return ReplaceInstUsesWith(I, V); 1363 } 1364 1365 return 0; 1366 } 1367