1 //===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass reassociates commutative expressions in an order that is designed 11 // to promote better constant propagation, GCSE, LICM, PRE, etc. 12 // 13 // For example: 4 + (x + 5) -> x + (4 + 5) 14 // 15 // In the implementation of this algorithm, constants are assigned rank = 0, 16 // function arguments are rank = 1, and other values are assigned ranks 17 // corresponding to the reverse post order traversal of current function 18 // (starting at 2), which effectively gives values in deep loops higher rank 19 // than values not in loops. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #define DEBUG_TYPE "reassociate" 24 #include "llvm/Transforms/Scalar.h" 25 #include "llvm/Transforms/Utils/Local.h" 26 #include "llvm/Constants.h" 27 #include "llvm/DerivedTypes.h" 28 #include "llvm/Function.h" 29 #include "llvm/Instructions.h" 30 #include "llvm/IntrinsicInst.h" 31 #include "llvm/Pass.h" 32 #include "llvm/Assembly/Writer.h" 33 #include "llvm/Support/CFG.h" 34 #include "llvm/Support/IRBuilder.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ValueHandle.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include "llvm/ADT/PostOrderIterator.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/Statistic.h" 41 #include "llvm/ADT/DenseMap.h" 42 #include <algorithm> 43 using namespace llvm; 44 45 STATISTIC(NumLinear , "Number of insts linearized"); 46 STATISTIC(NumChanged, "Number of insts reassociated"); 47 STATISTIC(NumAnnihil, "Number of expr tree annihilated"); 48 STATISTIC(NumFactor , "Number of multiplies factored"); 49 50 namespace { 51 struct ValueEntry { 52 unsigned Rank; 53 Value *Op; 54 ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} 55 }; 56 inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { 57 return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. 58 } 59 } 60 61 #ifndef NDEBUG 62 /// PrintOps - Print out the expression identified in the Ops list. 63 /// 64 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 65 Module *M = I->getParent()->getParent()->getParent(); 66 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 67 << *Ops[0].Op->getType() << '\t'; 68 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 69 dbgs() << "[ "; 70 WriteAsOperand(dbgs(), Ops[i].Op, false, M); 71 dbgs() << ", #" << Ops[i].Rank << "] "; 72 } 73 } 74 #endif 75 76 namespace { 77 /// \brief Utility class representing a base and exponent pair which form one 78 /// factor of some product. 79 struct Factor { 80 Value *Base; 81 unsigned Power; 82 83 Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} 84 85 /// \brief Sort factors by their Base. 86 struct BaseSorter { 87 bool operator()(const Factor &LHS, const Factor &RHS) { 88 return LHS.Base < RHS.Base; 89 } 90 }; 91 92 /// \brief Compare factors for equal bases. 93 struct BaseEqual { 94 bool operator()(const Factor &LHS, const Factor &RHS) { 95 return LHS.Base == RHS.Base; 96 } 97 }; 98 99 /// \brief Sort factors in descending order by their power. 100 struct PowerDescendingSorter { 101 bool operator()(const Factor &LHS, const Factor &RHS) { 102 return LHS.Power > RHS.Power; 103 } 104 }; 105 106 /// \brief Compare factors for equal powers. 107 struct PowerEqual { 108 bool operator()(const Factor &LHS, const Factor &RHS) { 109 return LHS.Power == RHS.Power; 110 } 111 }; 112 }; 113 } 114 115 namespace { 116 class Reassociate : public FunctionPass { 117 DenseMap<BasicBlock*, unsigned> RankMap; 118 DenseMap<AssertingVH<Value>, unsigned> ValueRankMap; 119 SmallVector<WeakVH, 8> RedoInsts; 120 SmallVector<WeakVH, 8> DeadInsts; 121 bool MadeChange; 122 public: 123 static char ID; // Pass identification, replacement for typeid 124 Reassociate() : FunctionPass(ID) { 125 initializeReassociatePass(*PassRegistry::getPassRegistry()); 126 } 127 128 bool runOnFunction(Function &F); 129 130 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 131 AU.setPreservesCFG(); 132 } 133 private: 134 void BuildRankMap(Function &F); 135 unsigned getRank(Value *V); 136 Value *ReassociateExpression(BinaryOperator *I); 137 void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops, 138 unsigned Idx = 0); 139 Value *OptimizeExpression(BinaryOperator *I, 140 SmallVectorImpl<ValueEntry> &Ops); 141 Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); 142 bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, 143 SmallVectorImpl<Factor> &Factors); 144 Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, 145 SmallVectorImpl<Factor> &Factors); 146 Value *OptimizeMul(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 147 void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); 148 void LinearizeExpr(BinaryOperator *I); 149 Value *RemoveFactorFromExpression(Value *V, Value *Factor); 150 void ReassociateInst(BasicBlock::iterator &BBI); 151 152 void RemoveDeadBinaryOp(Value *V); 153 }; 154 } 155 156 char Reassociate::ID = 0; 157 INITIALIZE_PASS(Reassociate, "reassociate", 158 "Reassociate expressions", false, false) 159 160 // Public interface to the Reassociate pass 161 FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } 162 163 void Reassociate::RemoveDeadBinaryOp(Value *V) { 164 Instruction *Op = dyn_cast<Instruction>(V); 165 if (!Op || !isa<BinaryOperator>(Op)) 166 return; 167 168 Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); 169 170 ValueRankMap.erase(Op); 171 DeadInsts.push_back(Op); 172 RemoveDeadBinaryOp(LHS); 173 RemoveDeadBinaryOp(RHS); 174 } 175 176 177 static bool isUnmovableInstruction(Instruction *I) { 178 if (I->getOpcode() == Instruction::PHI || 179 I->getOpcode() == Instruction::Alloca || 180 I->getOpcode() == Instruction::Load || 181 I->getOpcode() == Instruction::Invoke || 182 (I->getOpcode() == Instruction::Call && 183 !isa<DbgInfoIntrinsic>(I)) || 184 I->getOpcode() == Instruction::UDiv || 185 I->getOpcode() == Instruction::SDiv || 186 I->getOpcode() == Instruction::FDiv || 187 I->getOpcode() == Instruction::URem || 188 I->getOpcode() == Instruction::SRem || 189 I->getOpcode() == Instruction::FRem) 190 return true; 191 return false; 192 } 193 194 void Reassociate::BuildRankMap(Function &F) { 195 unsigned i = 2; 196 197 // Assign distinct ranks to function arguments 198 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) 199 ValueRankMap[&*I] = ++i; 200 201 ReversePostOrderTraversal<Function*> RPOT(&F); 202 for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(), 203 E = RPOT.end(); I != E; ++I) { 204 BasicBlock *BB = *I; 205 unsigned BBRank = RankMap[BB] = ++i << 16; 206 207 // Walk the basic block, adding precomputed ranks for any instructions that 208 // we cannot move. This ensures that the ranks for these instructions are 209 // all different in the block. 210 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 211 if (isUnmovableInstruction(I)) 212 ValueRankMap[&*I] = ++BBRank; 213 } 214 } 215 216 unsigned Reassociate::getRank(Value *V) { 217 Instruction *I = dyn_cast<Instruction>(V); 218 if (I == 0) { 219 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 220 return 0; // Otherwise it's a global or constant, rank 0. 221 } 222 223 if (unsigned Rank = ValueRankMap[I]) 224 return Rank; // Rank already known? 225 226 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 227 // we can reassociate expressions for code motion! Since we do not recurse 228 // for PHI nodes, we cannot have infinite recursion here, because there 229 // cannot be loops in the value graph that do not go through PHI nodes. 230 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 231 for (unsigned i = 0, e = I->getNumOperands(); 232 i != e && Rank != MaxRank; ++i) 233 Rank = std::max(Rank, getRank(I->getOperand(i))); 234 235 // If this is a not or neg instruction, do not count it for rank. This 236 // assures us that X and ~X will have the same rank. 237 if (!I->getType()->isIntegerTy() || 238 (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) 239 ++Rank; 240 241 //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " 242 // << Rank << "\n"); 243 244 return ValueRankMap[I] = Rank; 245 } 246 247 /// isReassociableOp - Return true if V is an instruction of the specified 248 /// opcode and if it only has one use. 249 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 250 if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) && 251 cast<Instruction>(V)->getOpcode() == Opcode) 252 return cast<BinaryOperator>(V); 253 return 0; 254 } 255 256 /// LowerNegateToMultiply - Replace 0-X with X*-1. 257 /// 258 static Instruction *LowerNegateToMultiply(Instruction *Neg, 259 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 260 Constant *Cst = Constant::getAllOnesValue(Neg->getType()); 261 262 Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); 263 ValueRankMap.erase(Neg); 264 Res->takeName(Neg); 265 Neg->replaceAllUsesWith(Res); 266 Res->setDebugLoc(Neg->getDebugLoc()); 267 Neg->eraseFromParent(); 268 return Res; 269 } 270 271 // Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'. 272 // Note that if D is also part of the expression tree that we recurse to 273 // linearize it as well. Besides that case, this does not recurse into A,B, or 274 // C. 275 void Reassociate::LinearizeExpr(BinaryOperator *I) { 276 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 277 BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1)); 278 assert(isReassociableOp(LHS, I->getOpcode()) && 279 isReassociableOp(RHS, I->getOpcode()) && 280 "Not an expression that needs linearization?"); 281 282 DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); 283 284 // Move the RHS instruction to live immediately before I, avoiding breaking 285 // dominator properties. 286 RHS->moveBefore(I); 287 288 // Move operands around to do the linearization. 289 I->setOperand(1, RHS->getOperand(0)); 290 RHS->setOperand(0, LHS); 291 I->setOperand(0, RHS); 292 293 // Conservatively clear all the optional flags, which may not hold 294 // after the reassociation. 295 I->clearSubclassOptionalData(); 296 LHS->clearSubclassOptionalData(); 297 RHS->clearSubclassOptionalData(); 298 299 ++NumLinear; 300 MadeChange = true; 301 DEBUG(dbgs() << "Linearized: " << *I << '\n'); 302 303 // If D is part of this expression tree, tail recurse. 304 if (isReassociableOp(I->getOperand(1), I->getOpcode())) 305 LinearizeExpr(I); 306 } 307 308 309 /// LinearizeExprTree - Given an associative binary expression tree, traverse 310 /// all of the uses putting it into canonical form. This forces a left-linear 311 /// form of the expression (((a+b)+c)+d), and collects information about the 312 /// rank of the non-tree operands. 313 /// 314 /// NOTE: These intentionally destroys the expression tree operands (turning 315 /// them into undef values) to reduce #uses of the values. This means that the 316 /// caller MUST use something like RewriteExprTree to put the values back in. 317 /// 318 void Reassociate::LinearizeExprTree(BinaryOperator *I, 319 SmallVectorImpl<ValueEntry> &Ops) { 320 Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 321 unsigned Opcode = I->getOpcode(); 322 323 // First step, linearize the expression if it is in ((A+B)+(C+D)) form. 324 BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); 325 BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode); 326 327 // If this is a multiply expression tree and it contains internal negations, 328 // transform them into multiplies by -1 so they can be reassociated. 329 if (I->getOpcode() == Instruction::Mul) { 330 if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) { 331 LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap); 332 LHSBO = isReassociableOp(LHS, Opcode); 333 } 334 if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) { 335 RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap); 336 RHSBO = isReassociableOp(RHS, Opcode); 337 } 338 } 339 340 if (!LHSBO) { 341 if (!RHSBO) { 342 // Neither the LHS or RHS as part of the tree, thus this is a leaf. As 343 // such, just remember these operands and their rank. 344 Ops.push_back(ValueEntry(getRank(LHS), LHS)); 345 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 346 347 // Clear the leaves out. 348 I->setOperand(0, UndefValue::get(I->getType())); 349 I->setOperand(1, UndefValue::get(I->getType())); 350 return; 351 } 352 353 // Turn X+(Y+Z) -> (Y+Z)+X 354 std::swap(LHSBO, RHSBO); 355 std::swap(LHS, RHS); 356 bool Success = !I->swapOperands(); 357 assert(Success && "swapOperands failed"); 358 (void)Success; 359 MadeChange = true; 360 } else if (RHSBO) { 361 // Turn (A+B)+(C+D) -> (((A+B)+C)+D). This guarantees the RHS is not 362 // part of the expression tree. 363 LinearizeExpr(I); 364 LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0)); 365 RHS = I->getOperand(1); 366 RHSBO = 0; 367 } 368 369 // Okay, now we know that the LHS is a nested expression and that the RHS is 370 // not. Perform reassociation. 371 assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!"); 372 373 // Move LHS right before I to make sure that the tree expression dominates all 374 // values. 375 LHSBO->moveBefore(I); 376 377 // Linearize the expression tree on the LHS. 378 LinearizeExprTree(LHSBO, Ops); 379 380 // Remember the RHS operand and its rank. 381 Ops.push_back(ValueEntry(getRank(RHS), RHS)); 382 383 // Clear the RHS leaf out. 384 I->setOperand(1, UndefValue::get(I->getType())); 385 } 386 387 // RewriteExprTree - Now that the operands for this expression tree are 388 // linearized and optimized, emit them in-order. This function is written to be 389 // tail recursive. 390 void Reassociate::RewriteExprTree(BinaryOperator *I, 391 SmallVectorImpl<ValueEntry> &Ops, 392 unsigned i) { 393 if (i+2 == Ops.size()) { 394 if (I->getOperand(0) != Ops[i].Op || 395 I->getOperand(1) != Ops[i+1].Op) { 396 Value *OldLHS = I->getOperand(0); 397 DEBUG(dbgs() << "RA: " << *I << '\n'); 398 I->setOperand(0, Ops[i].Op); 399 I->setOperand(1, Ops[i+1].Op); 400 401 // Clear all the optional flags, which may not hold after the 402 // reassociation if the expression involved more than just this operation. 403 if (Ops.size() != 2) 404 I->clearSubclassOptionalData(); 405 406 DEBUG(dbgs() << "TO: " << *I << '\n'); 407 MadeChange = true; 408 ++NumChanged; 409 410 // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3) 411 // delete the extra, now dead, nodes. 412 RemoveDeadBinaryOp(OldLHS); 413 } 414 return; 415 } 416 assert(i+2 < Ops.size() && "Ops index out of range!"); 417 418 if (I->getOperand(1) != Ops[i].Op) { 419 DEBUG(dbgs() << "RA: " << *I << '\n'); 420 I->setOperand(1, Ops[i].Op); 421 422 // Conservatively clear all the optional flags, which may not hold 423 // after the reassociation. 424 I->clearSubclassOptionalData(); 425 426 DEBUG(dbgs() << "TO: " << *I << '\n'); 427 MadeChange = true; 428 ++NumChanged; 429 } 430 431 BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0)); 432 assert(LHS->getOpcode() == I->getOpcode() && 433 "Improper expression tree!"); 434 435 // Compactify the tree instructions together with each other to guarantee 436 // that the expression tree is dominated by all of Ops. 437 LHS->moveBefore(I); 438 RewriteExprTree(LHS, Ops, i+1); 439 } 440 441 442 443 // NegateValue - Insert instructions before the instruction pointed to by BI, 444 // that computes the negative version of the value specified. The negative 445 // version of the value is returned, and BI is left pointing at the instruction 446 // that should be processed next by the reassociation pass. 447 // 448 static Value *NegateValue(Value *V, Instruction *BI) { 449 if (Constant *C = dyn_cast<Constant>(V)) 450 return ConstantExpr::getNeg(C); 451 452 // We are trying to expose opportunity for reassociation. One of the things 453 // that we want to do to achieve this is to push a negation as deep into an 454 // expression chain as possible, to expose the add instructions. In practice, 455 // this means that we turn this: 456 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 457 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 458 // the constants. We assume that instcombine will clean up the mess later if 459 // we introduce tons of unnecessary negation instructions. 460 // 461 if (Instruction *I = dyn_cast<Instruction>(V)) 462 if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { 463 // Push the negates through the add. 464 I->setOperand(0, NegateValue(I->getOperand(0), BI)); 465 I->setOperand(1, NegateValue(I->getOperand(1), BI)); 466 467 // We must move the add instruction here, because the neg instructions do 468 // not dominate the old add instruction in general. By moving it, we are 469 // assured that the neg instructions we just inserted dominate the 470 // instruction we are about to insert after them. 471 // 472 I->moveBefore(BI); 473 I->setName(I->getName()+".neg"); 474 return I; 475 } 476 477 // Okay, we need to materialize a negated version of V with an instruction. 478 // Scan the use lists of V to see if we have one already. 479 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ 480 User *U = *UI; 481 if (!BinaryOperator::isNeg(U)) continue; 482 483 // We found one! Now we have to make sure that the definition dominates 484 // this use. We do this by moving it to the entry block (if it is a 485 // non-instruction value) or right after the definition. These negates will 486 // be zapped by reassociate later, so we don't need much finesse here. 487 BinaryOperator *TheNeg = cast<BinaryOperator>(U); 488 489 // Verify that the negate is in this function, V might be a constant expr. 490 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 491 continue; 492 493 BasicBlock::iterator InsertPt; 494 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 495 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 496 InsertPt = II->getNormalDest()->begin(); 497 } else { 498 InsertPt = InstInput; 499 ++InsertPt; 500 } 501 while (isa<PHINode>(InsertPt)) ++InsertPt; 502 } else { 503 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 504 } 505 TheNeg->moveBefore(InsertPt); 506 return TheNeg; 507 } 508 509 // Insert a 'neg' instruction that subtracts the value from zero to get the 510 // negation. 511 return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI); 512 } 513 514 /// ShouldBreakUpSubtract - Return true if we should break up this subtract of 515 /// X-Y into (X + -Y). 516 static bool ShouldBreakUpSubtract(Instruction *Sub) { 517 // If this is a negation, we can't split it up! 518 if (BinaryOperator::isNeg(Sub)) 519 return false; 520 521 // Don't bother to break this up unless either the LHS is an associable add or 522 // subtract or if this is only used by one. 523 if (isReassociableOp(Sub->getOperand(0), Instruction::Add) || 524 isReassociableOp(Sub->getOperand(0), Instruction::Sub)) 525 return true; 526 if (isReassociableOp(Sub->getOperand(1), Instruction::Add) || 527 isReassociableOp(Sub->getOperand(1), Instruction::Sub)) 528 return true; 529 if (Sub->hasOneUse() && 530 (isReassociableOp(Sub->use_back(), Instruction::Add) || 531 isReassociableOp(Sub->use_back(), Instruction::Sub))) 532 return true; 533 534 return false; 535 } 536 537 /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is 538 /// only used by an add, transform this into (X+(0-Y)) to promote better 539 /// reassociation. 540 static Instruction *BreakUpSubtract(Instruction *Sub, 541 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 542 // Convert a subtract into an add and a neg instruction. This allows sub 543 // instructions to be commuted with other add instructions. 544 // 545 // Calculate the negative value of Operand 1 of the sub instruction, 546 // and set it as the RHS of the add instruction we just made. 547 // 548 Value *NegVal = NegateValue(Sub->getOperand(1), Sub); 549 Instruction *New = 550 BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); 551 New->takeName(Sub); 552 553 // Everyone now refers to the add instruction. 554 ValueRankMap.erase(Sub); 555 Sub->replaceAllUsesWith(New); 556 New->setDebugLoc(Sub->getDebugLoc()); 557 Sub->eraseFromParent(); 558 559 DEBUG(dbgs() << "Negated: " << *New << '\n'); 560 return New; 561 } 562 563 /// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used 564 /// by one, change this into a multiply by a constant to assist with further 565 /// reassociation. 566 static Instruction *ConvertShiftToMul(Instruction *Shl, 567 DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { 568 // If an operand of this shift is a reassociable multiply, or if the shift 569 // is used by a reassociable multiply or add, turn into a multiply. 570 if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || 571 (Shl->hasOneUse() && 572 (isReassociableOp(Shl->use_back(), Instruction::Mul) || 573 isReassociableOp(Shl->use_back(), Instruction::Add)))) { 574 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 575 MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1))); 576 577 Instruction *Mul = 578 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 579 ValueRankMap.erase(Shl); 580 Mul->takeName(Shl); 581 Shl->replaceAllUsesWith(Mul); 582 Mul->setDebugLoc(Shl->getDebugLoc()); 583 Shl->eraseFromParent(); 584 return Mul; 585 } 586 return 0; 587 } 588 589 // Scan backwards and forwards among values with the same rank as element i to 590 // see if X exists. If X does not exist, return i. This is useful when 591 // scanning for 'x' when we see '-x' because they both get the same rank. 592 static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i, 593 Value *X) { 594 unsigned XRank = Ops[i].Rank; 595 unsigned e = Ops.size(); 596 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) 597 if (Ops[j].Op == X) 598 return j; 599 // Scan backwards. 600 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) 601 if (Ops[j].Op == X) 602 return j; 603 return i; 604 } 605 606 /// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together 607 /// and returning the result. Insert the tree before I. 608 static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){ 609 if (Ops.size() == 1) return Ops.back(); 610 611 Value *V1 = Ops.back(); 612 Ops.pop_back(); 613 Value *V2 = EmitAddTreeOfValues(I, Ops); 614 return BinaryOperator::CreateAdd(V2, V1, "tmp", I); 615 } 616 617 /// RemoveFactorFromExpression - If V is an expression tree that is a 618 /// multiplication sequence, and if this sequence contains a multiply by Factor, 619 /// remove Factor from the tree and return the new tree. 620 Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { 621 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); 622 if (!BO) return 0; 623 624 SmallVector<ValueEntry, 8> Factors; 625 LinearizeExprTree(BO, Factors); 626 627 bool FoundFactor = false; 628 bool NeedsNegate = false; 629 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 630 if (Factors[i].Op == Factor) { 631 FoundFactor = true; 632 Factors.erase(Factors.begin()+i); 633 break; 634 } 635 636 // If this is a negative version of this factor, remove it. 637 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) 638 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 639 if (FC1->getValue() == -FC2->getValue()) { 640 FoundFactor = NeedsNegate = true; 641 Factors.erase(Factors.begin()+i); 642 break; 643 } 644 } 645 646 if (!FoundFactor) { 647 // Make sure to restore the operands to the expression tree. 648 RewriteExprTree(BO, Factors); 649 return 0; 650 } 651 652 BasicBlock::iterator InsertPt = BO; ++InsertPt; 653 654 // If this was just a single multiply, remove the multiply and return the only 655 // remaining operand. 656 if (Factors.size() == 1) { 657 ValueRankMap.erase(BO); 658 DeadInsts.push_back(BO); 659 V = Factors[0].Op; 660 } else { 661 RewriteExprTree(BO, Factors); 662 V = BO; 663 } 664 665 if (NeedsNegate) 666 V = BinaryOperator::CreateNeg(V, "neg", InsertPt); 667 668 return V; 669 } 670 671 /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively 672 /// add its operands as factors, otherwise add V to the list of factors. 673 /// 674 /// Ops is the top-level list of add operands we're trying to factor. 675 static void FindSingleUseMultiplyFactors(Value *V, 676 SmallVectorImpl<Value*> &Factors, 677 const SmallVectorImpl<ValueEntry> &Ops, 678 bool IsRoot) { 679 BinaryOperator *BO; 680 if (!(V->hasOneUse() || V->use_empty()) || // More than one use. 681 !(BO = dyn_cast<BinaryOperator>(V)) || 682 BO->getOpcode() != Instruction::Mul) { 683 Factors.push_back(V); 684 return; 685 } 686 687 // If this value has a single use because it is another input to the add 688 // tree we're reassociating and we dropped its use, it actually has two 689 // uses and we can't factor it. 690 if (!IsRoot) { 691 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 692 if (Ops[i].Op == V) { 693 Factors.push_back(V); 694 return; 695 } 696 } 697 698 699 // Otherwise, add the LHS and RHS to the list of factors. 700 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false); 701 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false); 702 } 703 704 /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor' 705 /// instruction. This optimizes based on identities. If it can be reduced to 706 /// a single Value, it is returned, otherwise the Ops list is mutated as 707 /// necessary. 708 static Value *OptimizeAndOrXor(unsigned Opcode, 709 SmallVectorImpl<ValueEntry> &Ops) { 710 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 711 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 712 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 713 // First, check for X and ~X in the operand list. 714 assert(i < Ops.size()); 715 if (BinaryOperator::isNot(Ops[i].Op)) { // Cannot occur for ^. 716 Value *X = BinaryOperator::getNotArgument(Ops[i].Op); 717 unsigned FoundX = FindInOperandList(Ops, i, X); 718 if (FoundX != i) { 719 if (Opcode == Instruction::And) // ...&X&~X = 0 720 return Constant::getNullValue(X->getType()); 721 722 if (Opcode == Instruction::Or) // ...|X|~X = -1 723 return Constant::getAllOnesValue(X->getType()); 724 } 725 } 726 727 // Next, check for duplicate pairs of values, which we assume are next to 728 // each other, due to our sorting criteria. 729 assert(i < Ops.size()); 730 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 731 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 732 // Drop duplicate values for And and Or. 733 Ops.erase(Ops.begin()+i); 734 --i; --e; 735 ++NumAnnihil; 736 continue; 737 } 738 739 // Drop pairs of values for Xor. 740 assert(Opcode == Instruction::Xor); 741 if (e == 2) 742 return Constant::getNullValue(Ops[0].Op->getType()); 743 744 // Y ^ X^X -> Y 745 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 746 i -= 1; e -= 2; 747 ++NumAnnihil; 748 } 749 } 750 return 0; 751 } 752 753 /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This 754 /// optimizes based on identities. If it can be reduced to a single Value, it 755 /// is returned, otherwise the Ops list is mutated as necessary. 756 Value *Reassociate::OptimizeAdd(Instruction *I, 757 SmallVectorImpl<ValueEntry> &Ops) { 758 // Scan the operand lists looking for X and -X pairs. If we find any, we 759 // can simplify the expression. X+-X == 0. While we're at it, scan for any 760 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 761 // 762 // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". 763 // 764 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 765 Value *TheOp = Ops[i].Op; 766 // Check to see if we've seen this operand before. If so, we factor all 767 // instances of the operand together. Due to our sorting criteria, we know 768 // that these need to be next to each other in the vector. 769 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 770 // Rescan the list, remove all instances of this operand from the expr. 771 unsigned NumFound = 0; 772 do { 773 Ops.erase(Ops.begin()+i); 774 ++NumFound; 775 } while (i != Ops.size() && Ops[i].Op == TheOp); 776 777 DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n'); 778 ++NumFactor; 779 780 // Insert a new multiply. 781 Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound); 782 Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I); 783 784 // Now that we have inserted a multiply, optimize it. This allows us to 785 // handle cases that require multiple factoring steps, such as this: 786 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 787 RedoInsts.push_back(Mul); 788 789 // If every add operand was a duplicate, return the multiply. 790 if (Ops.empty()) 791 return Mul; 792 793 // Otherwise, we had some input that didn't have the dupe, such as 794 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 795 // things being added by this operation. 796 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 797 798 --i; 799 e = Ops.size(); 800 continue; 801 } 802 803 // Check for X and -X in the operand list. 804 if (!BinaryOperator::isNeg(TheOp)) 805 continue; 806 807 Value *X = BinaryOperator::getNegArgument(TheOp); 808 unsigned FoundX = FindInOperandList(Ops, i, X); 809 if (FoundX == i) 810 continue; 811 812 // Remove X and -X from the operand list. 813 if (Ops.size() == 2) 814 return Constant::getNullValue(X->getType()); 815 816 Ops.erase(Ops.begin()+i); 817 if (i < FoundX) 818 --FoundX; 819 else 820 --i; // Need to back up an extra one. 821 Ops.erase(Ops.begin()+FoundX); 822 ++NumAnnihil; 823 --i; // Revisit element. 824 e -= 2; // Removed two elements. 825 } 826 827 // Scan the operand list, checking to see if there are any common factors 828 // between operands. Consider something like A*A+A*B*C+D. We would like to 829 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 830 // To efficiently find this, we count the number of times a factor occurs 831 // for any ADD operands that are MULs. 832 DenseMap<Value*, unsigned> FactorOccurrences; 833 834 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 835 // where they are actually the same multiply. 836 unsigned MaxOcc = 0; 837 Value *MaxOccVal = 0; 838 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 839 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 840 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 841 continue; 842 843 // Compute all of the factors of this added value. 844 SmallVector<Value*, 8> Factors; 845 FindSingleUseMultiplyFactors(BOp, Factors, Ops, true); 846 assert(Factors.size() > 1 && "Bad linearize!"); 847 848 // Add one to FactorOccurrences for each unique factor in this op. 849 SmallPtrSet<Value*, 8> Duplicates; 850 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 851 Value *Factor = Factors[i]; 852 if (!Duplicates.insert(Factor)) continue; 853 854 unsigned Occ = ++FactorOccurrences[Factor]; 855 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 856 857 // If Factor is a negative constant, add the negated value as a factor 858 // because we can percolate the negate out. Watch for minint, which 859 // cannot be positivified. 860 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) 861 if (CI->isNegative() && !CI->isMinValue(true)) { 862 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 863 assert(!Duplicates.count(Factor) && 864 "Shouldn't have two constant factors, missed a canonicalize"); 865 866 unsigned Occ = ++FactorOccurrences[Factor]; 867 if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; } 868 } 869 } 870 } 871 872 // If any factor occurred more than one time, we can pull it out. 873 if (MaxOcc > 1) { 874 DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n'); 875 ++NumFactor; 876 877 // Create a new instruction that uses the MaxOccVal twice. If we don't do 878 // this, we could otherwise run into situations where removing a factor 879 // from an expression will drop a use of maxocc, and this can cause 880 // RemoveFactorFromExpression on successive values to behave differently. 881 Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal); 882 SmallVector<Value*, 4> NewMulOps; 883 for (unsigned i = 0; i != Ops.size(); ++i) { 884 // Only try to remove factors from expressions we're allowed to. 885 BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op); 886 if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty()) 887 continue; 888 889 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 890 // The factorized operand may occur several times. Convert them all in 891 // one fell swoop. 892 for (unsigned j = Ops.size(); j != i;) { 893 --j; 894 if (Ops[j].Op == Ops[i].Op) { 895 NewMulOps.push_back(V); 896 Ops.erase(Ops.begin()+j); 897 } 898 } 899 --i; 900 } 901 } 902 903 // No need for extra uses anymore. 904 delete DummyInst; 905 906 unsigned NumAddedValues = NewMulOps.size(); 907 Value *V = EmitAddTreeOfValues(I, NewMulOps); 908 909 // Now that we have inserted the add tree, optimize it. This allows us to 910 // handle cases that require multiple factoring steps, such as this: 911 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 912 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 913 (void)NumAddedValues; 914 V = ReassociateExpression(cast<BinaryOperator>(V)); 915 916 // Create the multiply. 917 Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I); 918 919 // Rerun associate on the multiply in case the inner expression turned into 920 // a multiply. We want to make sure that we keep things in canonical form. 921 V2 = ReassociateExpression(cast<BinaryOperator>(V2)); 922 923 // If every add operand included the factor (e.g. "A*B + A*C"), then the 924 // entire result expression is just the multiply "A*(B+C)". 925 if (Ops.empty()) 926 return V2; 927 928 // Otherwise, we had some input that didn't have the factor, such as 929 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 930 // things being added by this operation. 931 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 932 } 933 934 return 0; 935 } 936 937 namespace { 938 /// \brief Predicate tests whether a ValueEntry's op is in a map. 939 struct IsValueInMap { 940 const DenseMap<Value *, unsigned> ⤅ 941 942 IsValueInMap(const DenseMap<Value *, unsigned> &Map) : Map(Map) {} 943 944 bool operator()(const ValueEntry &Entry) { 945 return Map.find(Entry.Op) != Map.end(); 946 } 947 }; 948 } 949 950 /// \brief Build up a vector of value/power pairs factoring a product. 951 /// 952 /// Given a series of multiplication operands, build a vector of factors and 953 /// the powers each is raised to when forming the final product. Sort them in 954 /// the order of descending power. 955 /// 956 /// (x*x) -> [(x, 2)] 957 /// ((x*x)*x) -> [(x, 3)] 958 /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] 959 /// 960 /// \returns Whether any factors have a power greater than one. 961 bool Reassociate::collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, 962 SmallVectorImpl<Factor> &Factors) { 963 unsigned FactorPowerSum = 0; 964 DenseMap<Value *, unsigned> FactorCounts; 965 for (unsigned LastIdx = 0, Idx = 0, Size = Ops.size(); Idx < Size; ++Idx) { 966 // Note that 'use_empty' uses means the only use is in the linearized tree 967 // represented by Ops -- we remove the values from the actual operations to 968 // reduce their use count. 969 if (!Ops[Idx].Op->use_empty()) { 970 if (LastIdx == Idx) 971 ++LastIdx; 972 continue; 973 } 974 if (LastIdx == Idx || Ops[LastIdx].Op != Ops[Idx].Op) { 975 LastIdx = Idx; 976 continue; 977 } 978 // Track for simplification all factors which occur 2 or more times. 979 DenseMap<Value *, unsigned>::iterator CountIt; 980 bool Inserted; 981 llvm::tie(CountIt, Inserted) 982 = FactorCounts.insert(std::make_pair(Ops[Idx].Op, 2)); 983 if (Inserted) { 984 FactorPowerSum += 2; 985 Factors.push_back(Factor(Ops[Idx].Op, 2)); 986 } else { 987 ++CountIt->second; 988 ++FactorPowerSum; 989 } 990 } 991 // We can only simplify factors if the sum of the powers of our simplifiable 992 // factors is 4 or higher. When that is the case, we will *always* have 993 // a simplification. This is an important invariant to prevent cyclicly 994 // trying to simplify already minimal formations. 995 if (FactorPowerSum < 4) 996 return false; 997 998 // Remove all the operands which are in the map. 999 Ops.erase(std::remove_if(Ops.begin(), Ops.end(), IsValueInMap(FactorCounts)), 1000 Ops.end()); 1001 1002 // Record the adjusted power for the simplification factors. We add back into 1003 // the Ops list any values with an odd power, and make the power even. This 1004 // allows the outer-most multiplication tree to remain in tact during 1005 // simplification. 1006 unsigned OldOpsSize = Ops.size(); 1007 for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) { 1008 Factors[Idx].Power = FactorCounts[Factors[Idx].Base]; 1009 if (Factors[Idx].Power & 1) { 1010 Ops.push_back(ValueEntry(getRank(Factors[Idx].Base), Factors[Idx].Base)); 1011 --Factors[Idx].Power; 1012 --FactorPowerSum; 1013 } 1014 } 1015 // None of the adjustments above should have reduced the sum of factor powers 1016 // below our mininum of '4'. 1017 assert(FactorPowerSum >= 4); 1018 1019 // Patch up the sort of the ops vector by sorting the factors we added back 1020 // onto the back, and merging the two sequences. 1021 if (OldOpsSize != Ops.size()) { 1022 SmallVectorImpl<ValueEntry>::iterator MiddleIt = Ops.begin() + OldOpsSize; 1023 std::sort(MiddleIt, Ops.end()); 1024 std::inplace_merge(Ops.begin(), MiddleIt, Ops.end()); 1025 } 1026 1027 std::sort(Factors.begin(), Factors.end(), Factor::PowerDescendingSorter()); 1028 return true; 1029 } 1030 1031 /// \brief Build a tree of multiplies, computing the product of Ops. 1032 static Value *buildMultiplyTree(IRBuilder<> &Builder, 1033 SmallVectorImpl<Value*> &Ops) { 1034 if (Ops.size() == 1) 1035 return Ops.back(); 1036 1037 Value *LHS = Ops.pop_back_val(); 1038 do { 1039 LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); 1040 } while (!Ops.empty()); 1041 1042 return LHS; 1043 } 1044 1045 /// \brief Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*... 1046 /// 1047 /// Given a vector of values raised to various powers, where no two values are 1048 /// equal and the powers are sorted in decreasing order, compute the minimal 1049 /// DAG of multiplies to compute the final product, and return that product 1050 /// value. 1051 Value *Reassociate::buildMinimalMultiplyDAG(IRBuilder<> &Builder, 1052 SmallVectorImpl<Factor> &Factors) { 1053 assert(Factors[0].Power); 1054 SmallVector<Value *, 4> OuterProduct; 1055 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size(); 1056 Idx < Size && Factors[Idx].Power > 0; ++Idx) { 1057 if (Factors[Idx].Power != Factors[LastIdx].Power) { 1058 LastIdx = Idx; 1059 continue; 1060 } 1061 1062 // We want to multiply across all the factors with the same power so that 1063 // we can raise them to that power as a single entity. Build a mini tree 1064 // for that. 1065 SmallVector<Value *, 4> InnerProduct; 1066 InnerProduct.push_back(Factors[LastIdx].Base); 1067 do { 1068 InnerProduct.push_back(Factors[Idx].Base); 1069 ++Idx; 1070 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power); 1071 1072 // Reset the base value of the first factor to the new expression tree. 1073 // We'll remove all the factors with the same power in a second pass. 1074 Factors[LastIdx].Base 1075 = ReassociateExpression( 1076 cast<BinaryOperator>(buildMultiplyTree(Builder, InnerProduct))); 1077 1078 LastIdx = Idx; 1079 } 1080 // Unique factors with equal powers -- we've folded them into the first one's 1081 // base. 1082 Factors.erase(std::unique(Factors.begin(), Factors.end(), 1083 Factor::PowerEqual()), 1084 Factors.end()); 1085 1086 // Iteratively collect the base of each factor with an add power into the 1087 // outer product, and halve each power in preparation for squaring the 1088 // expression. 1089 for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) { 1090 if (Factors[Idx].Power & 1) 1091 OuterProduct.push_back(Factors[Idx].Base); 1092 Factors[Idx].Power >>= 1; 1093 } 1094 if (Factors[0].Power) { 1095 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors); 1096 OuterProduct.push_back(SquareRoot); 1097 OuterProduct.push_back(SquareRoot); 1098 } 1099 if (OuterProduct.size() == 1) 1100 return OuterProduct.front(); 1101 1102 return ReassociateExpression( 1103 cast<BinaryOperator>(buildMultiplyTree(Builder, OuterProduct))); 1104 } 1105 1106 Value *Reassociate::OptimizeMul(BinaryOperator *I, 1107 SmallVectorImpl<ValueEntry> &Ops) { 1108 // We can only optimize the multiplies when there is a chain of more than 1109 // three, such that a balanced tree might require fewer total multiplies. 1110 if (Ops.size() < 4) 1111 return 0; 1112 1113 // Try to turn linear trees of multiplies without other uses of the 1114 // intermediate stages into minimal multiply DAGs with perfect sub-expression 1115 // re-use. 1116 SmallVector<Factor, 4> Factors; 1117 if (!collectMultiplyFactors(Ops, Factors)) 1118 return 0; // All distinct factors, so nothing left for us to do. 1119 1120 IRBuilder<> Builder(I); 1121 Value *V = buildMinimalMultiplyDAG(Builder, Factors); 1122 if (Ops.empty()) 1123 return V; 1124 1125 ValueEntry NewEntry = ValueEntry(getRank(V), V); 1126 Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); 1127 return 0; 1128 } 1129 1130 Value *Reassociate::OptimizeExpression(BinaryOperator *I, 1131 SmallVectorImpl<ValueEntry> &Ops) { 1132 // Now that we have the linearized expression tree, try to optimize it. 1133 // Start by folding any constants that we found. 1134 bool IterateOptimization = false; 1135 if (Ops.size() == 1) return Ops[0].Op; 1136 1137 unsigned Opcode = I->getOpcode(); 1138 1139 if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op)) 1140 if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) { 1141 Ops.pop_back(); 1142 Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); 1143 return OptimizeExpression(I, Ops); 1144 } 1145 1146 // Check for destructive annihilation due to a constant being used. 1147 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op)) 1148 switch (Opcode) { 1149 default: break; 1150 case Instruction::And: 1151 if (CstVal->isZero()) // X & 0 -> 0 1152 return CstVal; 1153 if (CstVal->isAllOnesValue()) // X & -1 -> X 1154 Ops.pop_back(); 1155 break; 1156 case Instruction::Mul: 1157 if (CstVal->isZero()) { // X * 0 -> 0 1158 ++NumAnnihil; 1159 return CstVal; 1160 } 1161 1162 if (cast<ConstantInt>(CstVal)->isOne()) 1163 Ops.pop_back(); // X * 1 -> X 1164 break; 1165 case Instruction::Or: 1166 if (CstVal->isAllOnesValue()) // X | -1 -> -1 1167 return CstVal; 1168 // FALLTHROUGH! 1169 case Instruction::Add: 1170 case Instruction::Xor: 1171 if (CstVal->isZero()) // X [|^+] 0 -> X 1172 Ops.pop_back(); 1173 break; 1174 } 1175 if (Ops.size() == 1) return Ops[0].Op; 1176 1177 // Handle destructive annihilation due to identities between elements in the 1178 // argument list here. 1179 unsigned NumOps = Ops.size(); 1180 switch (Opcode) { 1181 default: break; 1182 case Instruction::And: 1183 case Instruction::Or: 1184 case Instruction::Xor: 1185 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 1186 return Result; 1187 break; 1188 1189 case Instruction::Add: 1190 if (Value *Result = OptimizeAdd(I, Ops)) 1191 return Result; 1192 break; 1193 1194 case Instruction::Mul: 1195 if (Value *Result = OptimizeMul(I, Ops)) 1196 return Result; 1197 break; 1198 } 1199 1200 if (IterateOptimization || Ops.size() != NumOps) 1201 return OptimizeExpression(I, Ops); 1202 return 0; 1203 } 1204 1205 1206 /// ReassociateInst - Inspect and reassociate the instruction at the 1207 /// given position, post-incrementing the position. 1208 void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { 1209 Instruction *BI = BBI++; 1210 if (BI->getOpcode() == Instruction::Shl && 1211 isa<ConstantInt>(BI->getOperand(1))) 1212 if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { 1213 MadeChange = true; 1214 BI = NI; 1215 } 1216 1217 // Reject cases where it is pointless to do this. 1218 if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 1219 BI->getType()->isVectorTy()) 1220 return; // Floating point ops are not associative. 1221 1222 // Do not reassociate boolean (i1) expressions. We want to preserve the 1223 // original order of evaluation for short-circuited comparisons that 1224 // SimplifyCFG has folded to AND/OR expressions. If the expression 1225 // is not further optimized, it is likely to be transformed back to a 1226 // short-circuited form for code gen, and the source order may have been 1227 // optimized for the most likely conditions. 1228 if (BI->getType()->isIntegerTy(1)) 1229 return; 1230 1231 // If this is a subtract instruction which is not already in negate form, 1232 // see if we can convert it to X+-Y. 1233 if (BI->getOpcode() == Instruction::Sub) { 1234 if (ShouldBreakUpSubtract(BI)) { 1235 BI = BreakUpSubtract(BI, ValueRankMap); 1236 // Reset the BBI iterator in case BreakUpSubtract changed the 1237 // instruction it points to. 1238 BBI = BI; 1239 ++BBI; 1240 MadeChange = true; 1241 } else if (BinaryOperator::isNeg(BI)) { 1242 // Otherwise, this is a negation. See if the operand is a multiply tree 1243 // and if this is not an inner node of a multiply tree. 1244 if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && 1245 (!BI->hasOneUse() || 1246 !isReassociableOp(BI->use_back(), Instruction::Mul))) { 1247 BI = LowerNegateToMultiply(BI, ValueRankMap); 1248 MadeChange = true; 1249 } 1250 } 1251 } 1252 1253 // If this instruction is a commutative binary operator, process it. 1254 if (!BI->isAssociative()) return; 1255 BinaryOperator *I = cast<BinaryOperator>(BI); 1256 1257 // If this is an interior node of a reassociable tree, ignore it until we 1258 // get to the root of the tree, to avoid N^2 analysis. 1259 if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) 1260 return; 1261 1262 // If this is an add tree that is used by a sub instruction, ignore it 1263 // until we process the subtract. 1264 if (I->hasOneUse() && I->getOpcode() == Instruction::Add && 1265 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub) 1266 return; 1267 1268 ReassociateExpression(I); 1269 } 1270 1271 Value *Reassociate::ReassociateExpression(BinaryOperator *I) { 1272 1273 // First, walk the expression tree, linearizing the tree, collecting the 1274 // operand information. 1275 SmallVector<ValueEntry, 8> Ops; 1276 LinearizeExprTree(I, Ops); 1277 1278 DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1279 1280 // Now that we have linearized the tree to a list and have gathered all of 1281 // the operands and their ranks, sort the operands by their rank. Use a 1282 // stable_sort so that values with equal ranks will have their relative 1283 // positions maintained (and so the compiler is deterministic). Note that 1284 // this sorts so that the highest ranking values end up at the beginning of 1285 // the vector. 1286 std::stable_sort(Ops.begin(), Ops.end()); 1287 1288 // OptimizeExpression - Now that we have the expression tree in a convenient 1289 // sorted form, optimize it globally if possible. 1290 if (Value *V = OptimizeExpression(I, Ops)) { 1291 // This expression tree simplified to something that isn't a tree, 1292 // eliminate it. 1293 DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 1294 I->replaceAllUsesWith(V); 1295 if (Instruction *VI = dyn_cast<Instruction>(V)) 1296 VI->setDebugLoc(I->getDebugLoc()); 1297 RemoveDeadBinaryOp(I); 1298 ++NumAnnihil; 1299 return V; 1300 } 1301 1302 // We want to sink immediates as deeply as possible except in the case where 1303 // this is a multiply tree used only by an add, and the immediate is a -1. 1304 // In this case we reassociate to put the negation on the outside so that we 1305 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 1306 if (I->getOpcode() == Instruction::Mul && I->hasOneUse() && 1307 cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add && 1308 isa<ConstantInt>(Ops.back().Op) && 1309 cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { 1310 ValueEntry Tmp = Ops.pop_back_val(); 1311 Ops.insert(Ops.begin(), Tmp); 1312 } 1313 1314 DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 1315 1316 if (Ops.size() == 1) { 1317 // This expression tree simplified to something that isn't a tree, 1318 // eliminate it. 1319 I->replaceAllUsesWith(Ops[0].Op); 1320 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) 1321 OI->setDebugLoc(I->getDebugLoc()); 1322 RemoveDeadBinaryOp(I); 1323 return Ops[0].Op; 1324 } 1325 1326 // Now that we ordered and optimized the expressions, splat them back into 1327 // the expression tree, removing any unneeded nodes. 1328 RewriteExprTree(I, Ops); 1329 return I; 1330 } 1331 1332 1333 bool Reassociate::runOnFunction(Function &F) { 1334 // Recalculate the rank map for F 1335 BuildRankMap(F); 1336 1337 MadeChange = false; 1338 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) 1339 for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) 1340 ReassociateInst(BBI); 1341 1342 // Now that we're done, revisit any instructions which are likely to 1343 // have secondary reassociation opportunities. 1344 while (!RedoInsts.empty()) 1345 if (Value *V = RedoInsts.pop_back_val()) { 1346 BasicBlock::iterator BBI = cast<Instruction>(V); 1347 ReassociateInst(BBI); 1348 } 1349 1350 // Now that we're done, delete any instructions which are no longer used. 1351 while (!DeadInsts.empty()) 1352 if (Value *V = DeadInsts.pop_back_val()) 1353 RecursivelyDeleteTriviallyDeadInstructions(V); 1354 1355 // We are done with the rank map. 1356 RankMap.clear(); 1357 ValueRankMap.clear(); 1358 return MadeChange; 1359 } 1360 1361