1 //===- Reassociate.cpp - Reassociate binary expressions -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass reassociates commutative expressions in an order that is designed 10 // to promote better constant propagation, GCSE, LICM, PRE, etc. 11 // 12 // For example: 4 + (x + 5) -> x + (4 + 5) 13 // 14 // In the implementation of this algorithm, constants are assigned rank = 0, 15 // function arguments are rank = 1, and other values are assigned ranks 16 // corresponding to the reverse post order traversal of current function 17 // (starting at 2), which effectively gives values in deep loops higher rank 18 // than values not in loops. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/Transforms/Scalar/Reassociate.h" 23 #include "llvm/ADT/APFloat.h" 24 #include "llvm/ADT/APInt.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/PostOrderIterator.h" 27 #include "llvm/ADT/SmallPtrSet.h" 28 #include "llvm/ADT/SmallSet.h" 29 #include "llvm/ADT/SmallVector.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/Analysis/BasicAliasAnalysis.h" 32 #include "llvm/Analysis/ConstantFolding.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/BasicBlock.h" 37 #include "llvm/IR/CFG.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/Operator.h" 46 #include "llvm/IR/PassManager.h" 47 #include "llvm/IR/PatternMatch.h" 48 #include "llvm/IR/Type.h" 49 #include "llvm/IR/User.h" 50 #include "llvm/IR/Value.h" 51 #include "llvm/IR/ValueHandle.h" 52 #include "llvm/InitializePasses.h" 53 #include "llvm/Pass.h" 54 #include "llvm/Support/Casting.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include "llvm/Transforms/Scalar.h" 58 #include "llvm/Transforms/Utils/Local.h" 59 #include <algorithm> 60 #include <cassert> 61 #include <utility> 62 63 using namespace llvm; 64 using namespace reassociate; 65 using namespace PatternMatch; 66 67 #define DEBUG_TYPE "reassociate" 68 69 STATISTIC(NumChanged, "Number of insts reassociated"); 70 STATISTIC(NumAnnihil, "Number of expr tree annihilated"); 71 STATISTIC(NumFactor , "Number of multiplies factored"); 72 73 #ifndef NDEBUG 74 /// Print out the expression identified in the Ops list. 75 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { 76 Module *M = I->getModule(); 77 dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " 78 << *Ops[0].Op->getType() << '\t'; 79 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 80 dbgs() << "[ "; 81 Ops[i].Op->printAsOperand(dbgs(), false, M); 82 dbgs() << ", #" << Ops[i].Rank << "] "; 83 } 84 } 85 #endif 86 87 /// Utility class representing a non-constant Xor-operand. We classify 88 /// non-constant Xor-Operands into two categories: 89 /// C1) The operand is in the form "X & C", where C is a constant and C != ~0 90 /// C2) 91 /// C2.1) The operand is in the form of "X | C", where C is a non-zero 92 /// constant. 93 /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this 94 /// operand as "E | 0" 95 class llvm::reassociate::XorOpnd { 96 public: 97 XorOpnd(Value *V); 98 99 bool isInvalid() const { return SymbolicPart == nullptr; } 100 bool isOrExpr() const { return isOr; } 101 Value *getValue() const { return OrigVal; } 102 Value *getSymbolicPart() const { return SymbolicPart; } 103 unsigned getSymbolicRank() const { return SymbolicRank; } 104 const APInt &getConstPart() const { return ConstPart; } 105 106 void Invalidate() { SymbolicPart = OrigVal = nullptr; } 107 void setSymbolicRank(unsigned R) { SymbolicRank = R; } 108 109 private: 110 Value *OrigVal; 111 Value *SymbolicPart; 112 APInt ConstPart; 113 unsigned SymbolicRank; 114 bool isOr; 115 }; 116 117 XorOpnd::XorOpnd(Value *V) { 118 assert(!isa<ConstantInt>(V) && "No ConstantInt"); 119 OrigVal = V; 120 Instruction *I = dyn_cast<Instruction>(V); 121 SymbolicRank = 0; 122 123 if (I && (I->getOpcode() == Instruction::Or || 124 I->getOpcode() == Instruction::And)) { 125 Value *V0 = I->getOperand(0); 126 Value *V1 = I->getOperand(1); 127 const APInt *C; 128 if (match(V0, m_APInt(C))) 129 std::swap(V0, V1); 130 131 if (match(V1, m_APInt(C))) { 132 ConstPart = *C; 133 SymbolicPart = V0; 134 isOr = (I->getOpcode() == Instruction::Or); 135 return; 136 } 137 } 138 139 // view the operand as "V | 0" 140 SymbolicPart = V; 141 ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits()); 142 isOr = true; 143 } 144 145 /// Return true if V is an instruction of the specified opcode and if it 146 /// only has one use. 147 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { 148 auto *I = dyn_cast<Instruction>(V); 149 if (I && I->hasOneUse() && I->getOpcode() == Opcode) 150 if (!isa<FPMathOperator>(I) || I->isFast()) 151 return cast<BinaryOperator>(I); 152 return nullptr; 153 } 154 155 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1, 156 unsigned Opcode2) { 157 auto *I = dyn_cast<Instruction>(V); 158 if (I && I->hasOneUse() && 159 (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2)) 160 if (!isa<FPMathOperator>(I) || I->isFast()) 161 return cast<BinaryOperator>(I); 162 return nullptr; 163 } 164 165 void ReassociatePass::BuildRankMap(Function &F, 166 ReversePostOrderTraversal<Function*> &RPOT) { 167 unsigned Rank = 2; 168 169 // Assign distinct ranks to function arguments. 170 for (auto &Arg : F.args()) { 171 ValueRankMap[&Arg] = ++Rank; 172 LLVM_DEBUG(dbgs() << "Calculated Rank[" << Arg.getName() << "] = " << Rank 173 << "\n"); 174 } 175 176 // Traverse basic blocks in ReversePostOrder. 177 for (BasicBlock *BB : RPOT) { 178 unsigned BBRank = RankMap[BB] = ++Rank << 16; 179 180 // Walk the basic block, adding precomputed ranks for any instructions that 181 // we cannot move. This ensures that the ranks for these instructions are 182 // all different in the block. 183 for (Instruction &I : *BB) 184 if (mayHaveNonDefUseDependency(I)) 185 ValueRankMap[&I] = ++BBRank; 186 } 187 } 188 189 unsigned ReassociatePass::getRank(Value *V) { 190 Instruction *I = dyn_cast<Instruction>(V); 191 if (!I) { 192 if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument. 193 return 0; // Otherwise it's a global or constant, rank 0. 194 } 195 196 if (unsigned Rank = ValueRankMap[I]) 197 return Rank; // Rank already known? 198 199 // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that 200 // we can reassociate expressions for code motion! Since we do not recurse 201 // for PHI nodes, we cannot have infinite recursion here, because there 202 // cannot be loops in the value graph that do not go through PHI nodes. 203 unsigned Rank = 0, MaxRank = RankMap[I->getParent()]; 204 for (unsigned i = 0, e = I->getNumOperands(); i != e && Rank != MaxRank; ++i) 205 Rank = std::max(Rank, getRank(I->getOperand(i))); 206 207 // If this is a 'not' or 'neg' instruction, do not count it for rank. This 208 // assures us that X and ~X will have the same rank. 209 if (!match(I, m_Not(m_Value())) && !match(I, m_Neg(m_Value())) && 210 !match(I, m_FNeg(m_Value()))) 211 ++Rank; 212 213 LLVM_DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank 214 << "\n"); 215 216 return ValueRankMap[I] = Rank; 217 } 218 219 // Canonicalize constants to RHS. Otherwise, sort the operands by rank. 220 void ReassociatePass::canonicalizeOperands(Instruction *I) { 221 assert(isa<BinaryOperator>(I) && "Expected binary operator."); 222 assert(I->isCommutative() && "Expected commutative operator."); 223 224 Value *LHS = I->getOperand(0); 225 Value *RHS = I->getOperand(1); 226 if (LHS == RHS || isa<Constant>(RHS)) 227 return; 228 if (isa<Constant>(LHS) || getRank(RHS) < getRank(LHS)) 229 cast<BinaryOperator>(I)->swapOperands(); 230 } 231 232 static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, 233 Instruction *InsertBefore, Value *FlagsOp) { 234 if (S1->getType()->isIntOrIntVectorTy()) 235 return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); 236 else { 237 BinaryOperator *Res = 238 BinaryOperator::CreateFAdd(S1, S2, Name, InsertBefore); 239 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags()); 240 return Res; 241 } 242 } 243 244 static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, 245 Instruction *InsertBefore, Value *FlagsOp) { 246 if (S1->getType()->isIntOrIntVectorTy()) 247 return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); 248 else { 249 BinaryOperator *Res = 250 BinaryOperator::CreateFMul(S1, S2, Name, InsertBefore); 251 Res->setFastMathFlags(cast<FPMathOperator>(FlagsOp)->getFastMathFlags()); 252 return Res; 253 } 254 } 255 256 static Instruction *CreateNeg(Value *S1, const Twine &Name, 257 Instruction *InsertBefore, Value *FlagsOp) { 258 if (S1->getType()->isIntOrIntVectorTy()) 259 return BinaryOperator::CreateNeg(S1, Name, InsertBefore); 260 261 if (auto *FMFSource = dyn_cast<Instruction>(FlagsOp)) 262 return UnaryOperator::CreateFNegFMF(S1, FMFSource, Name, InsertBefore); 263 264 return UnaryOperator::CreateFNeg(S1, Name, InsertBefore); 265 } 266 267 /// Replace 0-X with X*-1. 268 static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { 269 assert((isa<UnaryOperator>(Neg) || isa<BinaryOperator>(Neg)) && 270 "Expected a Negate!"); 271 // FIXME: It's not safe to lower a unary FNeg into a FMul by -1.0. 272 unsigned OpNo = isa<BinaryOperator>(Neg) ? 1 : 0; 273 Type *Ty = Neg->getType(); 274 Constant *NegOne = Ty->isIntOrIntVectorTy() ? 275 ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0); 276 277 BinaryOperator *Res = CreateMul(Neg->getOperand(OpNo), NegOne, "", Neg, Neg); 278 Neg->setOperand(OpNo, Constant::getNullValue(Ty)); // Drop use of op. 279 Res->takeName(Neg); 280 Neg->replaceAllUsesWith(Res); 281 Res->setDebugLoc(Neg->getDebugLoc()); 282 return Res; 283 } 284 285 /// Returns k such that lambda(2^Bitwidth) = 2^k, where lambda is the Carmichael 286 /// function. This means that x^(2^k) === 1 mod 2^Bitwidth for 287 /// every odd x, i.e. x^(2^k) = 1 for every odd x in Bitwidth-bit arithmetic. 288 /// Note that 0 <= k < Bitwidth, and if Bitwidth > 3 then x^(2^k) = 0 for every 289 /// even x in Bitwidth-bit arithmetic. 290 static unsigned CarmichaelShift(unsigned Bitwidth) { 291 if (Bitwidth < 3) 292 return Bitwidth - 1; 293 return Bitwidth - 2; 294 } 295 296 /// Add the extra weight 'RHS' to the existing weight 'LHS', 297 /// reducing the combined weight using any special properties of the operation. 298 /// The existing weight LHS represents the computation X op X op ... op X where 299 /// X occurs LHS times. The combined weight represents X op X op ... op X with 300 /// X occurring LHS + RHS times. If op is "Xor" for example then the combined 301 /// operation is equivalent to X if LHS + RHS is odd, or 0 if LHS + RHS is even; 302 /// the routine returns 1 in LHS in the first case, and 0 in LHS in the second. 303 static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { 304 // If we were working with infinite precision arithmetic then the combined 305 // weight would be LHS + RHS. But we are using finite precision arithmetic, 306 // and the APInt sum LHS + RHS may not be correct if it wraps (it is correct 307 // for nilpotent operations and addition, but not for idempotent operations 308 // and multiplication), so it is important to correctly reduce the combined 309 // weight back into range if wrapping would be wrong. 310 311 // If RHS is zero then the weight didn't change. 312 if (RHS.isMinValue()) 313 return; 314 // If LHS is zero then the combined weight is RHS. 315 if (LHS.isMinValue()) { 316 LHS = RHS; 317 return; 318 } 319 // From this point on we know that neither LHS nor RHS is zero. 320 321 if (Instruction::isIdempotent(Opcode)) { 322 // Idempotent means X op X === X, so any non-zero weight is equivalent to a 323 // weight of 1. Keeping weights at zero or one also means that wrapping is 324 // not a problem. 325 assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); 326 return; // Return a weight of 1. 327 } 328 if (Instruction::isNilpotent(Opcode)) { 329 // Nilpotent means X op X === 0, so reduce weights modulo 2. 330 assert(LHS == 1 && RHS == 1 && "Weights not reduced!"); 331 LHS = 0; // 1 + 1 === 0 modulo 2. 332 return; 333 } 334 if (Opcode == Instruction::Add || Opcode == Instruction::FAdd) { 335 // TODO: Reduce the weight by exploiting nsw/nuw? 336 LHS += RHS; 337 return; 338 } 339 340 assert((Opcode == Instruction::Mul || Opcode == Instruction::FMul) && 341 "Unknown associative operation!"); 342 unsigned Bitwidth = LHS.getBitWidth(); 343 // If CM is the Carmichael number then a weight W satisfying W >= CM+Bitwidth 344 // can be replaced with W-CM. That's because x^W=x^(W-CM) for every Bitwidth 345 // bit number x, since either x is odd in which case x^CM = 1, or x is even in 346 // which case both x^W and x^(W - CM) are zero. By subtracting off multiples 347 // of CM like this weights can always be reduced to the range [0, CM+Bitwidth) 348 // which by a happy accident means that they can always be represented using 349 // Bitwidth bits. 350 // TODO: Reduce the weight by exploiting nsw/nuw? (Could do much better than 351 // the Carmichael number). 352 if (Bitwidth > 3) { 353 /// CM - The value of Carmichael's lambda function. 354 APInt CM = APInt::getOneBitSet(Bitwidth, CarmichaelShift(Bitwidth)); 355 // Any weight W >= Threshold can be replaced with W - CM. 356 APInt Threshold = CM + Bitwidth; 357 assert(LHS.ult(Threshold) && RHS.ult(Threshold) && "Weights not reduced!"); 358 // For Bitwidth 4 or more the following sum does not overflow. 359 LHS += RHS; 360 while (LHS.uge(Threshold)) 361 LHS -= CM; 362 } else { 363 // To avoid problems with overflow do everything the same as above but using 364 // a larger type. 365 unsigned CM = 1U << CarmichaelShift(Bitwidth); 366 unsigned Threshold = CM + Bitwidth; 367 assert(LHS.getZExtValue() < Threshold && RHS.getZExtValue() < Threshold && 368 "Weights not reduced!"); 369 unsigned Total = LHS.getZExtValue() + RHS.getZExtValue(); 370 while (Total >= Threshold) 371 Total -= CM; 372 LHS = Total; 373 } 374 } 375 376 using RepeatedValue = std::pair<Value*, APInt>; 377 378 /// Given an associative binary expression, return the leaf 379 /// nodes in Ops along with their weights (how many times the leaf occurs). The 380 /// original expression is the same as 381 /// (Ops[0].first op Ops[0].first op ... Ops[0].first) <- Ops[0].second times 382 /// op 383 /// (Ops[1].first op Ops[1].first op ... Ops[1].first) <- Ops[1].second times 384 /// op 385 /// ... 386 /// op 387 /// (Ops[N].first op Ops[N].first op ... Ops[N].first) <- Ops[N].second times 388 /// 389 /// Note that the values Ops[0].first, ..., Ops[N].first are all distinct. 390 /// 391 /// This routine may modify the function, in which case it returns 'true'. The 392 /// changes it makes may well be destructive, changing the value computed by 'I' 393 /// to something completely different. Thus if the routine returns 'true' then 394 /// you MUST either replace I with a new expression computed from the Ops array, 395 /// or use RewriteExprTree to put the values back in. 396 /// 397 /// A leaf node is either not a binary operation of the same kind as the root 398 /// node 'I' (i.e. is not a binary operator at all, or is, but with a different 399 /// opcode), or is the same kind of binary operator but has a use which either 400 /// does not belong to the expression, or does belong to the expression but is 401 /// a leaf node. Every leaf node has at least one use that is a non-leaf node 402 /// of the expression, while for non-leaf nodes (except for the root 'I') every 403 /// use is a non-leaf node of the expression. 404 /// 405 /// For example: 406 /// expression graph node names 407 /// 408 /// + | I 409 /// / \ | 410 /// + + | A, B 411 /// / \ / \ | 412 /// * + * | C, D, E 413 /// / \ / \ / \ | 414 /// + * | F, G 415 /// 416 /// The leaf nodes are C, E, F and G. The Ops array will contain (maybe not in 417 /// that order) (C, 1), (E, 1), (F, 2), (G, 2). 418 /// 419 /// The expression is maximal: if some instruction is a binary operator of the 420 /// same kind as 'I', and all of its uses are non-leaf nodes of the expression, 421 /// then the instruction also belongs to the expression, is not a leaf node of 422 /// it, and its operands also belong to the expression (but may be leaf nodes). 423 /// 424 /// NOTE: This routine will set operands of non-leaf non-root nodes to undef in 425 /// order to ensure that every non-root node in the expression has *exactly one* 426 /// use by a non-leaf node of the expression. This destruction means that the 427 /// caller MUST either replace 'I' with a new expression or use something like 428 /// RewriteExprTree to put the values back in if the routine indicates that it 429 /// made a change by returning 'true'. 430 /// 431 /// In the above example either the right operand of A or the left operand of B 432 /// will be replaced by undef. If it is B's operand then this gives: 433 /// 434 /// + | I 435 /// / \ | 436 /// + + | A, B - operand of B replaced with undef 437 /// / \ \ | 438 /// * + * | C, D, E 439 /// / \ / \ / \ | 440 /// + * | F, G 441 /// 442 /// Note that such undef operands can only be reached by passing through 'I'. 443 /// For example, if you visit operands recursively starting from a leaf node 444 /// then you will never see such an undef operand unless you get back to 'I', 445 /// which requires passing through a phi node. 446 /// 447 /// Note that this routine may also mutate binary operators of the wrong type 448 /// that have all uses inside the expression (i.e. only used by non-leaf nodes 449 /// of the expression) if it can turn them into binary operators of the right 450 /// type and thus make the expression bigger. 451 static bool LinearizeExprTree(Instruction *I, 452 SmallVectorImpl<RepeatedValue> &Ops, 453 ReassociatePass::OrderedSet &ToRedo) { 454 assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) && 455 "Expected a UnaryOperator or BinaryOperator!"); 456 LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); 457 unsigned Bitwidth = I->getType()->getScalarType()->getPrimitiveSizeInBits(); 458 unsigned Opcode = I->getOpcode(); 459 assert(I->isAssociative() && I->isCommutative() && 460 "Expected an associative and commutative operation!"); 461 462 // Visit all operands of the expression, keeping track of their weight (the 463 // number of paths from the expression root to the operand, or if you like 464 // the number of times that operand occurs in the linearized expression). 465 // For example, if I = X + A, where X = A + B, then I, X and B have weight 1 466 // while A has weight two. 467 468 // Worklist of non-leaf nodes (their operands are in the expression too) along 469 // with their weights, representing a certain number of paths to the operator. 470 // If an operator occurs in the worklist multiple times then we found multiple 471 // ways to get to it. 472 SmallVector<std::pair<Instruction*, APInt>, 8> Worklist; // (Op, Weight) 473 Worklist.push_back(std::make_pair(I, APInt(Bitwidth, 1))); 474 bool Changed = false; 475 476 // Leaves of the expression are values that either aren't the right kind of 477 // operation (eg: a constant, or a multiply in an add tree), or are, but have 478 // some uses that are not inside the expression. For example, in I = X + X, 479 // X = A + B, the value X has two uses (by I) that are in the expression. If 480 // X has any other uses, for example in a return instruction, then we consider 481 // X to be a leaf, and won't analyze it further. When we first visit a value, 482 // if it has more than one use then at first we conservatively consider it to 483 // be a leaf. Later, as the expression is explored, we may discover some more 484 // uses of the value from inside the expression. If all uses turn out to be 485 // from within the expression (and the value is a binary operator of the right 486 // kind) then the value is no longer considered to be a leaf, and its operands 487 // are explored. 488 489 // Leaves - Keeps track of the set of putative leaves as well as the number of 490 // paths to each leaf seen so far. 491 using LeafMap = DenseMap<Value *, APInt>; 492 LeafMap Leaves; // Leaf -> Total weight so far. 493 SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order. 494 495 #ifndef NDEBUG 496 SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme. 497 #endif 498 while (!Worklist.empty()) { 499 std::pair<Instruction*, APInt> P = Worklist.pop_back_val(); 500 I = P.first; // We examine the operands of this binary operator. 501 502 for (unsigned OpIdx = 0; OpIdx < I->getNumOperands(); ++OpIdx) { // Visit operands. 503 Value *Op = I->getOperand(OpIdx); 504 APInt Weight = P.second; // Number of paths to this operand. 505 LLVM_DEBUG(dbgs() << "OPERAND: " << *Op << " (" << Weight << ")\n"); 506 assert(!Op->use_empty() && "No uses, so how did we get to it?!"); 507 508 // If this is a binary operation of the right kind with only one use then 509 // add its operands to the expression. 510 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { 511 assert(Visited.insert(Op).second && "Not first visit!"); 512 LLVM_DEBUG(dbgs() << "DIRECT ADD: " << *Op << " (" << Weight << ")\n"); 513 Worklist.push_back(std::make_pair(BO, Weight)); 514 continue; 515 } 516 517 // Appears to be a leaf. Is the operand already in the set of leaves? 518 LeafMap::iterator It = Leaves.find(Op); 519 if (It == Leaves.end()) { 520 // Not in the leaf map. Must be the first time we saw this operand. 521 assert(Visited.insert(Op).second && "Not first visit!"); 522 if (!Op->hasOneUse()) { 523 // This value has uses not accounted for by the expression, so it is 524 // not safe to modify. Mark it as being a leaf. 525 LLVM_DEBUG(dbgs() 526 << "ADD USES LEAF: " << *Op << " (" << Weight << ")\n"); 527 LeafOrder.push_back(Op); 528 Leaves[Op] = Weight; 529 continue; 530 } 531 // No uses outside the expression, try morphing it. 532 } else { 533 // Already in the leaf map. 534 assert(It != Leaves.end() && Visited.count(Op) && 535 "In leaf map but not visited!"); 536 537 // Update the number of paths to the leaf. 538 IncorporateWeight(It->second, Weight, Opcode); 539 540 #if 0 // TODO: Re-enable once PR13021 is fixed. 541 // The leaf already has one use from inside the expression. As we want 542 // exactly one such use, drop this new use of the leaf. 543 assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); 544 I->setOperand(OpIdx, UndefValue::get(I->getType())); 545 Changed = true; 546 547 // If the leaf is a binary operation of the right kind and we now see 548 // that its multiple original uses were in fact all by nodes belonging 549 // to the expression, then no longer consider it to be a leaf and add 550 // its operands to the expression. 551 if (BinaryOperator *BO = isReassociableOp(Op, Opcode)) { 552 LLVM_DEBUG(dbgs() << "UNLEAF: " << *Op << " (" << It->second << ")\n"); 553 Worklist.push_back(std::make_pair(BO, It->second)); 554 Leaves.erase(It); 555 continue; 556 } 557 #endif 558 559 // If we still have uses that are not accounted for by the expression 560 // then it is not safe to modify the value. 561 if (!Op->hasOneUse()) 562 continue; 563 564 // No uses outside the expression, try morphing it. 565 Weight = It->second; 566 Leaves.erase(It); // Since the value may be morphed below. 567 } 568 569 // At this point we have a value which, first of all, is not a binary 570 // expression of the right kind, and secondly, is only used inside the 571 // expression. This means that it can safely be modified. See if we 572 // can usefully morph it into an expression of the right kind. 573 assert((!isa<Instruction>(Op) || 574 cast<Instruction>(Op)->getOpcode() != Opcode 575 || (isa<FPMathOperator>(Op) && 576 !cast<Instruction>(Op)->isFast())) && 577 "Should have been handled above!"); 578 assert(Op->hasOneUse() && "Has uses outside the expression tree!"); 579 580 // If this is a multiply expression, turn any internal negations into 581 // multiplies by -1 so they can be reassociated. Add any users of the 582 // newly created multiplication by -1 to the redo list, so any 583 // reassociation opportunities that are exposed will be reassociated 584 // further. 585 Instruction *Neg; 586 if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) || 587 (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) && 588 match(Op, m_Instruction(Neg))) { 589 LLVM_DEBUG(dbgs() 590 << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); 591 Instruction *Mul = LowerNegateToMultiply(Neg); 592 LLVM_DEBUG(dbgs() << *Mul << '\n'); 593 Worklist.push_back(std::make_pair(Mul, Weight)); 594 for (User *U : Mul->users()) { 595 if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U)) 596 ToRedo.insert(UserBO); 597 } 598 ToRedo.insert(Neg); 599 Changed = true; 600 continue; 601 } 602 603 // Failed to morph into an expression of the right type. This really is 604 // a leaf. 605 LLVM_DEBUG(dbgs() << "ADD LEAF: " << *Op << " (" << Weight << ")\n"); 606 assert(!isReassociableOp(Op, Opcode) && "Value was morphed?"); 607 LeafOrder.push_back(Op); 608 Leaves[Op] = Weight; 609 } 610 } 611 612 // The leaves, repeated according to their weights, represent the linearized 613 // form of the expression. 614 for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) { 615 Value *V = LeafOrder[i]; 616 LeafMap::iterator It = Leaves.find(V); 617 if (It == Leaves.end()) 618 // Node initially thought to be a leaf wasn't. 619 continue; 620 assert(!isReassociableOp(V, Opcode) && "Shouldn't be a leaf!"); 621 APInt Weight = It->second; 622 if (Weight.isMinValue()) 623 // Leaf already output or weight reduction eliminated it. 624 continue; 625 // Ensure the leaf is only output once. 626 It->second = 0; 627 Ops.push_back(std::make_pair(V, Weight)); 628 } 629 630 // For nilpotent operations or addition there may be no operands, for example 631 // because the expression was "X xor X" or consisted of 2^Bitwidth additions: 632 // in both cases the weight reduces to 0 causing the value to be skipped. 633 if (Ops.empty()) { 634 Constant *Identity = ConstantExpr::getBinOpIdentity(Opcode, I->getType()); 635 assert(Identity && "Associative operation without identity!"); 636 Ops.emplace_back(Identity, APInt(Bitwidth, 1)); 637 } 638 639 return Changed; 640 } 641 642 /// Now that the operands for this expression tree are 643 /// linearized and optimized, emit them in-order. 644 void ReassociatePass::RewriteExprTree(BinaryOperator *I, 645 SmallVectorImpl<ValueEntry> &Ops) { 646 assert(Ops.size() > 1 && "Single values should be used directly!"); 647 648 // Since our optimizations should never increase the number of operations, the 649 // new expression can usually be written reusing the existing binary operators 650 // from the original expression tree, without creating any new instructions, 651 // though the rewritten expression may have a completely different topology. 652 // We take care to not change anything if the new expression will be the same 653 // as the original. If more than trivial changes (like commuting operands) 654 // were made then we are obliged to clear out any optional subclass data like 655 // nsw flags. 656 657 /// NodesToRewrite - Nodes from the original expression available for writing 658 /// the new expression into. 659 SmallVector<BinaryOperator*, 8> NodesToRewrite; 660 unsigned Opcode = I->getOpcode(); 661 BinaryOperator *Op = I; 662 663 /// NotRewritable - The operands being written will be the leaves of the new 664 /// expression and must not be used as inner nodes (via NodesToRewrite) by 665 /// mistake. Inner nodes are always reassociable, and usually leaves are not 666 /// (if they were they would have been incorporated into the expression and so 667 /// would not be leaves), so most of the time there is no danger of this. But 668 /// in rare cases a leaf may become reassociable if an optimization kills uses 669 /// of it, or it may momentarily become reassociable during rewriting (below) 670 /// due it being removed as an operand of one of its uses. Ensure that misuse 671 /// of leaf nodes as inner nodes cannot occur by remembering all of the future 672 /// leaves and refusing to reuse any of them as inner nodes. 673 SmallPtrSet<Value*, 8> NotRewritable; 674 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 675 NotRewritable.insert(Ops[i].Op); 676 677 // ExpressionChanged - Non-null if the rewritten expression differs from the 678 // original in some non-trivial way, requiring the clearing of optional flags. 679 // Flags are cleared from the operator in ExpressionChanged up to I inclusive. 680 BinaryOperator *ExpressionChanged = nullptr; 681 for (unsigned i = 0; ; ++i) { 682 // The last operation (which comes earliest in the IR) is special as both 683 // operands will come from Ops, rather than just one with the other being 684 // a subexpression. 685 if (i+2 == Ops.size()) { 686 Value *NewLHS = Ops[i].Op; 687 Value *NewRHS = Ops[i+1].Op; 688 Value *OldLHS = Op->getOperand(0); 689 Value *OldRHS = Op->getOperand(1); 690 691 if (NewLHS == OldLHS && NewRHS == OldRHS) 692 // Nothing changed, leave it alone. 693 break; 694 695 if (NewLHS == OldRHS && NewRHS == OldLHS) { 696 // The order of the operands was reversed. Swap them. 697 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n'); 698 Op->swapOperands(); 699 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n'); 700 MadeChange = true; 701 ++NumChanged; 702 break; 703 } 704 705 // The new operation differs non-trivially from the original. Overwrite 706 // the old operands with the new ones. 707 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n'); 708 if (NewLHS != OldLHS) { 709 BinaryOperator *BO = isReassociableOp(OldLHS, Opcode); 710 if (BO && !NotRewritable.count(BO)) 711 NodesToRewrite.push_back(BO); 712 Op->setOperand(0, NewLHS); 713 } 714 if (NewRHS != OldRHS) { 715 BinaryOperator *BO = isReassociableOp(OldRHS, Opcode); 716 if (BO && !NotRewritable.count(BO)) 717 NodesToRewrite.push_back(BO); 718 Op->setOperand(1, NewRHS); 719 } 720 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n'); 721 722 ExpressionChanged = Op; 723 MadeChange = true; 724 ++NumChanged; 725 726 break; 727 } 728 729 // Not the last operation. The left-hand side will be a sub-expression 730 // while the right-hand side will be the current element of Ops. 731 Value *NewRHS = Ops[i].Op; 732 if (NewRHS != Op->getOperand(1)) { 733 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n'); 734 if (NewRHS == Op->getOperand(0)) { 735 // The new right-hand side was already present as the left operand. If 736 // we are lucky then swapping the operands will sort out both of them. 737 Op->swapOperands(); 738 } else { 739 // Overwrite with the new right-hand side. 740 BinaryOperator *BO = isReassociableOp(Op->getOperand(1), Opcode); 741 if (BO && !NotRewritable.count(BO)) 742 NodesToRewrite.push_back(BO); 743 Op->setOperand(1, NewRHS); 744 ExpressionChanged = Op; 745 } 746 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n'); 747 MadeChange = true; 748 ++NumChanged; 749 } 750 751 // Now deal with the left-hand side. If this is already an operation node 752 // from the original expression then just rewrite the rest of the expression 753 // into it. 754 BinaryOperator *BO = isReassociableOp(Op->getOperand(0), Opcode); 755 if (BO && !NotRewritable.count(BO)) { 756 Op = BO; 757 continue; 758 } 759 760 // Otherwise, grab a spare node from the original expression and use that as 761 // the left-hand side. If there are no nodes left then the optimizers made 762 // an expression with more nodes than the original! This usually means that 763 // they did something stupid but it might mean that the problem was just too 764 // hard (finding the mimimal number of multiplications needed to realize a 765 // multiplication expression is NP-complete). Whatever the reason, smart or 766 // stupid, create a new node if there are none left. 767 BinaryOperator *NewOp; 768 if (NodesToRewrite.empty()) { 769 Constant *Undef = UndefValue::get(I->getType()); 770 NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), 771 Undef, Undef, "", I); 772 if (NewOp->getType()->isFPOrFPVectorTy()) 773 NewOp->setFastMathFlags(I->getFastMathFlags()); 774 } else { 775 NewOp = NodesToRewrite.pop_back_val(); 776 } 777 778 LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n'); 779 Op->setOperand(0, NewOp); 780 LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n'); 781 ExpressionChanged = Op; 782 MadeChange = true; 783 ++NumChanged; 784 Op = NewOp; 785 } 786 787 // If the expression changed non-trivially then clear out all subclass data 788 // starting from the operator specified in ExpressionChanged, and compactify 789 // the operators to just before the expression root to guarantee that the 790 // expression tree is dominated by all of Ops. 791 if (ExpressionChanged) 792 do { 793 // Preserve FastMathFlags. 794 if (isa<FPMathOperator>(I)) { 795 FastMathFlags Flags = I->getFastMathFlags(); 796 ExpressionChanged->clearSubclassOptionalData(); 797 ExpressionChanged->setFastMathFlags(Flags); 798 } else 799 ExpressionChanged->clearSubclassOptionalData(); 800 801 if (ExpressionChanged == I) 802 break; 803 804 // Discard any debug info related to the expressions that has changed (we 805 // can leave debug infor related to the root, since the result of the 806 // expression tree should be the same even after reassociation). 807 replaceDbgUsesWithUndef(ExpressionChanged); 808 809 ExpressionChanged->moveBefore(I); 810 ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin()); 811 } while (true); 812 813 // Throw away any left over nodes from the original expression. 814 for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i) 815 RedoInsts.insert(NodesToRewrite[i]); 816 } 817 818 /// Insert instructions before the instruction pointed to by BI, 819 /// that computes the negative version of the value specified. The negative 820 /// version of the value is returned, and BI is left pointing at the instruction 821 /// that should be processed next by the reassociation pass. 822 /// Also add intermediate instructions to the redo list that are modified while 823 /// pushing the negates through adds. These will be revisited to see if 824 /// additional opportunities have been exposed. 825 static Value *NegateValue(Value *V, Instruction *BI, 826 ReassociatePass::OrderedSet &ToRedo) { 827 if (auto *C = dyn_cast<Constant>(V)) 828 return C->getType()->isFPOrFPVectorTy() ? ConstantExpr::getFNeg(C) : 829 ConstantExpr::getNeg(C); 830 831 // We are trying to expose opportunity for reassociation. One of the things 832 // that we want to do to achieve this is to push a negation as deep into an 833 // expression chain as possible, to expose the add instructions. In practice, 834 // this means that we turn this: 835 // X = -(A+12+C+D) into X = -A + -12 + -C + -D = -12 + -A + -C + -D 836 // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate 837 // the constants. We assume that instcombine will clean up the mess later if 838 // we introduce tons of unnecessary negation instructions. 839 // 840 if (BinaryOperator *I = 841 isReassociableOp(V, Instruction::Add, Instruction::FAdd)) { 842 // Push the negates through the add. 843 I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo)); 844 I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo)); 845 if (I->getOpcode() == Instruction::Add) { 846 I->setHasNoUnsignedWrap(false); 847 I->setHasNoSignedWrap(false); 848 } 849 850 // We must move the add instruction here, because the neg instructions do 851 // not dominate the old add instruction in general. By moving it, we are 852 // assured that the neg instructions we just inserted dominate the 853 // instruction we are about to insert after them. 854 // 855 I->moveBefore(BI); 856 I->setName(I->getName()+".neg"); 857 858 // Add the intermediate negates to the redo list as processing them later 859 // could expose more reassociating opportunities. 860 ToRedo.insert(I); 861 return I; 862 } 863 864 // Okay, we need to materialize a negated version of V with an instruction. 865 // Scan the use lists of V to see if we have one already. 866 for (User *U : V->users()) { 867 if (!match(U, m_Neg(m_Value())) && !match(U, m_FNeg(m_Value()))) 868 continue; 869 870 // We found one! Now we have to make sure that the definition dominates 871 // this use. We do this by moving it to the entry block (if it is a 872 // non-instruction value) or right after the definition. These negates will 873 // be zapped by reassociate later, so we don't need much finesse here. 874 Instruction *TheNeg = cast<Instruction>(U); 875 876 // Verify that the negate is in this function, V might be a constant expr. 877 if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) 878 continue; 879 880 bool FoundCatchSwitch = false; 881 882 BasicBlock::iterator InsertPt; 883 if (Instruction *InstInput = dyn_cast<Instruction>(V)) { 884 if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) { 885 InsertPt = II->getNormalDest()->begin(); 886 } else { 887 InsertPt = ++InstInput->getIterator(); 888 } 889 890 const BasicBlock *BB = InsertPt->getParent(); 891 892 // Make sure we don't move anything before PHIs or exception 893 // handling pads. 894 while (InsertPt != BB->end() && (isa<PHINode>(InsertPt) || 895 InsertPt->isEHPad())) { 896 if (isa<CatchSwitchInst>(InsertPt)) 897 // A catchswitch cannot have anything in the block except 898 // itself and PHIs. We'll bail out below. 899 FoundCatchSwitch = true; 900 ++InsertPt; 901 } 902 } else { 903 InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin(); 904 } 905 906 // We found a catchswitch in the block where we want to move the 907 // neg. We cannot move anything into that block. Bail and just 908 // create the neg before BI, as if we hadn't found an existing 909 // neg. 910 if (FoundCatchSwitch) 911 break; 912 913 TheNeg->moveBefore(&*InsertPt); 914 if (TheNeg->getOpcode() == Instruction::Sub) { 915 TheNeg->setHasNoUnsignedWrap(false); 916 TheNeg->setHasNoSignedWrap(false); 917 } else { 918 TheNeg->andIRFlags(BI); 919 } 920 ToRedo.insert(TheNeg); 921 return TheNeg; 922 } 923 924 // Insert a 'neg' instruction that subtracts the value from zero to get the 925 // negation. 926 Instruction *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI); 927 ToRedo.insert(NewNeg); 928 return NewNeg; 929 } 930 931 // See if this `or` looks like an load widening reduction, i.e. that it 932 // consists of an `or`/`shl`/`zext`/`load` nodes only. Note that we don't 933 // ensure that the pattern is *really* a load widening reduction, 934 // we do not ensure that it can really be replaced with a widened load, 935 // only that it mostly looks like one. 936 static bool isLoadCombineCandidate(Instruction *Or) { 937 SmallVector<Instruction *, 8> Worklist; 938 SmallSet<Instruction *, 8> Visited; 939 940 auto Enqueue = [&](Value *V) { 941 auto *I = dyn_cast<Instruction>(V); 942 // Each node of an `or` reduction must be an instruction, 943 if (!I) 944 return false; // Node is certainly not part of an `or` load reduction. 945 // Only process instructions we have never processed before. 946 if (Visited.insert(I).second) 947 Worklist.emplace_back(I); 948 return true; // Will need to look at parent nodes. 949 }; 950 951 if (!Enqueue(Or)) 952 return false; // Not an `or` reduction pattern. 953 954 while (!Worklist.empty()) { 955 auto *I = Worklist.pop_back_val(); 956 957 // Okay, which instruction is this node? 958 switch (I->getOpcode()) { 959 case Instruction::Or: 960 // Got an `or` node. That's fine, just recurse into it's operands. 961 for (Value *Op : I->operands()) 962 if (!Enqueue(Op)) 963 return false; // Not an `or` reduction pattern. 964 continue; 965 966 case Instruction::Shl: 967 case Instruction::ZExt: 968 // `shl`/`zext` nodes are fine, just recurse into their base operand. 969 if (!Enqueue(I->getOperand(0))) 970 return false; // Not an `or` reduction pattern. 971 continue; 972 973 case Instruction::Load: 974 // Perfect, `load` node means we've reached an edge of the graph. 975 continue; 976 977 default: // Unknown node. 978 return false; // Not an `or` reduction pattern. 979 } 980 } 981 982 return true; 983 } 984 985 /// Return true if it may be profitable to convert this (X|Y) into (X+Y). 986 static bool shouldConvertOrWithNoCommonBitsToAdd(Instruction *Or) { 987 // Don't bother to convert this up unless either the LHS is an associable add 988 // or subtract or mul or if this is only used by one of the above. 989 // This is only a compile-time improvement, it is not needed for correctness! 990 auto isInteresting = [](Value *V) { 991 for (auto Op : {Instruction::Add, Instruction::Sub, Instruction::Mul, 992 Instruction::Shl}) 993 if (isReassociableOp(V, Op)) 994 return true; 995 return false; 996 }; 997 998 if (any_of(Or->operands(), isInteresting)) 999 return true; 1000 1001 Value *VB = Or->user_back(); 1002 if (Or->hasOneUse() && isInteresting(VB)) 1003 return true; 1004 1005 return false; 1006 } 1007 1008 /// If we have (X|Y), and iff X and Y have no common bits set, 1009 /// transform this into (X+Y) to allow arithmetics reassociation. 1010 static BinaryOperator *convertOrWithNoCommonBitsToAdd(Instruction *Or) { 1011 // Convert an or into an add. 1012 BinaryOperator *New = 1013 CreateAdd(Or->getOperand(0), Or->getOperand(1), "", Or, Or); 1014 New->setHasNoSignedWrap(); 1015 New->setHasNoUnsignedWrap(); 1016 New->takeName(Or); 1017 1018 // Everyone now refers to the add instruction. 1019 Or->replaceAllUsesWith(New); 1020 New->setDebugLoc(Or->getDebugLoc()); 1021 1022 LLVM_DEBUG(dbgs() << "Converted or into an add: " << *New << '\n'); 1023 return New; 1024 } 1025 1026 /// Return true if we should break up this subtract of X-Y into (X + -Y). 1027 static bool ShouldBreakUpSubtract(Instruction *Sub) { 1028 // If this is a negation, we can't split it up! 1029 if (match(Sub, m_Neg(m_Value())) || match(Sub, m_FNeg(m_Value()))) 1030 return false; 1031 1032 // Don't breakup X - undef. 1033 if (isa<UndefValue>(Sub->getOperand(1))) 1034 return false; 1035 1036 // Don't bother to break this up unless either the LHS is an associable add or 1037 // subtract or if this is only used by one. 1038 Value *V0 = Sub->getOperand(0); 1039 if (isReassociableOp(V0, Instruction::Add, Instruction::FAdd) || 1040 isReassociableOp(V0, Instruction::Sub, Instruction::FSub)) 1041 return true; 1042 Value *V1 = Sub->getOperand(1); 1043 if (isReassociableOp(V1, Instruction::Add, Instruction::FAdd) || 1044 isReassociableOp(V1, Instruction::Sub, Instruction::FSub)) 1045 return true; 1046 Value *VB = Sub->user_back(); 1047 if (Sub->hasOneUse() && 1048 (isReassociableOp(VB, Instruction::Add, Instruction::FAdd) || 1049 isReassociableOp(VB, Instruction::Sub, Instruction::FSub))) 1050 return true; 1051 1052 return false; 1053 } 1054 1055 /// If we have (X-Y), and if either X is an add, or if this is only used by an 1056 /// add, transform this into (X+(0-Y)) to promote better reassociation. 1057 static BinaryOperator *BreakUpSubtract(Instruction *Sub, 1058 ReassociatePass::OrderedSet &ToRedo) { 1059 // Convert a subtract into an add and a neg instruction. This allows sub 1060 // instructions to be commuted with other add instructions. 1061 // 1062 // Calculate the negative value of Operand 1 of the sub instruction, 1063 // and set it as the RHS of the add instruction we just made. 1064 Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); 1065 BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); 1066 Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. 1067 Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op. 1068 New->takeName(Sub); 1069 1070 // Everyone now refers to the add instruction. 1071 Sub->replaceAllUsesWith(New); 1072 New->setDebugLoc(Sub->getDebugLoc()); 1073 1074 LLVM_DEBUG(dbgs() << "Negated: " << *New << '\n'); 1075 return New; 1076 } 1077 1078 /// If this is a shift of a reassociable multiply or is used by one, change 1079 /// this into a multiply by a constant to assist with further reassociation. 1080 static BinaryOperator *ConvertShiftToMul(Instruction *Shl) { 1081 Constant *MulCst = ConstantInt::get(Shl->getType(), 1); 1082 auto *SA = cast<ConstantInt>(Shl->getOperand(1)); 1083 MulCst = ConstantExpr::getShl(MulCst, SA); 1084 1085 BinaryOperator *Mul = 1086 BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); 1087 Shl->setOperand(0, PoisonValue::get(Shl->getType())); // Drop use of op. 1088 Mul->takeName(Shl); 1089 1090 // Everyone now refers to the mul instruction. 1091 Shl->replaceAllUsesWith(Mul); 1092 Mul->setDebugLoc(Shl->getDebugLoc()); 1093 1094 // We can safely preserve the nuw flag in all cases. It's also safe to turn a 1095 // nuw nsw shl into a nuw nsw mul. However, nsw in isolation requires special 1096 // handling. It can be preserved as long as we're not left shifting by 1097 // bitwidth - 1. 1098 bool NSW = cast<BinaryOperator>(Shl)->hasNoSignedWrap(); 1099 bool NUW = cast<BinaryOperator>(Shl)->hasNoUnsignedWrap(); 1100 unsigned BitWidth = Shl->getType()->getIntegerBitWidth(); 1101 if (NSW && (NUW || SA->getValue().ult(BitWidth - 1))) 1102 Mul->setHasNoSignedWrap(true); 1103 Mul->setHasNoUnsignedWrap(NUW); 1104 return Mul; 1105 } 1106 1107 /// Scan backwards and forwards among values with the same rank as element i 1108 /// to see if X exists. If X does not exist, return i. This is useful when 1109 /// scanning for 'x' when we see '-x' because they both get the same rank. 1110 static unsigned FindInOperandList(const SmallVectorImpl<ValueEntry> &Ops, 1111 unsigned i, Value *X) { 1112 unsigned XRank = Ops[i].Rank; 1113 unsigned e = Ops.size(); 1114 for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j) { 1115 if (Ops[j].Op == X) 1116 return j; 1117 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op)) 1118 if (Instruction *I2 = dyn_cast<Instruction>(X)) 1119 if (I1->isIdenticalTo(I2)) 1120 return j; 1121 } 1122 // Scan backwards. 1123 for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j) { 1124 if (Ops[j].Op == X) 1125 return j; 1126 if (Instruction *I1 = dyn_cast<Instruction>(Ops[j].Op)) 1127 if (Instruction *I2 = dyn_cast<Instruction>(X)) 1128 if (I1->isIdenticalTo(I2)) 1129 return j; 1130 } 1131 return i; 1132 } 1133 1134 /// Emit a tree of add instructions, summing Ops together 1135 /// and returning the result. Insert the tree before I. 1136 static Value *EmitAddTreeOfValues(Instruction *I, 1137 SmallVectorImpl<WeakTrackingVH> &Ops) { 1138 if (Ops.size() == 1) return Ops.back(); 1139 1140 Value *V1 = Ops.pop_back_val(); 1141 Value *V2 = EmitAddTreeOfValues(I, Ops); 1142 return CreateAdd(V2, V1, "reass.add", I, I); 1143 } 1144 1145 /// If V is an expression tree that is a multiplication sequence, 1146 /// and if this sequence contains a multiply by Factor, 1147 /// remove Factor from the tree and return the new tree. 1148 Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { 1149 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul); 1150 if (!BO) 1151 return nullptr; 1152 1153 SmallVector<RepeatedValue, 8> Tree; 1154 MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); 1155 SmallVector<ValueEntry, 8> Factors; 1156 Factors.reserve(Tree.size()); 1157 for (unsigned i = 0, e = Tree.size(); i != e; ++i) { 1158 RepeatedValue E = Tree[i]; 1159 Factors.append(E.second.getZExtValue(), 1160 ValueEntry(getRank(E.first), E.first)); 1161 } 1162 1163 bool FoundFactor = false; 1164 bool NeedsNegate = false; 1165 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 1166 if (Factors[i].Op == Factor) { 1167 FoundFactor = true; 1168 Factors.erase(Factors.begin()+i); 1169 break; 1170 } 1171 1172 // If this is a negative version of this factor, remove it. 1173 if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor)) { 1174 if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op)) 1175 if (FC1->getValue() == -FC2->getValue()) { 1176 FoundFactor = NeedsNegate = true; 1177 Factors.erase(Factors.begin()+i); 1178 break; 1179 } 1180 } else if (ConstantFP *FC1 = dyn_cast<ConstantFP>(Factor)) { 1181 if (ConstantFP *FC2 = dyn_cast<ConstantFP>(Factors[i].Op)) { 1182 const APFloat &F1 = FC1->getValueAPF(); 1183 APFloat F2(FC2->getValueAPF()); 1184 F2.changeSign(); 1185 if (F1 == F2) { 1186 FoundFactor = NeedsNegate = true; 1187 Factors.erase(Factors.begin() + i); 1188 break; 1189 } 1190 } 1191 } 1192 } 1193 1194 if (!FoundFactor) { 1195 // Make sure to restore the operands to the expression tree. 1196 RewriteExprTree(BO, Factors); 1197 return nullptr; 1198 } 1199 1200 BasicBlock::iterator InsertPt = ++BO->getIterator(); 1201 1202 // If this was just a single multiply, remove the multiply and return the only 1203 // remaining operand. 1204 if (Factors.size() == 1) { 1205 RedoInsts.insert(BO); 1206 V = Factors[0].Op; 1207 } else { 1208 RewriteExprTree(BO, Factors); 1209 V = BO; 1210 } 1211 1212 if (NeedsNegate) 1213 V = CreateNeg(V, "neg", &*InsertPt, BO); 1214 1215 return V; 1216 } 1217 1218 /// If V is a single-use multiply, recursively add its operands as factors, 1219 /// otherwise add V to the list of factors. 1220 /// 1221 /// Ops is the top-level list of add operands we're trying to factor. 1222 static void FindSingleUseMultiplyFactors(Value *V, 1223 SmallVectorImpl<Value*> &Factors) { 1224 BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul); 1225 if (!BO) { 1226 Factors.push_back(V); 1227 return; 1228 } 1229 1230 // Otherwise, add the LHS and RHS to the list of factors. 1231 FindSingleUseMultiplyFactors(BO->getOperand(1), Factors); 1232 FindSingleUseMultiplyFactors(BO->getOperand(0), Factors); 1233 } 1234 1235 /// Optimize a series of operands to an 'and', 'or', or 'xor' instruction. 1236 /// This optimizes based on identities. If it can be reduced to a single Value, 1237 /// it is returned, otherwise the Ops list is mutated as necessary. 1238 static Value *OptimizeAndOrXor(unsigned Opcode, 1239 SmallVectorImpl<ValueEntry> &Ops) { 1240 // Scan the operand lists looking for X and ~X pairs, along with X,X pairs. 1241 // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1. 1242 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1243 // First, check for X and ~X in the operand list. 1244 assert(i < Ops.size()); 1245 Value *X; 1246 if (match(Ops[i].Op, m_Not(m_Value(X)))) { // Cannot occur for ^. 1247 unsigned FoundX = FindInOperandList(Ops, i, X); 1248 if (FoundX != i) { 1249 if (Opcode == Instruction::And) // ...&X&~X = 0 1250 return Constant::getNullValue(X->getType()); 1251 1252 if (Opcode == Instruction::Or) // ...|X|~X = -1 1253 return Constant::getAllOnesValue(X->getType()); 1254 } 1255 } 1256 1257 // Next, check for duplicate pairs of values, which we assume are next to 1258 // each other, due to our sorting criteria. 1259 assert(i < Ops.size()); 1260 if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) { 1261 if (Opcode == Instruction::And || Opcode == Instruction::Or) { 1262 // Drop duplicate values for And and Or. 1263 Ops.erase(Ops.begin()+i); 1264 --i; --e; 1265 ++NumAnnihil; 1266 continue; 1267 } 1268 1269 // Drop pairs of values for Xor. 1270 assert(Opcode == Instruction::Xor); 1271 if (e == 2) 1272 return Constant::getNullValue(Ops[0].Op->getType()); 1273 1274 // Y ^ X^X -> Y 1275 Ops.erase(Ops.begin()+i, Ops.begin()+i+2); 1276 i -= 1; e -= 2; 1277 ++NumAnnihil; 1278 } 1279 } 1280 return nullptr; 1281 } 1282 1283 /// Helper function of CombineXorOpnd(). It creates a bitwise-and 1284 /// instruction with the given two operands, and return the resulting 1285 /// instruction. There are two special cases: 1) if the constant operand is 0, 1286 /// it will return NULL. 2) if the constant is ~0, the symbolic operand will 1287 /// be returned. 1288 static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, 1289 const APInt &ConstOpnd) { 1290 if (ConstOpnd.isZero()) 1291 return nullptr; 1292 1293 if (ConstOpnd.isAllOnes()) 1294 return Opnd; 1295 1296 Instruction *I = BinaryOperator::CreateAnd( 1297 Opnd, ConstantInt::get(Opnd->getType(), ConstOpnd), "and.ra", 1298 InsertBefore); 1299 I->setDebugLoc(InsertBefore->getDebugLoc()); 1300 return I; 1301 } 1302 1303 // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" 1304 // into "R ^ C", where C would be 0, and R is a symbolic value. 1305 // 1306 // If it was successful, true is returned, and the "R" and "C" is returned 1307 // via "Res" and "ConstOpnd", respectively; otherwise, false is returned, 1308 // and both "Res" and "ConstOpnd" remain unchanged. 1309 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, 1310 APInt &ConstOpnd, Value *&Res) { 1311 // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 1312 // = ((x | c1) ^ c1) ^ (c1 ^ c2) 1313 // = (x & ~c1) ^ (c1 ^ c2) 1314 // It is useful only when c1 == c2. 1315 if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero()) 1316 return false; 1317 1318 if (!Opnd1->getValue()->hasOneUse()) 1319 return false; 1320 1321 const APInt &C1 = Opnd1->getConstPart(); 1322 if (C1 != ConstOpnd) 1323 return false; 1324 1325 Value *X = Opnd1->getSymbolicPart(); 1326 Res = createAndInstr(I, X, ~C1); 1327 // ConstOpnd was C2, now C1 ^ C2. 1328 ConstOpnd ^= C1; 1329 1330 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) 1331 RedoInsts.insert(T); 1332 return true; 1333 } 1334 1335 // Helper function of OptimizeXor(). It tries to simplify 1336 // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a 1337 // symbolic value. 1338 // 1339 // If it was successful, true is returned, and the "R" and "C" is returned 1340 // via "Res" and "ConstOpnd", respectively (If the entire expression is 1341 // evaluated to a constant, the Res is set to NULL); otherwise, false is 1342 // returned, and both "Res" and "ConstOpnd" remain unchanged. 1343 bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, 1344 XorOpnd *Opnd2, APInt &ConstOpnd, 1345 Value *&Res) { 1346 Value *X = Opnd1->getSymbolicPart(); 1347 if (X != Opnd2->getSymbolicPart()) 1348 return false; 1349 1350 // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) 1351 int DeadInstNum = 1; 1352 if (Opnd1->getValue()->hasOneUse()) 1353 DeadInstNum++; 1354 if (Opnd2->getValue()->hasOneUse()) 1355 DeadInstNum++; 1356 1357 // Xor-Rule 2: 1358 // (x | c1) ^ (x & c2) 1359 // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1 1360 // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1 1361 // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3 1362 // 1363 if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) { 1364 if (Opnd2->isOrExpr()) 1365 std::swap(Opnd1, Opnd2); 1366 1367 const APInt &C1 = Opnd1->getConstPart(); 1368 const APInt &C2 = Opnd2->getConstPart(); 1369 APInt C3((~C1) ^ C2); 1370 1371 // Do not increase code size! 1372 if (!C3.isZero() && !C3.isAllOnes()) { 1373 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; 1374 if (NewInstNum > DeadInstNum) 1375 return false; 1376 } 1377 1378 Res = createAndInstr(I, X, C3); 1379 ConstOpnd ^= C1; 1380 } else if (Opnd1->isOrExpr()) { 1381 // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 1382 // 1383 const APInt &C1 = Opnd1->getConstPart(); 1384 const APInt &C2 = Opnd2->getConstPart(); 1385 APInt C3 = C1 ^ C2; 1386 1387 // Do not increase code size 1388 if (!C3.isZero() && !C3.isAllOnes()) { 1389 int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2; 1390 if (NewInstNum > DeadInstNum) 1391 return false; 1392 } 1393 1394 Res = createAndInstr(I, X, C3); 1395 ConstOpnd ^= C3; 1396 } else { 1397 // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) 1398 // 1399 const APInt &C1 = Opnd1->getConstPart(); 1400 const APInt &C2 = Opnd2->getConstPart(); 1401 APInt C3 = C1 ^ C2; 1402 Res = createAndInstr(I, X, C3); 1403 } 1404 1405 // Put the original operands in the Redo list; hope they will be deleted 1406 // as dead code. 1407 if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) 1408 RedoInsts.insert(T); 1409 if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue())) 1410 RedoInsts.insert(T); 1411 1412 return true; 1413 } 1414 1415 /// Optimize a series of operands to an 'xor' instruction. If it can be reduced 1416 /// to a single Value, it is returned, otherwise the Ops list is mutated as 1417 /// necessary. 1418 Value *ReassociatePass::OptimizeXor(Instruction *I, 1419 SmallVectorImpl<ValueEntry> &Ops) { 1420 if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) 1421 return V; 1422 1423 if (Ops.size() == 1) 1424 return nullptr; 1425 1426 SmallVector<XorOpnd, 8> Opnds; 1427 SmallVector<XorOpnd*, 8> OpndPtrs; 1428 Type *Ty = Ops[0].Op->getType(); 1429 APInt ConstOpnd(Ty->getScalarSizeInBits(), 0); 1430 1431 // Step 1: Convert ValueEntry to XorOpnd 1432 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1433 Value *V = Ops[i].Op; 1434 const APInt *C; 1435 // TODO: Support non-splat vectors. 1436 if (match(V, m_APInt(C))) { 1437 ConstOpnd ^= *C; 1438 } else { 1439 XorOpnd O(V); 1440 O.setSymbolicRank(getRank(O.getSymbolicPart())); 1441 Opnds.push_back(O); 1442 } 1443 } 1444 1445 // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds". 1446 // It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate 1447 // the "OpndPtrs" as well. For the similar reason, do not fuse this loop 1448 // with the previous loop --- the iterator of the "Opnds" may be invalidated 1449 // when new elements are added to the vector. 1450 for (unsigned i = 0, e = Opnds.size(); i != e; ++i) 1451 OpndPtrs.push_back(&Opnds[i]); 1452 1453 // Step 2: Sort the Xor-Operands in a way such that the operands containing 1454 // the same symbolic value cluster together. For instance, the input operand 1455 // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: 1456 // ("x | 123", "x & 789", "y & 456"). 1457 // 1458 // The purpose is twofold: 1459 // 1) Cluster together the operands sharing the same symbolic-value. 1460 // 2) Operand having smaller symbolic-value-rank is permuted earlier, which 1461 // could potentially shorten crital path, and expose more loop-invariants. 1462 // Note that values' rank are basically defined in RPO order (FIXME). 1463 // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 1464 // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", 1465 // "z" in the order of X-Y-Z is better than any other orders. 1466 llvm::stable_sort(OpndPtrs, [](XorOpnd *LHS, XorOpnd *RHS) { 1467 return LHS->getSymbolicRank() < RHS->getSymbolicRank(); 1468 }); 1469 1470 // Step 3: Combine adjacent operands 1471 XorOpnd *PrevOpnd = nullptr; 1472 bool Changed = false; 1473 for (unsigned i = 0, e = Opnds.size(); i < e; i++) { 1474 XorOpnd *CurrOpnd = OpndPtrs[i]; 1475 // The combined value 1476 Value *CV; 1477 1478 // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" 1479 if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { 1480 Changed = true; 1481 if (CV) 1482 *CurrOpnd = XorOpnd(CV); 1483 else { 1484 CurrOpnd->Invalidate(); 1485 continue; 1486 } 1487 } 1488 1489 if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) { 1490 PrevOpnd = CurrOpnd; 1491 continue; 1492 } 1493 1494 // step 3.2: When previous and current operands share the same symbolic 1495 // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" 1496 if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { 1497 // Remove previous operand 1498 PrevOpnd->Invalidate(); 1499 if (CV) { 1500 *CurrOpnd = XorOpnd(CV); 1501 PrevOpnd = CurrOpnd; 1502 } else { 1503 CurrOpnd->Invalidate(); 1504 PrevOpnd = nullptr; 1505 } 1506 Changed = true; 1507 } 1508 } 1509 1510 // Step 4: Reassemble the Ops 1511 if (Changed) { 1512 Ops.clear(); 1513 for (unsigned int i = 0, e = Opnds.size(); i < e; i++) { 1514 XorOpnd &O = Opnds[i]; 1515 if (O.isInvalid()) 1516 continue; 1517 ValueEntry VE(getRank(O.getValue()), O.getValue()); 1518 Ops.push_back(VE); 1519 } 1520 if (!ConstOpnd.isZero()) { 1521 Value *C = ConstantInt::get(Ty, ConstOpnd); 1522 ValueEntry VE(getRank(C), C); 1523 Ops.push_back(VE); 1524 } 1525 unsigned Sz = Ops.size(); 1526 if (Sz == 1) 1527 return Ops.back().Op; 1528 if (Sz == 0) { 1529 assert(ConstOpnd.isZero()); 1530 return ConstantInt::get(Ty, ConstOpnd); 1531 } 1532 } 1533 1534 return nullptr; 1535 } 1536 1537 /// Optimize a series of operands to an 'add' instruction. This 1538 /// optimizes based on identities. If it can be reduced to a single Value, it 1539 /// is returned, otherwise the Ops list is mutated as necessary. 1540 Value *ReassociatePass::OptimizeAdd(Instruction *I, 1541 SmallVectorImpl<ValueEntry> &Ops) { 1542 // Scan the operand lists looking for X and -X pairs. If we find any, we 1543 // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it, 1544 // scan for any 1545 // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. 1546 1547 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1548 Value *TheOp = Ops[i].Op; 1549 // Check to see if we've seen this operand before. If so, we factor all 1550 // instances of the operand together. Due to our sorting criteria, we know 1551 // that these need to be next to each other in the vector. 1552 if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) { 1553 // Rescan the list, remove all instances of this operand from the expr. 1554 unsigned NumFound = 0; 1555 do { 1556 Ops.erase(Ops.begin()+i); 1557 ++NumFound; 1558 } while (i != Ops.size() && Ops[i].Op == TheOp); 1559 1560 LLVM_DEBUG(dbgs() << "\nFACTORING [" << NumFound << "]: " << *TheOp 1561 << '\n'); 1562 ++NumFactor; 1563 1564 // Insert a new multiply. 1565 Type *Ty = TheOp->getType(); 1566 Constant *C = Ty->isIntOrIntVectorTy() ? 1567 ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound); 1568 Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); 1569 1570 // Now that we have inserted a multiply, optimize it. This allows us to 1571 // handle cases that require multiple factoring steps, such as this: 1572 // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 1573 RedoInsts.insert(Mul); 1574 1575 // If every add operand was a duplicate, return the multiply. 1576 if (Ops.empty()) 1577 return Mul; 1578 1579 // Otherwise, we had some input that didn't have the dupe, such as 1580 // "A + A + B" -> "A*2 + B". Add the new multiply to the list of 1581 // things being added by this operation. 1582 Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul)); 1583 1584 --i; 1585 e = Ops.size(); 1586 continue; 1587 } 1588 1589 // Check for X and -X or X and ~X in the operand list. 1590 Value *X; 1591 if (!match(TheOp, m_Neg(m_Value(X))) && !match(TheOp, m_Not(m_Value(X))) && 1592 !match(TheOp, m_FNeg(m_Value(X)))) 1593 continue; 1594 1595 unsigned FoundX = FindInOperandList(Ops, i, X); 1596 if (FoundX == i) 1597 continue; 1598 1599 // Remove X and -X from the operand list. 1600 if (Ops.size() == 2 && 1601 (match(TheOp, m_Neg(m_Value())) || match(TheOp, m_FNeg(m_Value())))) 1602 return Constant::getNullValue(X->getType()); 1603 1604 // Remove X and ~X from the operand list. 1605 if (Ops.size() == 2 && match(TheOp, m_Not(m_Value()))) 1606 return Constant::getAllOnesValue(X->getType()); 1607 1608 Ops.erase(Ops.begin()+i); 1609 if (i < FoundX) 1610 --FoundX; 1611 else 1612 --i; // Need to back up an extra one. 1613 Ops.erase(Ops.begin()+FoundX); 1614 ++NumAnnihil; 1615 --i; // Revisit element. 1616 e -= 2; // Removed two elements. 1617 1618 // if X and ~X we append -1 to the operand list. 1619 if (match(TheOp, m_Not(m_Value()))) { 1620 Value *V = Constant::getAllOnesValue(X->getType()); 1621 Ops.insert(Ops.end(), ValueEntry(getRank(V), V)); 1622 e += 1; 1623 } 1624 } 1625 1626 // Scan the operand list, checking to see if there are any common factors 1627 // between operands. Consider something like A*A+A*B*C+D. We would like to 1628 // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies. 1629 // To efficiently find this, we count the number of times a factor occurs 1630 // for any ADD operands that are MULs. 1631 DenseMap<Value*, unsigned> FactorOccurrences; 1632 1633 // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) 1634 // where they are actually the same multiply. 1635 unsigned MaxOcc = 0; 1636 Value *MaxOccVal = nullptr; 1637 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1638 BinaryOperator *BOp = 1639 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); 1640 if (!BOp) 1641 continue; 1642 1643 // Compute all of the factors of this added value. 1644 SmallVector<Value*, 8> Factors; 1645 FindSingleUseMultiplyFactors(BOp, Factors); 1646 assert(Factors.size() > 1 && "Bad linearize!"); 1647 1648 // Add one to FactorOccurrences for each unique factor in this op. 1649 SmallPtrSet<Value*, 8> Duplicates; 1650 for (unsigned i = 0, e = Factors.size(); i != e; ++i) { 1651 Value *Factor = Factors[i]; 1652 if (!Duplicates.insert(Factor).second) 1653 continue; 1654 1655 unsigned Occ = ++FactorOccurrences[Factor]; 1656 if (Occ > MaxOcc) { 1657 MaxOcc = Occ; 1658 MaxOccVal = Factor; 1659 } 1660 1661 // If Factor is a negative constant, add the negated value as a factor 1662 // because we can percolate the negate out. Watch for minint, which 1663 // cannot be positivified. 1664 if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) { 1665 if (CI->isNegative() && !CI->isMinValue(true)) { 1666 Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); 1667 if (!Duplicates.insert(Factor).second) 1668 continue; 1669 unsigned Occ = ++FactorOccurrences[Factor]; 1670 if (Occ > MaxOcc) { 1671 MaxOcc = Occ; 1672 MaxOccVal = Factor; 1673 } 1674 } 1675 } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Factor)) { 1676 if (CF->isNegative()) { 1677 APFloat F(CF->getValueAPF()); 1678 F.changeSign(); 1679 Factor = ConstantFP::get(CF->getContext(), F); 1680 if (!Duplicates.insert(Factor).second) 1681 continue; 1682 unsigned Occ = ++FactorOccurrences[Factor]; 1683 if (Occ > MaxOcc) { 1684 MaxOcc = Occ; 1685 MaxOccVal = Factor; 1686 } 1687 } 1688 } 1689 } 1690 } 1691 1692 // If any factor occurred more than one time, we can pull it out. 1693 if (MaxOcc > 1) { 1694 LLVM_DEBUG(dbgs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal 1695 << '\n'); 1696 ++NumFactor; 1697 1698 // Create a new instruction that uses the MaxOccVal twice. If we don't do 1699 // this, we could otherwise run into situations where removing a factor 1700 // from an expression will drop a use of maxocc, and this can cause 1701 // RemoveFactorFromExpression on successive values to behave differently. 1702 Instruction *DummyInst = 1703 I->getType()->isIntOrIntVectorTy() 1704 ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) 1705 : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); 1706 1707 SmallVector<WeakTrackingVH, 4> NewMulOps; 1708 for (unsigned i = 0; i != Ops.size(); ++i) { 1709 // Only try to remove factors from expressions we're allowed to. 1710 BinaryOperator *BOp = 1711 isReassociableOp(Ops[i].Op, Instruction::Mul, Instruction::FMul); 1712 if (!BOp) 1713 continue; 1714 1715 if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) { 1716 // The factorized operand may occur several times. Convert them all in 1717 // one fell swoop. 1718 for (unsigned j = Ops.size(); j != i;) { 1719 --j; 1720 if (Ops[j].Op == Ops[i].Op) { 1721 NewMulOps.push_back(V); 1722 Ops.erase(Ops.begin()+j); 1723 } 1724 } 1725 --i; 1726 } 1727 } 1728 1729 // No need for extra uses anymore. 1730 DummyInst->deleteValue(); 1731 1732 unsigned NumAddedValues = NewMulOps.size(); 1733 Value *V = EmitAddTreeOfValues(I, NewMulOps); 1734 1735 // Now that we have inserted the add tree, optimize it. This allows us to 1736 // handle cases that require multiple factoring steps, such as this: 1737 // A*A*B + A*A*C --> A*(A*B+A*C) --> A*(A*(B+C)) 1738 assert(NumAddedValues > 1 && "Each occurrence should contribute a value"); 1739 (void)NumAddedValues; 1740 if (Instruction *VI = dyn_cast<Instruction>(V)) 1741 RedoInsts.insert(VI); 1742 1743 // Create the multiply. 1744 Instruction *V2 = CreateMul(V, MaxOccVal, "reass.mul", I, I); 1745 1746 // Rerun associate on the multiply in case the inner expression turned into 1747 // a multiply. We want to make sure that we keep things in canonical form. 1748 RedoInsts.insert(V2); 1749 1750 // If every add operand included the factor (e.g. "A*B + A*C"), then the 1751 // entire result expression is just the multiply "A*(B+C)". 1752 if (Ops.empty()) 1753 return V2; 1754 1755 // Otherwise, we had some input that didn't have the factor, such as 1756 // "A*B + A*C + D" -> "A*(B+C) + D". Add the new multiply to the list of 1757 // things being added by this operation. 1758 Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); 1759 } 1760 1761 return nullptr; 1762 } 1763 1764 /// Build up a vector of value/power pairs factoring a product. 1765 /// 1766 /// Given a series of multiplication operands, build a vector of factors and 1767 /// the powers each is raised to when forming the final product. Sort them in 1768 /// the order of descending power. 1769 /// 1770 /// (x*x) -> [(x, 2)] 1771 /// ((x*x)*x) -> [(x, 3)] 1772 /// ((((x*y)*x)*y)*x) -> [(x, 3), (y, 2)] 1773 /// 1774 /// \returns Whether any factors have a power greater than one. 1775 static bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, 1776 SmallVectorImpl<Factor> &Factors) { 1777 // FIXME: Have Ops be (ValueEntry, Multiplicity) pairs, simplifying this. 1778 // Compute the sum of powers of simplifiable factors. 1779 unsigned FactorPowerSum = 0; 1780 for (unsigned Idx = 1, Size = Ops.size(); Idx < Size; ++Idx) { 1781 Value *Op = Ops[Idx-1].Op; 1782 1783 // Count the number of occurrences of this value. 1784 unsigned Count = 1; 1785 for (; Idx < Size && Ops[Idx].Op == Op; ++Idx) 1786 ++Count; 1787 // Track for simplification all factors which occur 2 or more times. 1788 if (Count > 1) 1789 FactorPowerSum += Count; 1790 } 1791 1792 // We can only simplify factors if the sum of the powers of our simplifiable 1793 // factors is 4 or higher. When that is the case, we will *always* have 1794 // a simplification. This is an important invariant to prevent cyclicly 1795 // trying to simplify already minimal formations. 1796 if (FactorPowerSum < 4) 1797 return false; 1798 1799 // Now gather the simplifiable factors, removing them from Ops. 1800 FactorPowerSum = 0; 1801 for (unsigned Idx = 1; Idx < Ops.size(); ++Idx) { 1802 Value *Op = Ops[Idx-1].Op; 1803 1804 // Count the number of occurrences of this value. 1805 unsigned Count = 1; 1806 for (; Idx < Ops.size() && Ops[Idx].Op == Op; ++Idx) 1807 ++Count; 1808 if (Count == 1) 1809 continue; 1810 // Move an even number of occurrences to Factors. 1811 Count &= ~1U; 1812 Idx -= Count; 1813 FactorPowerSum += Count; 1814 Factors.push_back(Factor(Op, Count)); 1815 Ops.erase(Ops.begin()+Idx, Ops.begin()+Idx+Count); 1816 } 1817 1818 // None of the adjustments above should have reduced the sum of factor powers 1819 // below our mininum of '4'. 1820 assert(FactorPowerSum >= 4); 1821 1822 llvm::stable_sort(Factors, [](const Factor &LHS, const Factor &RHS) { 1823 return LHS.Power > RHS.Power; 1824 }); 1825 return true; 1826 } 1827 1828 /// Build a tree of multiplies, computing the product of Ops. 1829 static Value *buildMultiplyTree(IRBuilderBase &Builder, 1830 SmallVectorImpl<Value*> &Ops) { 1831 if (Ops.size() == 1) 1832 return Ops.back(); 1833 1834 Value *LHS = Ops.pop_back_val(); 1835 do { 1836 if (LHS->getType()->isIntOrIntVectorTy()) 1837 LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); 1838 else 1839 LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); 1840 } while (!Ops.empty()); 1841 1842 return LHS; 1843 } 1844 1845 /// Build a minimal multiplication DAG for (a^x)*(b^y)*(c^z)*... 1846 /// 1847 /// Given a vector of values raised to various powers, where no two values are 1848 /// equal and the powers are sorted in decreasing order, compute the minimal 1849 /// DAG of multiplies to compute the final product, and return that product 1850 /// value. 1851 Value * 1852 ReassociatePass::buildMinimalMultiplyDAG(IRBuilderBase &Builder, 1853 SmallVectorImpl<Factor> &Factors) { 1854 assert(Factors[0].Power); 1855 SmallVector<Value *, 4> OuterProduct; 1856 for (unsigned LastIdx = 0, Idx = 1, Size = Factors.size(); 1857 Idx < Size && Factors[Idx].Power > 0; ++Idx) { 1858 if (Factors[Idx].Power != Factors[LastIdx].Power) { 1859 LastIdx = Idx; 1860 continue; 1861 } 1862 1863 // We want to multiply across all the factors with the same power so that 1864 // we can raise them to that power as a single entity. Build a mini tree 1865 // for that. 1866 SmallVector<Value *, 4> InnerProduct; 1867 InnerProduct.push_back(Factors[LastIdx].Base); 1868 do { 1869 InnerProduct.push_back(Factors[Idx].Base); 1870 ++Idx; 1871 } while (Idx < Size && Factors[Idx].Power == Factors[LastIdx].Power); 1872 1873 // Reset the base value of the first factor to the new expression tree. 1874 // We'll remove all the factors with the same power in a second pass. 1875 Value *M = Factors[LastIdx].Base = buildMultiplyTree(Builder, InnerProduct); 1876 if (Instruction *MI = dyn_cast<Instruction>(M)) 1877 RedoInsts.insert(MI); 1878 1879 LastIdx = Idx; 1880 } 1881 // Unique factors with equal powers -- we've folded them into the first one's 1882 // base. 1883 Factors.erase(std::unique(Factors.begin(), Factors.end(), 1884 [](const Factor &LHS, const Factor &RHS) { 1885 return LHS.Power == RHS.Power; 1886 }), 1887 Factors.end()); 1888 1889 // Iteratively collect the base of each factor with an add power into the 1890 // outer product, and halve each power in preparation for squaring the 1891 // expression. 1892 for (unsigned Idx = 0, Size = Factors.size(); Idx != Size; ++Idx) { 1893 if (Factors[Idx].Power & 1) 1894 OuterProduct.push_back(Factors[Idx].Base); 1895 Factors[Idx].Power >>= 1; 1896 } 1897 if (Factors[0].Power) { 1898 Value *SquareRoot = buildMinimalMultiplyDAG(Builder, Factors); 1899 OuterProduct.push_back(SquareRoot); 1900 OuterProduct.push_back(SquareRoot); 1901 } 1902 if (OuterProduct.size() == 1) 1903 return OuterProduct.front(); 1904 1905 Value *V = buildMultiplyTree(Builder, OuterProduct); 1906 return V; 1907 } 1908 1909 Value *ReassociatePass::OptimizeMul(BinaryOperator *I, 1910 SmallVectorImpl<ValueEntry> &Ops) { 1911 // We can only optimize the multiplies when there is a chain of more than 1912 // three, such that a balanced tree might require fewer total multiplies. 1913 if (Ops.size() < 4) 1914 return nullptr; 1915 1916 // Try to turn linear trees of multiplies without other uses of the 1917 // intermediate stages into minimal multiply DAGs with perfect sub-expression 1918 // re-use. 1919 SmallVector<Factor, 4> Factors; 1920 if (!collectMultiplyFactors(Ops, Factors)) 1921 return nullptr; // All distinct factors, so nothing left for us to do. 1922 1923 IRBuilder<> Builder(I); 1924 // The reassociate transformation for FP operations is performed only 1925 // if unsafe algebra is permitted by FastMathFlags. Propagate those flags 1926 // to the newly generated operations. 1927 if (auto FPI = dyn_cast<FPMathOperator>(I)) 1928 Builder.setFastMathFlags(FPI->getFastMathFlags()); 1929 1930 Value *V = buildMinimalMultiplyDAG(Builder, Factors); 1931 if (Ops.empty()) 1932 return V; 1933 1934 ValueEntry NewEntry = ValueEntry(getRank(V), V); 1935 Ops.insert(llvm::lower_bound(Ops, NewEntry), NewEntry); 1936 return nullptr; 1937 } 1938 1939 Value *ReassociatePass::OptimizeExpression(BinaryOperator *I, 1940 SmallVectorImpl<ValueEntry> &Ops) { 1941 // Now that we have the linearized expression tree, try to optimize it. 1942 // Start by folding any constants that we found. 1943 const DataLayout &DL = I->getModule()->getDataLayout(); 1944 Constant *Cst = nullptr; 1945 unsigned Opcode = I->getOpcode(); 1946 while (!Ops.empty()) { 1947 if (auto *C = dyn_cast<Constant>(Ops.back().Op)) { 1948 if (!Cst) { 1949 Ops.pop_back(); 1950 Cst = C; 1951 continue; 1952 } 1953 if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) { 1954 Ops.pop_back(); 1955 Cst = Res; 1956 continue; 1957 } 1958 } 1959 break; 1960 } 1961 // If there was nothing but constants then we are done. 1962 if (Ops.empty()) 1963 return Cst; 1964 1965 // Put the combined constant back at the end of the operand list, except if 1966 // there is no point. For example, an add of 0 gets dropped here, while a 1967 // multiplication by zero turns the whole expression into zero. 1968 if (Cst && Cst != ConstantExpr::getBinOpIdentity(Opcode, I->getType())) { 1969 if (Cst == ConstantExpr::getBinOpAbsorber(Opcode, I->getType())) 1970 return Cst; 1971 Ops.push_back(ValueEntry(0, Cst)); 1972 } 1973 1974 if (Ops.size() == 1) return Ops[0].Op; 1975 1976 // Handle destructive annihilation due to identities between elements in the 1977 // argument list here. 1978 unsigned NumOps = Ops.size(); 1979 switch (Opcode) { 1980 default: break; 1981 case Instruction::And: 1982 case Instruction::Or: 1983 if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) 1984 return Result; 1985 break; 1986 1987 case Instruction::Xor: 1988 if (Value *Result = OptimizeXor(I, Ops)) 1989 return Result; 1990 break; 1991 1992 case Instruction::Add: 1993 case Instruction::FAdd: 1994 if (Value *Result = OptimizeAdd(I, Ops)) 1995 return Result; 1996 break; 1997 1998 case Instruction::Mul: 1999 case Instruction::FMul: 2000 if (Value *Result = OptimizeMul(I, Ops)) 2001 return Result; 2002 break; 2003 } 2004 2005 if (Ops.size() != NumOps) 2006 return OptimizeExpression(I, Ops); 2007 return nullptr; 2008 } 2009 2010 // Remove dead instructions and if any operands are trivially dead add them to 2011 // Insts so they will be removed as well. 2012 void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I, 2013 OrderedSet &Insts) { 2014 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); 2015 SmallVector<Value *, 4> Ops(I->operands()); 2016 ValueRankMap.erase(I); 2017 Insts.remove(I); 2018 RedoInsts.remove(I); 2019 llvm::salvageDebugInfo(*I); 2020 I->eraseFromParent(); 2021 for (auto Op : Ops) 2022 if (Instruction *OpInst = dyn_cast<Instruction>(Op)) 2023 if (OpInst->use_empty()) 2024 Insts.insert(OpInst); 2025 } 2026 2027 /// Zap the given instruction, adding interesting operands to the work list. 2028 void ReassociatePass::EraseInst(Instruction *I) { 2029 assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); 2030 LLVM_DEBUG(dbgs() << "Erasing dead inst: "; I->dump()); 2031 2032 SmallVector<Value *, 8> Ops(I->operands()); 2033 // Erase the dead instruction. 2034 ValueRankMap.erase(I); 2035 RedoInsts.remove(I); 2036 llvm::salvageDebugInfo(*I); 2037 I->eraseFromParent(); 2038 // Optimize its operands. 2039 SmallPtrSet<Instruction *, 8> Visited; // Detect self-referential nodes. 2040 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2041 if (Instruction *Op = dyn_cast<Instruction>(Ops[i])) { 2042 // If this is a node in an expression tree, climb to the expression root 2043 // and add that since that's where optimization actually happens. 2044 unsigned Opcode = Op->getOpcode(); 2045 while (Op->hasOneUse() && Op->user_back()->getOpcode() == Opcode && 2046 Visited.insert(Op).second) 2047 Op = Op->user_back(); 2048 2049 // The instruction we're going to push may be coming from a 2050 // dead block, and Reassociate skips the processing of unreachable 2051 // blocks because it's a waste of time and also because it can 2052 // lead to infinite loop due to LLVM's non-standard definition 2053 // of dominance. 2054 if (ValueRankMap.find(Op) != ValueRankMap.end()) 2055 RedoInsts.insert(Op); 2056 } 2057 2058 MadeChange = true; 2059 } 2060 2061 /// Recursively analyze an expression to build a list of instructions that have 2062 /// negative floating-point constant operands. The caller can then transform 2063 /// the list to create positive constants for better reassociation and CSE. 2064 static void getNegatibleInsts(Value *V, 2065 SmallVectorImpl<Instruction *> &Candidates) { 2066 // Handle only one-use instructions. Combining negations does not justify 2067 // replicating instructions. 2068 Instruction *I; 2069 if (!match(V, m_OneUse(m_Instruction(I)))) 2070 return; 2071 2072 // Handle expressions of multiplications and divisions. 2073 // TODO: This could look through floating-point casts. 2074 const APFloat *C; 2075 switch (I->getOpcode()) { 2076 case Instruction::FMul: 2077 // Not expecting non-canonical code here. Bail out and wait. 2078 if (match(I->getOperand(0), m_Constant())) 2079 break; 2080 2081 if (match(I->getOperand(1), m_APFloat(C)) && C->isNegative()) { 2082 Candidates.push_back(I); 2083 LLVM_DEBUG(dbgs() << "FMul with negative constant: " << *I << '\n'); 2084 } 2085 getNegatibleInsts(I->getOperand(0), Candidates); 2086 getNegatibleInsts(I->getOperand(1), Candidates); 2087 break; 2088 case Instruction::FDiv: 2089 // Not expecting non-canonical code here. Bail out and wait. 2090 if (match(I->getOperand(0), m_Constant()) && 2091 match(I->getOperand(1), m_Constant())) 2092 break; 2093 2094 if ((match(I->getOperand(0), m_APFloat(C)) && C->isNegative()) || 2095 (match(I->getOperand(1), m_APFloat(C)) && C->isNegative())) { 2096 Candidates.push_back(I); 2097 LLVM_DEBUG(dbgs() << "FDiv with negative constant: " << *I << '\n'); 2098 } 2099 getNegatibleInsts(I->getOperand(0), Candidates); 2100 getNegatibleInsts(I->getOperand(1), Candidates); 2101 break; 2102 default: 2103 break; 2104 } 2105 } 2106 2107 /// Given an fadd/fsub with an operand that is a one-use instruction 2108 /// (the fadd/fsub), try to change negative floating-point constants into 2109 /// positive constants to increase potential for reassociation and CSE. 2110 Instruction *ReassociatePass::canonicalizeNegFPConstantsForOp(Instruction *I, 2111 Instruction *Op, 2112 Value *OtherOp) { 2113 assert((I->getOpcode() == Instruction::FAdd || 2114 I->getOpcode() == Instruction::FSub) && "Expected fadd/fsub"); 2115 2116 // Collect instructions with negative FP constants from the subtree that ends 2117 // in Op. 2118 SmallVector<Instruction *, 4> Candidates; 2119 getNegatibleInsts(Op, Candidates); 2120 if (Candidates.empty()) 2121 return nullptr; 2122 2123 // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the 2124 // resulting subtract will be broken up later. This can get us into an 2125 // infinite loop during reassociation. 2126 bool IsFSub = I->getOpcode() == Instruction::FSub; 2127 bool NeedsSubtract = !IsFSub && Candidates.size() % 2 == 1; 2128 if (NeedsSubtract && ShouldBreakUpSubtract(I)) 2129 return nullptr; 2130 2131 for (Instruction *Negatible : Candidates) { 2132 const APFloat *C; 2133 if (match(Negatible->getOperand(0), m_APFloat(C))) { 2134 assert(!match(Negatible->getOperand(1), m_Constant()) && 2135 "Expecting only 1 constant operand"); 2136 assert(C->isNegative() && "Expected negative FP constant"); 2137 Negatible->setOperand(0, ConstantFP::get(Negatible->getType(), abs(*C))); 2138 MadeChange = true; 2139 } 2140 if (match(Negatible->getOperand(1), m_APFloat(C))) { 2141 assert(!match(Negatible->getOperand(0), m_Constant()) && 2142 "Expecting only 1 constant operand"); 2143 assert(C->isNegative() && "Expected negative FP constant"); 2144 Negatible->setOperand(1, ConstantFP::get(Negatible->getType(), abs(*C))); 2145 MadeChange = true; 2146 } 2147 } 2148 assert(MadeChange == true && "Negative constant candidate was not changed"); 2149 2150 // Negations cancelled out. 2151 if (Candidates.size() % 2 == 0) 2152 return I; 2153 2154 // Negate the final operand in the expression by flipping the opcode of this 2155 // fadd/fsub. 2156 assert(Candidates.size() % 2 == 1 && "Expected odd number"); 2157 IRBuilder<> Builder(I); 2158 Value *NewInst = IsFSub ? Builder.CreateFAddFMF(OtherOp, Op, I) 2159 : Builder.CreateFSubFMF(OtherOp, Op, I); 2160 I->replaceAllUsesWith(NewInst); 2161 RedoInsts.insert(I); 2162 return dyn_cast<Instruction>(NewInst); 2163 } 2164 2165 /// Canonicalize expressions that contain a negative floating-point constant 2166 /// of the following form: 2167 /// OtherOp + (subtree) -> OtherOp {+/-} (canonical subtree) 2168 /// (subtree) + OtherOp -> OtherOp {+/-} (canonical subtree) 2169 /// OtherOp - (subtree) -> OtherOp {+/-} (canonical subtree) 2170 /// 2171 /// The fadd/fsub opcode may be switched to allow folding a negation into the 2172 /// input instruction. 2173 Instruction *ReassociatePass::canonicalizeNegFPConstants(Instruction *I) { 2174 LLVM_DEBUG(dbgs() << "Combine negations for: " << *I << '\n'); 2175 Value *X; 2176 Instruction *Op; 2177 if (match(I, m_FAdd(m_Value(X), m_OneUse(m_Instruction(Op))))) 2178 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) 2179 I = R; 2180 if (match(I, m_FAdd(m_OneUse(m_Instruction(Op)), m_Value(X)))) 2181 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) 2182 I = R; 2183 if (match(I, m_FSub(m_Value(X), m_OneUse(m_Instruction(Op))))) 2184 if (Instruction *R = canonicalizeNegFPConstantsForOp(I, Op, X)) 2185 I = R; 2186 return I; 2187 } 2188 2189 /// Inspect and optimize the given instruction. Note that erasing 2190 /// instructions is not allowed. 2191 void ReassociatePass::OptimizeInst(Instruction *I) { 2192 // Only consider operations that we understand. 2193 if (!isa<UnaryOperator>(I) && !isa<BinaryOperator>(I)) 2194 return; 2195 2196 if (I->getOpcode() == Instruction::Shl && isa<ConstantInt>(I->getOperand(1))) 2197 // If an operand of this shift is a reassociable multiply, or if the shift 2198 // is used by a reassociable multiply or add, turn into a multiply. 2199 if (isReassociableOp(I->getOperand(0), Instruction::Mul) || 2200 (I->hasOneUse() && 2201 (isReassociableOp(I->user_back(), Instruction::Mul) || 2202 isReassociableOp(I->user_back(), Instruction::Add)))) { 2203 Instruction *NI = ConvertShiftToMul(I); 2204 RedoInsts.insert(I); 2205 MadeChange = true; 2206 I = NI; 2207 } 2208 2209 // Commute binary operators, to canonicalize the order of their operands. 2210 // This can potentially expose more CSE opportunities, and makes writing other 2211 // transformations simpler. 2212 if (I->isCommutative()) 2213 canonicalizeOperands(I); 2214 2215 // Canonicalize negative constants out of expressions. 2216 if (Instruction *Res = canonicalizeNegFPConstants(I)) 2217 I = Res; 2218 2219 // Don't optimize floating-point instructions unless they are 'fast'. 2220 if (I->getType()->isFPOrFPVectorTy() && !I->isFast()) 2221 return; 2222 2223 // Do not reassociate boolean (i1) expressions. We want to preserve the 2224 // original order of evaluation for short-circuited comparisons that 2225 // SimplifyCFG has folded to AND/OR expressions. If the expression 2226 // is not further optimized, it is likely to be transformed back to a 2227 // short-circuited form for code gen, and the source order may have been 2228 // optimized for the most likely conditions. 2229 if (I->getType()->isIntegerTy(1)) 2230 return; 2231 2232 // If this is a bitwise or instruction of operands 2233 // with no common bits set, convert it to X+Y. 2234 if (I->getOpcode() == Instruction::Or && 2235 shouldConvertOrWithNoCommonBitsToAdd(I) && !isLoadCombineCandidate(I) && 2236 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), 2237 I->getModule()->getDataLayout(), /*AC=*/nullptr, I, 2238 /*DT=*/nullptr)) { 2239 Instruction *NI = convertOrWithNoCommonBitsToAdd(I); 2240 RedoInsts.insert(I); 2241 MadeChange = true; 2242 I = NI; 2243 } 2244 2245 // If this is a subtract instruction which is not already in negate form, 2246 // see if we can convert it to X+-Y. 2247 if (I->getOpcode() == Instruction::Sub) { 2248 if (ShouldBreakUpSubtract(I)) { 2249 Instruction *NI = BreakUpSubtract(I, RedoInsts); 2250 RedoInsts.insert(I); 2251 MadeChange = true; 2252 I = NI; 2253 } else if (match(I, m_Neg(m_Value()))) { 2254 // Otherwise, this is a negation. See if the operand is a multiply tree 2255 // and if this is not an inner node of a multiply tree. 2256 if (isReassociableOp(I->getOperand(1), Instruction::Mul) && 2257 (!I->hasOneUse() || 2258 !isReassociableOp(I->user_back(), Instruction::Mul))) { 2259 Instruction *NI = LowerNegateToMultiply(I); 2260 // If the negate was simplified, revisit the users to see if we can 2261 // reassociate further. 2262 for (User *U : NI->users()) { 2263 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U)) 2264 RedoInsts.insert(Tmp); 2265 } 2266 RedoInsts.insert(I); 2267 MadeChange = true; 2268 I = NI; 2269 } 2270 } 2271 } else if (I->getOpcode() == Instruction::FNeg || 2272 I->getOpcode() == Instruction::FSub) { 2273 if (ShouldBreakUpSubtract(I)) { 2274 Instruction *NI = BreakUpSubtract(I, RedoInsts); 2275 RedoInsts.insert(I); 2276 MadeChange = true; 2277 I = NI; 2278 } else if (match(I, m_FNeg(m_Value()))) { 2279 // Otherwise, this is a negation. See if the operand is a multiply tree 2280 // and if this is not an inner node of a multiply tree. 2281 Value *Op = isa<BinaryOperator>(I) ? I->getOperand(1) : 2282 I->getOperand(0); 2283 if (isReassociableOp(Op, Instruction::FMul) && 2284 (!I->hasOneUse() || 2285 !isReassociableOp(I->user_back(), Instruction::FMul))) { 2286 // If the negate was simplified, revisit the users to see if we can 2287 // reassociate further. 2288 Instruction *NI = LowerNegateToMultiply(I); 2289 for (User *U : NI->users()) { 2290 if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U)) 2291 RedoInsts.insert(Tmp); 2292 } 2293 RedoInsts.insert(I); 2294 MadeChange = true; 2295 I = NI; 2296 } 2297 } 2298 } 2299 2300 // If this instruction is an associative binary operator, process it. 2301 if (!I->isAssociative()) return; 2302 BinaryOperator *BO = cast<BinaryOperator>(I); 2303 2304 // If this is an interior node of a reassociable tree, ignore it until we 2305 // get to the root of the tree, to avoid N^2 analysis. 2306 unsigned Opcode = BO->getOpcode(); 2307 if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) { 2308 // During the initial run we will get to the root of the tree. 2309 // But if we get here while we are redoing instructions, there is no 2310 // guarantee that the root will be visited. So Redo later 2311 if (BO->user_back() != BO && 2312 BO->getParent() == BO->user_back()->getParent()) 2313 RedoInsts.insert(BO->user_back()); 2314 return; 2315 } 2316 2317 // If this is an add tree that is used by a sub instruction, ignore it 2318 // until we process the subtract. 2319 if (BO->hasOneUse() && BO->getOpcode() == Instruction::Add && 2320 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::Sub) 2321 return; 2322 if (BO->hasOneUse() && BO->getOpcode() == Instruction::FAdd && 2323 cast<Instruction>(BO->user_back())->getOpcode() == Instruction::FSub) 2324 return; 2325 2326 ReassociateExpression(BO); 2327 } 2328 2329 void ReassociatePass::ReassociateExpression(BinaryOperator *I) { 2330 // First, walk the expression tree, linearizing the tree, collecting the 2331 // operand information. 2332 SmallVector<RepeatedValue, 8> Tree; 2333 MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); 2334 SmallVector<ValueEntry, 8> Ops; 2335 Ops.reserve(Tree.size()); 2336 for (const RepeatedValue &E : Tree) 2337 Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first)); 2338 2339 LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n'); 2340 2341 // Now that we have linearized the tree to a list and have gathered all of 2342 // the operands and their ranks, sort the operands by their rank. Use a 2343 // stable_sort so that values with equal ranks will have their relative 2344 // positions maintained (and so the compiler is deterministic). Note that 2345 // this sorts so that the highest ranking values end up at the beginning of 2346 // the vector. 2347 llvm::stable_sort(Ops); 2348 2349 // Now that we have the expression tree in a convenient 2350 // sorted form, optimize it globally if possible. 2351 if (Value *V = OptimizeExpression(I, Ops)) { 2352 if (V == I) 2353 // Self-referential expression in unreachable code. 2354 return; 2355 // This expression tree simplified to something that isn't a tree, 2356 // eliminate it. 2357 LLVM_DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); 2358 I->replaceAllUsesWith(V); 2359 if (Instruction *VI = dyn_cast<Instruction>(V)) 2360 if (I->getDebugLoc()) 2361 VI->setDebugLoc(I->getDebugLoc()); 2362 RedoInsts.insert(I); 2363 ++NumAnnihil; 2364 return; 2365 } 2366 2367 // We want to sink immediates as deeply as possible except in the case where 2368 // this is a multiply tree used only by an add, and the immediate is a -1. 2369 // In this case we reassociate to put the negation on the outside so that we 2370 // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y 2371 if (I->hasOneUse()) { 2372 if (I->getOpcode() == Instruction::Mul && 2373 cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add && 2374 isa<ConstantInt>(Ops.back().Op) && 2375 cast<ConstantInt>(Ops.back().Op)->isMinusOne()) { 2376 ValueEntry Tmp = Ops.pop_back_val(); 2377 Ops.insert(Ops.begin(), Tmp); 2378 } else if (I->getOpcode() == Instruction::FMul && 2379 cast<Instruction>(I->user_back())->getOpcode() == 2380 Instruction::FAdd && 2381 isa<ConstantFP>(Ops.back().Op) && 2382 cast<ConstantFP>(Ops.back().Op)->isExactlyValue(-1.0)) { 2383 ValueEntry Tmp = Ops.pop_back_val(); 2384 Ops.insert(Ops.begin(), Tmp); 2385 } 2386 } 2387 2388 LLVM_DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n'); 2389 2390 if (Ops.size() == 1) { 2391 if (Ops[0].Op == I) 2392 // Self-referential expression in unreachable code. 2393 return; 2394 2395 // This expression tree simplified to something that isn't a tree, 2396 // eliminate it. 2397 I->replaceAllUsesWith(Ops[0].Op); 2398 if (Instruction *OI = dyn_cast<Instruction>(Ops[0].Op)) 2399 OI->setDebugLoc(I->getDebugLoc()); 2400 RedoInsts.insert(I); 2401 return; 2402 } 2403 2404 if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) { 2405 // Find the pair with the highest count in the pairmap and move it to the 2406 // back of the list so that it can later be CSE'd. 2407 // example: 2408 // a*b*c*d*e 2409 // if c*e is the most "popular" pair, we can express this as 2410 // (((c*e)*d)*b)*a 2411 unsigned Max = 1; 2412 unsigned BestRank = 0; 2413 std::pair<unsigned, unsigned> BestPair; 2414 unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin; 2415 for (unsigned i = 0; i < Ops.size() - 1; ++i) 2416 for (unsigned j = i + 1; j < Ops.size(); ++j) { 2417 unsigned Score = 0; 2418 Value *Op0 = Ops[i].Op; 2419 Value *Op1 = Ops[j].Op; 2420 if (std::less<Value *>()(Op1, Op0)) 2421 std::swap(Op0, Op1); 2422 auto it = PairMap[Idx].find({Op0, Op1}); 2423 if (it != PairMap[Idx].end()) { 2424 // Functions like BreakUpSubtract() can erase the Values we're using 2425 // as keys and create new Values after we built the PairMap. There's a 2426 // small chance that the new nodes can have the same address as 2427 // something already in the table. We shouldn't accumulate the stored 2428 // score in that case as it refers to the wrong Value. 2429 if (it->second.isValid()) 2430 Score += it->second.Score; 2431 } 2432 2433 unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank); 2434 if (Score > Max || (Score == Max && MaxRank < BestRank)) { 2435 BestPair = {i, j}; 2436 Max = Score; 2437 BestRank = MaxRank; 2438 } 2439 } 2440 if (Max > 1) { 2441 auto Op0 = Ops[BestPair.first]; 2442 auto Op1 = Ops[BestPair.second]; 2443 Ops.erase(&Ops[BestPair.second]); 2444 Ops.erase(&Ops[BestPair.first]); 2445 Ops.push_back(Op0); 2446 Ops.push_back(Op1); 2447 } 2448 } 2449 // Now that we ordered and optimized the expressions, splat them back into 2450 // the expression tree, removing any unneeded nodes. 2451 RewriteExprTree(I, Ops); 2452 } 2453 2454 void 2455 ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) { 2456 // Make a "pairmap" of how often each operand pair occurs. 2457 for (BasicBlock *BI : RPOT) { 2458 for (Instruction &I : *BI) { 2459 if (!I.isAssociative()) 2460 continue; 2461 2462 // Ignore nodes that aren't at the root of trees. 2463 if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode()) 2464 continue; 2465 2466 // Collect all operands in a single reassociable expression. 2467 // Since Reassociate has already been run once, we can assume things 2468 // are already canonical according to Reassociation's regime. 2469 SmallVector<Value *, 8> Worklist = { I.getOperand(0), I.getOperand(1) }; 2470 SmallVector<Value *, 8> Ops; 2471 while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) { 2472 Value *Op = Worklist.pop_back_val(); 2473 Instruction *OpI = dyn_cast<Instruction>(Op); 2474 if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) { 2475 Ops.push_back(Op); 2476 continue; 2477 } 2478 // Be paranoid about self-referencing expressions in unreachable code. 2479 if (OpI->getOperand(0) != OpI) 2480 Worklist.push_back(OpI->getOperand(0)); 2481 if (OpI->getOperand(1) != OpI) 2482 Worklist.push_back(OpI->getOperand(1)); 2483 } 2484 // Skip extremely long expressions. 2485 if (Ops.size() > GlobalReassociateLimit) 2486 continue; 2487 2488 // Add all pairwise combinations of operands to the pair map. 2489 unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin; 2490 SmallSet<std::pair<Value *, Value*>, 32> Visited; 2491 for (unsigned i = 0; i < Ops.size() - 1; ++i) { 2492 for (unsigned j = i + 1; j < Ops.size(); ++j) { 2493 // Canonicalize operand orderings. 2494 Value *Op0 = Ops[i]; 2495 Value *Op1 = Ops[j]; 2496 if (std::less<Value *>()(Op1, Op0)) 2497 std::swap(Op0, Op1); 2498 if (!Visited.insert({Op0, Op1}).second) 2499 continue; 2500 auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, {Op0, Op1, 1}}); 2501 if (!res.second) { 2502 // If either key value has been erased then we've got the same 2503 // address by coincidence. That can't happen here because nothing is 2504 // erasing values but it can happen by the time we're querying the 2505 // map. 2506 assert(res.first->second.isValid() && "WeakVH invalidated"); 2507 ++res.first->second.Score; 2508 } 2509 } 2510 } 2511 } 2512 } 2513 } 2514 2515 PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { 2516 // Get the functions basic blocks in Reverse Post Order. This order is used by 2517 // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic 2518 // blocks (it has been seen that the analysis in this pass could hang when 2519 // analysing dead basic blocks). 2520 ReversePostOrderTraversal<Function *> RPOT(&F); 2521 2522 // Calculate the rank map for F. 2523 BuildRankMap(F, RPOT); 2524 2525 // Build the pair map before running reassociate. 2526 // Technically this would be more accurate if we did it after one round 2527 // of reassociation, but in practice it doesn't seem to help much on 2528 // real-world code, so don't waste the compile time running reassociate 2529 // twice. 2530 // If a user wants, they could expicitly run reassociate twice in their 2531 // pass pipeline for further potential gains. 2532 // It might also be possible to update the pair map during runtime, but the 2533 // overhead of that may be large if there's many reassociable chains. 2534 BuildPairMap(RPOT); 2535 2536 MadeChange = false; 2537 2538 // Traverse the same blocks that were analysed by BuildRankMap. 2539 for (BasicBlock *BI : RPOT) { 2540 assert(RankMap.count(&*BI) && "BB should be ranked."); 2541 // Optimize every instruction in the basic block. 2542 for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) 2543 if (isInstructionTriviallyDead(&*II)) { 2544 EraseInst(&*II++); 2545 } else { 2546 OptimizeInst(&*II); 2547 assert(II->getParent() == &*BI && "Moved to a different block!"); 2548 ++II; 2549 } 2550 2551 // Make a copy of all the instructions to be redone so we can remove dead 2552 // instructions. 2553 OrderedSet ToRedo(RedoInsts); 2554 // Iterate over all instructions to be reevaluated and remove trivially dead 2555 // instructions. If any operand of the trivially dead instruction becomes 2556 // dead mark it for deletion as well. Continue this process until all 2557 // trivially dead instructions have been removed. 2558 while (!ToRedo.empty()) { 2559 Instruction *I = ToRedo.pop_back_val(); 2560 if (isInstructionTriviallyDead(I)) { 2561 RecursivelyEraseDeadInsts(I, ToRedo); 2562 MadeChange = true; 2563 } 2564 } 2565 2566 // Now that we have removed dead instructions, we can reoptimize the 2567 // remaining instructions. 2568 while (!RedoInsts.empty()) { 2569 Instruction *I = RedoInsts.front(); 2570 RedoInsts.erase(RedoInsts.begin()); 2571 if (isInstructionTriviallyDead(I)) 2572 EraseInst(I); 2573 else 2574 OptimizeInst(I); 2575 } 2576 } 2577 2578 // We are done with the rank map and pair map. 2579 RankMap.clear(); 2580 ValueRankMap.clear(); 2581 for (auto &Entry : PairMap) 2582 Entry.clear(); 2583 2584 if (MadeChange) { 2585 PreservedAnalyses PA; 2586 PA.preserveSet<CFGAnalyses>(); 2587 return PA; 2588 } 2589 2590 return PreservedAnalyses::all(); 2591 } 2592 2593 namespace { 2594 2595 class ReassociateLegacyPass : public FunctionPass { 2596 ReassociatePass Impl; 2597 2598 public: 2599 static char ID; // Pass identification, replacement for typeid 2600 2601 ReassociateLegacyPass() : FunctionPass(ID) { 2602 initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); 2603 } 2604 2605 bool runOnFunction(Function &F) override { 2606 if (skipFunction(F)) 2607 return false; 2608 2609 FunctionAnalysisManager DummyFAM; 2610 auto PA = Impl.run(F, DummyFAM); 2611 return !PA.areAllPreserved(); 2612 } 2613 2614 void getAnalysisUsage(AnalysisUsage &AU) const override { 2615 AU.setPreservesCFG(); 2616 AU.addPreserved<AAResultsWrapperPass>(); 2617 AU.addPreserved<BasicAAWrapperPass>(); 2618 AU.addPreserved<GlobalsAAWrapperPass>(); 2619 } 2620 }; 2621 2622 } // end anonymous namespace 2623 2624 char ReassociateLegacyPass::ID = 0; 2625 2626 INITIALIZE_PASS(ReassociateLegacyPass, "reassociate", 2627 "Reassociate expressions", false, false) 2628 2629 // Public interface to the Reassociate pass 2630 FunctionPass *llvm::createReassociatePass() { 2631 return new ReassociateLegacyPass(); 2632 } 2633