1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/BranchProbabilityInfo.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/LoopPass.h" 49 #include "llvm/Analysis/MemorySSAUpdater.h" 50 #include "llvm/Analysis/PostDominators.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/ConstantRange.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/InitializePasses.h" 57 #include "llvm/Pass.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/KnownBits.h" 61 #include "llvm/Transforms/Scalar.h" 62 #include "llvm/Transforms/Utils/GuardUtils.h" 63 #include "llvm/Transforms/Utils/LoopUtils.h" 64 #include <functional> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "guard-widening" 69 70 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 71 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 72 73 static cl::opt<bool> 74 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 75 cl::desc("Whether or not we should widen guards " 76 "expressed as branches by widenable conditions"), 77 cl::init(true)); 78 79 namespace { 80 81 // Get the condition of \p I. It can either be a guard or a conditional branch. 82 static Value *getCondition(Instruction *I) { 83 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 84 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 85 "Bad guard intrinsic?"); 86 return GI->getArgOperand(0); 87 } 88 Value *Cond, *WC; 89 BasicBlock *IfTrueBB, *IfFalseBB; 90 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 91 return Cond; 92 93 return cast<BranchInst>(I)->getCondition(); 94 } 95 96 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 97 // conditional branch. 98 static void setCondition(Instruction *I, Value *NewCond) { 99 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 100 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 101 "Bad guard intrinsic?"); 102 GI->setArgOperand(0, NewCond); 103 return; 104 } 105 cast<BranchInst>(I)->setCondition(NewCond); 106 } 107 108 // Eliminates the guard instruction properly. 109 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 110 GuardInst->eraseFromParent(); 111 if (MSSAU) 112 MSSAU->removeMemoryAccess(GuardInst); 113 ++GuardsEliminated; 114 } 115 116 class GuardWideningImpl { 117 DominatorTree &DT; 118 PostDominatorTree *PDT; 119 LoopInfo &LI; 120 MemorySSAUpdater *MSSAU; 121 122 /// Together, these describe the region of interest. This might be all of 123 /// the blocks within a function, or only a given loop's blocks and preheader. 124 DomTreeNode *Root; 125 std::function<bool(BasicBlock*)> BlockFilter; 126 127 /// The set of guards and conditional branches whose conditions have been 128 /// widened into dominating guards. 129 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 130 131 /// The set of guards which have been widened to include conditions to other 132 /// guards. 133 DenseSet<Instruction *> WidenedGuards; 134 135 /// Try to eliminate instruction \p Instr by widening it into an earlier 136 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 137 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 138 /// maps BasicBlocks to the set of guards seen in that block. 139 bool eliminateInstrViaWidening( 140 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 141 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 142 GuardsPerBlock, bool InvertCondition = false); 143 144 /// Used to keep track of which widening potential is more effective. 145 enum WideningScore { 146 /// Don't widen. 147 WS_IllegalOrNegative, 148 149 /// Widening is performance neutral as far as the cycles spent in check 150 /// conditions goes (but can still help, e.g., code layout, having less 151 /// deopt state). 152 WS_Neutral, 153 154 /// Widening is profitable. 155 WS_Positive, 156 157 /// Widening is very profitable. Not significantly different from \c 158 /// WS_Positive, except by the order. 159 WS_VeryPositive 160 }; 161 162 static StringRef scoreTypeToString(WideningScore WS); 163 164 /// Compute the score for widening the condition in \p DominatedInstr 165 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the 166 /// inverted condition of the dominating guard. 167 WideningScore computeWideningScore(Instruction *DominatedInstr, 168 Instruction *DominatingGuard, 169 bool InvertCond); 170 171 /// Helper to check if \p V can be hoisted to \p InsertPos. 172 bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { 173 SmallPtrSet<const Instruction *, 8> Visited; 174 return isAvailableAt(V, InsertPos, Visited); 175 } 176 177 bool isAvailableAt(const Value *V, const Instruction *InsertPos, 178 SmallPtrSetImpl<const Instruction *> &Visited) const; 179 180 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 181 /// isAvailableAt returned true. 182 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 183 184 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 185 /// to generate an expression computing the logical AND of \p Cond0 and (\p 186 /// Cond1 XOR \p InvertCondition). 187 /// Return true if the expression computing the AND is only as 188 /// expensive as computing one of the two. If \p InsertPt is true then 189 /// actually generate the resulting expression, make it available at \p 190 /// InsertPt and return it in \p Result (else no change to the IR is made). 191 bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, 192 Value *&Result, bool InvertCondition); 193 194 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 195 /// with the constraint that \c Length is not negative. \c CheckInst is the 196 /// pre-existing instruction in the IR that computes the result of this range 197 /// check. 198 class RangeCheck { 199 const Value *Base; 200 const ConstantInt *Offset; 201 const Value *Length; 202 ICmpInst *CheckInst; 203 204 public: 205 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 206 const Value *Length, ICmpInst *CheckInst) 207 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 208 209 void setBase(const Value *NewBase) { Base = NewBase; } 210 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 211 212 const Value *getBase() const { return Base; } 213 const ConstantInt *getOffset() const { return Offset; } 214 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 215 const Value *getLength() const { return Length; }; 216 ICmpInst *getCheckInst() const { return CheckInst; } 217 218 void print(raw_ostream &OS, bool PrintTypes = false) { 219 OS << "Base: "; 220 Base->printAsOperand(OS, PrintTypes); 221 OS << " Offset: "; 222 Offset->printAsOperand(OS, PrintTypes); 223 OS << " Length: "; 224 Length->printAsOperand(OS, PrintTypes); 225 } 226 227 LLVM_DUMP_METHOD void dump() { 228 print(dbgs()); 229 dbgs() << "\n"; 230 } 231 }; 232 233 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and 234 /// append them to \p Checks. Returns true on success, may clobber \c Checks 235 /// on failure. 236 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { 237 SmallPtrSet<const Value *, 8> Visited; 238 return parseRangeChecks(CheckCond, Checks, Visited); 239 } 240 241 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, 242 SmallPtrSetImpl<const Value *> &Visited); 243 244 /// Combine the checks in \p Checks into a smaller set of checks and append 245 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 246 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 247 /// and \p CombinedChecks on success and on failure. 248 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 249 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 250 251 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of 252 /// computing only one of the two expressions? 253 bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { 254 Value *ResultUnused; 255 return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, 256 InvertCond); 257 } 258 259 /// If \p InvertCondition is false, Widen \p ToWiden to fail if 260 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is 261 /// true (in addition to whatever it is already checking). 262 void widenGuard(Instruction *ToWiden, Value *NewCondition, 263 bool InvertCondition) { 264 Value *Result; 265 266 widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, 267 InvertCondition); 268 if (isGuardAsWidenableBranch(ToWiden)) { 269 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 270 return; 271 } 272 setCondition(ToWiden, Result); 273 } 274 275 public: 276 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 277 LoopInfo &LI, MemorySSAUpdater *MSSAU, 278 DomTreeNode *Root, 279 std::function<bool(BasicBlock*)> BlockFilter) 280 : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root), 281 BlockFilter(BlockFilter) {} 282 283 /// The entry point for this pass. 284 bool run(); 285 }; 286 } 287 288 static bool isSupportedGuardInstruction(const Instruction *Insn) { 289 if (isGuard(Insn)) 290 return true; 291 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 292 return true; 293 return false; 294 } 295 296 bool GuardWideningImpl::run() { 297 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 298 bool Changed = false; 299 for (auto DFI = df_begin(Root), DFE = df_end(Root); 300 DFI != DFE; ++DFI) { 301 auto *BB = (*DFI)->getBlock(); 302 if (!BlockFilter(BB)) 303 continue; 304 305 auto &CurrentList = GuardsInBlock[BB]; 306 307 for (auto &I : *BB) 308 if (isSupportedGuardInstruction(&I)) 309 CurrentList.push_back(cast<Instruction>(&I)); 310 311 for (auto *II : CurrentList) 312 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 313 } 314 315 assert(EliminatedGuardsAndBranches.empty() || Changed); 316 for (auto *I : EliminatedGuardsAndBranches) 317 if (!WidenedGuards.count(I)) { 318 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 319 if (isSupportedGuardInstruction(I)) 320 eliminateGuard(I, MSSAU); 321 else { 322 assert(isa<BranchInst>(I) && 323 "Eliminated something other than guard or branch?"); 324 ++CondBranchEliminated; 325 } 326 } 327 328 return Changed; 329 } 330 331 bool GuardWideningImpl::eliminateInstrViaWidening( 332 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 333 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 334 GuardsInBlock, bool InvertCondition) { 335 // Ignore trivial true or false conditions. These instructions will be 336 // trivially eliminated by any cleanup pass. Do not erase them because other 337 // guards can possibly be widened into them. 338 if (isa<ConstantInt>(getCondition(Instr))) 339 return false; 340 341 Instruction *BestSoFar = nullptr; 342 auto BestScoreSoFar = WS_IllegalOrNegative; 343 344 // In the set of dominating guards, find the one we can merge GuardInst with 345 // for the most profit. 346 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 347 auto *CurBB = DFSI.getPath(i)->getBlock(); 348 if (!BlockFilter(CurBB)) 349 break; 350 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 351 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 352 353 auto I = GuardsInCurBB.begin(); 354 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 355 : GuardsInCurBB.end(); 356 357 #ifndef NDEBUG 358 { 359 unsigned Index = 0; 360 for (auto &I : *CurBB) { 361 if (Index == GuardsInCurBB.size()) 362 break; 363 if (GuardsInCurBB[Index] == &I) 364 Index++; 365 } 366 assert(Index == GuardsInCurBB.size() && 367 "Guards expected to be in order!"); 368 } 369 #endif 370 371 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 372 373 for (auto *Candidate : make_range(I, E)) { 374 auto Score = computeWideningScore(Instr, Candidate, InvertCondition); 375 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) 376 << " and " << *getCondition(Candidate) << " is " 377 << scoreTypeToString(Score) << "\n"); 378 if (Score > BestScoreSoFar) { 379 BestScoreSoFar = Score; 380 BestSoFar = Candidate; 381 } 382 } 383 } 384 385 if (BestScoreSoFar == WS_IllegalOrNegative) { 386 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 387 return false; 388 } 389 390 assert(BestSoFar != Instr && "Should have never visited same guard!"); 391 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 392 393 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 394 << " with score " << scoreTypeToString(BestScoreSoFar) 395 << "\n"); 396 widenGuard(BestSoFar, getCondition(Instr), InvertCondition); 397 auto NewGuardCondition = InvertCondition 398 ? ConstantInt::getFalse(Instr->getContext()) 399 : ConstantInt::getTrue(Instr->getContext()); 400 setCondition(Instr, NewGuardCondition); 401 EliminatedGuardsAndBranches.push_back(Instr); 402 WidenedGuards.insert(BestSoFar); 403 return true; 404 } 405 406 GuardWideningImpl::WideningScore 407 GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, 408 Instruction *DominatingGuard, 409 bool InvertCond) { 410 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 411 Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); 412 bool HoistingOutOfLoop = false; 413 414 if (DominatingGuardLoop != DominatedInstrLoop) { 415 // Be conservative and don't widen into a sibling loop. TODO: If the 416 // sibling is colder, we should consider allowing this. 417 if (DominatingGuardLoop && 418 !DominatingGuardLoop->contains(DominatedInstrLoop)) 419 return WS_IllegalOrNegative; 420 421 HoistingOutOfLoop = true; 422 } 423 424 if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) 425 return WS_IllegalOrNegative; 426 427 // If the guard was conditional executed, it may never be reached 428 // dynamically. There are two potential downsides to hoisting it out of the 429 // conditionally executed region: 1) we may spuriously deopt without need and 430 // 2) we have the extra cost of computing the guard condition in the common 431 // case. At the moment, we really only consider the second in our heuristic 432 // here. TODO: evaluate cost model for spurious deopt 433 // NOTE: As written, this also lets us hoist right over another guard which 434 // is essentially just another spelling for control flow. 435 if (isWideningCondProfitable(getCondition(DominatedInstr), 436 getCondition(DominatingGuard), InvertCond)) 437 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 438 439 if (HoistingOutOfLoop) 440 return WS_Positive; 441 442 // Returns true if we might be hoisting above explicit control flow. Note 443 // that this completely ignores implicit control flow (guards, calls which 444 // throw, etc...). That choice appears arbitrary. 445 auto MaybeHoistingOutOfIf = [&]() { 446 auto *DominatingBlock = DominatingGuard->getParent(); 447 auto *DominatedBlock = DominatedInstr->getParent(); 448 if (isGuardAsWidenableBranch(DominatingGuard)) 449 DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); 450 451 // Same Block? 452 if (DominatedBlock == DominatingBlock) 453 return false; 454 // Obvious successor (common loop header/preheader case) 455 if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) 456 return false; 457 // TODO: diamond, triangle cases 458 if (!PDT) return true; 459 return !PDT->dominates(DominatedBlock, DominatingBlock); 460 }; 461 462 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; 463 } 464 465 bool GuardWideningImpl::isAvailableAt( 466 const Value *V, const Instruction *Loc, 467 SmallPtrSetImpl<const Instruction *> &Visited) const { 468 auto *Inst = dyn_cast<Instruction>(V); 469 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 470 return true; 471 472 if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) || 473 Inst->mayReadFromMemory()) 474 return false; 475 476 Visited.insert(Inst); 477 478 // We only want to go _up_ the dominance chain when recursing. 479 assert(!isa<PHINode>(Loc) && 480 "PHIs should return false for isSafeToSpeculativelyExecute"); 481 assert(DT.isReachableFromEntry(Inst->getParent()) && 482 "We did a DFS from the block entry!"); 483 return all_of(Inst->operands(), 484 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); 485 } 486 487 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 488 auto *Inst = dyn_cast<Instruction>(V); 489 if (!Inst || DT.dominates(Inst, Loc)) 490 return; 491 492 assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) && 493 !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); 494 495 for (Value *Op : Inst->operands()) 496 makeAvailableAt(Op, Loc); 497 498 Inst->moveBefore(Loc); 499 } 500 501 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, 502 Instruction *InsertPt, Value *&Result, 503 bool InvertCondition) { 504 using namespace llvm::PatternMatch; 505 506 { 507 // L >u C0 && L >u C1 -> L >u max(C0, C1) 508 ConstantInt *RHS0, *RHS1; 509 Value *LHS; 510 ICmpInst::Predicate Pred0, Pred1; 511 if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 512 match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 513 if (InvertCondition) 514 Pred1 = ICmpInst::getInversePredicate(Pred1); 515 516 ConstantRange CR0 = 517 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 518 ConstantRange CR1 = 519 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 520 521 // SubsetIntersect is a subset of the actual mathematical intersection of 522 // CR0 and CR1, while SupersetIntersect is a superset of the actual 523 // mathematical intersection. If these two ConstantRanges are equal, then 524 // we know we were able to represent the actual mathematical intersection 525 // of CR0 and CR1, and can use the same to generate an icmp instruction. 526 // 527 // Given what we're doing here and the semantics of guards, it would 528 // actually be correct to just use SubsetIntersect, but that may be too 529 // aggressive in cases we care about. 530 auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse(); 531 auto SupersetIntersect = CR0.intersectWith(CR1); 532 533 APInt NewRHSAP; 534 CmpInst::Predicate Pred; 535 if (SubsetIntersect == SupersetIntersect && 536 SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) { 537 if (InsertPt) { 538 ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP); 539 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 540 } 541 return true; 542 } 543 } 544 } 545 546 { 547 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 548 // TODO: Support InvertCondition case? 549 if (!InvertCondition && 550 parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && 551 combineRangeChecks(Checks, CombinedChecks)) { 552 if (InsertPt) { 553 Result = nullptr; 554 for (auto &RC : CombinedChecks) { 555 makeAvailableAt(RC.getCheckInst(), InsertPt); 556 if (Result) 557 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 558 InsertPt); 559 else 560 Result = RC.getCheckInst(); 561 } 562 assert(Result && "Failed to find result value"); 563 Result->setName("wide.chk"); 564 } 565 return true; 566 } 567 } 568 569 // Base case -- just logical-and the two conditions together. 570 571 if (InsertPt) { 572 makeAvailableAt(Cond0, InsertPt); 573 makeAvailableAt(Cond1, InsertPt); 574 if (InvertCondition) 575 Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); 576 Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); 577 } 578 579 // We were not able to compute Cond0 AND Cond1 for the price of one. 580 return false; 581 } 582 583 bool GuardWideningImpl::parseRangeChecks( 584 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 585 SmallPtrSetImpl<const Value *> &Visited) { 586 if (!Visited.insert(CheckCond).second) 587 return true; 588 589 using namespace llvm::PatternMatch; 590 591 { 592 Value *AndLHS, *AndRHS; 593 if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) 594 return parseRangeChecks(AndLHS, Checks) && 595 parseRangeChecks(AndRHS, Checks); 596 } 597 598 auto *IC = dyn_cast<ICmpInst>(CheckCond); 599 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 600 (IC->getPredicate() != ICmpInst::ICMP_ULT && 601 IC->getPredicate() != ICmpInst::ICMP_UGT)) 602 return false; 603 604 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 605 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 606 std::swap(CmpLHS, CmpRHS); 607 608 auto &DL = IC->getModule()->getDataLayout(); 609 610 GuardWideningImpl::RangeCheck Check( 611 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 612 CmpRHS, IC); 613 614 if (!isKnownNonNegative(Check.getLength(), DL)) 615 return false; 616 617 // What we have in \c Check now is a correct interpretation of \p CheckCond. 618 // Try to see if we can move some constant offsets into the \c Offset field. 619 620 bool Changed; 621 auto &Ctx = CheckCond->getContext(); 622 623 do { 624 Value *OpLHS; 625 ConstantInt *OpRHS; 626 Changed = false; 627 628 #ifndef NDEBUG 629 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 630 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 631 "Unreachable instruction?"); 632 #endif 633 634 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 635 Check.setBase(OpLHS); 636 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 637 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 638 Changed = true; 639 } else if (match(Check.getBase(), 640 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 641 KnownBits Known = computeKnownBits(OpLHS, DL); 642 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 643 Check.setBase(OpLHS); 644 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 645 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 646 Changed = true; 647 } 648 } 649 } while (Changed); 650 651 Checks.push_back(Check); 652 return true; 653 } 654 655 bool GuardWideningImpl::combineRangeChecks( 656 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 657 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 658 unsigned OldCount = Checks.size(); 659 while (!Checks.empty()) { 660 // Pick all of the range checks with a specific base and length, and try to 661 // merge them. 662 const Value *CurrentBase = Checks.front().getBase(); 663 const Value *CurrentLength = Checks.front().getLength(); 664 665 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 666 667 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 668 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 669 }; 670 671 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 672 erase_if(Checks, IsCurrentCheck); 673 674 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 675 676 if (CurrentChecks.size() < 3) { 677 llvm::append_range(RangeChecksOut, CurrentChecks); 678 continue; 679 } 680 681 // CurrentChecks.size() will typically be 3 here, but so far there has been 682 // no need to hard-code that fact. 683 684 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 685 const GuardWideningImpl::RangeCheck &RHS) { 686 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 687 }); 688 689 // Note: std::sort should not invalidate the ChecksStart iterator. 690 691 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 692 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 693 694 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 695 if ((MaxOffset->getValue() - MinOffset->getValue()) 696 .ugt(APInt::getSignedMinValue(BitWidth))) 697 return false; 698 699 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 700 const APInt &HighOffset = MaxOffset->getValue(); 701 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 702 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 703 }; 704 705 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 706 return false; 707 708 // We have a series of f+1 checks as: 709 // 710 // I+k_0 u< L ... Chk_0 711 // I+k_1 u< L ... Chk_1 712 // ... 713 // I+k_f u< L ... Chk_f 714 // 715 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 716 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 717 // k_f != k_0 ... Precond_2 718 // 719 // Claim: 720 // Chk_0 AND Chk_f implies all the other checks 721 // 722 // Informal proof sketch: 723 // 724 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 725 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 726 // thus I+k_f is the greatest unsigned value in that range. 727 // 728 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 729 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 730 // lie in [I+k_0,I+k_f], this proving our claim. 731 // 732 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 733 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 734 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 735 // range by definition, and the latter case is impossible: 736 // 737 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 738 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 739 // 740 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 741 // with 'x' above) to be at least >u INT_MIN. 742 743 RangeChecksOut.emplace_back(CurrentChecks.front()); 744 RangeChecksOut.emplace_back(CurrentChecks.back()); 745 } 746 747 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 748 return RangeChecksOut.size() != OldCount; 749 } 750 751 #ifndef NDEBUG 752 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 753 switch (WS) { 754 case WS_IllegalOrNegative: 755 return "IllegalOrNegative"; 756 case WS_Neutral: 757 return "Neutral"; 758 case WS_Positive: 759 return "Positive"; 760 case WS_VeryPositive: 761 return "VeryPositive"; 762 } 763 764 llvm_unreachable("Fully covered switch above!"); 765 } 766 #endif 767 768 PreservedAnalyses GuardWideningPass::run(Function &F, 769 FunctionAnalysisManager &AM) { 770 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 771 auto &LI = AM.getResult<LoopAnalysis>(F); 772 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 773 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 774 std::unique_ptr<MemorySSAUpdater> MSSAU; 775 if (MSSAA) 776 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 777 if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 778 DT.getRootNode(), [](BasicBlock *) { return true; }) 779 .run()) 780 return PreservedAnalyses::all(); 781 782 PreservedAnalyses PA; 783 PA.preserveSet<CFGAnalyses>(); 784 PA.preserve<MemorySSAAnalysis>(); 785 return PA; 786 } 787 788 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 789 LoopStandardAnalysisResults &AR, 790 LPMUpdater &U) { 791 BasicBlock *RootBB = L.getLoopPredecessor(); 792 if (!RootBB) 793 RootBB = L.getHeader(); 794 auto BlockFilter = [&](BasicBlock *BB) { 795 return BB == RootBB || L.contains(BB); 796 }; 797 std::unique_ptr<MemorySSAUpdater> MSSAU; 798 if (AR.MSSA) 799 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 800 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr, 801 AR.DT.getNode(RootBB), BlockFilter).run()) 802 return PreservedAnalyses::all(); 803 804 auto PA = getLoopPassPreservedAnalyses(); 805 if (AR.MSSA) 806 PA.preserve<MemorySSAAnalysis>(); 807 return PA; 808 } 809 810 namespace { 811 struct GuardWideningLegacyPass : public FunctionPass { 812 static char ID; 813 814 GuardWideningLegacyPass() : FunctionPass(ID) { 815 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 816 } 817 818 bool runOnFunction(Function &F) override { 819 if (skipFunction(F)) 820 return false; 821 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 822 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 823 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 824 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 825 std::unique_ptr<MemorySSAUpdater> MSSAU; 826 if (MSSAWP) 827 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 828 return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 829 DT.getRootNode(), 830 [](BasicBlock *) { return true; }) 831 .run(); 832 } 833 834 void getAnalysisUsage(AnalysisUsage &AU) const override { 835 AU.setPreservesCFG(); 836 AU.addRequired<DominatorTreeWrapperPass>(); 837 AU.addRequired<PostDominatorTreeWrapperPass>(); 838 AU.addRequired<LoopInfoWrapperPass>(); 839 AU.addPreserved<MemorySSAWrapperPass>(); 840 } 841 }; 842 843 /// Same as above, but restricted to a single loop at a time. Can be 844 /// scheduled with other loop passes w/o breaking out of LPM 845 struct LoopGuardWideningLegacyPass : public LoopPass { 846 static char ID; 847 848 LoopGuardWideningLegacyPass() : LoopPass(ID) { 849 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 850 } 851 852 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 853 if (skipLoop(L)) 854 return false; 855 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 856 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 857 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); 858 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; 859 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 860 std::unique_ptr<MemorySSAUpdater> MSSAU; 861 if (MSSAWP) 862 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 863 864 BasicBlock *RootBB = L->getLoopPredecessor(); 865 if (!RootBB) 866 RootBB = L->getHeader(); 867 auto BlockFilter = [&](BasicBlock *BB) { 868 return BB == RootBB || L->contains(BB); 869 }; 870 return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr, 871 DT.getNode(RootBB), BlockFilter).run(); 872 } 873 874 void getAnalysisUsage(AnalysisUsage &AU) const override { 875 AU.setPreservesCFG(); 876 getLoopAnalysisUsage(AU); 877 AU.addPreserved<PostDominatorTreeWrapperPass>(); 878 AU.addPreserved<MemorySSAWrapperPass>(); 879 } 880 }; 881 } 882 883 char GuardWideningLegacyPass::ID = 0; 884 char LoopGuardWideningLegacyPass::ID = 0; 885 886 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", 887 false, false) 888 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 889 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 890 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 891 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", 892 false, false) 893 894 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", 895 "Widen guards (within a single loop, as a loop pass)", 896 false, false) 897 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 898 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 899 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 900 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", 901 "Widen guards (within a single loop, as a loop pass)", 902 false, false) 903 904 FunctionPass *llvm::createGuardWideningPass() { 905 return new GuardWideningLegacyPass(); 906 } 907 908 Pass *llvm::createLoopGuardWideningPass() { 909 return new LoopGuardWideningLegacyPass(); 910 } 911