1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/GuardUtils.h" 46 #include "llvm/Analysis/LoopInfo.h" 47 #include "llvm/Analysis/LoopPass.h" 48 #include "llvm/Analysis/MemorySSAUpdater.h" 49 #include "llvm/Analysis/PostDominators.h" 50 #include "llvm/Analysis/ValueTracking.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Dominators.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/PatternMatch.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/KnownBits.h" 60 #include "llvm/Transforms/Scalar.h" 61 #include "llvm/Transforms/Utils/GuardUtils.h" 62 #include "llvm/Transforms/Utils/LoopUtils.h" 63 #include <functional> 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "guard-widening" 68 69 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 70 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 71 72 static cl::opt<bool> 73 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 74 cl::desc("Whether or not we should widen guards " 75 "expressed as branches by widenable conditions"), 76 cl::init(true)); 77 78 namespace { 79 80 // Get the condition of \p I. It can either be a guard or a conditional branch. 81 static Value *getCondition(Instruction *I) { 82 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 83 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 84 "Bad guard intrinsic?"); 85 return GI->getArgOperand(0); 86 } 87 Value *Cond, *WC; 88 BasicBlock *IfTrueBB, *IfFalseBB; 89 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 90 return Cond; 91 92 return cast<BranchInst>(I)->getCondition(); 93 } 94 95 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 96 // conditional branch. 97 static void setCondition(Instruction *I, Value *NewCond) { 98 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 99 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 100 "Bad guard intrinsic?"); 101 GI->setArgOperand(0, NewCond); 102 return; 103 } 104 cast<BranchInst>(I)->setCondition(NewCond); 105 } 106 107 // Eliminates the guard instruction properly. 108 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 109 GuardInst->eraseFromParent(); 110 if (MSSAU) 111 MSSAU->removeMemoryAccess(GuardInst); 112 ++GuardsEliminated; 113 } 114 115 class GuardWideningImpl { 116 DominatorTree &DT; 117 PostDominatorTree *PDT; 118 LoopInfo &LI; 119 MemorySSAUpdater *MSSAU; 120 121 /// Together, these describe the region of interest. This might be all of 122 /// the blocks within a function, or only a given loop's blocks and preheader. 123 DomTreeNode *Root; 124 std::function<bool(BasicBlock*)> BlockFilter; 125 126 /// The set of guards and conditional branches whose conditions have been 127 /// widened into dominating guards. 128 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 129 130 /// The set of guards which have been widened to include conditions to other 131 /// guards. 132 DenseSet<Instruction *> WidenedGuards; 133 134 /// Try to eliminate instruction \p Instr by widening it into an earlier 135 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 136 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 137 /// maps BasicBlocks to the set of guards seen in that block. 138 bool eliminateInstrViaWidening( 139 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 140 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 141 GuardsPerBlock, bool InvertCondition = false); 142 143 /// Used to keep track of which widening potential is more effective. 144 enum WideningScore { 145 /// Don't widen. 146 WS_IllegalOrNegative, 147 148 /// Widening is performance neutral as far as the cycles spent in check 149 /// conditions goes (but can still help, e.g., code layout, having less 150 /// deopt state). 151 WS_Neutral, 152 153 /// Widening is profitable. 154 WS_Positive, 155 156 /// Widening is very profitable. Not significantly different from \c 157 /// WS_Positive, except by the order. 158 WS_VeryPositive 159 }; 160 161 static StringRef scoreTypeToString(WideningScore WS); 162 163 /// Compute the score for widening the condition in \p DominatedInstr 164 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the 165 /// inverted condition of the dominating guard. 166 WideningScore computeWideningScore(Instruction *DominatedInstr, 167 Instruction *DominatingGuard, 168 bool InvertCond); 169 170 /// Helper to check if \p V can be hoisted to \p InsertPos. 171 bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { 172 SmallPtrSet<const Instruction *, 8> Visited; 173 return isAvailableAt(V, InsertPos, Visited); 174 } 175 176 bool isAvailableAt(const Value *V, const Instruction *InsertPos, 177 SmallPtrSetImpl<const Instruction *> &Visited) const; 178 179 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 180 /// isAvailableAt returned true. 181 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 182 183 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 184 /// to generate an expression computing the logical AND of \p Cond0 and (\p 185 /// Cond1 XOR \p InvertCondition). 186 /// Return true if the expression computing the AND is only as 187 /// expensive as computing one of the two. If \p InsertPt is true then 188 /// actually generate the resulting expression, make it available at \p 189 /// InsertPt and return it in \p Result (else no change to the IR is made). 190 bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, 191 Value *&Result, bool InvertCondition); 192 193 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 194 /// with the constraint that \c Length is not negative. \c CheckInst is the 195 /// pre-existing instruction in the IR that computes the result of this range 196 /// check. 197 class RangeCheck { 198 const Value *Base; 199 const ConstantInt *Offset; 200 const Value *Length; 201 ICmpInst *CheckInst; 202 203 public: 204 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 205 const Value *Length, ICmpInst *CheckInst) 206 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 207 208 void setBase(const Value *NewBase) { Base = NewBase; } 209 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 210 211 const Value *getBase() const { return Base; } 212 const ConstantInt *getOffset() const { return Offset; } 213 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 214 const Value *getLength() const { return Length; }; 215 ICmpInst *getCheckInst() const { return CheckInst; } 216 217 void print(raw_ostream &OS, bool PrintTypes = false) { 218 OS << "Base: "; 219 Base->printAsOperand(OS, PrintTypes); 220 OS << " Offset: "; 221 Offset->printAsOperand(OS, PrintTypes); 222 OS << " Length: "; 223 Length->printAsOperand(OS, PrintTypes); 224 } 225 226 LLVM_DUMP_METHOD void dump() { 227 print(dbgs()); 228 dbgs() << "\n"; 229 } 230 }; 231 232 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and 233 /// append them to \p Checks. Returns true on success, may clobber \c Checks 234 /// on failure. 235 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { 236 SmallPtrSet<const Value *, 8> Visited; 237 return parseRangeChecks(CheckCond, Checks, Visited); 238 } 239 240 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, 241 SmallPtrSetImpl<const Value *> &Visited); 242 243 /// Combine the checks in \p Checks into a smaller set of checks and append 244 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 245 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 246 /// and \p CombinedChecks on success and on failure. 247 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 248 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 249 250 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of 251 /// computing only one of the two expressions? 252 bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { 253 Value *ResultUnused; 254 return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, 255 InvertCond); 256 } 257 258 /// If \p InvertCondition is false, Widen \p ToWiden to fail if 259 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is 260 /// true (in addition to whatever it is already checking). 261 void widenGuard(Instruction *ToWiden, Value *NewCondition, 262 bool InvertCondition) { 263 Value *Result; 264 265 widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, 266 InvertCondition); 267 if (isGuardAsWidenableBranch(ToWiden)) { 268 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 269 return; 270 } 271 setCondition(ToWiden, Result); 272 } 273 274 public: 275 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 276 LoopInfo &LI, MemorySSAUpdater *MSSAU, 277 DomTreeNode *Root, 278 std::function<bool(BasicBlock*)> BlockFilter) 279 : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root), 280 BlockFilter(BlockFilter) {} 281 282 /// The entry point for this pass. 283 bool run(); 284 }; 285 } 286 287 static bool isSupportedGuardInstruction(const Instruction *Insn) { 288 if (isGuard(Insn)) 289 return true; 290 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 291 return true; 292 return false; 293 } 294 295 bool GuardWideningImpl::run() { 296 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 297 bool Changed = false; 298 for (auto DFI = df_begin(Root), DFE = df_end(Root); 299 DFI != DFE; ++DFI) { 300 auto *BB = (*DFI)->getBlock(); 301 if (!BlockFilter(BB)) 302 continue; 303 304 auto &CurrentList = GuardsInBlock[BB]; 305 306 for (auto &I : *BB) 307 if (isSupportedGuardInstruction(&I)) 308 CurrentList.push_back(cast<Instruction>(&I)); 309 310 for (auto *II : CurrentList) 311 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 312 } 313 314 assert(EliminatedGuardsAndBranches.empty() || Changed); 315 for (auto *I : EliminatedGuardsAndBranches) 316 if (!WidenedGuards.count(I)) { 317 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 318 if (isSupportedGuardInstruction(I)) 319 eliminateGuard(I, MSSAU); 320 else { 321 assert(isa<BranchInst>(I) && 322 "Eliminated something other than guard or branch?"); 323 ++CondBranchEliminated; 324 } 325 } 326 327 return Changed; 328 } 329 330 bool GuardWideningImpl::eliminateInstrViaWidening( 331 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 332 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 333 GuardsInBlock, bool InvertCondition) { 334 // Ignore trivial true or false conditions. These instructions will be 335 // trivially eliminated by any cleanup pass. Do not erase them because other 336 // guards can possibly be widened into them. 337 if (isa<ConstantInt>(getCondition(Instr))) 338 return false; 339 340 Instruction *BestSoFar = nullptr; 341 auto BestScoreSoFar = WS_IllegalOrNegative; 342 343 // In the set of dominating guards, find the one we can merge GuardInst with 344 // for the most profit. 345 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 346 auto *CurBB = DFSI.getPath(i)->getBlock(); 347 if (!BlockFilter(CurBB)) 348 break; 349 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 350 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 351 352 auto I = GuardsInCurBB.begin(); 353 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 354 : GuardsInCurBB.end(); 355 356 #ifndef NDEBUG 357 { 358 unsigned Index = 0; 359 for (auto &I : *CurBB) { 360 if (Index == GuardsInCurBB.size()) 361 break; 362 if (GuardsInCurBB[Index] == &I) 363 Index++; 364 } 365 assert(Index == GuardsInCurBB.size() && 366 "Guards expected to be in order!"); 367 } 368 #endif 369 370 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 371 372 for (auto *Candidate : make_range(I, E)) { 373 auto Score = computeWideningScore(Instr, Candidate, InvertCondition); 374 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) 375 << " and " << *getCondition(Candidate) << " is " 376 << scoreTypeToString(Score) << "\n"); 377 if (Score > BestScoreSoFar) { 378 BestScoreSoFar = Score; 379 BestSoFar = Candidate; 380 } 381 } 382 } 383 384 if (BestScoreSoFar == WS_IllegalOrNegative) { 385 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 386 return false; 387 } 388 389 assert(BestSoFar != Instr && "Should have never visited same guard!"); 390 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 391 392 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 393 << " with score " << scoreTypeToString(BestScoreSoFar) 394 << "\n"); 395 widenGuard(BestSoFar, getCondition(Instr), InvertCondition); 396 auto NewGuardCondition = InvertCondition 397 ? ConstantInt::getFalse(Instr->getContext()) 398 : ConstantInt::getTrue(Instr->getContext()); 399 setCondition(Instr, NewGuardCondition); 400 EliminatedGuardsAndBranches.push_back(Instr); 401 WidenedGuards.insert(BestSoFar); 402 return true; 403 } 404 405 GuardWideningImpl::WideningScore 406 GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, 407 Instruction *DominatingGuard, 408 bool InvertCond) { 409 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 410 Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); 411 bool HoistingOutOfLoop = false; 412 413 if (DominatingGuardLoop != DominatedInstrLoop) { 414 // Be conservative and don't widen into a sibling loop. TODO: If the 415 // sibling is colder, we should consider allowing this. 416 if (DominatingGuardLoop && 417 !DominatingGuardLoop->contains(DominatedInstrLoop)) 418 return WS_IllegalOrNegative; 419 420 HoistingOutOfLoop = true; 421 } 422 423 if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) 424 return WS_IllegalOrNegative; 425 426 // If the guard was conditional executed, it may never be reached 427 // dynamically. There are two potential downsides to hoisting it out of the 428 // conditionally executed region: 1) we may spuriously deopt without need and 429 // 2) we have the extra cost of computing the guard condition in the common 430 // case. At the moment, we really only consider the second in our heuristic 431 // here. TODO: evaluate cost model for spurious deopt 432 // NOTE: As written, this also lets us hoist right over another guard which 433 // is essentially just another spelling for control flow. 434 if (isWideningCondProfitable(getCondition(DominatedInstr), 435 getCondition(DominatingGuard), InvertCond)) 436 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 437 438 if (HoistingOutOfLoop) 439 return WS_Positive; 440 441 // Returns true if we might be hoisting above explicit control flow. Note 442 // that this completely ignores implicit control flow (guards, calls which 443 // throw, etc...). That choice appears arbitrary. 444 auto MaybeHoistingOutOfIf = [&]() { 445 auto *DominatingBlock = DominatingGuard->getParent(); 446 auto *DominatedBlock = DominatedInstr->getParent(); 447 if (isGuardAsWidenableBranch(DominatingGuard)) 448 DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); 449 450 // Same Block? 451 if (DominatedBlock == DominatingBlock) 452 return false; 453 // Obvious successor (common loop header/preheader case) 454 if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) 455 return false; 456 // TODO: diamond, triangle cases 457 if (!PDT) return true; 458 return !PDT->dominates(DominatedBlock, DominatingBlock); 459 }; 460 461 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; 462 } 463 464 bool GuardWideningImpl::isAvailableAt( 465 const Value *V, const Instruction *Loc, 466 SmallPtrSetImpl<const Instruction *> &Visited) const { 467 auto *Inst = dyn_cast<Instruction>(V); 468 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 469 return true; 470 471 if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) || 472 Inst->mayReadFromMemory()) 473 return false; 474 475 Visited.insert(Inst); 476 477 // We only want to go _up_ the dominance chain when recursing. 478 assert(!isa<PHINode>(Loc) && 479 "PHIs should return false for isSafeToSpeculativelyExecute"); 480 assert(DT.isReachableFromEntry(Inst->getParent()) && 481 "We did a DFS from the block entry!"); 482 return all_of(Inst->operands(), 483 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); 484 } 485 486 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 487 auto *Inst = dyn_cast<Instruction>(V); 488 if (!Inst || DT.dominates(Inst, Loc)) 489 return; 490 491 assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) && 492 !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); 493 494 for (Value *Op : Inst->operands()) 495 makeAvailableAt(Op, Loc); 496 497 Inst->moveBefore(Loc); 498 } 499 500 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, 501 Instruction *InsertPt, Value *&Result, 502 bool InvertCondition) { 503 using namespace llvm::PatternMatch; 504 505 { 506 // L >u C0 && L >u C1 -> L >u max(C0, C1) 507 ConstantInt *RHS0, *RHS1; 508 Value *LHS; 509 ICmpInst::Predicate Pred0, Pred1; 510 if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 511 match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 512 if (InvertCondition) 513 Pred1 = ICmpInst::getInversePredicate(Pred1); 514 515 ConstantRange CR0 = 516 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 517 ConstantRange CR1 = 518 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 519 520 // Given what we're doing here and the semantics of guards, it would 521 // be correct to use a subset intersection, but that may be too 522 // aggressive in cases we care about. 523 if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) { 524 APInt NewRHSAP; 525 CmpInst::Predicate Pred; 526 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 527 if (InsertPt) { 528 ConstantInt *NewRHS = 529 ConstantInt::get(Cond0->getContext(), NewRHSAP); 530 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 531 } 532 return true; 533 } 534 } 535 } 536 } 537 538 { 539 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 540 // TODO: Support InvertCondition case? 541 if (!InvertCondition && 542 parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && 543 combineRangeChecks(Checks, CombinedChecks)) { 544 if (InsertPt) { 545 Result = nullptr; 546 for (auto &RC : CombinedChecks) { 547 makeAvailableAt(RC.getCheckInst(), InsertPt); 548 if (Result) 549 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 550 InsertPt); 551 else 552 Result = RC.getCheckInst(); 553 } 554 assert(Result && "Failed to find result value"); 555 Result->setName("wide.chk"); 556 } 557 return true; 558 } 559 } 560 561 // Base case -- just logical-and the two conditions together. 562 563 if (InsertPt) { 564 makeAvailableAt(Cond0, InsertPt); 565 makeAvailableAt(Cond1, InsertPt); 566 if (InvertCondition) 567 Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); 568 Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); 569 } 570 571 // We were not able to compute Cond0 AND Cond1 for the price of one. 572 return false; 573 } 574 575 bool GuardWideningImpl::parseRangeChecks( 576 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 577 SmallPtrSetImpl<const Value *> &Visited) { 578 if (!Visited.insert(CheckCond).second) 579 return true; 580 581 using namespace llvm::PatternMatch; 582 583 { 584 Value *AndLHS, *AndRHS; 585 if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) 586 return parseRangeChecks(AndLHS, Checks) && 587 parseRangeChecks(AndRHS, Checks); 588 } 589 590 auto *IC = dyn_cast<ICmpInst>(CheckCond); 591 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 592 (IC->getPredicate() != ICmpInst::ICMP_ULT && 593 IC->getPredicate() != ICmpInst::ICMP_UGT)) 594 return false; 595 596 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 597 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 598 std::swap(CmpLHS, CmpRHS); 599 600 auto &DL = IC->getModule()->getDataLayout(); 601 602 GuardWideningImpl::RangeCheck Check( 603 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 604 CmpRHS, IC); 605 606 if (!isKnownNonNegative(Check.getLength(), DL)) 607 return false; 608 609 // What we have in \c Check now is a correct interpretation of \p CheckCond. 610 // Try to see if we can move some constant offsets into the \c Offset field. 611 612 bool Changed; 613 auto &Ctx = CheckCond->getContext(); 614 615 do { 616 Value *OpLHS; 617 ConstantInt *OpRHS; 618 Changed = false; 619 620 #ifndef NDEBUG 621 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 622 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 623 "Unreachable instruction?"); 624 #endif 625 626 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 627 Check.setBase(OpLHS); 628 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 629 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 630 Changed = true; 631 } else if (match(Check.getBase(), 632 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 633 KnownBits Known = computeKnownBits(OpLHS, DL); 634 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 635 Check.setBase(OpLHS); 636 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 637 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 638 Changed = true; 639 } 640 } 641 } while (Changed); 642 643 Checks.push_back(Check); 644 return true; 645 } 646 647 bool GuardWideningImpl::combineRangeChecks( 648 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 649 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 650 unsigned OldCount = Checks.size(); 651 while (!Checks.empty()) { 652 // Pick all of the range checks with a specific base and length, and try to 653 // merge them. 654 const Value *CurrentBase = Checks.front().getBase(); 655 const Value *CurrentLength = Checks.front().getLength(); 656 657 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 658 659 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 660 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 661 }; 662 663 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 664 erase_if(Checks, IsCurrentCheck); 665 666 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 667 668 if (CurrentChecks.size() < 3) { 669 llvm::append_range(RangeChecksOut, CurrentChecks); 670 continue; 671 } 672 673 // CurrentChecks.size() will typically be 3 here, but so far there has been 674 // no need to hard-code that fact. 675 676 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 677 const GuardWideningImpl::RangeCheck &RHS) { 678 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 679 }); 680 681 // Note: std::sort should not invalidate the ChecksStart iterator. 682 683 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 684 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 685 686 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 687 if ((MaxOffset->getValue() - MinOffset->getValue()) 688 .ugt(APInt::getSignedMinValue(BitWidth))) 689 return false; 690 691 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 692 const APInt &HighOffset = MaxOffset->getValue(); 693 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 694 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 695 }; 696 697 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 698 return false; 699 700 // We have a series of f+1 checks as: 701 // 702 // I+k_0 u< L ... Chk_0 703 // I+k_1 u< L ... Chk_1 704 // ... 705 // I+k_f u< L ... Chk_f 706 // 707 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 708 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 709 // k_f != k_0 ... Precond_2 710 // 711 // Claim: 712 // Chk_0 AND Chk_f implies all the other checks 713 // 714 // Informal proof sketch: 715 // 716 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 717 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 718 // thus I+k_f is the greatest unsigned value in that range. 719 // 720 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 721 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 722 // lie in [I+k_0,I+k_f], this proving our claim. 723 // 724 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 725 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 726 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 727 // range by definition, and the latter case is impossible: 728 // 729 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 730 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 731 // 732 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 733 // with 'x' above) to be at least >u INT_MIN. 734 735 RangeChecksOut.emplace_back(CurrentChecks.front()); 736 RangeChecksOut.emplace_back(CurrentChecks.back()); 737 } 738 739 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 740 return RangeChecksOut.size() != OldCount; 741 } 742 743 #ifndef NDEBUG 744 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 745 switch (WS) { 746 case WS_IllegalOrNegative: 747 return "IllegalOrNegative"; 748 case WS_Neutral: 749 return "Neutral"; 750 case WS_Positive: 751 return "Positive"; 752 case WS_VeryPositive: 753 return "VeryPositive"; 754 } 755 756 llvm_unreachable("Fully covered switch above!"); 757 } 758 #endif 759 760 PreservedAnalyses GuardWideningPass::run(Function &F, 761 FunctionAnalysisManager &AM) { 762 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 763 auto &LI = AM.getResult<LoopAnalysis>(F); 764 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 765 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 766 std::unique_ptr<MemorySSAUpdater> MSSAU; 767 if (MSSAA) 768 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 769 if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 770 DT.getRootNode(), [](BasicBlock *) { return true; }) 771 .run()) 772 return PreservedAnalyses::all(); 773 774 PreservedAnalyses PA; 775 PA.preserveSet<CFGAnalyses>(); 776 PA.preserve<MemorySSAAnalysis>(); 777 return PA; 778 } 779 780 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 781 LoopStandardAnalysisResults &AR, 782 LPMUpdater &U) { 783 BasicBlock *RootBB = L.getLoopPredecessor(); 784 if (!RootBB) 785 RootBB = L.getHeader(); 786 auto BlockFilter = [&](BasicBlock *BB) { 787 return BB == RootBB || L.contains(BB); 788 }; 789 std::unique_ptr<MemorySSAUpdater> MSSAU; 790 if (AR.MSSA) 791 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 792 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr, 793 AR.DT.getNode(RootBB), BlockFilter).run()) 794 return PreservedAnalyses::all(); 795 796 auto PA = getLoopPassPreservedAnalyses(); 797 if (AR.MSSA) 798 PA.preserve<MemorySSAAnalysis>(); 799 return PA; 800 } 801 802 namespace { 803 struct GuardWideningLegacyPass : public FunctionPass { 804 static char ID; 805 806 GuardWideningLegacyPass() : FunctionPass(ID) { 807 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 808 } 809 810 bool runOnFunction(Function &F) override { 811 if (skipFunction(F)) 812 return false; 813 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 814 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 815 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 816 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 817 std::unique_ptr<MemorySSAUpdater> MSSAU; 818 if (MSSAWP) 819 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 820 return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 821 DT.getRootNode(), 822 [](BasicBlock *) { return true; }) 823 .run(); 824 } 825 826 void getAnalysisUsage(AnalysisUsage &AU) const override { 827 AU.setPreservesCFG(); 828 AU.addRequired<DominatorTreeWrapperPass>(); 829 AU.addRequired<PostDominatorTreeWrapperPass>(); 830 AU.addRequired<LoopInfoWrapperPass>(); 831 AU.addPreserved<MemorySSAWrapperPass>(); 832 } 833 }; 834 835 /// Same as above, but restricted to a single loop at a time. Can be 836 /// scheduled with other loop passes w/o breaking out of LPM 837 struct LoopGuardWideningLegacyPass : public LoopPass { 838 static char ID; 839 840 LoopGuardWideningLegacyPass() : LoopPass(ID) { 841 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 842 } 843 844 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 845 if (skipLoop(L)) 846 return false; 847 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 848 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 849 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); 850 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; 851 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 852 std::unique_ptr<MemorySSAUpdater> MSSAU; 853 if (MSSAWP) 854 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 855 856 BasicBlock *RootBB = L->getLoopPredecessor(); 857 if (!RootBB) 858 RootBB = L->getHeader(); 859 auto BlockFilter = [&](BasicBlock *BB) { 860 return BB == RootBB || L->contains(BB); 861 }; 862 return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr, 863 DT.getNode(RootBB), BlockFilter).run(); 864 } 865 866 void getAnalysisUsage(AnalysisUsage &AU) const override { 867 AU.setPreservesCFG(); 868 getLoopAnalysisUsage(AU); 869 AU.addPreserved<PostDominatorTreeWrapperPass>(); 870 AU.addPreserved<MemorySSAWrapperPass>(); 871 } 872 }; 873 } 874 875 char GuardWideningLegacyPass::ID = 0; 876 char LoopGuardWideningLegacyPass::ID = 0; 877 878 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", 879 false, false) 880 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 881 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 882 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 883 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", 884 false, false) 885 886 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", 887 "Widen guards (within a single loop, as a loop pass)", 888 false, false) 889 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 890 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 891 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 892 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", 893 "Widen guards (within a single loop, as a loop pass)", 894 false, false) 895 896 FunctionPass *llvm::createGuardWideningPass() { 897 return new GuardWideningLegacyPass(); 898 } 899 900 Pass *llvm::createLoopGuardWideningPass() { 901 return new LoopGuardWideningLegacyPass(); 902 } 903