1 //===- bolt/Passes/DataflowAnalysis.h ---------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef BOLT_PASSES_DATAFLOWANALYSIS_H 10 #define BOLT_PASSES_DATAFLOWANALYSIS_H 11 12 #include "bolt/Core/BinaryContext.h" 13 #include "bolt/Core/BinaryFunction.h" 14 #include "llvm/Support/Errc.h" 15 #include <queue> 16 17 namespace llvm { 18 namespace bolt { 19 20 /// Represents a given program point as viewed by a dataflow analysis. This 21 /// point is a location that may be either an instruction or a basic block. 22 /// Example: 23 /// 24 /// BB1: --> ProgramPoint 1 (stored as bb *) 25 /// add --> ProgramPoint 2 (stored as inst *) 26 /// sub --> ProgramPoint 3 (stored as inst *) 27 /// jmp --> ProgramPoint 4 (stored as inst *) 28 /// 29 /// ProgramPoints allow us to attach a state to any location in the program 30 /// and is a core concept used in the dataflow analysis engine. 31 /// 32 /// A dataflow analysis will associate a state with a program point. In 33 /// analyses whose direction is forward, this state tracks what happened after 34 /// the execution of an instruction, and the BB tracks the state of what 35 /// happened before the execution of the first instruction in this BB. For 36 /// backwards dataflow analyses, state tracks what happened before the 37 /// execution of a given instruction, while the state associated with a BB 38 /// tracks what happened after the execution of the last instruction of a BB. 39 class ProgramPoint { 40 enum IDTy : bool { BB = 0, Inst } ID; 41 42 union DataU { 43 BinaryBasicBlock *BB; 44 MCInst *Inst; DataU(BinaryBasicBlock * BB)45 DataU(BinaryBasicBlock *BB) : BB(BB) {} DataU(MCInst * Inst)46 DataU(MCInst *Inst) : Inst(Inst) {} 47 } Data; 48 49 public: ProgramPoint()50 ProgramPoint() : ID(IDTy::BB), Data((MCInst *)nullptr) {} ProgramPoint(BinaryBasicBlock * BB)51 ProgramPoint(BinaryBasicBlock *BB) : ID(IDTy::BB), Data(BB) {} ProgramPoint(MCInst * Inst)52 ProgramPoint(MCInst *Inst) : ID(IDTy::Inst), Data(Inst) {} 53 54 /// Convenience function to access the last program point of a basic block, 55 /// which is equal to its last instruction. If it is empty, it is equal to 56 /// itself. getLastPointAt(BinaryBasicBlock & BB)57 static ProgramPoint getLastPointAt(BinaryBasicBlock &BB) { 58 auto Last = BB.rbegin(); 59 if (Last != BB.rend()) 60 return ProgramPoint(&*Last); 61 return ProgramPoint(&BB); 62 } 63 64 /// Similar to getLastPointAt. getFirstPointAt(BinaryBasicBlock & BB)65 static ProgramPoint getFirstPointAt(BinaryBasicBlock &BB) { 66 auto First = BB.begin(); 67 if (First != BB.end()) 68 return ProgramPoint(&*First); 69 return ProgramPoint(&BB); 70 } 71 72 bool operator<(const ProgramPoint &PP) const { return Data.BB < PP.Data.BB; } 73 bool operator==(const ProgramPoint &PP) const { 74 return Data.BB == PP.Data.BB; 75 } 76 isBB()77 bool isBB() const { return ID == IDTy::BB; } isInst()78 bool isInst() const { return ID == IDTy::Inst; } 79 getBB()80 BinaryBasicBlock *getBB() const { 81 assert(isBB()); 82 return Data.BB; 83 } getInst()84 MCInst *getInst() const { 85 assert(isInst()); 86 return Data.Inst; 87 } 88 89 friend DenseMapInfo<ProgramPoint>; 90 }; 91 92 /// Convenience function to operate on all predecessors of a BB, as viewed 93 /// by a dataflow analysis. This includes throw sites if it is a landing pad. 94 void doForAllPreds(const BinaryBasicBlock &BB, 95 std::function<void(ProgramPoint)> Task); 96 97 /// Operates on all successors of a basic block. 98 void doForAllSuccs(const BinaryBasicBlock &BB, 99 std::function<void(ProgramPoint)> Task); 100 101 /// Default printer for State data. 102 template <typename StateTy> class StatePrinter { 103 public: print(raw_ostream & OS,const StateTy & State)104 void print(raw_ostream &OS, const StateTy &State) const { OS << State; } StatePrinter(const BinaryContext &)105 explicit StatePrinter(const BinaryContext &) {} 106 }; 107 108 /// Printer for State data that is a BitVector of registers. 109 class RegStatePrinter { 110 public: 111 void print(raw_ostream &OS, const BitVector &State) const; RegStatePrinter(const BinaryContext & BC)112 explicit RegStatePrinter(const BinaryContext &BC) : BC(BC) {} 113 114 private: 115 const BinaryContext &BC; 116 }; 117 118 /// Base class for dataflow analyses. Depends on the type of whatever object is 119 /// stored as the state (StateTy) at each program point. The dataflow then 120 /// updates the state at each program point depending on the instruction being 121 /// processed, iterating until all points converge and agree on a state value. 122 /// Remember that depending on how you formulate your dataflow equation, this 123 /// may not converge and will loop indefinitely. 124 /// /p Backward indicates the direction of the dataflow. If false, direction is 125 /// forward. 126 /// 127 /// Example: Compute the set of live registers at each program point. 128 /// 129 /// Modelling: 130 /// Let State be the set of registers that are live. The kill set of a 131 /// point is the set of all registers clobbered by the instruction at this 132 /// program point. The gen set is the set of all registers read by it. 133 /// 134 /// out{b} = Union (s E succs{b}) {in{s}} 135 /// in{b} = (out{b} - kill{b}) U gen{b} 136 /// 137 /// Template parameters: 138 /// StateTy = BitVector, where each index corresponds to a machine register 139 /// Backward = true (live reg operates in reverse order) 140 /// 141 /// Subclass implementation notes: 142 /// Confluence operator = union (if a reg is alive in any succ, it is alive 143 /// in the current block). 144 /// 145 template <typename Derived, typename StateTy, bool Backward = false, 146 typename StatePrinterTy = StatePrinter<StateTy>> 147 class DataflowAnalysis { 148 /// CRTP convenience methods derived()149 Derived &derived() { return *static_cast<Derived *>(this); } 150 const_derived()151 const Derived &const_derived() const { 152 return *static_cast<const Derived *>(this); 153 } 154 155 mutable Optional<unsigned> AnnotationIndex; 156 157 protected: 158 const BinaryContext &BC; 159 /// Reference to the function being analysed 160 BinaryFunction &Func; 161 162 /// The id of the annotation allocator to be used 163 MCPlusBuilder::AllocatorIdTy AllocatorId = 0; 164 165 /// Tracks the state at basic block start (end) if direction of the dataflow 166 /// is forward (backward). 167 std::unordered_map<const BinaryBasicBlock *, StateTy> StateAtBBEntry; 168 /// Map a point to its previous (succeeding) point if the direction of the 169 /// dataflow is forward (backward). This is used to support convenience 170 /// methods to access the resulting state before (after) a given instruction, 171 /// otherwise our clients need to keep "prev" pointers themselves. 172 DenseMap<const MCInst *, ProgramPoint> PrevPoint; 173 174 /// Perform any bookkeeping before dataflow starts preflight()175 void preflight() { llvm_unreachable("Unimplemented method"); } 176 177 /// Sets initial state for each BB getStartingStateAtBB(const BinaryBasicBlock & BB)178 StateTy getStartingStateAtBB(const BinaryBasicBlock &BB) { 179 llvm_unreachable("Unimplemented method"); 180 } 181 182 /// Sets initial state for each instruction (out set) getStartingStateAtPoint(const MCInst & Point)183 StateTy getStartingStateAtPoint(const MCInst &Point) { 184 llvm_unreachable("Unimplemented method"); 185 } 186 187 /// Computes the in set for the first instruction in a BB by applying the 188 /// confluence operator to the out sets of the last instruction of each pred 189 /// (in case of a backwards dataflow, we will operate on the in sets of each 190 /// successor to determine the starting state of the last instruction of the 191 /// current BB) doConfluence(StateTy & StateOut,const StateTy & StateIn)192 void doConfluence(StateTy &StateOut, const StateTy &StateIn) { 193 llvm_unreachable("Unimplemented method"); 194 } 195 196 /// In case of a forwards dataflow, compute the in set for the first 197 /// instruction in a Landing Pad considering all out sets for associated 198 /// throw sites. 199 /// In case of a backwards dataflow, compute the in set of a invoke 200 /// instruction considering in sets for the first instructions of its 201 /// landing pads. doConfluenceWithLP(StateTy & StateOut,const StateTy & StateIn,const MCInst & Invoke)202 void doConfluenceWithLP(StateTy &StateOut, const StateTy &StateIn, 203 const MCInst &Invoke) { 204 return derived().doConfluence(StateOut, StateIn); 205 } 206 207 /// Returns the out set of an instruction given its in set. 208 /// If backwards, computes the in set given its out set. computeNext(const MCInst & Point,const StateTy & Cur)209 StateTy computeNext(const MCInst &Point, const StateTy &Cur) { 210 llvm_unreachable("Unimplemented method"); 211 return StateTy(); 212 } 213 214 /// Returns the MCAnnotation name getAnnotationName()215 StringRef getAnnotationName() const { 216 llvm_unreachable("Unimplemented method"); 217 return StringRef(""); 218 } 219 getAnnotationIndex()220 unsigned getAnnotationIndex() const { 221 if (AnnotationIndex) 222 return *AnnotationIndex; 223 AnnotationIndex = 224 BC.MIB->getOrCreateAnnotationIndex(const_derived().getAnnotationName()); 225 return *AnnotationIndex; 226 } 227 228 /// Private getter methods accessing state in a read-write fashion getOrCreateStateAt(const BinaryBasicBlock & BB)229 StateTy &getOrCreateStateAt(const BinaryBasicBlock &BB) { 230 return StateAtBBEntry[&BB]; 231 } 232 getOrCreateStateAt(MCInst & Point)233 StateTy &getOrCreateStateAt(MCInst &Point) { 234 return BC.MIB->getOrCreateAnnotationAs<StateTy>( 235 Point, derived().getAnnotationIndex(), AllocatorId); 236 } 237 getOrCreateStateAt(ProgramPoint Point)238 StateTy &getOrCreateStateAt(ProgramPoint Point) { 239 if (Point.isBB()) 240 return getOrCreateStateAt(*Point.getBB()); 241 return getOrCreateStateAt(*Point.getInst()); 242 } 243 244 public: 245 /// Return the allocator id getAllocatorId()246 unsigned getAllocatorId() { return AllocatorId; } 247 248 /// If the direction of the dataflow is forward, operates on the last 249 /// instruction of all predecessors when performing an iteration of the 250 /// dataflow equation for the start of this BB. If backwards, operates on 251 /// the first instruction of all successors. doForAllSuccsOrPreds(const BinaryBasicBlock & BB,std::function<void (ProgramPoint)> Task)252 void doForAllSuccsOrPreds(const BinaryBasicBlock &BB, 253 std::function<void(ProgramPoint)> Task) { 254 if (!Backward) 255 return doForAllPreds(BB, Task); 256 return doForAllSuccs(BB, Task); 257 } 258 259 /// We need the current binary context and the function that will be processed 260 /// in this dataflow analysis. 261 DataflowAnalysis(BinaryFunction &BF, 262 MCPlusBuilder::AllocatorIdTy AllocatorId = 0) 263 : BC(BF.getBinaryContext()), Func(BF), AllocatorId(AllocatorId) {} 264 ~DataflowAnalysis()265 virtual ~DataflowAnalysis() { cleanAnnotations(); } 266 267 /// Track the state at basic block start (end) if direction of the dataflow 268 /// is forward (backward). getStateAt(const BinaryBasicBlock & BB)269 ErrorOr<const StateTy &> getStateAt(const BinaryBasicBlock &BB) const { 270 auto Iter = StateAtBBEntry.find(&BB); 271 if (Iter == StateAtBBEntry.end()) 272 return make_error_code(errc::result_out_of_range); 273 return Iter->second; 274 } 275 276 /// Track the state at the end (start) of each MCInst in this function if 277 /// the direction of the dataflow is forward (backward). getStateAt(const MCInst & Point)278 ErrorOr<const StateTy &> getStateAt(const MCInst &Point) const { 279 return BC.MIB->tryGetAnnotationAs<StateTy>( 280 Point, const_derived().getAnnotationIndex()); 281 } 282 283 /// Return the out set (in set) of a given program point if the direction of 284 /// the dataflow is forward (backward). getStateAt(ProgramPoint Point)285 ErrorOr<const StateTy &> getStateAt(ProgramPoint Point) const { 286 if (Point.isBB()) 287 return getStateAt(*Point.getBB()); 288 return getStateAt(*Point.getInst()); 289 } 290 291 /// Relies on a ptr map to fetch the previous instruction and then retrieve 292 /// state. WARNING: Watch out for invalidated pointers. Do not use this 293 /// function if you invalidated pointers after the analysis has been completed getStateBefore(const MCInst & Point)294 ErrorOr<const StateTy &> getStateBefore(const MCInst &Point) { 295 return getStateAt(PrevPoint[&Point]); 296 } 297 getStateBefore(ProgramPoint Point)298 ErrorOr<const StateTy &> getStateBefore(ProgramPoint Point) { 299 if (Point.isBB()) 300 return getStateAt(*Point.getBB()); 301 return getStateAt(PrevPoint[Point.getInst()]); 302 } 303 304 /// Remove any state annotations left by this analysis cleanAnnotations()305 void cleanAnnotations() { 306 for (BinaryBasicBlock &BB : Func) { 307 for (MCInst &Inst : BB) { 308 BC.MIB->removeAnnotation(Inst, derived().getAnnotationIndex()); 309 } 310 } 311 } 312 313 /// Public entry point that will perform the entire analysis form start to 314 /// end. run()315 void run() { 316 derived().preflight(); 317 318 if (Func.begin() == Func.end()) 319 return; 320 // Initialize state for all points of the function 321 for (BinaryBasicBlock &BB : Func) { 322 StateTy &St = getOrCreateStateAt(BB); 323 St = derived().getStartingStateAtBB(BB); 324 for (MCInst &Inst : BB) { 325 StateTy &St = getOrCreateStateAt(Inst); 326 St = derived().getStartingStateAtPoint(Inst); 327 } 328 } 329 330 std::queue<BinaryBasicBlock *> Worklist; 331 // TODO: Pushing this in a DFS ordering will greatly speed up the dataflow 332 // performance. 333 if (!Backward) { 334 for (BinaryBasicBlock &BB : Func) { 335 Worklist.push(&BB); 336 MCInst *Prev = nullptr; 337 for (MCInst &Inst : BB) { 338 PrevPoint[&Inst] = Prev ? ProgramPoint(Prev) : ProgramPoint(&BB); 339 Prev = &Inst; 340 } 341 } 342 } else { 343 for (auto I = Func.rbegin(), E = Func.rend(); I != E; ++I) { 344 Worklist.push(&*I); 345 MCInst *Prev = nullptr; 346 for (auto J = (*I).rbegin(), E2 = (*I).rend(); J != E2; ++J) { 347 MCInst &Inst = *J; 348 PrevPoint[&Inst] = Prev ? ProgramPoint(Prev) : ProgramPoint(&*I); 349 Prev = &Inst; 350 } 351 } 352 } 353 354 // Main dataflow loop 355 while (!Worklist.empty()) { 356 BinaryBasicBlock *BB = Worklist.front(); 357 Worklist.pop(); 358 359 // Calculate state at the entry of first instruction in BB 360 StateTy StateAtEntry = getOrCreateStateAt(*BB); 361 if (BB->isLandingPad()) { 362 doForAllSuccsOrPreds(*BB, [&](ProgramPoint P) { 363 if (P.isInst() && BC.MIB->isInvoke(*P.getInst())) 364 derived().doConfluenceWithLP(StateAtEntry, *getStateAt(P), 365 *P.getInst()); 366 else 367 derived().doConfluence(StateAtEntry, *getStateAt(P)); 368 }); 369 } else { 370 doForAllSuccsOrPreds(*BB, [&](ProgramPoint P) { 371 derived().doConfluence(StateAtEntry, *getStateAt(P)); 372 }); 373 } 374 375 bool Changed = false; 376 StateTy &St = getOrCreateStateAt(*BB); 377 if (St != StateAtEntry) { 378 Changed = true; 379 St = std::move(StateAtEntry); 380 } 381 382 // Propagate information from first instruction down to the last one 383 StateTy *PrevState = &St; 384 const MCInst *LAST = nullptr; 385 if (!Backward) 386 LAST = &*BB->rbegin(); 387 else 388 LAST = &*BB->begin(); 389 390 auto doNext = [&](MCInst &Inst, const BinaryBasicBlock &BB) { 391 StateTy CurState = derived().computeNext(Inst, *PrevState); 392 393 if (Backward && BC.MIB->isInvoke(Inst)) { 394 BinaryBasicBlock *LBB = Func.getLandingPadBBFor(BB, Inst); 395 if (LBB) { 396 auto First = LBB->begin(); 397 if (First != LBB->end()) 398 derived().doConfluenceWithLP(CurState, 399 getOrCreateStateAt(&*First), Inst); 400 else 401 derived().doConfluenceWithLP(CurState, getOrCreateStateAt(LBB), 402 Inst); 403 } 404 } 405 406 StateTy &St = getOrCreateStateAt(Inst); 407 if (St != CurState) { 408 St = CurState; 409 if (&Inst == LAST) 410 Changed = true; 411 } 412 PrevState = &St; 413 }; 414 415 if (!Backward) 416 for (MCInst &Inst : *BB) 417 doNext(Inst, *BB); 418 else 419 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) 420 doNext(*I, *BB); 421 422 if (Changed) { 423 if (!Backward) { 424 for (BinaryBasicBlock *Succ : BB->successors()) 425 Worklist.push(Succ); 426 for (BinaryBasicBlock *LandingPad : BB->landing_pads()) 427 Worklist.push(LandingPad); 428 } else { 429 for (BinaryBasicBlock *Pred : BB->predecessors()) 430 Worklist.push(Pred); 431 for (BinaryBasicBlock *Thrower : BB->throwers()) 432 Worklist.push(Thrower); 433 } 434 } 435 } // end while (!Worklist.empty()) 436 } 437 }; 438 439 /// Define an iterator for navigating the expressions calculated by a 440 /// dataflow analysis at each program point, when they are backed by a 441 /// BitVector. 442 class ExprIterator 443 : public std::iterator<std::forward_iterator_tag, const MCInst *> { 444 const BitVector *BV; 445 const std::vector<MCInst *> &Expressions; 446 int Idx; 447 448 public: 449 ExprIterator &operator++() { 450 assert(Idx != -1 && "Iterator already at the end"); 451 Idx = BV->find_next(Idx); 452 return *this; 453 } 454 ExprIterator operator++(int) { 455 assert(Idx != -1 && "Iterator already at the end"); 456 ExprIterator Ret = *this; 457 ++(*this); 458 return Ret; 459 } 460 bool operator==(const ExprIterator &Other) const { return Idx == Other.Idx; } 461 bool operator!=(const ExprIterator &Other) const { return Idx != Other.Idx; } 462 MCInst *operator*() { 463 assert(Idx != -1 && "Invalid access to end iterator"); 464 return Expressions[Idx]; 465 } ExprIterator(const BitVector * BV,const std::vector<MCInst * > & Exprs)466 ExprIterator(const BitVector *BV, const std::vector<MCInst *> &Exprs) 467 : BV(BV), Expressions(Exprs) { 468 Idx = BV->find_first(); 469 } ExprIterator(const BitVector * BV,const std::vector<MCInst * > & Exprs,int Idx)470 ExprIterator(const BitVector *BV, const std::vector<MCInst *> &Exprs, int Idx) 471 : BV(BV), Expressions(Exprs), Idx(Idx) {} 472 getBitVectorIndex()473 int getBitVectorIndex() const { return Idx; } 474 }; 475 476 /// Specialization of DataflowAnalysis whose state specifically stores 477 /// a set of instructions. 478 template <typename Derived, bool Backward = false, 479 typename StatePrinterTy = StatePrinter<BitVector>> 480 class InstrsDataflowAnalysis 481 : public DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy> { 482 public: 483 /// These iterator functions offer access to the set of pointers to 484 /// instructions in a given program point expr_begin(const T & Point)485 template <typename T> ExprIterator expr_begin(const T &Point) const { 486 if (auto State = this->getStateAt(Point)) 487 return ExprIterator(&*State, Expressions); 488 return expr_end(); 489 } expr_begin(const BitVector & BV)490 ExprIterator expr_begin(const BitVector &BV) const { 491 return ExprIterator(&BV, Expressions); 492 } expr_end()493 ExprIterator expr_end() const { 494 return ExprIterator(nullptr, Expressions, -1); 495 } 496 497 /// Used to size the set of expressions/definitions being tracked by the 498 /// dataflow analysis 499 uint64_t NumInstrs{0}; 500 /// We put every MCInst we want to track (which one representing an 501 /// expression/def) into a vector because we need to associate them with 502 /// small numbers. They will be tracked via BitVectors throughout the 503 /// dataflow analysis. 504 std::vector<MCInst *> Expressions; 505 /// Maps expressions defs (MCInsts) to its index in the Expressions vector 506 std::unordered_map<const MCInst *, uint64_t> ExprToIdx; 507 508 /// Return whether \p Expr is in the state set at \p Point count(ProgramPoint Point,const MCInst & Expr)509 bool count(ProgramPoint Point, const MCInst &Expr) const { 510 auto IdxIter = ExprToIdx.find(&Expr); 511 assert(IdxIter != ExprToIdx.end() && "Invalid Expr"); 512 return (*this->getStateAt(Point))[IdxIter->second]; 513 } 514 count(const MCInst & Point,const MCInst & Expr)515 bool count(const MCInst &Point, const MCInst &Expr) const { 516 auto IdxIter = ExprToIdx.find(&Expr); 517 assert(IdxIter != ExprToIdx.end() && "Invalid Expr"); 518 return (*this->getStateAt(Point))[IdxIter->second]; 519 } 520 521 /// Return whether \p Expr is in the state set at the instr of index 522 /// \p PointIdx count(unsigned PointIdx,const MCInst & Expr)523 bool count(unsigned PointIdx, const MCInst &Expr) const { 524 return count(*Expressions[PointIdx], Expr); 525 } 526 527 InstrsDataflowAnalysis(BinaryFunction &BF, 528 MCPlusBuilder::AllocatorIdTy AllocId = 0) 529 : DataflowAnalysis<Derived, BitVector, Backward, StatePrinterTy>( 530 BF, AllocId) {} ~InstrsDataflowAnalysis()531 virtual ~InstrsDataflowAnalysis() {} 532 }; 533 534 } // namespace bolt 535 536 /// DenseMapInfo allows us to use the DenseMap LLVM data structure to store 537 /// ProgramPoints. 538 template <> struct DenseMapInfo<bolt::ProgramPoint> { 539 static inline bolt::ProgramPoint getEmptyKey() { 540 uintptr_t Val = static_cast<uintptr_t>(-1); 541 Val <<= PointerLikeTypeTraits<MCInst *>::NumLowBitsAvailable; 542 return bolt::ProgramPoint(reinterpret_cast<MCInst *>(Val)); 543 } 544 static inline bolt::ProgramPoint getTombstoneKey() { 545 uintptr_t Val = static_cast<uintptr_t>(-2); 546 Val <<= PointerLikeTypeTraits<MCInst *>::NumLowBitsAvailable; 547 return bolt::ProgramPoint(reinterpret_cast<MCInst *>(Val)); 548 } 549 static unsigned getHashValue(const bolt::ProgramPoint &PP) { 550 return (unsigned((uintptr_t)PP.Data.BB) >> 4) ^ 551 (unsigned((uintptr_t)PP.Data.BB) >> 9); 552 } 553 static bool isEqual(const bolt::ProgramPoint &LHS, 554 const bolt::ProgramPoint &RHS) { 555 return LHS.Data.BB == RHS.Data.BB; 556 } 557 }; 558 559 raw_ostream &operator<<(raw_ostream &OS, const BitVector &Val); 560 561 } // namespace llvm 562 563 #endif 564