1 //===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H 10 #define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H 11 12 #include "llvm/ADT/Optional.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/ADT/StringRef.h" 15 #include <cstdint> 16 #include <map> 17 #include <system_error> 18 19 namespace llvm { 20 class raw_ostream; 21 22 namespace object { 23 class ELFObjectFileBase; 24 } // namespace object 25 26 namespace bolt { 27 class BinaryBasicBlock; 28 class BinaryContext; 29 class BinaryFunction; 30 31 /// The map of output addresses to input ones to be used when translating 32 /// samples collected in a binary that was already processed by BOLT. We do not 33 /// support reoptimizing a binary already processed by BOLT, but we do support 34 /// collecting samples in a binary processed by BOLT. We then translate samples 35 /// back to addresses from the input (original) binary, one that can be 36 /// optimized. The goal is to avoid special deployments of non-bolted binaries 37 /// just for the purposes of data collection. 38 /// 39 /// The in-memory representation of the map is as follows. Each function has its 40 /// own map. A function is identified by its output address. This is the key to 41 /// retrieve a translation map. The translation map is a collection of ordered 42 /// keys identifying the start of a region (relative to the function start) in 43 /// the output address space (addresses in the binary processed by BOLT). 44 /// 45 /// A translation then happens when perf2bolt needs to convert sample addresses 46 /// in the output address space back to input addresses, valid to run BOLT in 47 /// the original input binary. To convert, perf2bolt first needs to fetch the 48 /// translation map for a sample recorded in a given function. It then finds 49 /// the largest key that is still smaller or equal than the recorded address. 50 /// It then converts this address to use the value of this key. 51 /// 52 /// Example translation Map for function foo 53 /// KEY VALUE BB? 54 /// Output offset1 (first BB) Original input offset1 Y 55 /// ... 56 /// Output offsetN (last branch) Original input offsetN N 57 /// 58 /// The information on whether a given entry is a BB start or an instruction 59 /// that changes control flow is encoded in the last (highest) bit of VALUE. 60 /// 61 /// Notes: 62 /// Instructions that will never appear in LBR because they do not cause control 63 /// flow change are omitted from this map. Basic block locations are recorded 64 /// because they can be a target of a jump (To address in the LBR) and also to 65 /// recreate the BB layout of this function. We use the BB layout map to 66 /// recreate fall-through jumps in the profile, given an LBR trace. 67 class BoltAddressTranslation { 68 public: 69 // In-memory representation of the address translation table 70 using MapTy = std::map<uint32_t, uint32_t>; 71 72 // List of taken fall-throughs 73 using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>; 74 75 /// Name of the ELF section where the table will be serialized to in the 76 /// output binary 77 static const char *SECTION_NAME; 78 BoltAddressTranslation(BinaryContext & BC)79 BoltAddressTranslation(BinaryContext &BC) : BC(BC) {} 80 81 /// Write the serialized address translation tables for each reordered 82 /// function 83 void write(raw_ostream &OS); 84 85 /// Read the serialized address translation tables and load them internally 86 /// in memory. Return a parse error if failed. 87 std::error_code parse(StringRef Buf); 88 89 /// If the maps are loaded in memory, perform the lookup to translate LBR 90 /// addresses in \p Func. 91 uint64_t translate(const BinaryFunction &Func, uint64_t Offset, 92 bool IsBranchSrc) const; 93 94 /// Use the map keys containing basic block addresses to infer fall-throughs 95 /// taken in the path started at FirstLBR.To and ending at SecondLBR.From. 96 /// Return NoneType if trace is invalid or the list of fall-throughs 97 /// otherwise. 98 Optional<FallthroughListTy> getFallthroughsInTrace(const BinaryFunction &Func, 99 uint64_t From, 100 uint64_t To) const; 101 102 /// If available, fetch the address of the hot part linked to the cold part 103 /// at \p Address. Return 0 otherwise. 104 uint64_t fetchParentAddress(uint64_t Address) const; 105 106 /// True if the input binary has a translation table we can use to convert 107 /// addresses when aggregating profile 108 bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const; 109 110 private: 111 /// Helper to update \p Map by inserting one or more BAT entries reflecting 112 /// \p BB for function located at \p FuncAddress. At least one entry will be 113 /// emitted for the start of the BB. More entries may be emitted to cover 114 /// the location of calls or any instruction that may change control flow. 115 void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB, 116 uint64_t FuncAddress); 117 118 BinaryContext &BC; 119 120 std::map<uint64_t, MapTy> Maps; 121 122 /// Links outlined cold bocks to their original function 123 std::map<uint64_t, uint64_t> ColdPartSource; 124 125 /// Identifies the address of a control-flow changing instructions in a 126 /// translation map entry 127 const static uint32_t BRANCHENTRY = 0x80000000; 128 }; 129 } // namespace bolt 130 131 } // namespace llvm 132 133 #endif 134