1 //===- bolt/Profile/BoltAddressTranslation.h --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
10 #define BOLT_PROFILE_BOLTADDRESSTRANSLATION_H
11 
12 #include "llvm/ADT/Optional.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringRef.h"
15 #include <cstdint>
16 #include <map>
17 #include <system_error>
18 
19 namespace llvm {
20 class raw_ostream;
21 
22 namespace object {
23 class ELFObjectFileBase;
24 } // namespace object
25 
26 namespace bolt {
27 class BinaryBasicBlock;
28 class BinaryContext;
29 class BinaryFunction;
30 
31 /// The map of output addresses to input ones to be used when translating
32 /// samples collected in a binary that was already processed by BOLT. We do not
33 /// support reoptimizing a binary already processed by BOLT, but we do support
34 /// collecting samples in a binary processed by BOLT. We then translate samples
35 /// back to addresses from the input (original) binary, one that can be
36 /// optimized. The goal is to avoid special deployments of non-bolted binaries
37 /// just for the purposes of data collection.
38 ///
39 /// The in-memory representation of the map is as follows. Each function has its
40 /// own map. A function is identified by its output address. This is the key to
41 /// retrieve a translation map. The translation map is a collection of ordered
42 /// keys identifying the start of a region (relative to the function start) in
43 /// the output address space (addresses in the binary processed by BOLT).
44 ///
45 /// A translation then happens when perf2bolt needs to convert sample addresses
46 /// in the output address space back to input addresses, valid to run BOLT in
47 /// the original input binary. To convert, perf2bolt first needs to fetch the
48 /// translation map for a sample recorded in a given function. It then finds
49 /// the largest key that is still smaller or equal than the recorded address.
50 /// It then converts this address to use the value of this key.
51 ///
52 ///   Example translation Map for function foo
53 ///      KEY                             VALUE                    BB?
54 ///    Output offset1 (first BB)         Original input offset1   Y
55 ///    ...
56 ///    Output offsetN (last branch)      Original input offsetN   N
57 ///
58 /// The information on whether a given entry is a BB start or an instruction
59 /// that changes control flow is encoded in the last (highest) bit of VALUE.
60 ///
61 /// Notes:
62 /// Instructions that will never appear in LBR because they do not cause control
63 /// flow change are omitted from this map. Basic block locations are recorded
64 /// because they can be a target of a jump (To address in the LBR) and also to
65 /// recreate the BB layout of this function. We use the BB layout map to
66 /// recreate fall-through jumps in the profile, given an LBR trace.
67 class BoltAddressTranslation {
68 public:
69   // In-memory representation of the address translation table
70   using MapTy = std::map<uint32_t, uint32_t>;
71 
72   // List of taken fall-throughs
73   using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
74 
75   /// Name of the ELF section where the table will be serialized to in the
76   /// output binary
77   static const char *SECTION_NAME;
78 
BoltAddressTranslation(BinaryContext & BC)79   BoltAddressTranslation(BinaryContext &BC) : BC(BC) {}
80 
81   /// Write the serialized address translation tables for each reordered
82   /// function
83   void write(raw_ostream &OS);
84 
85   /// Read the serialized address translation tables and load them internally
86   /// in memory. Return a parse error if failed.
87   std::error_code parse(StringRef Buf);
88 
89   /// If the maps are loaded in memory, perform the lookup to translate LBR
90   /// addresses in \p Func.
91   uint64_t translate(const BinaryFunction &Func, uint64_t Offset,
92                      bool IsBranchSrc) const;
93 
94   /// Use the map keys containing basic block addresses to infer fall-throughs
95   /// taken in the path started at FirstLBR.To and ending at SecondLBR.From.
96   /// Return NoneType if trace is invalid or the list of fall-throughs
97   /// otherwise.
98   Optional<FallthroughListTy> getFallthroughsInTrace(const BinaryFunction &Func,
99                                                      uint64_t From,
100                                                      uint64_t To) const;
101 
102   /// If available, fetch the address of the hot part linked to the cold part
103   /// at \p Address. Return 0 otherwise.
104   uint64_t fetchParentAddress(uint64_t Address) const;
105 
106   /// True if the input binary has a translation table we can use to convert
107   /// addresses when aggregating profile
108   bool enabledFor(llvm::object::ELFObjectFileBase *InputFile) const;
109 
110 private:
111   /// Helper to update \p Map by inserting one or more BAT entries reflecting
112   /// \p BB for function located at \p FuncAddress. At least one entry will be
113   /// emitted for the start of the BB. More entries may be emitted to cover
114   /// the location of calls or any instruction that may change control flow.
115   void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
116                          uint64_t FuncAddress);
117 
118   BinaryContext &BC;
119 
120   std::map<uint64_t, MapTy> Maps;
121 
122   /// Links outlined cold bocks to their original function
123   std::map<uint64_t, uint64_t> ColdPartSource;
124 
125   /// Identifies the address of a control-flow changing instructions in a
126   /// translation map entry
127   const static uint32_t BRANCHENTRY = 0x80000000;
128 };
129 } // namespace bolt
130 
131 } // namespace llvm
132 
133 #endif
134