1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/DataExtractor.h"
12 #include "llvm/Support/Errc.h"
13
14 #define DEBUG_TYPE "bolt-bat"
15
16 namespace llvm {
17 namespace bolt {
18
19 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
20
writeEntriesForBB(MapTy & Map,const BinaryBasicBlock & BB,uint64_t FuncAddress)21 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
22 const BinaryBasicBlock &BB,
23 uint64_t FuncAddress) {
24 const uint64_t BBOutputOffset =
25 BB.getOutputAddressRange().first - FuncAddress;
26 const uint32_t BBInputOffset = BB.getInputOffset();
27
28 assert(BBInputOffset != BinaryBasicBlock::INVALID_OFFSET &&
29 "Every output BB must track back to an input BB for profile "
30 "collection in bolted binaries");
31
32 LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
33 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
34 << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
35 // In case of conflicts (same Key mapping to different Vals), the last
36 // update takes precedence. Of course it is not ideal to have conflicts and
37 // those happen when we have an empty BB that either contained only
38 // NOPs or a jump to the next block (successor). Either way, the successor
39 // and this deleted block will both share the same output address (the same
40 // key), and we need to map back. We choose here to privilege the successor by
41 // allowing it to overwrite the previously inserted key in the map.
42 Map[BBOutputOffset] = BBInputOffset;
43
44 for (const auto &IOPair : BB.getOffsetTranslationTable()) {
45 const uint64_t OutputOffset = IOPair.first + BBOutputOffset;
46 const uint32_t InputOffset = IOPair.second;
47
48 // Is this the first instruction in the BB? No need to duplicate the entry.
49 if (OutputOffset == BBOutputOffset)
50 continue;
51
52 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
53 << Twine::utohexstr(InputOffset) << " (branch)\n");
54 Map.insert(
55 std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
56 }
57 }
58
write(raw_ostream & OS)59 void BoltAddressTranslation::write(raw_ostream &OS) {
60 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
61 for (auto &BFI : BC.getBinaryFunctions()) {
62 BinaryFunction &Function = BFI.second;
63 // We don't need a translation table if the body of the function hasn't
64 // changed
65 if (!BC.HasRelocations && !Function.isSimple())
66 continue;
67
68 LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
69 LLVM_DEBUG(dbgs() << " Address reference: 0x"
70 << Twine::utohexstr(Function.getOutputAddress()) << "\n");
71 MapTy Map;
72 const bool IsSplit = Function.isSplit();
73 for (const BinaryBasicBlock *BB : Function.getLayout().blocks()) {
74 if (IsSplit && BB->isCold())
75 break;
76 writeEntriesForBB(Map, *BB, Function.getOutputAddress());
77 }
78 Maps.insert(std::pair<uint64_t, MapTy>(Function.getOutputAddress(), Map));
79
80 if (!IsSplit)
81 continue;
82
83 // Cold map
84 Map.clear();
85 LLVM_DEBUG(dbgs() << " Cold part\n");
86 for (const BinaryBasicBlock *BB : Function.getLayout().blocks()) {
87 if (!BB->isCold())
88 continue;
89 writeEntriesForBB(Map, *BB, Function.cold().getAddress());
90 }
91 Maps.insert(std::pair<uint64_t, MapTy>(Function.cold().getAddress(), Map));
92 ColdPartSource.insert(std::pair<uint64_t, uint64_t>(
93 Function.cold().getAddress(), Function.getOutputAddress()));
94 }
95
96 const uint32_t NumFuncs = Maps.size();
97 OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
98 LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
99 for (auto &MapEntry : Maps) {
100 const uint64_t Address = MapEntry.first;
101 MapTy &Map = MapEntry.second;
102 const uint32_t NumEntries = Map.size();
103 LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
104 << Twine::utohexstr(Address) << ".\n");
105 OS.write(reinterpret_cast<const char *>(&Address), 8);
106 OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
107 for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
108 OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
109 OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
110 }
111 }
112 const uint32_t NumColdEntries = ColdPartSource.size();
113 LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
114 << " cold part mappings.\n");
115 OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
116 for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
117 OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
118 OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
119 LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
120 << Twine::utohexstr(ColdEntry.second) << "\n");
121 }
122
123 outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
124 outs() << "BOLT-INFO: Wrote " << NumColdEntries
125 << " BAT cold-to-hot entries\n";
126 }
127
parse(StringRef Buf)128 std::error_code BoltAddressTranslation::parse(StringRef Buf) {
129 DataExtractor DE = DataExtractor(Buf, true, 8);
130 uint64_t Offset = 0;
131 if (Buf.size() < 12)
132 return make_error_code(llvm::errc::io_error);
133
134 const uint32_t NameSz = DE.getU32(&Offset);
135 const uint32_t DescSz = DE.getU32(&Offset);
136 const uint32_t Type = DE.getU32(&Offset);
137
138 if (Type != BinarySection::NT_BOLT_BAT ||
139 Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
140 return make_error_code(llvm::errc::io_error);
141
142 StringRef Name = Buf.slice(Offset, Offset + NameSz);
143 Offset = alignTo(Offset + NameSz, 4);
144 if (Name.substr(0, 4) != "BOLT")
145 return make_error_code(llvm::errc::io_error);
146
147 if (Buf.size() - Offset < 4)
148 return make_error_code(llvm::errc::io_error);
149
150 const uint32_t NumFunctions = DE.getU32(&Offset);
151 LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
152 for (uint32_t I = 0; I < NumFunctions; ++I) {
153 if (Buf.size() - Offset < 12)
154 return make_error_code(llvm::errc::io_error);
155
156 const uint64_t Address = DE.getU64(&Offset);
157 const uint32_t NumEntries = DE.getU32(&Offset);
158 MapTy Map;
159
160 LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
161 << Twine::utohexstr(Address) << "\n");
162 if (Buf.size() - Offset < 8 * NumEntries)
163 return make_error_code(llvm::errc::io_error);
164 for (uint32_t J = 0; J < NumEntries; ++J) {
165 const uint32_t OutputAddr = DE.getU32(&Offset);
166 const uint32_t InputAddr = DE.getU32(&Offset);
167 Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
168 LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
169 << Twine::utohexstr(InputAddr) << "\n");
170 }
171 Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
172 }
173
174 if (Buf.size() - Offset < 4)
175 return make_error_code(llvm::errc::io_error);
176
177 const uint32_t NumColdEntries = DE.getU32(&Offset);
178 LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
179 for (uint32_t I = 0; I < NumColdEntries; ++I) {
180 if (Buf.size() - Offset < 16)
181 return make_error_code(llvm::errc::io_error);
182 const uint32_t ColdAddress = DE.getU64(&Offset);
183 const uint32_t HotAddress = DE.getU64(&Offset);
184 ColdPartSource.insert(
185 std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
186 LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
187 << Twine::utohexstr(HotAddress) << "\n");
188 }
189 outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
190 outs() << "BOLT-INFO: Parsed " << NumColdEntries
191 << " BAT cold-to-hot entries\n";
192
193 return std::error_code();
194 }
195
translate(const BinaryFunction & Func,uint64_t Offset,bool IsBranchSrc) const196 uint64_t BoltAddressTranslation::translate(const BinaryFunction &Func,
197 uint64_t Offset,
198 bool IsBranchSrc) const {
199 auto Iter = Maps.find(Func.getAddress());
200 if (Iter == Maps.end())
201 return Offset;
202
203 const MapTy &Map = Iter->second;
204 auto KeyVal = Map.upper_bound(Offset);
205 if (KeyVal == Map.begin())
206 return Offset;
207
208 --KeyVal;
209
210 const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
211 // Branch source addresses are translated to the first instruction of the
212 // source BB to avoid accounting for modifications BOLT may have made in the
213 // BB regarding deletion/addition of instructions.
214 if (IsBranchSrc)
215 return Val;
216 return Offset - KeyVal->first + Val;
217 }
218
219 Optional<BoltAddressTranslation::FallthroughListTy>
getFallthroughsInTrace(const BinaryFunction & Func,uint64_t From,uint64_t To) const220 BoltAddressTranslation::getFallthroughsInTrace(const BinaryFunction &Func,
221 uint64_t From,
222 uint64_t To) const {
223 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
224
225 // Filter out trivial case
226 if (From >= To)
227 return Res;
228
229 From -= Func.getAddress();
230 To -= Func.getAddress();
231
232 auto Iter = Maps.find(Func.getAddress());
233 if (Iter == Maps.end())
234 return NoneType();
235
236 const MapTy &Map = Iter->second;
237 auto FromIter = Map.upper_bound(From);
238 if (FromIter == Map.begin())
239 return Res;
240 // Skip instruction entries, to create fallthroughs we are only interested in
241 // BB boundaries
242 do {
243 if (FromIter == Map.begin())
244 return Res;
245 --FromIter;
246 } while (FromIter->second & BRANCHENTRY);
247
248 auto ToIter = Map.upper_bound(To);
249 if (ToIter == Map.begin())
250 return Res;
251 --ToIter;
252 if (FromIter->first >= ToIter->first)
253 return Res;
254
255 for (auto Iter = FromIter; Iter != ToIter;) {
256 const uint32_t Src = Iter->first;
257 if (Iter->second & BRANCHENTRY) {
258 ++Iter;
259 continue;
260 }
261
262 ++Iter;
263 while (Iter->second & BRANCHENTRY && Iter != ToIter)
264 ++Iter;
265 if (Iter->second & BRANCHENTRY)
266 break;
267 Res.emplace_back(Src, Iter->first);
268 }
269
270 return Res;
271 }
272
fetchParentAddress(uint64_t Address) const273 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
274 auto Iter = ColdPartSource.find(Address);
275 if (Iter == ColdPartSource.end())
276 return 0;
277 return Iter->second;
278 }
279
enabledFor(llvm::object::ELFObjectFileBase * InputFile) const280 bool BoltAddressTranslation::enabledFor(
281 llvm::object::ELFObjectFileBase *InputFile) const {
282 for (const SectionRef &Section : InputFile->sections()) {
283 Expected<StringRef> SectionNameOrErr = Section.getName();
284 if (Error E = SectionNameOrErr.takeError())
285 continue;
286
287 if (SectionNameOrErr.get() == SECTION_NAME)
288 return true;
289 }
290 return false;
291 }
292 } // namespace bolt
293 } // namespace llvm
294