1 //===- Profile.cpp - XRay Profile Abstraction -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Defines the XRay Profile class representing the latency profile generated by 10 // XRay's profiling mode. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/XRay/Profile.h" 14 15 #include "llvm/Support/DataExtractor.h" 16 #include "llvm/Support/Error.h" 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/XRay/Trace.h" 19 #include <deque> 20 #include <memory> 21 22 namespace llvm { 23 namespace xray { 24 25 Profile::Profile(const Profile &O) { 26 // We need to re-create all the tries from the original (O), into the current 27 // Profile being initialized, through the Block instances we see. 28 for (const auto &Block : O) { 29 Blocks.push_back({Block.Thread, {}}); 30 auto &B = Blocks.back(); 31 for (const auto &PathData : Block.PathData) 32 B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))), 33 PathData.second}); 34 } 35 } 36 37 Profile &Profile::operator=(const Profile &O) { 38 Profile P = O; 39 *this = std::move(P); 40 return *this; 41 } 42 43 namespace { 44 45 struct BlockHeader { 46 uint32_t Size; 47 uint32_t Number; 48 uint64_t Thread; 49 }; 50 51 static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor, 52 uint32_t &Offset) { 53 BlockHeader H; 54 uint32_t CurrentOffset = Offset; 55 H.Size = Extractor.getU32(&Offset); 56 if (Offset == CurrentOffset) 57 return make_error<StringError>( 58 Twine("Error parsing block header size at offset '") + 59 Twine(CurrentOffset) + "'", 60 std::make_error_code(std::errc::invalid_argument)); 61 CurrentOffset = Offset; 62 H.Number = Extractor.getU32(&Offset); 63 if (Offset == CurrentOffset) 64 return make_error<StringError>( 65 Twine("Error parsing block header number at offset '") + 66 Twine(CurrentOffset) + "'", 67 std::make_error_code(std::errc::invalid_argument)); 68 CurrentOffset = Offset; 69 H.Thread = Extractor.getU64(&Offset); 70 if (Offset == CurrentOffset) 71 return make_error<StringError>( 72 Twine("Error parsing block header thread id at offset '") + 73 Twine(CurrentOffset) + "'", 74 std::make_error_code(std::errc::invalid_argument)); 75 return H; 76 } 77 78 static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor, 79 uint32_t &Offset) { 80 // We're reading a sequence of int32_t's until we find a 0. 81 std::vector<Profile::FuncID> Path; 82 auto CurrentOffset = Offset; 83 int32_t FuncId; 84 do { 85 FuncId = Extractor.getSigned(&Offset, 4); 86 if (CurrentOffset == Offset) 87 return make_error<StringError>( 88 Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'", 89 std::make_error_code(std::errc::invalid_argument)); 90 CurrentOffset = Offset; 91 Path.push_back(FuncId); 92 } while (FuncId != 0); 93 return std::move(Path); 94 } 95 96 static Expected<Profile::Data> readData(DataExtractor &Extractor, 97 uint32_t &Offset) { 98 // We expect a certain number of elements for Data: 99 // - A 64-bit CallCount 100 // - A 64-bit CumulativeLocalTime counter 101 Profile::Data D; 102 auto CurrentOffset = Offset; 103 D.CallCount = Extractor.getU64(&Offset); 104 if (CurrentOffset == Offset) 105 return make_error<StringError>( 106 Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) + 107 "'", 108 std::make_error_code(std::errc::invalid_argument)); 109 CurrentOffset = Offset; 110 D.CumulativeLocalTime = Extractor.getU64(&Offset); 111 if (CurrentOffset == Offset) 112 return make_error<StringError>( 113 Twine("Error parsing cumulative local time at offset '") + 114 Twine(CurrentOffset) + "'", 115 std::make_error_code(std::errc::invalid_argument)); 116 return D; 117 } 118 119 } // namespace 120 121 Error Profile::addBlock(Block &&B) { 122 if (B.PathData.empty()) 123 return make_error<StringError>( 124 "Block may not have empty path data.", 125 std::make_error_code(std::errc::invalid_argument)); 126 127 Blocks.emplace_back(std::move(B)); 128 return Error::success(); 129 } 130 131 Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const { 132 auto It = PathIDMap.find(P); 133 if (It == PathIDMap.end()) 134 return make_error<StringError>( 135 Twine("PathID not found: ") + Twine(P), 136 std::make_error_code(std::errc::invalid_argument)); 137 std::vector<Profile::FuncID> Path; 138 for (auto Node = It->second; Node; Node = Node->Caller) 139 Path.push_back(Node->Func); 140 return std::move(Path); 141 } 142 143 Profile::PathID Profile::internPath(ArrayRef<FuncID> P) { 144 if (P.empty()) 145 return 0; 146 147 auto RootToLeafPath = reverse(P); 148 149 // Find the root. 150 auto It = RootToLeafPath.begin(); 151 auto PathRoot = *It++; 152 auto RootIt = 153 find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; }); 154 155 // If we've not seen this root before, remember it. 156 TrieNode *Node = nullptr; 157 if (RootIt == Roots.end()) { 158 NodeStorage.emplace_back(); 159 Node = &NodeStorage.back(); 160 Node->Func = PathRoot; 161 Roots.push_back(Node); 162 } else { 163 Node = *RootIt; 164 } 165 166 // Now traverse the path, re-creating if necessary. 167 while (It != RootToLeafPath.end()) { 168 auto NodeFuncID = *It++; 169 auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) { 170 return N->Func == NodeFuncID; 171 }); 172 if (CalleeIt == Node->Callees.end()) { 173 NodeStorage.emplace_back(); 174 auto NewNode = &NodeStorage.back(); 175 NewNode->Func = NodeFuncID; 176 NewNode->Caller = Node; 177 Node->Callees.push_back(NewNode); 178 Node = NewNode; 179 } else { 180 Node = *CalleeIt; 181 } 182 } 183 184 // At this point, Node *must* be pointing at the leaf. 185 assert(Node->Func == P.front()); 186 if (Node->ID == 0) { 187 Node->ID = NextID++; 188 PathIDMap.insert({Node->ID, Node}); 189 } 190 return Node->ID; 191 } 192 193 Profile mergeProfilesByThread(const Profile &L, const Profile &R) { 194 Profile Merged; 195 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 196 using PathDataMapPtr = std::unique_ptr<PathDataMap>; 197 using PathDataVector = decltype(Profile::Block::PathData); 198 using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>; 199 ThreadProfileIndexMap ThreadProfileIndex; 200 201 for (const auto &P : {std::ref(L), std::ref(R)}) 202 for (const auto &Block : P.get()) { 203 ThreadProfileIndexMap::iterator It; 204 std::tie(It, std::ignore) = ThreadProfileIndex.insert( 205 {Block.Thread, PathDataMapPtr{new PathDataMap()}}); 206 for (const auto &PathAndData : Block.PathData) { 207 auto &PathID = PathAndData.first; 208 auto &Data = PathAndData.second; 209 auto NewPathID = 210 Merged.internPath(cantFail(P.get().expandPath(PathID))); 211 PathDataMap::iterator PathDataIt; 212 bool Inserted; 213 std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data}); 214 if (!Inserted) { 215 auto &ExistingData = PathDataIt->second; 216 ExistingData.CallCount += Data.CallCount; 217 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 218 } 219 } 220 } 221 222 for (const auto &IndexedThreadBlock : ThreadProfileIndex) { 223 PathDataVector PathAndData; 224 PathAndData.reserve(IndexedThreadBlock.second->size()); 225 copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData)); 226 cantFail( 227 Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)})); 228 } 229 return Merged; 230 } 231 232 Profile mergeProfilesByStack(const Profile &L, const Profile &R) { 233 Profile Merged; 234 using PathDataMap = DenseMap<Profile::PathID, Profile::Data>; 235 PathDataMap PathData; 236 using PathDataVector = decltype(Profile::Block::PathData); 237 for (const auto &P : {std::ref(L), std::ref(R)}) 238 for (const auto &Block : P.get()) 239 for (const auto &PathAndData : Block.PathData) { 240 auto &PathId = PathAndData.first; 241 auto &Data = PathAndData.second; 242 auto NewPathID = 243 Merged.internPath(cantFail(P.get().expandPath(PathId))); 244 PathDataMap::iterator PathDataIt; 245 bool Inserted; 246 std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data}); 247 if (!Inserted) { 248 auto &ExistingData = PathDataIt->second; 249 ExistingData.CallCount += Data.CallCount; 250 ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime; 251 } 252 } 253 254 // In the end there's a single Block, for thread 0. 255 PathDataVector Block; 256 Block.reserve(PathData.size()); 257 copy(PathData, std::back_inserter(Block)); 258 cantFail(Merged.addBlock({0, std::move(Block)})); 259 return Merged; 260 } 261 262 Expected<Profile> loadProfile(StringRef Filename) { 263 int Fd; 264 if (auto EC = sys::fs::openFileForRead(Filename, Fd)) 265 return make_error<StringError>( 266 Twine("Cannot read profile from '") + Filename + "'", EC); 267 268 uint64_t FileSize; 269 if (auto EC = sys::fs::file_size(Filename, FileSize)) 270 return make_error<StringError>( 271 Twine("Cannot get filesize of '") + Filename + "'", EC); 272 273 std::error_code EC; 274 sys::fs::mapped_file_region MappedFile( 275 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); 276 if (EC) 277 return make_error<StringError>( 278 Twine("Cannot mmap profile '") + Filename + "'", EC); 279 StringRef Data(MappedFile.data(), MappedFile.size()); 280 281 Profile P; 282 uint32_t Offset = 0; 283 DataExtractor Extractor(Data, true, 8); 284 285 // For each block we get from the file: 286 while (Offset != MappedFile.size()) { 287 auto HeaderOrError = readBlockHeader(Extractor, Offset); 288 if (!HeaderOrError) 289 return HeaderOrError.takeError(); 290 291 // TODO: Maybe store this header information for each block, even just for 292 // debugging? 293 const auto &Header = HeaderOrError.get(); 294 295 // Read in the path data. 296 auto PathOrError = readPath(Extractor, Offset); 297 if (!PathOrError) 298 return PathOrError.takeError(); 299 const auto &Path = PathOrError.get(); 300 301 // For each path we encounter, we should intern it to get a PathID. 302 auto DataOrError = readData(Extractor, Offset); 303 if (!DataOrError) 304 return DataOrError.takeError(); 305 auto &Data = DataOrError.get(); 306 307 if (auto E = 308 P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread}, 309 {{P.internPath(Path), std::move(Data)}}})) 310 return std::move(E); 311 } 312 313 return P; 314 } 315 316 namespace { 317 318 struct StackEntry { 319 uint64_t Timestamp; 320 Profile::FuncID FuncId; 321 }; 322 323 } // namespace 324 325 Expected<Profile> profileFromTrace(const Trace &T) { 326 Profile P; 327 328 // The implementation of the algorithm re-creates the execution of 329 // the functions based on the trace data. To do this, we set up a number of 330 // data structures to track the execution context of every thread in the 331 // Trace. 332 DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks; 333 DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>> 334 ThreadPathData; 335 336 // We then do a pass through the Trace to account data on a per-thread-basis. 337 for (const auto &E : T) { 338 auto &TSD = ThreadStacks[E.TId]; 339 switch (E.Type) { 340 case RecordTypes::ENTER: 341 case RecordTypes::ENTER_ARG: 342 343 // Push entries into the function call stack. 344 TSD.push_back({E.TSC, E.FuncId}); 345 break; 346 347 case RecordTypes::EXIT: 348 case RecordTypes::TAIL_EXIT: 349 350 // Exits cause some accounting to happen, based on the state of the stack. 351 // For each function we pop off the stack, we take note of the path and 352 // record the cumulative state for this path. As we're doing this, we 353 // intern the path into the Profile. 354 while (!TSD.empty()) { 355 auto Top = TSD.back(); 356 auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC); 357 SmallVector<Profile::FuncID, 16> Path; 358 transform(reverse(TSD), std::back_inserter(Path), 359 std::mem_fn(&StackEntry::FuncId)); 360 auto InternedPath = P.internPath(Path); 361 auto &TPD = ThreadPathData[E.TId][InternedPath]; 362 ++TPD.CallCount; 363 TPD.CumulativeLocalTime += FunctionLocalTime; 364 TSD.pop_back(); 365 366 // If we've matched the corresponding entry event for this function, 367 // then we exit the loop. 368 if (Top.FuncId == E.FuncId) 369 break; 370 371 // FIXME: Consider the intermediate times and the cumulative tree time 372 // as well. 373 } 374 375 break; 376 377 case RecordTypes::CUSTOM_EVENT: 378 case RecordTypes::TYPED_EVENT: 379 // TODO: Support an extension point to allow handling of custom and typed 380 // events in profiles. 381 break; 382 } 383 } 384 385 // Once we've gone through the Trace, we now create one Block per thread in 386 // the Profile. 387 for (const auto &ThreadPaths : ThreadPathData) { 388 const auto &TID = ThreadPaths.first; 389 const auto &PathsData = ThreadPaths.second; 390 if (auto E = P.addBlock({ 391 TID, 392 std::vector<std::pair<Profile::PathID, Profile::Data>>( 393 PathsData.begin(), PathsData.end()), 394 })) 395 return std::move(E); 396 } 397 398 return P; 399 } 400 401 } // namespace xray 402 } // namespace llvm 403