1 //===-- function_call_trie_test.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a function call tracing system. 10 // 11 //===----------------------------------------------------------------------===// 12 #include "xray_function_call_trie.h" 13 #include "gtest/gtest.h" 14 #include <cstdint> 15 16 namespace __xray { 17 18 namespace { 19 20 TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) { 21 profilingFlags()->setDefaults(); 22 FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators(); 23 FunctionCallTrie Trie(Allocators); 24 } 25 26 TEST(FunctionCallTrieTest, EnterAndExitFunction) { 27 profilingFlags()->setDefaults(); 28 auto A = FunctionCallTrie::InitAllocators(); 29 FunctionCallTrie Trie(A); 30 31 uint64_t TSC = 1; 32 uint16_t CPU = 0; 33 Trie.enterFunction(1, TSC++, CPU++); 34 Trie.exitFunction(1, TSC++, CPU++); 35 const auto &R = Trie.getRoots(); 36 37 ASSERT_EQ(R.size(), 1u); 38 ASSERT_EQ(R.front()->FId, 1); 39 ASSERT_EQ(R.front()->CallCount, 1u); 40 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); 41 } 42 43 TEST(FunctionCallTrieTest, HandleTSCOverflow) { 44 profilingFlags()->setDefaults(); 45 auto A = FunctionCallTrie::InitAllocators(); 46 FunctionCallTrie Trie(A); 47 48 Trie.enterFunction(1, std::numeric_limits<uint64_t>::max(), 0); 49 Trie.exitFunction(1, 1, 0); 50 const auto &R = Trie.getRoots(); 51 52 ASSERT_EQ(R.size(), 1u); 53 ASSERT_EQ(R.front()->FId, 1); 54 ASSERT_EQ(R.front()->CallCount, 1u); 55 ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); 56 } 57 58 TEST(FunctionCallTrieTest, MaximalCumulativeTime) { 59 profilingFlags()->setDefaults(); 60 auto A = FunctionCallTrie::InitAllocators(); 61 FunctionCallTrie Trie(A); 62 63 Trie.enterFunction(1, 1, 0); 64 Trie.exitFunction(1, 0, 0); 65 const auto &R = Trie.getRoots(); 66 67 ASSERT_EQ(R.size(), 1u); 68 ASSERT_EQ(R.front()->FId, 1); 69 ASSERT_EQ(R.front()->CallCount, 1u); 70 ASSERT_EQ(R.front()->CumulativeLocalTime, 71 std::numeric_limits<uint64_t>::max() - 1); 72 } 73 74 TEST(FunctionCallTrieTest, MissingFunctionEntry) { 75 profilingFlags()->setDefaults(); 76 auto A = FunctionCallTrie::InitAllocators(); 77 FunctionCallTrie Trie(A); 78 Trie.exitFunction(1, 1, 0); 79 const auto &R = Trie.getRoots(); 80 81 ASSERT_TRUE(R.empty()); 82 } 83 84 TEST(FunctionCallTrieTest, NoMatchingEntersForExit) { 85 profilingFlags()->setDefaults(); 86 auto A = FunctionCallTrie::InitAllocators(); 87 FunctionCallTrie Trie(A); 88 Trie.enterFunction(2, 1, 0); 89 Trie.enterFunction(3, 3, 0); 90 Trie.exitFunction(1, 5, 0); 91 const auto &R = Trie.getRoots(); 92 93 ASSERT_FALSE(R.empty()); 94 EXPECT_EQ(R.size(), size_t{1}); 95 } 96 97 TEST(FunctionCallTrieTest, MissingFunctionExit) { 98 profilingFlags()->setDefaults(); 99 auto A = FunctionCallTrie::InitAllocators(); 100 FunctionCallTrie Trie(A); 101 Trie.enterFunction(1, 1, 0); 102 const auto &R = Trie.getRoots(); 103 104 ASSERT_FALSE(R.empty()); 105 EXPECT_EQ(R.size(), size_t{1}); 106 } 107 108 TEST(FunctionCallTrieTest, MultipleRoots) { 109 profilingFlags()->setDefaults(); 110 auto A = FunctionCallTrie::InitAllocators(); 111 FunctionCallTrie Trie(A); 112 113 // Enter and exit FId = 1. 114 Trie.enterFunction(1, 1, 0); 115 Trie.exitFunction(1, 2, 0); 116 117 // Enter and exit FId = 2. 118 Trie.enterFunction(2, 3, 0); 119 Trie.exitFunction(2, 4, 0); 120 121 const auto &R = Trie.getRoots(); 122 ASSERT_FALSE(R.empty()); 123 ASSERT_EQ(R.size(), 2u); 124 125 // Make sure the roots have different IDs. 126 const auto R0 = R[0]; 127 const auto R1 = R[1]; 128 ASSERT_NE(R0->FId, R1->FId); 129 130 // Inspect the roots that they have the right data. 131 ASSERT_NE(R0, nullptr); 132 EXPECT_EQ(R0->CallCount, 1u); 133 EXPECT_EQ(R0->CumulativeLocalTime, 1u); 134 135 ASSERT_NE(R1, nullptr); 136 EXPECT_EQ(R1->CallCount, 1u); 137 EXPECT_EQ(R1->CumulativeLocalTime, 1u); 138 } 139 140 // While missing an intermediary entry may be rare in practice, we still enforce 141 // that we can handle the case where we've missed the entry event somehow, in 142 // between call entry/exits. To illustrate, imagine the following shadow call 143 // stack: 144 // 145 // f0@t0 -> f1@t1 -> f2@t2 146 // 147 // If for whatever reason we see an exit for `f2` @ t3, followed by an exit for 148 // `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of 149 // accounting local time to `f2` from d = (t3 - t2), then local time to `f1` 150 // as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'. 151 TEST(FunctionCallTrieTest, MissingIntermediaryExit) { 152 profilingFlags()->setDefaults(); 153 auto A = FunctionCallTrie::InitAllocators(); 154 FunctionCallTrie Trie(A); 155 156 Trie.enterFunction(1, 0, 0); 157 Trie.enterFunction(2, 100, 0); 158 Trie.enterFunction(3, 200, 0); 159 Trie.exitFunction(3, 300, 0); 160 Trie.exitFunction(1, 400, 0); 161 162 // What we should see at this point is all the functions in the trie in a 163 // specific order (1 -> 2 -> 3) with the appropriate count(s) and local 164 // latencies. 165 const auto &R = Trie.getRoots(); 166 ASSERT_FALSE(R.empty()); 167 ASSERT_EQ(R.size(), 1u); 168 169 const auto &F1 = *R[0]; 170 ASSERT_EQ(F1.FId, 1); 171 ASSERT_FALSE(F1.Callees.empty()); 172 173 const auto &F2 = *F1.Callees[0].NodePtr; 174 ASSERT_EQ(F2.FId, 2); 175 ASSERT_FALSE(F2.Callees.empty()); 176 177 const auto &F3 = *F2.Callees[0].NodePtr; 178 ASSERT_EQ(F3.FId, 3); 179 ASSERT_TRUE(F3.Callees.empty()); 180 181 // Now that we've established the preconditions, we check for specific aspects 182 // of the nodes. 183 EXPECT_EQ(F3.CallCount, 1u); 184 EXPECT_EQ(F2.CallCount, 1u); 185 EXPECT_EQ(F1.CallCount, 1u); 186 EXPECT_EQ(F3.CumulativeLocalTime, 100u); 187 EXPECT_EQ(F2.CumulativeLocalTime, 300u); 188 EXPECT_EQ(F1.CumulativeLocalTime, 100u); 189 } 190 191 TEST(FunctionCallTrieTest, DeepCallStack) { 192 // Simulate a relatively deep call stack (32 levels) and ensure that we can 193 // properly pop all the way up the stack. 194 profilingFlags()->setDefaults(); 195 auto A = FunctionCallTrie::InitAllocators(); 196 FunctionCallTrie Trie(A); 197 for (int i = 0; i < 32; ++i) 198 Trie.enterFunction(i + 1, i, 0); 199 Trie.exitFunction(1, 33, 0); 200 201 // Here, validate that we have a 32-level deep function call path from the 202 // root (1) down to the leaf (33). 203 const auto &R = Trie.getRoots(); 204 ASSERT_EQ(R.size(), 1u); 205 auto F = R[0]; 206 for (int i = 0; i < 32; ++i) { 207 EXPECT_EQ(F->FId, i + 1); 208 EXPECT_EQ(F->CallCount, 1u); 209 if (F->Callees.empty() && i != 31) 210 FAIL() << "Empty callees for FId " << F->FId; 211 if (i != 31) 212 F = F->Callees[0].NodePtr; 213 } 214 } 215 216 // TODO: Test that we can handle cross-CPU migrations, where TSCs are not 217 // guaranteed to be synchronised. 218 TEST(FunctionCallTrieTest, DeepCopy) { 219 profilingFlags()->setDefaults(); 220 auto A = FunctionCallTrie::InitAllocators(); 221 FunctionCallTrie Trie(A); 222 223 Trie.enterFunction(1, 0, 0); 224 Trie.enterFunction(2, 1, 0); 225 Trie.exitFunction(2, 2, 0); 226 Trie.enterFunction(3, 3, 0); 227 Trie.exitFunction(3, 4, 0); 228 Trie.exitFunction(1, 5, 0); 229 230 // We want to make a deep copy and compare notes. 231 auto B = FunctionCallTrie::InitAllocators(); 232 FunctionCallTrie Copy(B); 233 Trie.deepCopyInto(Copy); 234 235 ASSERT_NE(Trie.getRoots().size(), 0u); 236 ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size()); 237 const auto &R0Orig = *Trie.getRoots()[0]; 238 const auto &R0Copy = *Copy.getRoots()[0]; 239 EXPECT_EQ(R0Orig.FId, 1); 240 EXPECT_EQ(R0Orig.FId, R0Copy.FId); 241 242 ASSERT_EQ(R0Orig.Callees.size(), 2u); 243 ASSERT_EQ(R0Copy.Callees.size(), 2u); 244 245 const auto &F1Orig = 246 *R0Orig.Callees 247 .find_element( 248 [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) 249 ->NodePtr; 250 const auto &F1Copy = 251 *R0Copy.Callees 252 .find_element( 253 [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) 254 ->NodePtr; 255 EXPECT_EQ(&R0Orig, F1Orig.Parent); 256 EXPECT_EQ(&R0Copy, F1Copy.Parent); 257 } 258 259 TEST(FunctionCallTrieTest, MergeInto) { 260 profilingFlags()->setDefaults(); 261 auto A = FunctionCallTrie::InitAllocators(); 262 FunctionCallTrie T0(A); 263 FunctionCallTrie T1(A); 264 265 // 1 -> 2 -> 3 266 T0.enterFunction(1, 0, 0); 267 T0.enterFunction(2, 1, 0); 268 T0.enterFunction(3, 2, 0); 269 T0.exitFunction(3, 3, 0); 270 T0.exitFunction(2, 4, 0); 271 T0.exitFunction(1, 5, 0); 272 273 // 1 -> 2 -> 3 274 T1.enterFunction(1, 0, 0); 275 T1.enterFunction(2, 1, 0); 276 T1.enterFunction(3, 2, 0); 277 T1.exitFunction(3, 3, 0); 278 T1.exitFunction(2, 4, 0); 279 T1.exitFunction(1, 5, 0); 280 281 // We use a different allocator here to make sure that we're able to transfer 282 // data into a FunctionCallTrie which uses a different allocator. This 283 // reflects the inteded usage scenario for when we're collecting profiles that 284 // aggregate across threads. 285 auto B = FunctionCallTrie::InitAllocators(); 286 FunctionCallTrie Merged(B); 287 288 T0.mergeInto(Merged); 289 T1.mergeInto(Merged); 290 291 ASSERT_EQ(Merged.getRoots().size(), 1u); 292 const auto &R0 = *Merged.getRoots()[0]; 293 EXPECT_EQ(R0.FId, 1); 294 EXPECT_EQ(R0.CallCount, 2u); 295 EXPECT_EQ(R0.CumulativeLocalTime, 10u); 296 EXPECT_EQ(R0.Callees.size(), 1u); 297 298 const auto &F1 = *R0.Callees[0].NodePtr; 299 EXPECT_EQ(F1.FId, 2); 300 EXPECT_EQ(F1.CallCount, 2u); 301 EXPECT_EQ(F1.CumulativeLocalTime, 6u); 302 EXPECT_EQ(F1.Callees.size(), 1u); 303 304 const auto &F2 = *F1.Callees[0].NodePtr; 305 EXPECT_EQ(F2.FId, 3); 306 EXPECT_EQ(F2.CallCount, 2u); 307 EXPECT_EQ(F2.CumulativeLocalTime, 2u); 308 EXPECT_EQ(F2.Callees.size(), 0u); 309 } 310 311 TEST(FunctionCallTrieTest, PlacementNewOnAlignedStorage) { 312 profilingFlags()->setDefaults(); 313 typename std::aligned_storage<sizeof(FunctionCallTrie::Allocators), 314 alignof(FunctionCallTrie::Allocators)>::type 315 AllocatorsStorage; 316 new (&AllocatorsStorage) 317 FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); 318 auto *A = 319 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage); 320 321 typename std::aligned_storage<sizeof(FunctionCallTrie), 322 alignof(FunctionCallTrie)>::type FCTStorage; 323 new (&FCTStorage) FunctionCallTrie(*A); 324 auto *T = reinterpret_cast<FunctionCallTrie *>(&FCTStorage); 325 326 // Put some data into it. 327 T->enterFunction(1, 0, 0); 328 T->exitFunction(1, 1, 0); 329 330 // Re-initialize the objects in storage. 331 T->~FunctionCallTrie(); 332 A->~Allocators(); 333 new (A) FunctionCallTrie::Allocators(FunctionCallTrie::InitAllocators()); 334 new (T) FunctionCallTrie(*A); 335 336 // Then put some data into it again. 337 T->enterFunction(1, 0, 0); 338 T->exitFunction(1, 1, 0); 339 } 340 341 } // namespace 342 343 } // namespace __xray 344