1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Instrumentation-based profile-guided optimization 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenPGO.h" 15 #include "CodeGenFunction.h" 16 #include "CoverageMappingGen.h" 17 #include "clang/AST/RecursiveASTVisitor.h" 18 #include "clang/AST/StmtVisitor.h" 19 #include "llvm/IR/Intrinsics.h" 20 #include "llvm/IR/MDBuilder.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/MD5.h" 24 25 static llvm::cl::opt<bool> EnableValueProfiling( 26 "enable-value-profiling", llvm::cl::ZeroOrMore, 27 llvm::cl::desc("Enable value profiling"), llvm::cl::init(false)); 28 29 using namespace clang; 30 using namespace CodeGen; 31 32 void CodeGenPGO::setFuncName(StringRef Name, 33 llvm::GlobalValue::LinkageTypes Linkage) { 34 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 35 FuncName = llvm::getPGOFuncName( 36 Name, Linkage, CGM.getCodeGenOpts().MainFileName, 37 PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version); 38 39 // If we're generating a profile, create a variable for the name. 40 if (CGM.getCodeGenOpts().hasProfileClangInstr()) 41 FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName); 42 } 43 44 void CodeGenPGO::setFuncName(llvm::Function *Fn) { 45 setFuncName(Fn->getName(), Fn->getLinkage()); 46 } 47 48 namespace { 49 /// \brief Stable hasher for PGO region counters. 50 /// 51 /// PGOHash produces a stable hash of a given function's control flow. 52 /// 53 /// Changing the output of this hash will invalidate all previously generated 54 /// profiles -- i.e., don't do it. 55 /// 56 /// \note When this hash does eventually change (years?), we still need to 57 /// support old hashes. We'll need to pull in the version number from the 58 /// profile data format and use the matching hash function. 59 class PGOHash { 60 uint64_t Working; 61 unsigned Count; 62 llvm::MD5 MD5; 63 64 static const int NumBitsPerType = 6; 65 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType; 66 static const unsigned TooBig = 1u << NumBitsPerType; 67 68 public: 69 /// \brief Hash values for AST nodes. 70 /// 71 /// Distinct values for AST nodes that have region counters attached. 72 /// 73 /// These values must be stable. All new members must be added at the end, 74 /// and no members should be removed. Changing the enumeration value for an 75 /// AST node will affect the hash of every function that contains that node. 76 enum HashType : unsigned char { 77 None = 0, 78 LabelStmt = 1, 79 WhileStmt, 80 DoStmt, 81 ForStmt, 82 CXXForRangeStmt, 83 ObjCForCollectionStmt, 84 SwitchStmt, 85 CaseStmt, 86 DefaultStmt, 87 IfStmt, 88 CXXTryStmt, 89 CXXCatchStmt, 90 ConditionalOperator, 91 BinaryOperatorLAnd, 92 BinaryOperatorLOr, 93 BinaryConditionalOperator, 94 95 // Keep this last. It's for the static assert that follows. 96 LastHashType 97 }; 98 static_assert(LastHashType <= TooBig, "Too many types in HashType"); 99 100 // TODO: When this format changes, take in a version number here, and use the 101 // old hash calculation for file formats that used the old hash. 102 PGOHash() : Working(0), Count(0) {} 103 void combine(HashType Type); 104 uint64_t finalize(); 105 }; 106 const int PGOHash::NumBitsPerType; 107 const unsigned PGOHash::NumTypesPerWord; 108 const unsigned PGOHash::TooBig; 109 110 /// A RecursiveASTVisitor that fills a map of statements to PGO counters. 111 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { 112 /// The next counter value to assign. 113 unsigned NextCounter; 114 /// The function hash. 115 PGOHash Hash; 116 /// The map of statements to counters. 117 llvm::DenseMap<const Stmt *, unsigned> &CounterMap; 118 119 MapRegionCounters(llvm::DenseMap<const Stmt *, unsigned> &CounterMap) 120 : NextCounter(0), CounterMap(CounterMap) {} 121 122 // Blocks and lambdas are handled as separate functions, so we need not 123 // traverse them in the parent context. 124 bool TraverseBlockExpr(BlockExpr *BE) { return true; } 125 bool TraverseLambdaBody(LambdaExpr *LE) { return true; } 126 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; } 127 128 bool VisitDecl(const Decl *D) { 129 switch (D->getKind()) { 130 default: 131 break; 132 case Decl::Function: 133 case Decl::CXXMethod: 134 case Decl::CXXConstructor: 135 case Decl::CXXDestructor: 136 case Decl::CXXConversion: 137 case Decl::ObjCMethod: 138 case Decl::Block: 139 case Decl::Captured: 140 CounterMap[D->getBody()] = NextCounter++; 141 break; 142 } 143 return true; 144 } 145 146 bool VisitStmt(const Stmt *S) { 147 auto Type = getHashType(S); 148 if (Type == PGOHash::None) 149 return true; 150 151 CounterMap[S] = NextCounter++; 152 Hash.combine(Type); 153 return true; 154 } 155 PGOHash::HashType getHashType(const Stmt *S) { 156 switch (S->getStmtClass()) { 157 default: 158 break; 159 case Stmt::LabelStmtClass: 160 return PGOHash::LabelStmt; 161 case Stmt::WhileStmtClass: 162 return PGOHash::WhileStmt; 163 case Stmt::DoStmtClass: 164 return PGOHash::DoStmt; 165 case Stmt::ForStmtClass: 166 return PGOHash::ForStmt; 167 case Stmt::CXXForRangeStmtClass: 168 return PGOHash::CXXForRangeStmt; 169 case Stmt::ObjCForCollectionStmtClass: 170 return PGOHash::ObjCForCollectionStmt; 171 case Stmt::SwitchStmtClass: 172 return PGOHash::SwitchStmt; 173 case Stmt::CaseStmtClass: 174 return PGOHash::CaseStmt; 175 case Stmt::DefaultStmtClass: 176 return PGOHash::DefaultStmt; 177 case Stmt::IfStmtClass: 178 return PGOHash::IfStmt; 179 case Stmt::CXXTryStmtClass: 180 return PGOHash::CXXTryStmt; 181 case Stmt::CXXCatchStmtClass: 182 return PGOHash::CXXCatchStmt; 183 case Stmt::ConditionalOperatorClass: 184 return PGOHash::ConditionalOperator; 185 case Stmt::BinaryConditionalOperatorClass: 186 return PGOHash::BinaryConditionalOperator; 187 case Stmt::BinaryOperatorClass: { 188 const BinaryOperator *BO = cast<BinaryOperator>(S); 189 if (BO->getOpcode() == BO_LAnd) 190 return PGOHash::BinaryOperatorLAnd; 191 if (BO->getOpcode() == BO_LOr) 192 return PGOHash::BinaryOperatorLOr; 193 break; 194 } 195 } 196 return PGOHash::None; 197 } 198 }; 199 200 /// A StmtVisitor that propagates the raw counts through the AST and 201 /// records the count at statements where the value may change. 202 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> { 203 /// PGO state. 204 CodeGenPGO &PGO; 205 206 /// A flag that is set when the current count should be recorded on the 207 /// next statement, such as at the exit of a loop. 208 bool RecordNextStmtCount; 209 210 /// The count at the current location in the traversal. 211 uint64_t CurrentCount; 212 213 /// The map of statements to count values. 214 llvm::DenseMap<const Stmt *, uint64_t> &CountMap; 215 216 /// BreakContinueStack - Keep counts of breaks and continues inside loops. 217 struct BreakContinue { 218 uint64_t BreakCount; 219 uint64_t ContinueCount; 220 BreakContinue() : BreakCount(0), ContinueCount(0) {} 221 }; 222 SmallVector<BreakContinue, 8> BreakContinueStack; 223 224 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap, 225 CodeGenPGO &PGO) 226 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {} 227 228 void RecordStmtCount(const Stmt *S) { 229 if (RecordNextStmtCount) { 230 CountMap[S] = CurrentCount; 231 RecordNextStmtCount = false; 232 } 233 } 234 235 /// Set and return the current count. 236 uint64_t setCount(uint64_t Count) { 237 CurrentCount = Count; 238 return Count; 239 } 240 241 void VisitStmt(const Stmt *S) { 242 RecordStmtCount(S); 243 for (const Stmt *Child : S->children()) 244 if (Child) 245 this->Visit(Child); 246 } 247 248 void VisitFunctionDecl(const FunctionDecl *D) { 249 // Counter tracks entry to the function body. 250 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 251 CountMap[D->getBody()] = BodyCount; 252 Visit(D->getBody()); 253 } 254 255 // Skip lambda expressions. We visit these as FunctionDecls when we're 256 // generating them and aren't interested in the body when generating a 257 // parent context. 258 void VisitLambdaExpr(const LambdaExpr *LE) {} 259 260 void VisitCapturedDecl(const CapturedDecl *D) { 261 // Counter tracks entry to the capture body. 262 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 263 CountMap[D->getBody()] = BodyCount; 264 Visit(D->getBody()); 265 } 266 267 void VisitObjCMethodDecl(const ObjCMethodDecl *D) { 268 // Counter tracks entry to the method body. 269 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 270 CountMap[D->getBody()] = BodyCount; 271 Visit(D->getBody()); 272 } 273 274 void VisitBlockDecl(const BlockDecl *D) { 275 // Counter tracks entry to the block body. 276 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody())); 277 CountMap[D->getBody()] = BodyCount; 278 Visit(D->getBody()); 279 } 280 281 void VisitReturnStmt(const ReturnStmt *S) { 282 RecordStmtCount(S); 283 if (S->getRetValue()) 284 Visit(S->getRetValue()); 285 CurrentCount = 0; 286 RecordNextStmtCount = true; 287 } 288 289 void VisitCXXThrowExpr(const CXXThrowExpr *E) { 290 RecordStmtCount(E); 291 if (E->getSubExpr()) 292 Visit(E->getSubExpr()); 293 CurrentCount = 0; 294 RecordNextStmtCount = true; 295 } 296 297 void VisitGotoStmt(const GotoStmt *S) { 298 RecordStmtCount(S); 299 CurrentCount = 0; 300 RecordNextStmtCount = true; 301 } 302 303 void VisitLabelStmt(const LabelStmt *S) { 304 RecordNextStmtCount = false; 305 // Counter tracks the block following the label. 306 uint64_t BlockCount = setCount(PGO.getRegionCount(S)); 307 CountMap[S] = BlockCount; 308 Visit(S->getSubStmt()); 309 } 310 311 void VisitBreakStmt(const BreakStmt *S) { 312 RecordStmtCount(S); 313 assert(!BreakContinueStack.empty() && "break not in a loop or switch!"); 314 BreakContinueStack.back().BreakCount += CurrentCount; 315 CurrentCount = 0; 316 RecordNextStmtCount = true; 317 } 318 319 void VisitContinueStmt(const ContinueStmt *S) { 320 RecordStmtCount(S); 321 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!"); 322 BreakContinueStack.back().ContinueCount += CurrentCount; 323 CurrentCount = 0; 324 RecordNextStmtCount = true; 325 } 326 327 void VisitWhileStmt(const WhileStmt *S) { 328 RecordStmtCount(S); 329 uint64_t ParentCount = CurrentCount; 330 331 BreakContinueStack.push_back(BreakContinue()); 332 // Visit the body region first so the break/continue adjustments can be 333 // included when visiting the condition. 334 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 335 CountMap[S->getBody()] = CurrentCount; 336 Visit(S->getBody()); 337 uint64_t BackedgeCount = CurrentCount; 338 339 // ...then go back and propagate counts through the condition. The count 340 // at the start of the condition is the sum of the incoming edges, 341 // the backedge from the end of the loop body, and the edges from 342 // continue statements. 343 BreakContinue BC = BreakContinueStack.pop_back_val(); 344 uint64_t CondCount = 345 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 346 CountMap[S->getCond()] = CondCount; 347 Visit(S->getCond()); 348 setCount(BC.BreakCount + CondCount - BodyCount); 349 RecordNextStmtCount = true; 350 } 351 352 void VisitDoStmt(const DoStmt *S) { 353 RecordStmtCount(S); 354 uint64_t LoopCount = PGO.getRegionCount(S); 355 356 BreakContinueStack.push_back(BreakContinue()); 357 // The count doesn't include the fallthrough from the parent scope. Add it. 358 uint64_t BodyCount = setCount(LoopCount + CurrentCount); 359 CountMap[S->getBody()] = BodyCount; 360 Visit(S->getBody()); 361 uint64_t BackedgeCount = CurrentCount; 362 363 BreakContinue BC = BreakContinueStack.pop_back_val(); 364 // The count at the start of the condition is equal to the count at the 365 // end of the body, plus any continues. 366 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount); 367 CountMap[S->getCond()] = CondCount; 368 Visit(S->getCond()); 369 setCount(BC.BreakCount + CondCount - LoopCount); 370 RecordNextStmtCount = true; 371 } 372 373 void VisitForStmt(const ForStmt *S) { 374 RecordStmtCount(S); 375 if (S->getInit()) 376 Visit(S->getInit()); 377 378 uint64_t ParentCount = CurrentCount; 379 380 BreakContinueStack.push_back(BreakContinue()); 381 // Visit the body region first. (This is basically the same as a while 382 // loop; see further comments in VisitWhileStmt.) 383 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 384 CountMap[S->getBody()] = BodyCount; 385 Visit(S->getBody()); 386 uint64_t BackedgeCount = CurrentCount; 387 BreakContinue BC = BreakContinueStack.pop_back_val(); 388 389 // The increment is essentially part of the body but it needs to include 390 // the count for all the continue statements. 391 if (S->getInc()) { 392 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); 393 CountMap[S->getInc()] = IncCount; 394 Visit(S->getInc()); 395 } 396 397 // ...then go back and propagate counts through the condition. 398 uint64_t CondCount = 399 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 400 if (S->getCond()) { 401 CountMap[S->getCond()] = CondCount; 402 Visit(S->getCond()); 403 } 404 setCount(BC.BreakCount + CondCount - BodyCount); 405 RecordNextStmtCount = true; 406 } 407 408 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) { 409 RecordStmtCount(S); 410 Visit(S->getLoopVarStmt()); 411 Visit(S->getRangeStmt()); 412 Visit(S->getBeginEndStmt()); 413 414 uint64_t ParentCount = CurrentCount; 415 BreakContinueStack.push_back(BreakContinue()); 416 // Visit the body region first. (This is basically the same as a while 417 // loop; see further comments in VisitWhileStmt.) 418 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 419 CountMap[S->getBody()] = BodyCount; 420 Visit(S->getBody()); 421 uint64_t BackedgeCount = CurrentCount; 422 BreakContinue BC = BreakContinueStack.pop_back_val(); 423 424 // The increment is essentially part of the body but it needs to include 425 // the count for all the continue statements. 426 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); 427 CountMap[S->getInc()] = IncCount; 428 Visit(S->getInc()); 429 430 // ...then go back and propagate counts through the condition. 431 uint64_t CondCount = 432 setCount(ParentCount + BackedgeCount + BC.ContinueCount); 433 CountMap[S->getCond()] = CondCount; 434 Visit(S->getCond()); 435 setCount(BC.BreakCount + CondCount - BodyCount); 436 RecordNextStmtCount = true; 437 } 438 439 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) { 440 RecordStmtCount(S); 441 Visit(S->getElement()); 442 uint64_t ParentCount = CurrentCount; 443 BreakContinueStack.push_back(BreakContinue()); 444 // Counter tracks the body of the loop. 445 uint64_t BodyCount = setCount(PGO.getRegionCount(S)); 446 CountMap[S->getBody()] = BodyCount; 447 Visit(S->getBody()); 448 uint64_t BackedgeCount = CurrentCount; 449 BreakContinue BC = BreakContinueStack.pop_back_val(); 450 451 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount - 452 BodyCount); 453 RecordNextStmtCount = true; 454 } 455 456 void VisitSwitchStmt(const SwitchStmt *S) { 457 RecordStmtCount(S); 458 Visit(S->getCond()); 459 CurrentCount = 0; 460 BreakContinueStack.push_back(BreakContinue()); 461 Visit(S->getBody()); 462 // If the switch is inside a loop, add the continue counts. 463 BreakContinue BC = BreakContinueStack.pop_back_val(); 464 if (!BreakContinueStack.empty()) 465 BreakContinueStack.back().ContinueCount += BC.ContinueCount; 466 // Counter tracks the exit block of the switch. 467 setCount(PGO.getRegionCount(S)); 468 RecordNextStmtCount = true; 469 } 470 471 void VisitSwitchCase(const SwitchCase *S) { 472 RecordNextStmtCount = false; 473 // Counter for this particular case. This counts only jumps from the 474 // switch header and does not include fallthrough from the case before 475 // this one. 476 uint64_t CaseCount = PGO.getRegionCount(S); 477 setCount(CurrentCount + CaseCount); 478 // We need the count without fallthrough in the mapping, so it's more useful 479 // for branch probabilities. 480 CountMap[S] = CaseCount; 481 RecordNextStmtCount = true; 482 Visit(S->getSubStmt()); 483 } 484 485 void VisitIfStmt(const IfStmt *S) { 486 RecordStmtCount(S); 487 uint64_t ParentCount = CurrentCount; 488 Visit(S->getCond()); 489 490 // Counter tracks the "then" part of an if statement. The count for 491 // the "else" part, if it exists, will be calculated from this counter. 492 uint64_t ThenCount = setCount(PGO.getRegionCount(S)); 493 CountMap[S->getThen()] = ThenCount; 494 Visit(S->getThen()); 495 uint64_t OutCount = CurrentCount; 496 497 uint64_t ElseCount = ParentCount - ThenCount; 498 if (S->getElse()) { 499 setCount(ElseCount); 500 CountMap[S->getElse()] = ElseCount; 501 Visit(S->getElse()); 502 OutCount += CurrentCount; 503 } else 504 OutCount += ElseCount; 505 setCount(OutCount); 506 RecordNextStmtCount = true; 507 } 508 509 void VisitCXXTryStmt(const CXXTryStmt *S) { 510 RecordStmtCount(S); 511 Visit(S->getTryBlock()); 512 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I) 513 Visit(S->getHandler(I)); 514 // Counter tracks the continuation block of the try statement. 515 setCount(PGO.getRegionCount(S)); 516 RecordNextStmtCount = true; 517 } 518 519 void VisitCXXCatchStmt(const CXXCatchStmt *S) { 520 RecordNextStmtCount = false; 521 // Counter tracks the catch statement's handler block. 522 uint64_t CatchCount = setCount(PGO.getRegionCount(S)); 523 CountMap[S] = CatchCount; 524 Visit(S->getHandlerBlock()); 525 } 526 527 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) { 528 RecordStmtCount(E); 529 uint64_t ParentCount = CurrentCount; 530 Visit(E->getCond()); 531 532 // Counter tracks the "true" part of a conditional operator. The 533 // count in the "false" part will be calculated from this counter. 534 uint64_t TrueCount = setCount(PGO.getRegionCount(E)); 535 CountMap[E->getTrueExpr()] = TrueCount; 536 Visit(E->getTrueExpr()); 537 uint64_t OutCount = CurrentCount; 538 539 uint64_t FalseCount = setCount(ParentCount - TrueCount); 540 CountMap[E->getFalseExpr()] = FalseCount; 541 Visit(E->getFalseExpr()); 542 OutCount += CurrentCount; 543 544 setCount(OutCount); 545 RecordNextStmtCount = true; 546 } 547 548 void VisitBinLAnd(const BinaryOperator *E) { 549 RecordStmtCount(E); 550 uint64_t ParentCount = CurrentCount; 551 Visit(E->getLHS()); 552 // Counter tracks the right hand side of a logical and operator. 553 uint64_t RHSCount = setCount(PGO.getRegionCount(E)); 554 CountMap[E->getRHS()] = RHSCount; 555 Visit(E->getRHS()); 556 setCount(ParentCount + RHSCount - CurrentCount); 557 RecordNextStmtCount = true; 558 } 559 560 void VisitBinLOr(const BinaryOperator *E) { 561 RecordStmtCount(E); 562 uint64_t ParentCount = CurrentCount; 563 Visit(E->getLHS()); 564 // Counter tracks the right hand side of a logical or operator. 565 uint64_t RHSCount = setCount(PGO.getRegionCount(E)); 566 CountMap[E->getRHS()] = RHSCount; 567 Visit(E->getRHS()); 568 setCount(ParentCount + RHSCount - CurrentCount); 569 RecordNextStmtCount = true; 570 } 571 }; 572 } // end anonymous namespace 573 574 void PGOHash::combine(HashType Type) { 575 // Check that we never combine 0 and only have six bits. 576 assert(Type && "Hash is invalid: unexpected type 0"); 577 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types"); 578 579 // Pass through MD5 if enough work has built up. 580 if (Count && Count % NumTypesPerWord == 0) { 581 using namespace llvm::support; 582 uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working); 583 MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped))); 584 Working = 0; 585 } 586 587 // Accumulate the current type. 588 ++Count; 589 Working = Working << NumBitsPerType | Type; 590 } 591 592 uint64_t PGOHash::finalize() { 593 // Use Working as the hash directly if we never used MD5. 594 if (Count <= NumTypesPerWord) 595 // No need to byte swap here, since none of the math was endian-dependent. 596 // This number will be byte-swapped as required on endianness transitions, 597 // so we will see the same value on the other side. 598 return Working; 599 600 // Check for remaining work in Working. 601 if (Working) 602 MD5.update(Working); 603 604 // Finalize the MD5 and return the hash. 605 llvm::MD5::MD5Result Result; 606 MD5.final(Result); 607 using namespace llvm::support; 608 return endian::read<uint64_t, little, unaligned>(Result); 609 } 610 611 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) { 612 const Decl *D = GD.getDecl(); 613 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr(); 614 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 615 if (!InstrumentRegions && !PGOReader) 616 return; 617 if (D->isImplicit()) 618 return; 619 // Constructors and destructors may be represented by several functions in IR. 620 // If so, instrument only base variant, others are implemented by delegation 621 // to the base one, it would be counted twice otherwise. 622 if (CGM.getTarget().getCXXABI().hasConstructorVariants() && 623 ((isa<CXXConstructorDecl>(GD.getDecl()) && 624 GD.getCtorType() != Ctor_Base) || 625 (isa<CXXDestructorDecl>(GD.getDecl()) && 626 GD.getDtorType() != Dtor_Base))) { 627 return; 628 } 629 CGM.ClearUnusedCoverageMapping(D); 630 setFuncName(Fn); 631 632 mapRegionCounters(D); 633 if (CGM.getCodeGenOpts().CoverageMapping) 634 emitCounterRegionMapping(D); 635 if (PGOReader) { 636 SourceManager &SM = CGM.getContext().getSourceManager(); 637 loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation())); 638 computeRegionCounts(D); 639 applyFunctionAttributes(PGOReader, Fn); 640 } 641 } 642 643 void CodeGenPGO::mapRegionCounters(const Decl *D) { 644 RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>); 645 MapRegionCounters Walker(*RegionCounterMap); 646 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) 647 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD)); 648 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) 649 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD)); 650 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D)) 651 Walker.TraverseDecl(const_cast<BlockDecl *>(BD)); 652 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D)) 653 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD)); 654 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); 655 NumRegionCounters = Walker.NextCounter; 656 FunctionHash = Walker.Hash.finalize(); 657 } 658 659 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) { 660 if (SkipCoverageMapping) 661 return; 662 // Don't map the functions inside the system headers 663 auto Loc = D->getBody()->getLocStart(); 664 if (CGM.getContext().getSourceManager().isInSystemHeader(Loc)) 665 return; 666 667 std::string CoverageMapping; 668 llvm::raw_string_ostream OS(CoverageMapping); 669 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(), 670 CGM.getContext().getSourceManager(), 671 CGM.getLangOpts(), RegionCounterMap.get()); 672 MappingGen.emitCounterMapping(D, OS); 673 OS.flush(); 674 675 if (CoverageMapping.empty()) 676 return; 677 678 CGM.getCoverageMapping()->addFunctionMappingRecord( 679 FuncNameVar, FuncName, FunctionHash, CoverageMapping); 680 } 681 682 void 683 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name, 684 llvm::GlobalValue::LinkageTypes Linkage) { 685 if (SkipCoverageMapping) 686 return; 687 // Don't map the functions inside the system headers 688 auto Loc = D->getBody()->getLocStart(); 689 if (CGM.getContext().getSourceManager().isInSystemHeader(Loc)) 690 return; 691 692 std::string CoverageMapping; 693 llvm::raw_string_ostream OS(CoverageMapping); 694 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(), 695 CGM.getContext().getSourceManager(), 696 CGM.getLangOpts()); 697 MappingGen.emitEmptyMapping(D, OS); 698 OS.flush(); 699 700 if (CoverageMapping.empty()) 701 return; 702 703 setFuncName(Name, Linkage); 704 CGM.getCoverageMapping()->addFunctionMappingRecord( 705 FuncNameVar, FuncName, FunctionHash, CoverageMapping, false); 706 } 707 708 void CodeGenPGO::computeRegionCounts(const Decl *D) { 709 StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>); 710 ComputeRegionCounts Walker(*StmtCountMap, *this); 711 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) 712 Walker.VisitFunctionDecl(FD); 713 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D)) 714 Walker.VisitObjCMethodDecl(MD); 715 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D)) 716 Walker.VisitBlockDecl(BD); 717 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D)) 718 Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD)); 719 } 720 721 void 722 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader, 723 llvm::Function *Fn) { 724 if (!haveRegionCounts()) 725 return; 726 727 uint64_t FunctionCount = getRegionCount(nullptr); 728 Fn->setEntryCount(FunctionCount); 729 } 730 731 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) { 732 if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap) 733 return; 734 if (!Builder.GetInsertBlock()) 735 return; 736 737 unsigned Counter = (*RegionCounterMap)[S]; 738 auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext()); 739 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment), 740 {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 741 Builder.getInt64(FunctionHash), 742 Builder.getInt32(NumRegionCounters), 743 Builder.getInt32(Counter)}); 744 } 745 746 // This method either inserts a call to the profile run-time during 747 // instrumentation or puts profile data into metadata for PGO use. 748 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind, 749 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) { 750 751 if (!EnableValueProfiling) 752 return; 753 754 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock()) 755 return; 756 757 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr(); 758 if (InstrumentValueSites && RegionCounterMap) { 759 llvm::LLVMContext &Ctx = CGM.getLLVMContext(); 760 auto *I8PtrTy = llvm::Type::getInt8PtrTy(Ctx); 761 llvm::Value *Args[5] = { 762 llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), 763 Builder.getInt64(FunctionHash), 764 Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()), 765 Builder.getInt32(ValueKind), 766 Builder.getInt32(NumValueSites[ValueKind]++) 767 }; 768 Builder.CreateCall( 769 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args); 770 return; 771 } 772 773 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader(); 774 if (PGOReader && haveRegionCounts()) { 775 // We record the top most called three functions at each call site. 776 // Profile metadata contains "VP" string identifying this metadata 777 // as value profiling data, then a uint32_t value for the value profiling 778 // kind, a uint64_t value for the total number of times the call is 779 // executed, followed by the function hash and execution count (uint64_t) 780 // pairs for each function. 781 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind)) 782 return; 783 784 llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord, 785 (llvm::InstrProfValueKind)ValueKind, 786 NumValueSites[ValueKind]); 787 788 NumValueSites[ValueKind]++; 789 } 790 } 791 792 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader, 793 bool IsInMainFile) { 794 CGM.getPGOStats().addVisited(IsInMainFile); 795 RegionCounts.clear(); 796 llvm::ErrorOr<llvm::InstrProfRecord> RecordErrorOr = 797 PGOReader->getInstrProfRecord(FuncName, FunctionHash); 798 if (std::error_code EC = RecordErrorOr.getError()) { 799 if (EC == llvm::instrprof_error::unknown_function) 800 CGM.getPGOStats().addMissing(IsInMainFile); 801 else if (EC == llvm::instrprof_error::hash_mismatch) 802 CGM.getPGOStats().addMismatched(IsInMainFile); 803 else if (EC == llvm::instrprof_error::malformed) 804 // TODO: Consider a more specific warning for this case. 805 CGM.getPGOStats().addMismatched(IsInMainFile); 806 return; 807 } 808 ProfRecord = 809 llvm::make_unique<llvm::InstrProfRecord>(std::move(RecordErrorOr.get())); 810 RegionCounts = ProfRecord->Counts; 811 } 812 813 /// \brief Calculate what to divide by to scale weights. 814 /// 815 /// Given the maximum weight, calculate a divisor that will scale all the 816 /// weights to strictly less than UINT32_MAX. 817 static uint64_t calculateWeightScale(uint64_t MaxWeight) { 818 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1; 819 } 820 821 /// \brief Scale an individual branch weight (and add 1). 822 /// 823 /// Scale a 64-bit weight down to 32-bits using \c Scale. 824 /// 825 /// According to Laplace's Rule of Succession, it is better to compute the 826 /// weight based on the count plus 1, so universally add 1 to the value. 827 /// 828 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no 829 /// greater than \c Weight. 830 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) { 831 assert(Scale && "scale by 0?"); 832 uint64_t Scaled = Weight / Scale + 1; 833 assert(Scaled <= UINT32_MAX && "overflow 32-bits"); 834 return Scaled; 835 } 836 837 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount, 838 uint64_t FalseCount) { 839 // Check for empty weights. 840 if (!TrueCount && !FalseCount) 841 return nullptr; 842 843 // Calculate how to scale down to 32-bits. 844 uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount)); 845 846 llvm::MDBuilder MDHelper(CGM.getLLVMContext()); 847 return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale), 848 scaleBranchWeight(FalseCount, Scale)); 849 } 850 851 llvm::MDNode * 852 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) { 853 // We need at least two elements to create meaningful weights. 854 if (Weights.size() < 2) 855 return nullptr; 856 857 // Check for empty weights. 858 uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end()); 859 if (MaxWeight == 0) 860 return nullptr; 861 862 // Calculate how to scale down to 32-bits. 863 uint64_t Scale = calculateWeightScale(MaxWeight); 864 865 SmallVector<uint32_t, 16> ScaledWeights; 866 ScaledWeights.reserve(Weights.size()); 867 for (uint64_t W : Weights) 868 ScaledWeights.push_back(scaleBranchWeight(W, Scale)); 869 870 llvm::MDBuilder MDHelper(CGM.getLLVMContext()); 871 return MDHelper.createBranchWeights(ScaledWeights); 872 } 873 874 llvm::MDNode *CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond, 875 uint64_t LoopCount) { 876 if (!PGO.haveRegionCounts()) 877 return nullptr; 878 Optional<uint64_t> CondCount = PGO.getStmtCount(Cond); 879 assert(CondCount.hasValue() && "missing expected loop condition count"); 880 if (*CondCount == 0) 881 return nullptr; 882 return createProfileWeights(LoopCount, 883 std::max(*CondCount, LoopCount) - LoopCount); 884 } 885