1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ShrinkWrapping class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ShrinkWrapping.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/DataflowInfoManager.h" 16 #include "bolt/Passes/MCF.h" 17 #include "bolt/Utils/CommandLineOpts.h" 18 #include <numeric> 19 #include <stack> 20 21 #define DEBUG_TYPE "shrinkwrapping" 22 23 using namespace llvm; 24 25 namespace opts { 26 27 extern cl::opt<bool> TimeOpts; 28 extern cl::OptionCategory BoltOptCategory; 29 30 static cl::opt<unsigned> ShrinkWrappingThreshold( 31 "shrink-wrapping-threshold", 32 cl::desc("Percentage of prologue execution count to use as threshold when" 33 " evaluating whether a block is cold enough to be profitable to" 34 " move eligible spills there"), 35 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 36 } // namespace opts 37 38 namespace llvm { 39 namespace bolt { 40 41 void CalleeSavedAnalysis::analyzeSaves() { 42 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); 43 StackReachingUses &SRU = Info.getStackReachingUses(); 44 auto &InsnToBB = Info.getInsnToBBMap(); 45 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); 46 47 LLVM_DEBUG(dbgs() << "Checking spill locations\n"); 48 for (BinaryBasicBlock &BB : BF) { 49 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 50 const MCInst *Prev = nullptr; 51 for (MCInst &Inst : BB) { 52 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 53 // Blacklist weird stores we don't understand 54 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && 55 FIE->IsStoreFromReg) { 56 BlacklistedRegs.set(FIE->RegOrImm); 57 CalleeSaved.reset(FIE->RegOrImm); 58 Prev = &Inst; 59 continue; 60 } 61 62 if (!FIE->IsStore || !FIE->IsStoreFromReg || 63 BlacklistedRegs[FIE->RegOrImm]) { 64 Prev = &Inst; 65 continue; 66 } 67 68 // If this reg is defined locally, it is not a callee-saved reg 69 if (RD.isReachedBy(FIE->RegOrImm, 70 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) { 71 BlacklistedRegs.set(FIE->RegOrImm); 72 CalleeSaved.reset(FIE->RegOrImm); 73 Prev = &Inst; 74 continue; 75 } 76 77 // If this stack position is accessed in another function, we are 78 // probably dealing with a parameter passed in a stack -- do not mess 79 // with it 80 if (SRU.isStoreUsed(*FIE, 81 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)), 82 /*IncludeLocalAccesses=*/false) { 83 BlacklistedRegs.set(FIE->RegOrImm); 84 CalleeSaved.reset(FIE->RegOrImm); 85 Prev = &Inst; 86 continue; 87 } 88 89 // If this stack position is loaded elsewhere in another reg, we can't 90 // update it, so blacklist it. 91 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev) 92 : SRU.expr_begin(BB))) { 93 BlacklistedRegs.set(FIE->RegOrImm); 94 CalleeSaved.reset(FIE->RegOrImm); 95 Prev = &Inst; 96 continue; 97 } 98 99 // Ignore regs with multiple saves 100 if (CalleeSaved[FIE->RegOrImm]) { 101 BlacklistedRegs.set(FIE->RegOrImm); 102 CalleeSaved.reset(FIE->RegOrImm); 103 Prev = &Inst; 104 continue; 105 } 106 107 CalleeSaved.set(FIE->RegOrImm); 108 SaveFIEByReg[FIE->RegOrImm] = &*FIE; 109 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); 110 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId); 111 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; 112 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " 113 << FIE->RegOrImm << "\n"); 114 } 115 Prev = &Inst; 116 } 117 } 118 } 119 120 void CalleeSavedAnalysis::analyzeRestores() { 121 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); 122 123 // Now compute all restores of these callee-saved regs 124 for (BinaryBasicBlock &BB : BF) { 125 const MCInst *Prev = nullptr; 126 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 127 MCInst &Inst = *I; 128 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 129 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { 130 Prev = &Inst; 131 continue; 132 } 133 134 // If this reg is used locally after a restore, then we are probably 135 // not dealing with a callee-saved reg. Except if this use is by 136 // another store, but we don't cover this case yet. 137 // Also not callee-saved if this load accesses caller stack or isn't 138 // simple. 139 if (!FIE->IsSimple || FIE->StackOffset >= 0 || 140 RU.isReachedBy(FIE->RegOrImm, 141 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) { 142 CalleeSaved.reset(FIE->RegOrImm); 143 Prev = &Inst; 144 continue; 145 } 146 // If stack offsets between saves/store don't agree with each other, 147 // we don't completely understand what's happening here 148 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { 149 CalleeSaved.reset(FIE->RegOrImm); 150 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " 151 "mismatching restore: " 152 << FIE->RegOrImm << "\n"); 153 Prev = &Inst; 154 continue; 155 } 156 157 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm 158 << "\n"); 159 if (LoadFIEByReg[FIE->RegOrImm] == nullptr) 160 LoadFIEByReg[FIE->RegOrImm] = &*FIE; 161 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm, 162 AllocatorId); 163 HasRestores.set(FIE->RegOrImm); 164 } 165 Prev = &Inst; 166 } 167 } 168 } 169 170 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { 171 std::vector<MCInst *> Results; 172 for (BinaryBasicBlock &BB : BF) 173 for (MCInst &Inst : BB) 174 if (getSavedReg(Inst) == Reg) 175 Results.push_back(&Inst); 176 return Results; 177 } 178 179 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { 180 std::vector<MCInst *> Results; 181 for (BinaryBasicBlock &BB : BF) 182 for (MCInst &Inst : BB) 183 if (getRestoredReg(Inst) == Reg) 184 Results.push_back(&Inst); 185 return Results; 186 } 187 188 CalleeSavedAnalysis::~CalleeSavedAnalysis() { 189 for (BinaryBasicBlock &BB : BF) { 190 for (MCInst &Inst : BB) { 191 BC.MIB->removeAnnotation(Inst, getSaveTag()); 192 BC.MIB->removeAnnotation(Inst, getRestoreTag()); 193 } 194 } 195 } 196 197 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { 198 if (BlacklistedRegions[Offset] < Size) 199 BlacklistedRegions[Offset] = Size; 200 } 201 202 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { 203 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) 204 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) 205 return true; 206 return false; 207 } 208 209 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, 210 int64_t Size) { 211 bool HasConflict = false; 212 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { 213 std::pair<const int64_t, int64_t> &Elem = *Iter; 214 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && 215 (Offset != Elem.first || Size != Elem.second)) { 216 Iter = AvailableRegions.erase(Iter); 217 HasConflict = true; 218 continue; 219 } 220 ++Iter; 221 } 222 if (HasConflict) { 223 blacklistRegion(Offset, Size); 224 return true; 225 } 226 return false; 227 } 228 229 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { 230 StackPointerTracking &SPT = Info.getStackPointerTracking(); 231 if (!BC.MII->get(Point.getOpcode()) 232 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI)) 233 return; 234 235 int SPVal, FPVal; 236 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 237 std::pair<MCPhysReg, int64_t> FP; 238 239 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 240 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 241 else 242 FP = std::make_pair(0, 0); 243 std::pair<MCPhysReg, int64_t> SP; 244 245 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 246 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 247 else 248 SP = std::make_pair(0, 0); 249 250 int64_t Output; 251 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 252 return; 253 254 // Not your regular frame pointer initialization... bail 255 if (Output != SPVal) 256 blacklistRegion(0, 0); 257 } 258 259 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { 260 StackPointerTracking &SPT = Info.getStackPointerTracking(); 261 if (!BC.MII->get(Point.getOpcode()) 262 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI)) 263 return; 264 // Check if the definition of SP comes from FP -- in this case, this 265 // value may need to be updated depending on our stack layout changes 266 const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode()); 267 unsigned NumDefs = InstInfo.getNumDefs(); 268 bool UsesFP = false; 269 for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E; 270 ++I) { 271 MCOperand &Operand = Point.getOperand(I); 272 if (!Operand.isReg()) 273 continue; 274 if (Operand.getReg() == BC.MIB->getFramePointer()) { 275 UsesFP = true; 276 break; 277 } 278 } 279 if (!UsesFP) 280 return; 281 282 // Setting up evaluation 283 int SPVal, FPVal; 284 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 285 std::pair<MCPhysReg, int64_t> FP; 286 287 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 288 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 289 else 290 FP = std::make_pair(0, 0); 291 std::pair<MCPhysReg, int64_t> SP; 292 293 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 294 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 295 else 296 SP = std::make_pair(0, 0); 297 298 int64_t Output; 299 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 300 return; 301 302 // If the value is the same of FP, no need to adjust it 303 if (Output == FPVal) 304 return; 305 306 // If an allocation happened through FP, bail 307 if (Output <= SPVal) { 308 blacklistRegion(0, 0); 309 return; 310 } 311 312 // We are restoring SP to an old value based on FP. Mark it as a stack 313 // access to be fixed later. 314 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId); 315 } 316 317 void StackLayoutModifier::classifyStackAccesses() { 318 // Understand when stack slots are being used non-locally 319 StackReachingUses &SRU = Info.getStackReachingUses(); 320 321 for (BinaryBasicBlock &BB : BF) { 322 const MCInst *Prev = nullptr; 323 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 324 MCInst &Inst = *I; 325 checkFramePointerInitialization(Inst); 326 checkStackPointerRestore(Inst); 327 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 328 if (!FIEX) { 329 Prev = &Inst; 330 continue; 331 } 332 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { 333 blacklistRegion(FIEX->StackOffset, FIEX->Size); 334 Prev = &Inst; 335 continue; 336 } 337 // If this stack position is accessed in another function, we are 338 // probably dealing with a parameter passed in a stack -- do not mess 339 // with it 340 if (SRU.isStoreUsed(*FIEX, 341 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB), 342 /*IncludeLocalAccesses=*/false)) { 343 blacklistRegion(FIEX->StackOffset, FIEX->Size); 344 Prev = &Inst; 345 continue; 346 } 347 // Now we have a clear stack slot access. Check if its blacklisted or if 348 // it conflicts with another chunk. 349 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) || 350 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) { 351 Prev = &Inst; 352 continue; 353 } 354 // We are free to go. Add it as available stack slot which we know how 355 // to move it. 356 AvailableRegions[FIEX->StackOffset] = FIEX->Size; 357 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId); 358 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm); 359 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset); 360 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " 361 << (int)FIEX->Size << "\n"); 362 } 363 } 364 } 365 366 void StackLayoutModifier::classifyCFIs() { 367 std::stack<std::pair<int64_t, uint16_t>> CFIStack; 368 int64_t CfaOffset = -8; 369 uint16_t CfaReg = 7; 370 371 auto recordAccess = [&](MCInst *Inst, int64_t Offset) { 372 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false); 373 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { 374 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId); 375 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n"); 376 } else { 377 IsSimple = false; 378 return; 379 } 380 }; 381 382 for (BinaryBasicBlock *&BB : BF.layout()) { 383 for (MCInst &Inst : *BB) { 384 if (!BC.MIB->isCFI(Inst)) 385 continue; 386 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 387 switch (CFI->getOperation()) { 388 case MCCFIInstruction::OpDefCfa: 389 CfaOffset = -CFI->getOffset(); 390 recordAccess(&Inst, CfaOffset); 391 LLVM_FALLTHROUGH; 392 case MCCFIInstruction::OpDefCfaRegister: 393 CfaReg = CFI->getRegister(); 394 break; 395 case MCCFIInstruction::OpDefCfaOffset: 396 CfaOffset = -CFI->getOffset(); 397 recordAccess(&Inst, CfaOffset); 398 break; 399 case MCCFIInstruction::OpOffset: 400 recordAccess(&Inst, CFI->getOffset()); 401 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 402 BC.MRI->getLLVMRegNum(CFI->getRegister(), 403 /*isEH=*/false), 404 AllocatorId); 405 break; 406 case MCCFIInstruction::OpSameValue: 407 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 408 BC.MRI->getLLVMRegNum(CFI->getRegister(), 409 /*isEH=*/false), 410 AllocatorId); 411 break; 412 case MCCFIInstruction::OpRememberState: 413 CFIStack.push(std::make_pair(CfaOffset, CfaReg)); 414 break; 415 case MCCFIInstruction::OpRestoreState: { 416 assert(!CFIStack.empty() && "Corrupt CFI stack"); 417 std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); 418 CFIStack.pop(); 419 CfaOffset = Elem.first; 420 CfaReg = Elem.second; 421 break; 422 } 423 case MCCFIInstruction::OpRelOffset: 424 case MCCFIInstruction::OpAdjustCfaOffset: 425 llvm_unreachable("Unhandled AdjustCfaOffset"); 426 break; 427 default: 428 break; 429 } 430 } 431 } 432 } 433 434 void StackLayoutModifier::scheduleChange( 435 MCInst &Inst, StackLayoutModifier::WorklistItem Item) { 436 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( 437 Inst, getTodoTag(), AllocatorId); 438 WList.push_back(Item); 439 } 440 441 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { 442 if (!IsSimple || !BC.MIB->isPush(*DeletedPush)) 443 return false; 444 445 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 446 if (!FIE) 447 return false; 448 449 return canCollapseRegion(FIE->StackOffset); 450 } 451 452 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { 453 if (!IsInitialized) 454 initialize(); 455 if (!IsSimple) 456 return false; 457 458 if (CollapsedRegions.count(RegionAddr)) 459 return true; 460 461 // Check if it is possible to readjust all accesses below RegionAddr 462 if (!BlacklistedRegions.empty()) 463 return false; 464 465 return true; 466 } 467 468 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { 469 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 470 if (!FIE) 471 return false; 472 int64_t RegionAddr = FIE->StackOffset; 473 int64_t RegionSz = FIE->Size; 474 return collapseRegion(DeletedPush, RegionAddr, RegionSz); 475 } 476 477 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, 478 int64_t RegionSz) { 479 if (!canCollapseRegion(RegionAddr)) 480 return false; 481 482 assert(IsInitialized); 483 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); 484 485 for (BinaryBasicBlock &BB : BF) { 486 for (MCInst &Inst : BB) { 487 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 488 continue; 489 auto Slot = 490 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 491 Inst, getSlotTag()); 492 if (!AvailableRegions.count(Slot)) 493 continue; 494 // We need to ensure this access is affected by the deleted push 495 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]]) 496 continue; 497 498 if (BC.MIB->isCFI(Inst)) { 499 if (Slot > RegionAddr) 500 continue; 501 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz)); 502 continue; 503 } 504 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 505 if (!FIE) { 506 if (Slot > RegionAddr) 507 continue; 508 // SP update based on frame pointer 509 scheduleChange( 510 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 511 continue; 512 } 513 514 if (Slot == RegionAddr) { 515 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId); 516 continue; 517 } 518 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 519 continue; 520 521 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 522 continue; 523 524 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) 525 continue; 526 527 scheduleChange( 528 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 529 } 530 } 531 532 CollapsedRegions.insert(RegionAddr); 533 return true; 534 } 535 536 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { 537 for (BinaryBasicBlock &BB : BF) { 538 for (MCInst &Inst : BB) { 539 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) 540 continue; 541 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 542 scheduleChange( 543 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); 544 } 545 } 546 } 547 548 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { 549 if (!IsInitialized) 550 initialize(); 551 if (!IsSimple) 552 return false; 553 554 StackPointerTracking &SPT = Info.getStackPointerTracking(); 555 int64_t RegionAddr = SPT.getStateBefore(P)->first; 556 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 557 return false; 558 559 if (InsertedRegions.count(RegionAddr)) 560 return true; 561 562 // Check if we are going to screw up stack accesses at call sites that 563 // pass parameters via stack 564 if (!BlacklistedRegions.empty()) 565 return false; 566 567 return true; 568 } 569 570 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { 571 if (!canInsertRegion(P)) 572 return false; 573 574 assert(IsInitialized); 575 StackPointerTracking &SPT = Info.getStackPointerTracking(); 576 // This RegionAddr is slightly different from the one seen in collapseRegion 577 // This is the value of SP before the allocation the user wants to make. 578 int64_t RegionAddr = SPT.getStateBefore(P)->first; 579 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 580 return false; 581 582 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 583 584 for (BinaryBasicBlock &BB : BF) { 585 for (MCInst &Inst : BB) { 586 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 587 continue; 588 auto Slot = 589 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 590 Inst, getSlotTag()); 591 if (!AvailableRegions.count(Slot)) 592 continue; 593 594 if (!(DA.doesADominateB(P, Inst))) 595 continue; 596 597 if (BC.MIB->isCFI(Inst)) { 598 if (Slot >= RegionAddr) 599 continue; 600 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); 601 continue; 602 } 603 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 604 if (!FIE) { 605 if (Slot >= RegionAddr) 606 continue; 607 scheduleChange( 608 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 609 continue; 610 } 611 612 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 613 continue; 614 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) 615 continue; 616 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 617 continue; 618 scheduleChange( 619 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 620 } 621 } 622 623 InsertedRegions.insert(RegionAddr); 624 return true; 625 } 626 627 void StackLayoutModifier::performChanges() { 628 std::set<uint32_t> ModifiedCFIIndices; 629 for (BinaryBasicBlock &BB : BF) { 630 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 631 MCInst &Inst = *I; 632 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) { 633 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); 634 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 635 } 636 if (!BC.MIB->hasAnnotation(Inst, getTodoTag())) 637 continue; 638 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( 639 Inst, getTodoTag()); 640 int64_t Adjustment = 0; 641 WorklistItem::ActionType AdjustmentType = WorklistItem::None; 642 for (WorklistItem &WI : WList) { 643 if (WI.Action == WorklistItem::None) 644 continue; 645 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || 646 WI.Action == WorklistItem::AdjustCFI); 647 assert((AdjustmentType == WorklistItem::None || 648 AdjustmentType == WI.Action) && 649 "Conflicting actions requested at the same program point"); 650 AdjustmentType = WI.Action; 651 Adjustment += WI.OffsetUpdate; 652 } 653 if (!Adjustment) 654 continue; 655 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { 656 assert(BC.MIB->isCFI(Inst)); 657 uint32_t CFINum = Inst.getOperand(0).getImm(); 658 if (ModifiedCFIIndices.count(CFINum)) 659 continue; 660 ModifiedCFIIndices.insert(CFINum); 661 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 662 const MCCFIInstruction::OpType Operation = CFI->getOperation(); 663 if (Operation == MCCFIInstruction::OpDefCfa || 664 Operation == MCCFIInstruction::OpDefCfaOffset) 665 Adjustment = 0 - Adjustment; 666 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() 667 << " to " << (CFI->getOffset() + Adjustment) << "\n"); 668 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment); 669 continue; 670 } 671 int32_t SrcImm = 0; 672 MCPhysReg Reg = 0; 673 MCPhysReg StackPtrReg = 0; 674 int64_t StackOffset = 0; 675 bool IsIndexed = false; 676 bool IsLoad = false; 677 bool IsStore = false; 678 bool IsSimple = false; 679 bool IsStoreFromReg = false; 680 uint8_t Size = 0; 681 bool Success = false; 682 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, 683 Reg, SrcImm, StackPtrReg, StackOffset, 684 Size, IsSimple, IsIndexed); 685 if (!Success) { 686 // SP update based on FP value 687 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx); 688 assert(Success); 689 continue; 690 } 691 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); 692 if (StackPtrReg != BC.MIB->getFramePointer()) 693 Adjustment = -Adjustment; 694 if (IsLoad) 695 Success = BC.MIB->createRestoreFromStack( 696 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 697 else if (IsStore) 698 Success = BC.MIB->createSaveToStack( 699 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 700 LLVM_DEBUG({ 701 dbgs() << "Adjusted instruction: "; 702 Inst.dump(); 703 }); 704 assert(Success); 705 } 706 } 707 } 708 709 void StackLayoutModifier::initialize() { 710 classifyStackAccesses(); 711 classifyCFIs(); 712 IsInitialized = true; 713 } 714 715 std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0}; 716 std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0}; 717 std::atomic_uint64_t ShrinkWrapping::SpillsMovedDynamicCount{0}; 718 std::atomic_uint64_t ShrinkWrapping::SpillsFailedDynamicCount{0}; 719 std::atomic_uint64_t ShrinkWrapping::InstrDynamicCount{0}; 720 std::atomic_uint64_t ShrinkWrapping::StoreDynamicCount{0}; 721 722 using BBIterTy = BinaryBasicBlock::iterator; 723 724 void ShrinkWrapping::classifyCSRUses() { 725 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 726 StackPointerTracking &SPT = Info.getStackPointerTracking(); 727 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), 728 BitVector(DA.NumInstrs, false)); 729 730 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer()); 731 for (BinaryBasicBlock &BB : BF) { 732 for (MCInst &Inst : BB) { 733 if (BC.MIB->isCFI(Inst)) 734 continue; 735 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 736 BC.MIB->getTouchedRegs(Inst, BV); 737 BV &= CSA.CalleeSaved; 738 for (int I : BV.set_bits()) { 739 if (I == 0) 740 continue; 741 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) 742 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 743 } 744 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) 745 continue; 746 BV = CSA.CalleeSaved; 747 BV &= FPAliases; 748 for (int I : BV.set_bits()) 749 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 750 } 751 } 752 } 753 754 void ShrinkWrapping::pruneUnwantedCSRs() { 755 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); 756 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 757 if (!CSA.CalleeSaved[I]) 758 continue; 759 if (ParamRegs[I]) { 760 CSA.CalleeSaved.reset(I); 761 continue; 762 } 763 if (UsesByReg[I].empty()) { 764 LLVM_DEBUG( 765 dbgs() 766 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I 767 << "\n"); 768 CSA.CalleeSaved.reset(I); 769 continue; 770 } 771 if (!CSA.HasRestores[I]) { 772 LLVM_DEBUG( 773 dbgs() << "Dismissing Callee-Saved Reg because it does not have " 774 "restores:" 775 << I << "\n"); 776 CSA.CalleeSaved.reset(I); 777 } 778 } 779 } 780 781 void ShrinkWrapping::computeSaveLocations() { 782 BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs()); 783 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 784 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 785 StackPointerTracking &SPT = Info.getStackPointerTracking(); 786 787 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); 788 for (BinaryBasicBlock &BB : BF) { 789 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 790 791 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; 792 if (!First) 793 continue; 794 795 // Use reaching instructions to detect if we are inside a loop - if we 796 // are, do not consider this BB as valid placement for saves. 797 if (RI.isInLoop(BB)) 798 continue; 799 800 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First); 801 // If we don't know stack state at this point, bail 802 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 803 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 804 continue; 805 806 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 807 if (!CSA.CalleeSaved[I]) 808 continue; 809 810 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); 811 for (int J : UsesByReg[I].set_bits()) 812 if (DA.doesADominateB(*First, J)) 813 BBDominatedUses.set(J); 814 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " 815 << BBDominatedUses.count() << " uses for reg " << I 816 << ". Total uses for reg is " << UsesByReg[I].count() 817 << "\n"); 818 BBDominatedUses &= UsesByReg[I]; 819 if (BBDominatedUses == UsesByReg[I]) { 820 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() 821 << " as a save pos for " << I << "\n"); 822 BestSavePos[I].push_back(First); 823 LLVM_DEBUG({ 824 dbgs() << "Dominated uses are:\n"; 825 for (int J : UsesByReg[I].set_bits()) { 826 dbgs() << "Idx " << J << ": "; 827 BC.printInstruction(dbgs(), *DA.Expressions[J]); 828 DA.Expressions[J]->dump(); 829 } 830 }); 831 } 832 } 833 } 834 835 BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs()); 836 837 auto &InsnToBB = Info.getInsnToBBMap(); 838 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 839 if (!CSA.CalleeSaved[I]) 840 continue; 841 842 std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(), 843 [&](const MCInst *A, const MCInst *B) { 844 const BinaryBasicBlock *BBA = InsnToBB[A]; 845 const BinaryBasicBlock *BBB = InsnToBB[B]; 846 const uint64_t CountA = BBA->getKnownExecutionCount(); 847 const uint64_t CountB = BBB->getKnownExecutionCount(); 848 return CountB < CountA; 849 }); 850 851 for (MCInst *Pos : BestSavePos[I]) { 852 const BinaryBasicBlock *BB = InsnToBB[Pos]; 853 const uint64_t Count = BB->getKnownExecutionCount(); 854 BestSaveCount[I].push_back(Count); 855 } 856 } 857 } 858 859 void ShrinkWrapping::computeDomOrder() { 860 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); 861 std::vector<MCPhysReg> Order; 862 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 863 Order.push_back(I); 864 } 865 866 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 867 auto &InsnToBB = Info.getInsnToBBMap(); 868 llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) { 869 BinaryBasicBlock *BBA = 870 BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr; 871 BinaryBasicBlock *BBB = 872 BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr; 873 if (BBA == BBB) 874 return A < B; 875 if (!BBA && BBB) 876 return false; 877 if (BBA && !BBB) 878 return true; 879 if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back())) 880 return true; 881 if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back())) 882 return false; 883 return A < B; 884 }); 885 886 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) 887 DomOrder[Order[I]] = I; 888 } 889 890 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, 891 uint64_t &TotalEstimatedWin) { 892 const uint64_t CurSavingCost = CSA.SavingCost[CSR]; 893 if (!CSA.CalleeSaved[CSR]) 894 return false; 895 896 assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() && 897 "save position vectors out of sync"); 898 if (BestSaveCount[CSR].empty()) 899 return false; 900 901 const uint64_t BestCount = BestSaveCount[CSR].back(); 902 BestPosSave = BestSavePos[CSR].back(); 903 if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) 904 return false; 905 906 LLVM_DEBUG({ 907 auto &InsnToBB = Info.getInsnToBBMap(); 908 dbgs() << "Better position for saves found in func " << BF.getPrintName() 909 << " count << " << BF.getKnownExecutionCount() << "\n"; 910 dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName() 911 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; 912 }); 913 914 TotalEstimatedWin = CurSavingCost - BestCount; 915 return true; 916 } 917 918 /// Auxiliar function used to create basic blocks for critical edges and update 919 /// the dominance frontier with these new locations 920 void ShrinkWrapping::splitFrontierCritEdges( 921 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, 922 const SmallVector<bool, 4> &IsCritEdge, 923 const SmallVector<BinaryBasicBlock *, 4> &From, 924 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { 925 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " 926 << BF.getPrintName() << "\n"); 927 // For every FromBB, there might be one or more critical edges, with 928 // To[I] containing destination BBs. It's important to memorize 929 // the original size of the Frontier as we may append to it while splitting 930 // critical edges originating with blocks with multiple destinations. 931 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { 932 if (!IsCritEdge[I]) 933 continue; 934 if (To[I].empty()) 935 continue; 936 BinaryBasicBlock *FromBB = From[I]; 937 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() 938 << "\n"); 939 // Split edge for every DestinationBBs 940 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { 941 BinaryBasicBlock *DestinationBB = To[I][DI]; 942 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n"); 943 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB); 944 // Insert dummy instruction so this BB is never empty (we need this for 945 // PredictiveStackPointerTracking to work, since it annotates instructions 946 // and not BBs). 947 if (NewBB->empty()) { 948 MCInst NewInst; 949 BC.MIB->createNoop(NewInst); 950 NewBB->addInstruction(std::move(NewInst)); 951 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0)); 952 } 953 954 // Update frontier 955 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB); 956 if (DI == 0) { 957 // Update frontier inplace 958 Frontier[I] = NewFrontierPP; 959 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() 960 << '\n'); 961 } else { 962 // Append new frontier to the end of the list 963 Frontier.push_back(NewFrontierPP); 964 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() 965 << '\n'); 966 } 967 } 968 } 969 } 970 971 SmallVector<ProgramPoint, 4> 972 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, 973 uint64_t TotalEstimatedWin) { 974 SmallVector<ProgramPoint, 4> Frontier; 975 SmallVector<bool, 4> IsCritEdge; 976 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 977 978 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; 979 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; 980 // In case of a critical edge, we need to create extra BBs to host restores 981 // into edges transitioning to the dominance frontier, otherwise we pull these 982 // restores to inside the dominated area. 983 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector(); 984 LLVM_DEBUG({ 985 dbgs() << "Dumping dominance frontier for "; 986 BC.printInstruction(dbgs(), *BestPosSave); 987 for (ProgramPoint &PP : Frontier) 988 if (PP.isInst()) 989 BC.printInstruction(dbgs(), *PP.getInst()); 990 else 991 dbgs() << PP.getBB()->getName() << "\n"; 992 }); 993 for (ProgramPoint &PP : Frontier) { 994 bool HasCritEdges = false; 995 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && 996 doesInstUsesCSR(*PP.getInst(), CSR)) { 997 Frontier.clear(); 998 return Frontier; 999 } 1000 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 1001 CritEdgesFrom.emplace_back(FrontierBB); 1002 CritEdgesTo.emplace_back(0); 1003 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); 1004 // Check for invoke instructions at the dominance frontier, which indicates 1005 // the landing pad is not dominated. 1006 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { 1007 Frontier.clear(); 1008 return Frontier; 1009 } 1010 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) { 1011 if (!DA.doesADominateB(*BestPosSave, P)) { 1012 Dests.emplace_back(Info.getParentBB(P)); 1013 return; 1014 } 1015 HasCritEdges = true; 1016 }); 1017 IsCritEdge.push_back(HasCritEdges); 1018 } 1019 // Restores cannot be placed in empty BBs because we have a dataflow 1020 // analysis that depends on insertions happening before real instructions 1021 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a 1022 // dummy nop that is scheduled to be removed later. 1023 bool InvalidateRequired = false; 1024 for (BinaryBasicBlock *&BB : BF.layout()) { 1025 if (BB->size() != 0) 1026 continue; 1027 MCInst NewInst; 1028 BC.MIB->createNoop(NewInst); 1029 auto II = BB->addInstruction(std::move(NewInst)); 1030 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0)); 1031 InvalidateRequired = true; 1032 } 1033 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) { 1034 LLVM_DEBUG({ 1035 dbgs() << "Now detected critical edges in the following frontier:\n"; 1036 for (ProgramPoint &PP : Frontier) { 1037 if (PP.isBB()) { 1038 dbgs() << " BB: " << PP.getBB()->getName() << "\n"; 1039 } else { 1040 dbgs() << " Inst: "; 1041 PP.getInst()->dump(); 1042 } 1043 } 1044 }); 1045 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom, 1046 CritEdgesTo); 1047 InvalidateRequired = true; 1048 } 1049 if (InvalidateRequired) { 1050 // BitVectors that represent all insns of the function are invalid now 1051 // since we changed BBs/Insts. Re-run steps that depend on pointers being 1052 // valid 1053 Info.invalidateAll(); 1054 classifyCSRUses(); 1055 } 1056 return Frontier; 1057 } 1058 1059 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, 1060 int64_t SaveOffset) { 1061 if (FA.requiresAlignment(BF)) { 1062 LLVM_DEBUG({ 1063 dbgs() << "Reg " << CSR 1064 << " is not using push/pops due to function " 1065 "alignment requirements.\n"; 1066 }); 1067 return false; 1068 } 1069 if (FA.hasStackArithmetic(BF)) { 1070 LLVM_DEBUG({ 1071 dbgs() << "Reg " << CSR 1072 << " is not using push/pops due to function " 1073 "taking the address of a stack position.\n"; 1074 }); 1075 return false; 1076 } 1077 for (MCInst *Save : CSA.getSavesByReg(CSR)) { 1078 if (!SLM.canCollapseRegion(Save)) { 1079 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n"); 1080 return false; 1081 } 1082 } 1083 // Abort if one of the restores for this CSR is not a POP. 1084 for (MCInst *Load : CSA.getRestoresByReg(CSR)) { 1085 if (!BC.MIB->isPop(*Load)) { 1086 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n"); 1087 return false; 1088 } 1089 } 1090 1091 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1092 // Abort if we are inserting a push into an entry BB (offset -8) and this 1093 // func sets up a frame pointer. 1094 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION || 1095 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { 1096 LLVM_DEBUG({ 1097 dbgs() << "Reg " << CSR 1098 << " cannot insert region or we are " 1099 "trying to insert a push into entry bb.\n"; 1100 }); 1101 return false; 1102 } 1103 return true; 1104 } 1105 1106 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( 1107 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, 1108 unsigned CSR) { 1109 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; 1110 // Moving pop locations to the correct sp offset 1111 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 1112 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1113 for (ProgramPoint &PP : FixedRestorePoints) { 1114 BinaryBasicBlock *BB = Info.getParentBB(PP); 1115 bool Found = false; 1116 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first == 1117 SaveOffset) { 1118 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB)); 1119 BV &= UsesByReg[CSR]; 1120 if (!BV.any()) { 1121 Found = true; 1122 PP = BB; 1123 continue; 1124 } 1125 } 1126 for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) { 1127 if (SPT.getStateBefore(*RIt)->first == SaveOffset) { 1128 BitVector BV = *RI.getStateAt(*RIt); 1129 BV &= UsesByReg[CSR]; 1130 if (!BV.any()) { 1131 Found = true; 1132 PP = &*RIt; 1133 break; 1134 } 1135 } 1136 } 1137 if (!Found) { 1138 LLVM_DEBUG({ 1139 dbgs() << "Could not find restore insertion point for " << CSR 1140 << ", falling back to load/store mode\n"; 1141 }); 1142 FixedRestorePoints.clear(); 1143 return FixedRestorePoints; 1144 } 1145 } 1146 return FixedRestorePoints; 1147 } 1148 1149 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, 1150 bool UsePushPops) { 1151 1152 for (BinaryBasicBlock *&BB : BF.layout()) { 1153 std::vector<MCInst *> CFIs; 1154 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { 1155 MCInst &Inst = *I; 1156 if (BC.MIB->isCFI(Inst)) { 1157 // Delete all offset CFIs related to this CSR 1158 if (SLM.getOffsetCFIReg(Inst) == CSR) { 1159 HasDeletedOffsetCFIs[CSR] = true; 1160 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR)); 1161 continue; 1162 } 1163 CFIs.push_back(&Inst); 1164 continue; 1165 } 1166 1167 uint16_t SavedReg = CSA.getSavedReg(Inst); 1168 uint16_t RestoredReg = CSA.getRestoredReg(Inst); 1169 if (SavedReg != CSR && RestoredReg != CSR) { 1170 CFIs.clear(); 1171 continue; 1172 } 1173 1174 scheduleChange(&Inst, WorklistItem(UsePushPops 1175 ? WorklistItem::Erase 1176 : WorklistItem::ChangeToAdjustment, 1177 CSR)); 1178 1179 // Delete associated CFIs 1180 const bool RecordDeletedPushCFIs = 1181 SavedReg == CSR && DeletedPushCFIs[CSR].empty(); 1182 const bool RecordDeletedPopCFIs = 1183 RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); 1184 for (MCInst *CFI : CFIs) { 1185 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI); 1186 // Do not touch these... 1187 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || 1188 MCCFI->getOperation() == MCCFIInstruction::OpRememberState) 1189 continue; 1190 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR)); 1191 if (RecordDeletedPushCFIs) { 1192 // Do not record this to be replayed later because we are going to 1193 // rebuild it. 1194 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1195 continue; 1196 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1197 } 1198 if (RecordDeletedPopCFIs) { 1199 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1200 continue; 1201 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1202 } 1203 } 1204 CFIs.clear(); 1205 } 1206 } 1207 } 1208 1209 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { 1210 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || 1211 CSA.getRestoredReg(Inst) == CSR) 1212 return false; 1213 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 1214 BC.MIB->getTouchedRegs(Inst, BV); 1215 return BV[CSR]; 1216 } 1217 1218 void ShrinkWrapping::scheduleSaveRestoreInsertions( 1219 unsigned CSR, MCInst *BestPosSave, 1220 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { 1221 auto &InsnToBB = Info.getInsnToBBMap(); 1222 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; 1223 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; 1224 assert(FIESave && FIELoad && "Invalid CSR"); 1225 1226 LLVM_DEBUG({ 1227 dbgs() << "Scheduling save insertion at: "; 1228 BestPosSave->dump(); 1229 }); 1230 1231 scheduleChange(BestPosSave, 1232 UsePushPops ? WorklistItem::InsertPushOrPop 1233 : WorklistItem::InsertLoadOrStore, 1234 *FIESave, CSR); 1235 1236 for (ProgramPoint &PP : RestorePoints) { 1237 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 1238 LLVM_DEBUG({ 1239 dbgs() << "Scheduling restore insertion at: "; 1240 if (PP.isInst()) 1241 PP.getInst()->dump(); 1242 else 1243 dbgs() << PP.getBB()->getName() << "\n"; 1244 }); 1245 MCInst *Term = 1246 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr); 1247 if (Term) 1248 PP = Term; 1249 bool PrecededByPrefix = false; 1250 if (PP.isInst()) { 1251 auto Iter = FrontierBB->findInstruction(PP.getInst()); 1252 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { 1253 --Iter; 1254 PrecededByPrefix = BC.MIB->isPrefix(*Iter); 1255 } 1256 } 1257 if (PP.isInst() && 1258 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) { 1259 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && 1260 "cannot move to end of bb"); 1261 scheduleChange(InsnToBB[PP.getInst()], 1262 UsePushPops ? WorklistItem::InsertPushOrPop 1263 : WorklistItem::InsertLoadOrStore, 1264 *FIELoad, CSR); 1265 continue; 1266 } 1267 scheduleChange(PP, 1268 UsePushPops ? WorklistItem::InsertPushOrPop 1269 : WorklistItem::InsertLoadOrStore, 1270 *FIELoad, CSR); 1271 } 1272 } 1273 1274 void ShrinkWrapping::moveSaveRestores() { 1275 bool DisablePushPopMode = false; 1276 bool UsedPushPopMode = false; 1277 // Keeps info about successfully moved regs: reg index, save position and 1278 // save size 1279 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; 1280 uint64_t TotalEstimatedWin = 0; 1281 1282 computeDomOrder(); 1283 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1284 MCInst *BestPosSave = nullptr; 1285 uint64_t EstimatedWin = 0; 1286 SmallVector<ProgramPoint, 4> RestorePoints; 1287 while (RestorePoints.empty() && 1288 isBestSavePosCold(I, BestPosSave, EstimatedWin)) { 1289 RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin); 1290 if (RestorePoints.empty()) { 1291 LLVM_DEBUG({ 1292 dbgs() << "Dropping opportunity because restore placement failed" 1293 " -- total est. freq reduc: " 1294 << EstimatedWin << ". Will try " 1295 << (BestSaveCount[I].size() - 1) << " more times.\n"; 1296 }); 1297 BestSaveCount[I].pop_back(); 1298 BestSavePos[I].pop_back(); 1299 computeDomOrder(); 1300 } 1301 } 1302 if (RestorePoints.empty()) { 1303 SpillsFailedDynamicCount += EstimatedWin; 1304 continue; 1305 } 1306 1307 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; 1308 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; 1309 (void)FIELoad; 1310 assert(FIESave && FIELoad); 1311 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1312 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave); 1313 int SaveOffset = SPFP.first; 1314 uint8_t SaveSize = FIESave->Size; 1315 1316 // If we don't know stack state at this point, bail 1317 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 1318 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) { 1319 SpillsFailedDynamicCount += EstimatedWin; 1320 continue; 1321 } 1322 1323 // Operation mode: if true, will insert push/pops instead of loads/restores 1324 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset); 1325 1326 if (UsePushPops) { 1327 SmallVector<ProgramPoint, 4> FixedRestorePoints = 1328 fixPopsPlacements(RestorePoints, SaveOffset, I); 1329 if (FixedRestorePoints.empty()) 1330 UsePushPops = false; 1331 else 1332 RestorePoints = FixedRestorePoints; 1333 } 1334 1335 // Disable push-pop mode for all CSRs in this function 1336 if (!UsePushPops) 1337 DisablePushPopMode = true; 1338 else 1339 UsedPushPopMode = true; 1340 1341 scheduleOldSaveRestoresRemoval(I, UsePushPops); 1342 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops); 1343 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize)); 1344 TotalEstimatedWin += EstimatedWin; 1345 } 1346 1347 // Revert push-pop mode if it failed for a single CSR 1348 if (DisablePushPopMode && UsedPushPopMode) { 1349 UsedPushPopMode = false; 1350 for (BinaryBasicBlock &BB : BF) { 1351 auto WRI = Todo.find(&BB); 1352 if (WRI != Todo.end()) { 1353 std::vector<WorklistItem> &TodoList = WRI->second; 1354 for (WorklistItem &Item : TodoList) 1355 if (Item.Action == WorklistItem::InsertPushOrPop) 1356 Item.Action = WorklistItem::InsertLoadOrStore; 1357 } 1358 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 1359 MCInst &Inst = *I; 1360 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1361 Inst, getAnnotationIndex()); 1362 if (!TodoList) 1363 continue; 1364 bool isCFI = BC.MIB->isCFI(Inst); 1365 for (WorklistItem &Item : *TodoList) { 1366 if (Item.Action == WorklistItem::InsertPushOrPop) 1367 Item.Action = WorklistItem::InsertLoadOrStore; 1368 if (!isCFI && Item.Action == WorklistItem::Erase) 1369 Item.Action = WorklistItem::ChangeToAdjustment; 1370 } 1371 } 1372 } 1373 } 1374 SpillsMovedDynamicCount += TotalEstimatedWin; 1375 1376 // Update statistics 1377 if (!UsedPushPopMode) { 1378 SpillsMovedRegularMode += MovedRegs.size(); 1379 return; 1380 } 1381 1382 // Schedule modifications to stack-accessing instructions via 1383 // StackLayoutModifier. 1384 SpillsMovedPushPopMode += MovedRegs.size(); 1385 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { 1386 unsigned RegNdx; 1387 MCInst *SavePos; 1388 size_t SaveSize; 1389 std::tie(RegNdx, SavePos, SaveSize) = I; 1390 for (MCInst *Save : CSA.getSavesByReg(RegNdx)) 1391 SLM.collapseRegion(Save); 1392 SLM.insertRegion(SavePos, SaveSize); 1393 } 1394 } 1395 1396 namespace { 1397 /// Helper function to identify whether two basic blocks created by splitting 1398 /// a critical edge have the same contents. 1399 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, 1400 const BinaryBasicBlock &B) { 1401 if (A.succ_size() != B.succ_size()) 1402 return false; 1403 if (A.succ_size() != 1) 1404 return false; 1405 1406 if (*A.succ_begin() != *B.succ_begin()) 1407 return false; 1408 1409 if (A.size() != B.size()) 1410 return false; 1411 1412 // Compare instructions 1413 auto I = A.begin(), E = A.end(); 1414 auto OtherI = B.begin(), OtherE = B.end(); 1415 while (I != E && OtherI != OtherE) { 1416 if (I->getOpcode() != OtherI->getOpcode()) 1417 return false; 1418 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) { 1419 return true; 1420 })) 1421 return false; 1422 ++I; 1423 ++OtherI; 1424 } 1425 return true; 1426 } 1427 } // namespace 1428 1429 bool ShrinkWrapping::foldIdenticalSplitEdges() { 1430 bool Changed = false; 1431 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { 1432 BinaryBasicBlock &BB = *Iter; 1433 if (!BB.getName().startswith(".LSplitEdge")) 1434 continue; 1435 for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) { 1436 BinaryBasicBlock &RBB = *RIter; 1437 if (&RBB == &BB) 1438 break; 1439 if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() || 1440 !isIdenticalSplitEdgeBB(BC, *Iter, RBB)) 1441 continue; 1442 assert(RBB.pred_size() == 1 && "Invalid split edge BB"); 1443 BinaryBasicBlock *Pred = *RBB.pred_begin(); 1444 uint64_t OrigCount = Pred->branch_info_begin()->Count; 1445 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; 1446 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB); 1447 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds); 1448 Changed = true; 1449 // Remove the block from CFG 1450 RBB.markValid(false); 1451 } 1452 } 1453 1454 return Changed; 1455 } 1456 1457 namespace { 1458 1459 // A special StackPointerTracking that compensates for our future plans 1460 // in removing/adding insn. 1461 class PredictiveStackPointerTracking 1462 : public StackPointerTrackingBase<PredictiveStackPointerTracking> { 1463 friend class DataflowAnalysis<PredictiveStackPointerTracking, 1464 std::pair<int, int>>; 1465 decltype(ShrinkWrapping::Todo) &TodoMap; 1466 DataflowInfoManager &Info; 1467 1468 Optional<unsigned> AnnotationIndex; 1469 1470 protected: 1471 void compNextAux(const MCInst &Point, 1472 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, 1473 std::pair<int, int> &Res) { 1474 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { 1475 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1476 BC.MIB->isPush(Point)) { 1477 Res.first += BC.MIB->getPushSize(Point); 1478 continue; 1479 } 1480 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1481 BC.MIB->isPop(Point)) { 1482 Res.first -= BC.MIB->getPopSize(Point); 1483 continue; 1484 } 1485 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1486 Item.FIEToInsert.IsStore) { 1487 Res.first -= Item.FIEToInsert.Size; 1488 continue; 1489 } 1490 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1491 Item.FIEToInsert.IsLoad) { 1492 Res.first += Item.FIEToInsert.Size; 1493 continue; 1494 } 1495 } 1496 } 1497 1498 std::pair<int, int> computeNext(const MCInst &Point, 1499 const std::pair<int, int> &Cur) { 1500 std::pair<int, int> Res = 1501 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( 1502 Point, Cur); 1503 if (Res.first == StackPointerTracking::SUPERPOSITION || 1504 Res.first == StackPointerTracking::EMPTY) 1505 return Res; 1506 auto TodoItems = 1507 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( 1508 Point, ShrinkWrapping::getAnnotationName()); 1509 if (TodoItems) 1510 compNextAux(Point, *TodoItems, Res); 1511 auto &InsnToBBMap = Info.getInsnToBBMap(); 1512 if (&*InsnToBBMap[&Point]->rbegin() != &Point) 1513 return Res; 1514 auto WRI = TodoMap.find(InsnToBBMap[&Point]); 1515 if (WRI == TodoMap.end()) 1516 return Res; 1517 compNextAux(Point, WRI->second, Res); 1518 return Res; 1519 } 1520 1521 StringRef getAnnotationName() const { 1522 return StringRef("PredictiveStackPointerTracking"); 1523 } 1524 1525 public: 1526 PredictiveStackPointerTracking(BinaryFunction &BF, 1527 decltype(ShrinkWrapping::Todo) &TodoMap, 1528 DataflowInfoManager &Info, 1529 MCPlusBuilder::AllocatorIdTy AllocatorId = 0) 1530 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, 1531 AllocatorId), 1532 TodoMap(TodoMap), Info(Info) {} 1533 1534 void run() { 1535 StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); 1536 } 1537 }; 1538 1539 } // end anonymous namespace 1540 1541 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, 1542 int SPValPop) { 1543 MCInst *SavePoint = nullptr; 1544 for (BinaryBasicBlock &BB : BF) { 1545 for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter; 1546 ++InstIter) { 1547 int32_t SrcImm = 0; 1548 MCPhysReg Reg = 0; 1549 MCPhysReg StackPtrReg = 0; 1550 int64_t StackOffset = 0; 1551 bool IsIndexed = false; 1552 bool IsLoad = false; 1553 bool IsStore = false; 1554 bool IsSimple = false; 1555 bool IsStoreFromReg = false; 1556 uint8_t Size = 0; 1557 if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg, 1558 Reg, SrcImm, StackPtrReg, StackOffset, Size, 1559 IsSimple, IsIndexed)) 1560 continue; 1561 if (Reg != CSR || !IsStore || !IsSimple) 1562 continue; 1563 SavePoint = &*InstIter; 1564 break; 1565 } 1566 if (SavePoint) 1567 break; 1568 } 1569 assert(SavePoint); 1570 LLVM_DEBUG({ 1571 dbgs() << "Now using as save point for reg " << CSR << " :"; 1572 SavePoint->dump(); 1573 }); 1574 bool PrevAffectedZone = false; 1575 BinaryBasicBlock *PrevBB = nullptr; 1576 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 1577 for (BinaryBasicBlock *BB : BF.layout()) { 1578 if (BB->size() == 0) 1579 continue; 1580 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); 1581 const bool InAffectedZoneAtBegin = 1582 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]]; 1583 bool InAffectedZone = InAffectedZoneAtBegin; 1584 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { 1585 const bool CurZone = DA.count(*InstIter, *SavePoint); 1586 if (InAffectedZone != CurZone) { 1587 auto InsertionIter = InstIter; 1588 ++InsertionIter; 1589 InAffectedZone = CurZone; 1590 if (InAffectedZone) 1591 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0, 1592 SPValPop); 1593 else 1594 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0, 1595 SPValPush); 1596 --InstIter; 1597 } 1598 } 1599 // Are we at the first basic block or hot-cold split point? 1600 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { 1601 if (InAffectedZoneAtBegin) 1602 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush); 1603 } else if (InAffectedZoneAtBegin != PrevAffectedZone) { 1604 if (InAffectedZoneAtBegin) 1605 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush); 1606 else 1607 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop); 1608 } 1609 PrevAffectedZone = InAffectedZoneAtEnd; 1610 PrevBB = BB; 1611 } 1612 } 1613 1614 void ShrinkWrapping::rebuildCFIForSP() { 1615 for (BinaryBasicBlock &BB : BF) { 1616 for (MCInst &Inst : BB) { 1617 if (!BC.MIB->isCFI(Inst)) 1618 continue; 1619 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1620 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1621 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId); 1622 } 1623 } 1624 1625 int PrevSPVal = -8; 1626 BinaryBasicBlock *PrevBB = nullptr; 1627 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1628 for (BinaryBasicBlock *BB : BF.layout()) { 1629 if (BB->size() == 0) 1630 continue; 1631 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; 1632 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first; 1633 int SPVal = SPValAtBegin; 1634 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { 1635 const int CurVal = SPT.getStateAt(*Iter)->first; 1636 if (SPVal != CurVal) { 1637 auto InsertionIter = Iter; 1638 ++InsertionIter; 1639 Iter = BF.addCFIInstruction( 1640 BB, InsertionIter, 1641 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal)); 1642 SPVal = CurVal; 1643 } 1644 } 1645 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) 1646 BF.addCFIInstruction( 1647 BB, BB->begin(), 1648 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1649 else if (SPValAtBegin != PrevSPVal) 1650 BF.addCFIInstruction( 1651 PrevBB, PrevBB->end(), 1652 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1653 PrevSPVal = SPValAtEnd; 1654 PrevBB = BB; 1655 } 1656 1657 for (BinaryBasicBlock &BB : BF) 1658 for (auto I = BB.begin(); I != BB.end();) 1659 if (BC.MIB->hasAnnotation(*I, "DeleteMe")) 1660 I = BB.eraseInstruction(I); 1661 else 1662 ++I; 1663 } 1664 1665 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal, 1666 const FrameIndexEntry &FIE, 1667 bool CreatePushOrPop) { 1668 MCInst NewInst; 1669 if (SPVal != StackPointerTracking::SUPERPOSITION && 1670 SPVal != StackPointerTracking::EMPTY) { 1671 if (FIE.IsLoad) { 1672 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(), 1673 FIE.StackOffset - SPVal, FIE.RegOrImm, 1674 FIE.Size)) { 1675 errs() << "createRestoreFromStack: not supported on this platform\n"; 1676 abort(); 1677 } 1678 } else { 1679 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(), 1680 FIE.StackOffset - SPVal, FIE.RegOrImm, 1681 FIE.Size)) { 1682 errs() << "createSaveToStack: not supported on this platform\n"; 1683 abort(); 1684 } 1685 } 1686 if (CreatePushOrPop) 1687 BC.MIB->changeToPushOrPop(NewInst); 1688 return NewInst; 1689 } 1690 assert(FPVal != StackPointerTracking::SUPERPOSITION && 1691 FPVal != StackPointerTracking::EMPTY); 1692 1693 if (FIE.IsLoad) { 1694 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(), 1695 FIE.StackOffset - FPVal, FIE.RegOrImm, 1696 FIE.Size)) { 1697 errs() << "createRestoreFromStack: not supported on this platform\n"; 1698 abort(); 1699 } 1700 } else { 1701 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(), 1702 FIE.StackOffset - FPVal, FIE.RegOrImm, 1703 FIE.Size)) { 1704 errs() << "createSaveToStack: not supported on this platform\n"; 1705 abort(); 1706 } 1707 } 1708 return NewInst; 1709 } 1710 1711 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { 1712 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1713 if (UpdatedCFIs.count(CFI)) 1714 return; 1715 1716 switch (CFI->getOperation()) { 1717 case MCCFIInstruction::OpDefCfa: 1718 case MCCFIInstruction::OpDefCfaRegister: 1719 case MCCFIInstruction::OpDefCfaOffset: 1720 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset); 1721 break; 1722 case MCCFIInstruction::OpOffset: 1723 default: 1724 break; 1725 } 1726 1727 UpdatedCFIs.insert(CFI); 1728 } 1729 1730 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, 1731 BBIterTy Pos, unsigned Reg, 1732 bool isPush, int Sz, 1733 int64_t NewOffset) { 1734 if (isPush) { 1735 for (uint32_t Idx : DeletedPushCFIs[Reg]) { 1736 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1737 updateCFIInstOffset(*Pos++, NewOffset); 1738 } 1739 if (HasDeletedOffsetCFIs[Reg]) { 1740 Pos = BF.addCFIInstruction( 1741 &BB, Pos, 1742 MCCFIInstruction::createOffset( 1743 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset)); 1744 ++Pos; 1745 } 1746 } else { 1747 for (uint32_t Idx : DeletedPopCFIs[Reg]) { 1748 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1749 updateCFIInstOffset(*Pos++, NewOffset); 1750 } 1751 if (HasDeletedOffsetCFIs[Reg]) { 1752 Pos = BF.addCFIInstruction( 1753 &BB, Pos, 1754 MCCFIInstruction::createSameValue( 1755 nullptr, BC.MRI->getDwarfRegNum(Reg, false))); 1756 ++Pos; 1757 } 1758 } 1759 return Pos; 1760 } 1761 1762 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, 1763 BinaryBasicBlock *CurBB, 1764 const WorklistItem &Item, 1765 int64_t SPVal, int64_t FPVal) { 1766 // Trigger CFI reconstruction for this CSR if necessary - writing to 1767 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update 1768 if ((Item.FIEToInsert.IsStore && 1769 !DeletedPushCFIs[Item.AffectedReg].empty()) || 1770 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || 1771 HasDeletedOffsetCFIs[Item.AffectedReg]) { 1772 if (Item.Action == WorklistItem::InsertPushOrPop) { 1773 if (Item.FIEToInsert.IsStore) 1774 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; 1775 else 1776 PopOffsetByReg[Item.AffectedReg] = SPVal; 1777 } else { 1778 if (Item.FIEToInsert.IsStore) 1779 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1780 else 1781 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1782 } 1783 } 1784 1785 LLVM_DEBUG({ 1786 dbgs() << "Creating stack access with SPVal = " << SPVal 1787 << "; stack offset = " << Item.FIEToInsert.StackOffset 1788 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) 1789 << "\n"; 1790 }); 1791 MCInst NewInst = 1792 createStackAccess(SPVal, FPVal, Item.FIEToInsert, 1793 Item.Action == WorklistItem::InsertPushOrPop); 1794 if (InsertionPoint != CurBB->end()) { 1795 LLVM_DEBUG({ 1796 dbgs() << "Adding before Inst: "; 1797 InsertionPoint->dump(); 1798 dbgs() << "the following inst: "; 1799 NewInst.dump(); 1800 }); 1801 BBIterTy Iter = 1802 CurBB->insertInstruction(InsertionPoint, std::move(NewInst)); 1803 return ++Iter; 1804 } 1805 CurBB->addInstruction(std::move(NewInst)); 1806 LLVM_DEBUG(dbgs() << "Adding to BB!\n"); 1807 return CurBB->end(); 1808 } 1809 1810 BBIterTy ShrinkWrapping::processInsertionsList( 1811 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, 1812 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { 1813 bool HasInsertions = false; 1814 for (WorklistItem &Item : TodoList) { 1815 if (Item.Action == WorklistItem::Erase || 1816 Item.Action == WorklistItem::ChangeToAdjustment) 1817 continue; 1818 HasInsertions = true; 1819 break; 1820 } 1821 1822 if (!HasInsertions) 1823 return InsertionPoint; 1824 1825 assert(((SPVal != StackPointerTracking::SUPERPOSITION && 1826 SPVal != StackPointerTracking::EMPTY) || 1827 (FPVal != StackPointerTracking::SUPERPOSITION && 1828 FPVal != StackPointerTracking::EMPTY)) && 1829 "Cannot insert if we have no idea of the stack state here"); 1830 1831 // Revert the effect of PSPT for this location, we want SP Value before 1832 // insertions 1833 if (InsertionPoint == CurBB->end()) { 1834 for (WorklistItem &Item : TodoList) { 1835 if (Item.Action != WorklistItem::InsertPushOrPop) 1836 continue; 1837 if (Item.FIEToInsert.IsStore) 1838 SPVal += Item.FIEToInsert.Size; 1839 if (Item.FIEToInsert.IsLoad) 1840 SPVal -= Item.FIEToInsert.Size; 1841 } 1842 } 1843 1844 // Reorder POPs to obey the correct dominance relation between them 1845 llvm::stable_sort(TodoList, [&](const WorklistItem &A, 1846 const WorklistItem &B) { 1847 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) && 1848 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) 1849 return false; 1850 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad)) 1851 return true; 1852 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) 1853 return false; 1854 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; 1855 }); 1856 1857 // Process insertions 1858 for (WorklistItem &Item : TodoList) { 1859 if (Item.Action == WorklistItem::Erase || 1860 Item.Action == WorklistItem::ChangeToAdjustment) 1861 continue; 1862 1863 InsertionPoint = 1864 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); 1865 if (Item.Action == WorklistItem::InsertPushOrPop && 1866 Item.FIEToInsert.IsStore) 1867 SPVal -= Item.FIEToInsert.Size; 1868 if (Item.Action == WorklistItem::InsertPushOrPop && 1869 Item.FIEToInsert.IsLoad) 1870 SPVal += Item.FIEToInsert.Size; 1871 } 1872 return InsertionPoint; 1873 } 1874 1875 bool ShrinkWrapping::processInsertions() { 1876 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); 1877 PSPT.run(); 1878 1879 bool Changes = false; 1880 for (BinaryBasicBlock &BB : BF) { 1881 // Process insertions before some inst. 1882 for (auto I = BB.begin(); I != BB.end(); ++I) { 1883 MCInst &Inst = *I; 1884 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1885 Inst, getAnnotationIndex()); 1886 if (!TodoList) 1887 continue; 1888 Changes = true; 1889 std::vector<WorklistItem> List = *TodoList; 1890 LLVM_DEBUG({ 1891 dbgs() << "Now processing insertions in " << BB.getName() 1892 << " before inst: "; 1893 Inst.dump(); 1894 }); 1895 auto Iter = I; 1896 std::pair<int, int> SPTState = 1897 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); 1898 I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second); 1899 } 1900 // Process insertions at the end of bb 1901 auto WRI = Todo.find(&BB); 1902 if (WRI != Todo.end()) { 1903 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin()); 1904 processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first, 1905 SPTState.second); 1906 Changes = true; 1907 } 1908 } 1909 return Changes; 1910 } 1911 1912 void ShrinkWrapping::processDeletions() { 1913 LivenessAnalysis &LA = Info.getLivenessAnalysis(); 1914 for (BinaryBasicBlock &BB : BF) { 1915 for (auto II = BB.begin(); II != BB.end(); ++II) { 1916 MCInst &Inst = *II; 1917 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1918 Inst, getAnnotationIndex()); 1919 if (!TodoList) 1920 continue; 1921 // Process all deletions 1922 for (WorklistItem &Item : *TodoList) { 1923 if (Item.Action != WorklistItem::Erase && 1924 Item.Action != WorklistItem::ChangeToAdjustment) 1925 continue; 1926 1927 if (Item.Action == WorklistItem::ChangeToAdjustment) { 1928 // Is flag reg alive across this func? 1929 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg()); 1930 if (int Sz = BC.MIB->getPushSize(Inst)) { 1931 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags); 1932 continue; 1933 } 1934 if (int Sz = BC.MIB->getPopSize(Inst)) { 1935 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags); 1936 continue; 1937 } 1938 } 1939 1940 LLVM_DEBUG({ 1941 dbgs() << "Erasing: "; 1942 BC.printInstruction(dbgs(), Inst); 1943 }); 1944 II = std::prev(BB.eraseInstruction(II)); 1945 break; 1946 } 1947 } 1948 } 1949 } 1950 1951 void ShrinkWrapping::rebuildCFI() { 1952 const bool FP = Info.getStackPointerTracking().HasFramePointer; 1953 Info.invalidateAll(); 1954 if (!FP) { 1955 rebuildCFIForSP(); 1956 Info.invalidateAll(); 1957 } 1958 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1959 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) 1960 continue; 1961 const int64_t SPValPush = PushOffsetByReg[I]; 1962 const int64_t SPValPop = PopOffsetByReg[I]; 1963 insertUpdatedCFI(I, SPValPush, SPValPop); 1964 Info.invalidateAll(); 1965 } 1966 } 1967 1968 bool ShrinkWrapping::perform(bool HotOnly) { 1969 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); 1970 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1971 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1972 1973 // Update pass statistics 1974 uint64_t TotalInstrs = 0ULL; 1975 uint64_t TotalStoreInstrs = 0ULL; 1976 for (BinaryBasicBlock *BB : BF.layout()) { 1977 uint64_t BBExecCount = BB->getExecutionCount(); 1978 if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) 1979 continue; 1980 for (const auto &Instr : *BB) { 1981 if (BC.MIB->isPseudo(Instr)) 1982 continue; 1983 if (BC.MIB->isStore(Instr)) 1984 TotalStoreInstrs += BBExecCount; 1985 TotalInstrs += BBExecCount; 1986 } 1987 } 1988 InstrDynamicCount += TotalInstrs; 1989 StoreDynamicCount += TotalStoreInstrs; 1990 1991 if (!FA.hasFrameInfo(BF)) 1992 return false; 1993 1994 if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold())) 1995 return false; 1996 1997 if (opts::EqualizeBBCounts) 1998 equalizeBBCounts(Info, BF); 1999 2000 if (BF.checkForAmbiguousJumpTables()) { 2001 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() 2002 << ".\n"); 2003 // We could call disambiguateJumpTables here, but it is probably not worth 2004 // the cost (of duplicating potentially large jump tables that could regress 2005 // dcache misses). Moreover, ambiguous JTs are rare and coming from code 2006 // written in assembly language. Just bail. 2007 return false; 2008 } 2009 SLM.initialize(); 2010 CSA.compute(); 2011 classifyCSRUses(); 2012 pruneUnwantedCSRs(); 2013 computeSaveLocations(); 2014 moveSaveRestores(); 2015 LLVM_DEBUG({ 2016 dbgs() << "Func before shrink-wrapping: \n"; 2017 BF.dump(); 2018 }); 2019 SLM.performChanges(); 2020 // Early exit if processInsertions doesn't detect any todo items 2021 if (!processInsertions()) 2022 return false; 2023 processDeletions(); 2024 if (foldIdenticalSplitEdges()) { 2025 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); 2026 (void)Stats; 2027 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first 2028 << " redundant split edge BBs (" << Stats.second 2029 << " bytes) for " << BF.getPrintName() << "\n"); 2030 } 2031 rebuildCFI(); 2032 // We may have split edges, creating BBs that need correct branching 2033 BF.fixBranches(); 2034 LLVM_DEBUG({ 2035 dbgs() << "Func after shrink-wrapping: \n"; 2036 BF.dump(); 2037 }); 2038 return true; 2039 } 2040 2041 void ShrinkWrapping::printStats() { 2042 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode 2043 << " spills inserting load/stores and " << SpillsMovedPushPopMode 2044 << " spills inserting push/pops\n"; 2045 if (!InstrDynamicCount || !StoreDynamicCount) 2046 return; 2047 outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount 2048 << " store executions (" 2049 << format("%.1lf%%", 2050 (100.0 * SpillsMovedDynamicCount / InstrDynamicCount)) 2051 << " total instructions executed, " 2052 << format("%.1lf%%", 2053 (100.0 * SpillsMovedDynamicCount / StoreDynamicCount)) 2054 << " store instructions)\n"; 2055 outs() << "BOLT-INFO: Shrink wrapping failed at reducing " 2056 << SpillsFailedDynamicCount << " store executions (" 2057 << format("%.1lf%%", 2058 (100.0 * SpillsFailedDynamicCount / InstrDynamicCount)) 2059 << " total instructions executed, " 2060 << format("%.1lf%%", 2061 (100.0 * SpillsFailedDynamicCount / StoreDynamicCount)) 2062 << " store instructions)\n"; 2063 } 2064 2065 // Operators necessary as a result of using MCAnnotation 2066 raw_ostream &operator<<(raw_ostream &OS, 2067 const std::vector<ShrinkWrapping::WorklistItem> &Vec) { 2068 OS << "SWTodo["; 2069 const char *Sep = ""; 2070 for (const ShrinkWrapping::WorklistItem &Item : Vec) { 2071 OS << Sep; 2072 switch (Item.Action) { 2073 case ShrinkWrapping::WorklistItem::Erase: 2074 OS << "Erase"; 2075 break; 2076 case ShrinkWrapping::WorklistItem::ChangeToAdjustment: 2077 OS << "ChangeToAdjustment"; 2078 break; 2079 case ShrinkWrapping::WorklistItem::InsertLoadOrStore: 2080 OS << "InsertLoadOrStore"; 2081 break; 2082 case ShrinkWrapping::WorklistItem::InsertPushOrPop: 2083 OS << "InsertPushOrPop"; 2084 break; 2085 } 2086 Sep = ", "; 2087 } 2088 OS << "]"; 2089 return OS; 2090 } 2091 2092 raw_ostream & 2093 operator<<(raw_ostream &OS, 2094 const std::vector<StackLayoutModifier::WorklistItem> &Vec) { 2095 OS << "SLMTodo["; 2096 const char *Sep = ""; 2097 for (const StackLayoutModifier::WorklistItem &Item : Vec) { 2098 OS << Sep; 2099 switch (Item.Action) { 2100 case StackLayoutModifier::WorklistItem::None: 2101 OS << "None"; 2102 break; 2103 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: 2104 OS << "AdjustLoadStoreOffset"; 2105 break; 2106 case StackLayoutModifier::WorklistItem::AdjustCFI: 2107 OS << "AdjustCFI"; 2108 break; 2109 } 2110 Sep = ", "; 2111 } 2112 OS << "]"; 2113 return OS; 2114 } 2115 2116 bool operator==(const ShrinkWrapping::WorklistItem &A, 2117 const ShrinkWrapping::WorklistItem &B) { 2118 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && 2119 A.Adjustment == B.Adjustment && 2120 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && 2121 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && 2122 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && 2123 A.FIEToInsert.Size == B.FIEToInsert.Size && 2124 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && 2125 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); 2126 } 2127 2128 bool operator==(const StackLayoutModifier::WorklistItem &A, 2129 const StackLayoutModifier::WorklistItem &B) { 2130 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); 2131 } 2132 2133 } // end namespace bolt 2134 } // end namespace llvm 2135