1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ShrinkWrapping class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ShrinkWrapping.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/DataflowInfoManager.h" 16 #include <numeric> 17 #include <stack> 18 19 #define DEBUG_TYPE "shrinkwrapping" 20 21 using namespace llvm; 22 23 namespace opts { 24 25 extern cl::opt<bool> TimeOpts; 26 extern cl::OptionCategory BoltOptCategory; 27 28 static cl::opt<unsigned> ShrinkWrappingThreshold( 29 "shrink-wrapping-threshold", 30 cl::desc("Percentage of prologue execution count to use as threshold when" 31 " evaluating whether a block is cold enough to be profitable to" 32 " move eligible spills there"), 33 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 34 } // namespace opts 35 36 namespace llvm { 37 namespace bolt { 38 39 void CalleeSavedAnalysis::analyzeSaves() { 40 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); 41 StackReachingUses &SRU = Info.getStackReachingUses(); 42 auto &InsnToBB = Info.getInsnToBBMap(); 43 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); 44 45 LLVM_DEBUG(dbgs() << "Checking spill locations\n"); 46 for (BinaryBasicBlock &BB : BF) { 47 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 48 const MCInst *Prev = nullptr; 49 for (MCInst &Inst : BB) { 50 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 51 // Blacklist weird stores we don't understand 52 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && 53 FIE->IsStoreFromReg) { 54 BlacklistedRegs.set(FIE->RegOrImm); 55 CalleeSaved.reset(FIE->RegOrImm); 56 Prev = &Inst; 57 continue; 58 } 59 60 if (!FIE->IsStore || !FIE->IsStoreFromReg || 61 BlacklistedRegs[FIE->RegOrImm]) { 62 Prev = &Inst; 63 continue; 64 } 65 66 // If this reg is defined locally, it is not a callee-saved reg 67 if (RD.isReachedBy(FIE->RegOrImm, 68 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) { 69 BlacklistedRegs.set(FIE->RegOrImm); 70 CalleeSaved.reset(FIE->RegOrImm); 71 Prev = &Inst; 72 continue; 73 } 74 75 // If this stack position is accessed in another function, we are 76 // probably dealing with a parameter passed in a stack -- do not mess 77 // with it 78 if (SRU.isStoreUsed(*FIE, 79 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)), 80 /*IncludeLocalAccesses=*/false) { 81 BlacklistedRegs.set(FIE->RegOrImm); 82 CalleeSaved.reset(FIE->RegOrImm); 83 Prev = &Inst; 84 continue; 85 } 86 87 // If this stack position is loaded elsewhere in another reg, we can't 88 // update it, so blacklist it. 89 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev) 90 : SRU.expr_begin(BB))) { 91 BlacklistedRegs.set(FIE->RegOrImm); 92 CalleeSaved.reset(FIE->RegOrImm); 93 Prev = &Inst; 94 continue; 95 } 96 97 // Ignore regs with multiple saves 98 if (CalleeSaved[FIE->RegOrImm]) { 99 BlacklistedRegs.set(FIE->RegOrImm); 100 CalleeSaved.reset(FIE->RegOrImm); 101 Prev = &Inst; 102 continue; 103 } 104 105 CalleeSaved.set(FIE->RegOrImm); 106 SaveFIEByReg[FIE->RegOrImm] = &*FIE; 107 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); 108 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId); 109 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; 110 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " 111 << FIE->RegOrImm << "\n"); 112 } 113 Prev = &Inst; 114 } 115 } 116 } 117 118 void CalleeSavedAnalysis::analyzeRestores() { 119 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); 120 121 // Now compute all restores of these callee-saved regs 122 for (BinaryBasicBlock &BB : BF) { 123 const MCInst *Prev = nullptr; 124 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 125 MCInst &Inst = *I; 126 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 127 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { 128 Prev = &Inst; 129 continue; 130 } 131 132 // If this reg is used locally after a restore, then we are probably 133 // not dealing with a callee-saved reg. Except if this use is by 134 // another store, but we don't cover this case yet. 135 // Also not callee-saved if this load accesses caller stack or isn't 136 // simple. 137 if (!FIE->IsSimple || FIE->StackOffset >= 0 || 138 RU.isReachedBy(FIE->RegOrImm, 139 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) { 140 CalleeSaved.reset(FIE->RegOrImm); 141 Prev = &Inst; 142 continue; 143 } 144 // If stack offsets between saves/store don't agree with each other, 145 // we don't completely understand what's happening here 146 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { 147 CalleeSaved.reset(FIE->RegOrImm); 148 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " 149 "mismatching restore: " 150 << FIE->RegOrImm << "\n"); 151 Prev = &Inst; 152 continue; 153 } 154 155 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm 156 << "\n"); 157 if (LoadFIEByReg[FIE->RegOrImm] == nullptr) 158 LoadFIEByReg[FIE->RegOrImm] = &*FIE; 159 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm, 160 AllocatorId); 161 HasRestores.set(FIE->RegOrImm); 162 } 163 Prev = &Inst; 164 } 165 } 166 } 167 168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { 169 std::vector<MCInst *> Results; 170 for (BinaryBasicBlock &BB : BF) 171 for (MCInst &Inst : BB) 172 if (getSavedReg(Inst) == Reg) 173 Results.push_back(&Inst); 174 return Results; 175 } 176 177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { 178 std::vector<MCInst *> Results; 179 for (BinaryBasicBlock &BB : BF) 180 for (MCInst &Inst : BB) 181 if (getRestoredReg(Inst) == Reg) 182 Results.push_back(&Inst); 183 return Results; 184 } 185 186 CalleeSavedAnalysis::~CalleeSavedAnalysis() { 187 for (BinaryBasicBlock &BB : BF) { 188 for (MCInst &Inst : BB) { 189 BC.MIB->removeAnnotation(Inst, getSaveTag()); 190 BC.MIB->removeAnnotation(Inst, getRestoreTag()); 191 } 192 } 193 } 194 195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { 196 if (BlacklistedRegions[Offset] < Size) 197 BlacklistedRegions[Offset] = Size; 198 } 199 200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { 201 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) 202 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) 203 return true; 204 return false; 205 } 206 207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, 208 int64_t Size) { 209 bool HasConflict = false; 210 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { 211 std::pair<const int64_t, int64_t> &Elem = *Iter; 212 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && 213 (Offset != Elem.first || Size != Elem.second)) { 214 Iter = AvailableRegions.erase(Iter); 215 HasConflict = true; 216 continue; 217 } 218 ++Iter; 219 } 220 if (HasConflict) { 221 blacklistRegion(Offset, Size); 222 return true; 223 } 224 return false; 225 } 226 227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { 228 StackPointerTracking &SPT = Info.getStackPointerTracking(); 229 if (!BC.MII->get(Point.getOpcode()) 230 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI)) 231 return; 232 233 int SPVal, FPVal; 234 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 235 std::pair<MCPhysReg, int64_t> FP; 236 237 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 238 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 239 else 240 FP = std::make_pair(0, 0); 241 std::pair<MCPhysReg, int64_t> SP; 242 243 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 244 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 245 else 246 SP = std::make_pair(0, 0); 247 248 int64_t Output; 249 if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) 250 return; 251 252 // Not your regular frame pointer initialization... bail 253 if (Output != SPVal) 254 blacklistRegion(0, 0); 255 } 256 257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { 258 StackPointerTracking &SPT = Info.getStackPointerTracking(); 259 if (!BC.MII->get(Point.getOpcode()) 260 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI)) 261 return; 262 // Check if the definition of SP comes from FP -- in this case, this 263 // value may need to be updated depending on our stack layout changes 264 const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode()); 265 unsigned NumDefs = InstInfo.getNumDefs(); 266 bool UsesFP = false; 267 for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E; 268 ++I) { 269 MCOperand &Operand = Point.getOperand(I); 270 if (!Operand.isReg()) 271 continue; 272 if (Operand.getReg() == BC.MIB->getFramePointer()) { 273 UsesFP = true; 274 break; 275 } 276 } 277 if (!UsesFP) 278 return; 279 280 // Setting up evaluation 281 int SPVal, FPVal; 282 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 283 std::pair<MCPhysReg, int64_t> FP; 284 285 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 286 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 287 else 288 FP = std::make_pair(0, 0); 289 std::pair<MCPhysReg, int64_t> SP; 290 291 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 292 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 293 else 294 SP = std::make_pair(0, 0); 295 296 int64_t Output; 297 if (!BC.MIB->evaluateSimple(Point, Output, SP, FP)) 298 return; 299 300 // If the value is the same of FP, no need to adjust it 301 if (Output == FPVal) 302 return; 303 304 // If an allocation happened through FP, bail 305 if (Output <= SPVal) { 306 blacklistRegion(0, 0); 307 return; 308 } 309 310 // We are restoring SP to an old value based on FP. Mark it as a stack 311 // access to be fixed later. 312 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId); 313 } 314 315 void StackLayoutModifier::classifyStackAccesses() { 316 // Understand when stack slots are being used non-locally 317 StackReachingUses &SRU = Info.getStackReachingUses(); 318 319 for (BinaryBasicBlock &BB : BF) { 320 const MCInst *Prev = nullptr; 321 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 322 MCInst &Inst = *I; 323 checkFramePointerInitialization(Inst); 324 checkStackPointerRestore(Inst); 325 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 326 if (!FIEX) { 327 Prev = &Inst; 328 continue; 329 } 330 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { 331 blacklistRegion(FIEX->StackOffset, FIEX->Size); 332 Prev = &Inst; 333 continue; 334 } 335 // If this stack position is accessed in another function, we are 336 // probably dealing with a parameter passed in a stack -- do not mess 337 // with it 338 if (SRU.isStoreUsed(*FIEX, 339 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB), 340 /*IncludeLocalAccesses=*/false)) { 341 blacklistRegion(FIEX->StackOffset, FIEX->Size); 342 Prev = &Inst; 343 continue; 344 } 345 // Now we have a clear stack slot access. Check if its blacklisted or if 346 // it conflicts with another chunk. 347 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) || 348 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) { 349 Prev = &Inst; 350 continue; 351 } 352 // We are free to go. Add it as available stack slot which we know how 353 // to move it. 354 AvailableRegions[FIEX->StackOffset] = FIEX->Size; 355 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId); 356 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm); 357 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset); 358 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " 359 << (int)FIEX->Size << "\n"); 360 } 361 } 362 } 363 364 void StackLayoutModifier::classifyCFIs() { 365 std::stack<std::pair<int64_t, uint16_t>> CFIStack; 366 int64_t CfaOffset = -8; 367 uint16_t CfaReg = 7; 368 369 auto recordAccess = [&](MCInst *Inst, int64_t Offset) { 370 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false); 371 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { 372 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId); 373 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n"); 374 } else { 375 IsSimple = false; 376 return; 377 } 378 }; 379 380 for (BinaryBasicBlock *&BB : BF.layout()) { 381 for (MCInst &Inst : *BB) { 382 if (!BC.MIB->isCFI(Inst)) 383 continue; 384 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 385 switch (CFI->getOperation()) { 386 case MCCFIInstruction::OpDefCfa: 387 CfaOffset = -CFI->getOffset(); 388 recordAccess(&Inst, CfaOffset); 389 LLVM_FALLTHROUGH; 390 case MCCFIInstruction::OpDefCfaRegister: 391 CfaReg = CFI->getRegister(); 392 break; 393 case MCCFIInstruction::OpDefCfaOffset: 394 CfaOffset = -CFI->getOffset(); 395 recordAccess(&Inst, CfaOffset); 396 break; 397 case MCCFIInstruction::OpOffset: 398 recordAccess(&Inst, CFI->getOffset()); 399 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 400 BC.MRI->getLLVMRegNum(CFI->getRegister(), 401 /*isEH=*/false), 402 AllocatorId); 403 break; 404 case MCCFIInstruction::OpSameValue: 405 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 406 BC.MRI->getLLVMRegNum(CFI->getRegister(), 407 /*isEH=*/false), 408 AllocatorId); 409 break; 410 case MCCFIInstruction::OpRememberState: 411 CFIStack.push(std::make_pair(CfaOffset, CfaReg)); 412 break; 413 case MCCFIInstruction::OpRestoreState: { 414 assert(!CFIStack.empty() && "Corrupt CFI stack"); 415 std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); 416 CFIStack.pop(); 417 CfaOffset = Elem.first; 418 CfaReg = Elem.second; 419 break; 420 } 421 case MCCFIInstruction::OpRelOffset: 422 case MCCFIInstruction::OpAdjustCfaOffset: 423 llvm_unreachable("Unhandled AdjustCfaOffset"); 424 break; 425 default: 426 break; 427 } 428 } 429 } 430 } 431 432 void StackLayoutModifier::scheduleChange( 433 MCInst &Inst, StackLayoutModifier::WorklistItem Item) { 434 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( 435 Inst, getTodoTag(), AllocatorId); 436 WList.push_back(Item); 437 } 438 439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { 440 if (!IsSimple || !BC.MIB->isPush(*DeletedPush)) 441 return false; 442 443 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 444 if (!FIE) 445 return false; 446 447 return canCollapseRegion(FIE->StackOffset); 448 } 449 450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { 451 if (!IsInitialized) 452 initialize(); 453 if (!IsSimple) 454 return false; 455 456 if (CollapsedRegions.count(RegionAddr)) 457 return true; 458 459 // Check if it is possible to readjust all accesses below RegionAddr 460 if (!BlacklistedRegions.empty()) 461 return false; 462 463 return true; 464 } 465 466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { 467 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 468 if (!FIE) 469 return false; 470 int64_t RegionAddr = FIE->StackOffset; 471 int64_t RegionSz = FIE->Size; 472 return collapseRegion(DeletedPush, RegionAddr, RegionSz); 473 } 474 475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, 476 int64_t RegionSz) { 477 if (!canCollapseRegion(RegionAddr)) 478 return false; 479 480 assert(IsInitialized); 481 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); 482 483 for (BinaryBasicBlock &BB : BF) { 484 for (MCInst &Inst : BB) { 485 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 486 continue; 487 auto Slot = 488 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 489 Inst, getSlotTag()); 490 if (!AvailableRegions.count(Slot)) 491 continue; 492 // We need to ensure this access is affected by the deleted push 493 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]]) 494 continue; 495 496 if (BC.MIB->isCFI(Inst)) { 497 if (Slot > RegionAddr) 498 continue; 499 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz)); 500 continue; 501 } 502 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 503 if (!FIE) { 504 if (Slot > RegionAddr) 505 continue; 506 // SP update based on frame pointer 507 scheduleChange( 508 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 509 continue; 510 } 511 512 if (Slot == RegionAddr) { 513 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId); 514 continue; 515 } 516 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 517 continue; 518 519 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 520 continue; 521 522 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) 523 continue; 524 525 scheduleChange( 526 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 527 } 528 } 529 530 CollapsedRegions.insert(RegionAddr); 531 return true; 532 } 533 534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { 535 for (BinaryBasicBlock &BB : BF) { 536 for (MCInst &Inst : BB) { 537 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) 538 continue; 539 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 540 scheduleChange( 541 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); 542 } 543 } 544 } 545 546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { 547 if (!IsInitialized) 548 initialize(); 549 if (!IsSimple) 550 return false; 551 552 StackPointerTracking &SPT = Info.getStackPointerTracking(); 553 int64_t RegionAddr = SPT.getStateBefore(P)->first; 554 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 555 return false; 556 557 if (InsertedRegions.count(RegionAddr)) 558 return true; 559 560 // Check if we are going to screw up stack accesses at call sites that 561 // pass parameters via stack 562 if (!BlacklistedRegions.empty()) 563 return false; 564 565 return true; 566 } 567 568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { 569 if (!canInsertRegion(P)) 570 return false; 571 572 assert(IsInitialized); 573 StackPointerTracking &SPT = Info.getStackPointerTracking(); 574 // This RegionAddr is slightly different from the one seen in collapseRegion 575 // This is the value of SP before the allocation the user wants to make. 576 int64_t RegionAddr = SPT.getStateBefore(P)->first; 577 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 578 return false; 579 580 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 581 582 for (BinaryBasicBlock &BB : BF) { 583 for (MCInst &Inst : BB) { 584 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 585 continue; 586 auto Slot = 587 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 588 Inst, getSlotTag()); 589 if (!AvailableRegions.count(Slot)) 590 continue; 591 592 if (!(DA.doesADominateB(P, Inst))) 593 continue; 594 595 if (BC.MIB->isCFI(Inst)) { 596 if (Slot >= RegionAddr) 597 continue; 598 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); 599 continue; 600 } 601 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 602 if (!FIE) { 603 if (Slot >= RegionAddr) 604 continue; 605 scheduleChange( 606 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 607 continue; 608 } 609 610 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 611 continue; 612 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) 613 continue; 614 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 615 continue; 616 scheduleChange( 617 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 618 } 619 } 620 621 InsertedRegions.insert(RegionAddr); 622 return true; 623 } 624 625 void StackLayoutModifier::performChanges() { 626 std::set<uint32_t> ModifiedCFIIndices; 627 for (BinaryBasicBlock &BB : BF) { 628 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 629 MCInst &Inst = *I; 630 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) { 631 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); 632 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 633 } 634 if (!BC.MIB->hasAnnotation(Inst, getTodoTag())) 635 continue; 636 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( 637 Inst, getTodoTag()); 638 int64_t Adjustment = 0; 639 WorklistItem::ActionType AdjustmentType = WorklistItem::None; 640 for (WorklistItem &WI : WList) { 641 if (WI.Action == WorklistItem::None) 642 continue; 643 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || 644 WI.Action == WorklistItem::AdjustCFI); 645 assert((AdjustmentType == WorklistItem::None || 646 AdjustmentType == WI.Action) && 647 "Conflicting actions requested at the same program point"); 648 AdjustmentType = WI.Action; 649 Adjustment += WI.OffsetUpdate; 650 } 651 if (!Adjustment) 652 continue; 653 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { 654 assert(BC.MIB->isCFI(Inst)); 655 uint32_t CFINum = Inst.getOperand(0).getImm(); 656 if (ModifiedCFIIndices.count(CFINum)) 657 continue; 658 ModifiedCFIIndices.insert(CFINum); 659 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 660 const MCCFIInstruction::OpType Operation = CFI->getOperation(); 661 if (Operation == MCCFIInstruction::OpDefCfa || 662 Operation == MCCFIInstruction::OpDefCfaOffset) 663 Adjustment = 0 - Adjustment; 664 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() 665 << " to " << (CFI->getOffset() + Adjustment) << "\n"); 666 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment); 667 continue; 668 } 669 int32_t SrcImm = 0; 670 MCPhysReg Reg = 0; 671 MCPhysReg StackPtrReg = 0; 672 int64_t StackOffset = 0; 673 bool IsIndexed = false; 674 bool IsLoad = false; 675 bool IsStore = false; 676 bool IsSimple = false; 677 bool IsStoreFromReg = false; 678 uint8_t Size = 0; 679 bool Success = false; 680 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, 681 Reg, SrcImm, StackPtrReg, StackOffset, 682 Size, IsSimple, IsIndexed); 683 if (!Success) { 684 // SP update based on FP value 685 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx); 686 assert(Success); 687 continue; 688 } 689 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); 690 if (StackPtrReg != BC.MIB->getFramePointer()) 691 Adjustment = -Adjustment; 692 if (IsLoad) 693 Success = BC.MIB->createRestoreFromStack( 694 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 695 else if (IsStore) 696 Success = BC.MIB->createSaveToStack( 697 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 698 LLVM_DEBUG({ 699 dbgs() << "Adjusted instruction: "; 700 Inst.dump(); 701 }); 702 assert(Success); 703 } 704 } 705 } 706 707 void StackLayoutModifier::initialize() { 708 classifyStackAccesses(); 709 classifyCFIs(); 710 IsInitialized = true; 711 } 712 713 uint64_t ShrinkWrapping::SpillsMovedRegularMode = 0; 714 uint64_t ShrinkWrapping::SpillsMovedPushPopMode = 0; 715 716 using BBIterTy = BinaryBasicBlock::iterator; 717 718 void ShrinkWrapping::classifyCSRUses() { 719 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 720 StackPointerTracking &SPT = Info.getStackPointerTracking(); 721 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), 722 BitVector(DA.NumInstrs, false)); 723 724 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer()); 725 for (BinaryBasicBlock &BB : BF) { 726 for (MCInst &Inst : BB) { 727 if (BC.MIB->isCFI(Inst)) 728 continue; 729 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 730 BC.MIB->getTouchedRegs(Inst, BV); 731 BV &= CSA.CalleeSaved; 732 for (int I = BV.find_first(); I != -1; I = BV.find_next(I)) { 733 if (I == 0) 734 continue; 735 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) 736 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 737 } 738 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) 739 continue; 740 BV = CSA.CalleeSaved; 741 BV &= FPAliases; 742 for (int I = BV.find_first(); I > 0; I = BV.find_next(I)) 743 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 744 } 745 } 746 } 747 748 void ShrinkWrapping::pruneUnwantedCSRs() { 749 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); 750 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 751 if (!CSA.CalleeSaved[I]) 752 continue; 753 if (ParamRegs[I]) { 754 CSA.CalleeSaved.reset(I); 755 continue; 756 } 757 if (UsesByReg[I].empty()) { 758 LLVM_DEBUG( 759 dbgs() 760 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I 761 << "\n"); 762 CSA.CalleeSaved.reset(I); 763 continue; 764 } 765 if (!CSA.HasRestores[I]) { 766 LLVM_DEBUG( 767 dbgs() << "Dismissing Callee-Saved Reg because it does not have " 768 "restores:" 769 << I << "\n"); 770 CSA.CalleeSaved.reset(I); 771 } 772 } 773 } 774 775 void ShrinkWrapping::computeSaveLocations() { 776 SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs()); 777 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 778 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 779 StackPointerTracking &SPT = Info.getStackPointerTracking(); 780 781 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); 782 for (BinaryBasicBlock &BB : BF) { 783 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 784 785 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; 786 if (!First) 787 continue; 788 789 // Use reaching instructions to detect if we are inside a loop - if we 790 // are, do not consider this BB as valid placement for saves. 791 if (RI.isInLoop(BB)) 792 continue; 793 794 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First); 795 // If we don't know stack state at this point, bail 796 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 797 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 798 continue; 799 800 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 801 if (!CSA.CalleeSaved[I]) 802 continue; 803 804 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); 805 for (int J = UsesByReg[I].find_first(); J > 0; 806 J = UsesByReg[I].find_next(J)) 807 if (DA.doesADominateB(*First, J)) 808 BBDominatedUses.set(J); 809 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " 810 << BBDominatedUses.count() << " uses for reg " << I 811 << ". Total uses for reg is " << UsesByReg[I].count() 812 << "\n"); 813 BBDominatedUses &= UsesByReg[I]; 814 if (BBDominatedUses == UsesByReg[I]) { 815 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() 816 << " as a save pos for " << I << "\n"); 817 SavePos[I].insert(First); 818 LLVM_DEBUG({ 819 dbgs() << "Dominated uses are:\n"; 820 for (int J = UsesByReg[I].find_first(); J > 0; 821 J = UsesByReg[I].find_next(J)) { 822 dbgs() << "Idx " << J << ": "; 823 DA.Expressions[J]->dump(); 824 } 825 }); 826 } 827 } 828 } 829 830 BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(), 831 std::numeric_limits<uint64_t>::max()); 832 BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr); 833 auto &InsnToBB = Info.getInsnToBBMap(); 834 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 835 if (!CSA.CalleeSaved[I]) 836 continue; 837 838 for (MCInst *Pos : SavePos[I]) { 839 BinaryBasicBlock *BB = InsnToBB[Pos]; 840 uint64_t Count = BB->getExecutionCount(); 841 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && 842 Count < BestSaveCount[I]) { 843 BestSavePos[I] = Pos; 844 BestSaveCount[I] = Count; 845 } 846 } 847 } 848 } 849 850 void ShrinkWrapping::computeDomOrder() { 851 std::vector<MCPhysReg> Order; 852 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 853 Order.push_back(I); 854 } 855 856 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 857 auto &InsnToBB = Info.getInsnToBBMap(); 858 std::sort(Order.begin(), Order.end(), 859 [&](const MCPhysReg &A, const MCPhysReg &B) { 860 BinaryBasicBlock *BBA = 861 BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr; 862 BinaryBasicBlock *BBB = 863 BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr; 864 if (BBA == BBB) 865 return A < B; 866 if (!BBA && BBB) 867 return false; 868 if (BBA && !BBB) 869 return true; 870 if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B])) 871 return true; 872 if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A])) 873 return false; 874 return A < B; 875 }); 876 877 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) 878 DomOrder[Order[I]] = I; 879 } 880 881 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, 882 uint64_t &TotalEstimatedWin) { 883 const uint64_t CurSavingCost = CSA.SavingCost[CSR]; 884 if (!CSA.CalleeSaved[CSR]) 885 return false; 886 887 uint64_t BestCount = BestSaveCount[CSR]; 888 BestPosSave = BestSavePos[CSR]; 889 bool ShouldMove = false; 890 if (BestCount != std::numeric_limits<uint64_t>::max() && 891 BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) { 892 LLVM_DEBUG({ 893 auto &InsnToBB = Info.getInsnToBBMap(); 894 dbgs() << "Better position for saves found in func " << BF.getPrintName() 895 << " count << " << BF.getKnownExecutionCount() << "\n"; 896 dbgs() << "Reg: " << CSR 897 << "; New BB: " << InsnToBB[BestPosSave]->getName() 898 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; 899 }); 900 TotalEstimatedWin += CurSavingCost - BestCount; 901 ShouldMove = true; 902 } 903 904 if (!ShouldMove) 905 return false; 906 if (!BestPosSave) { 907 LLVM_DEBUG({ 908 dbgs() << "Dropping opportunity because we don't know where to put " 909 "stores -- total est. freq reduc: " 910 << TotalEstimatedWin << "\n"; 911 }); 912 return false; 913 } 914 return true; 915 } 916 917 /// Auxiliar function used to create basic blocks for critical edges and update 918 /// the dominance frontier with these new locations 919 void ShrinkWrapping::splitFrontierCritEdges( 920 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, 921 const SmallVector<bool, 4> &IsCritEdge, 922 const SmallVector<BinaryBasicBlock *, 4> &From, 923 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { 924 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " 925 << BF.getPrintName() << "\n"); 926 // For every FromBB, there might be one or more critical edges, with 927 // To[I] containing destination BBs. It's important to memorize 928 // the original size of the Frontier as we may append to it while splitting 929 // critical edges originating with blocks with multiple destinations. 930 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { 931 if (!IsCritEdge[I]) 932 continue; 933 if (To[I].empty()) 934 continue; 935 BinaryBasicBlock *FromBB = From[I]; 936 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() 937 << "\n"); 938 // Split edge for every DestinationBBs 939 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { 940 BinaryBasicBlock *DestinationBB = To[I][DI]; 941 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n"); 942 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB); 943 // Insert dummy instruction so this BB is never empty (we need this for 944 // PredictiveStackPointerTracking to work, since it annotates instructions 945 // and not BBs). 946 if (NewBB->empty()) { 947 MCInst NewInst; 948 BC.MIB->createNoop(NewInst); 949 NewBB->addInstruction(std::move(NewInst)); 950 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0)); 951 } 952 953 // Update frontier 954 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB); 955 if (DI == 0) { 956 // Update frontier inplace 957 Frontier[I] = NewFrontierPP; 958 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() 959 << '\n'); 960 } else { 961 // Append new frontier to the end of the list 962 Frontier.push_back(NewFrontierPP); 963 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() 964 << '\n'); 965 } 966 } 967 } 968 } 969 970 SmallVector<ProgramPoint, 4> 971 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, 972 uint64_t TotalEstimatedWin) { 973 SmallVector<ProgramPoint, 4> Frontier; 974 SmallVector<bool, 4> IsCritEdge; 975 bool CannotPlace = false; 976 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 977 978 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; 979 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; 980 // In case of a critical edge, we need to create extra BBs to host restores 981 // into edges transitioning to the dominance frontier, otherwise we pull these 982 // restores to inside the dominated area. 983 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector(); 984 LLVM_DEBUG({ 985 dbgs() << "Dumping dominance frontier for "; 986 BC.printInstruction(dbgs(), *BestPosSave); 987 for (ProgramPoint &PP : Frontier) 988 if (PP.isInst()) 989 BC.printInstruction(dbgs(), *PP.getInst()); 990 else 991 dbgs() << PP.getBB()->getName() << "\n"; 992 }); 993 for (ProgramPoint &PP : Frontier) { 994 bool HasCritEdges = false; 995 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && 996 doesInstUsesCSR(*PP.getInst(), CSR)) 997 CannotPlace = true; 998 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 999 CritEdgesFrom.emplace_back(FrontierBB); 1000 CritEdgesTo.emplace_back(0); 1001 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); 1002 // Check for invoke instructions at the dominance frontier, which indicates 1003 // the landing pad is not dominated. 1004 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { 1005 LLVM_DEBUG( 1006 dbgs() << "Bailing on restore placement to avoid LP splitting\n"); 1007 Frontier.clear(); 1008 return Frontier; 1009 } 1010 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) { 1011 if (!DA.doesADominateB(*BestPosSave, P)) { 1012 Dests.emplace_back(Info.getParentBB(P)); 1013 return; 1014 } 1015 HasCritEdges = true; 1016 }); 1017 IsCritEdge.push_back(HasCritEdges); 1018 } 1019 // Restores cannot be placed in empty BBs because we have a dataflow 1020 // analysis that depends on insertions happening before real instructions 1021 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a 1022 // dummy nop that is scheduled to be removed later. 1023 bool InvalidateRequired = false; 1024 for (BinaryBasicBlock *&BB : BF.layout()) { 1025 if (BB->size() != 0) 1026 continue; 1027 MCInst NewInst; 1028 BC.MIB->createNoop(NewInst); 1029 auto II = BB->addInstruction(std::move(NewInst)); 1030 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0)); 1031 InvalidateRequired = true; 1032 } 1033 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) { 1034 LLVM_DEBUG({ 1035 dbgs() << "Now detected critical edges in the following frontier:\n"; 1036 for (ProgramPoint &PP : Frontier) { 1037 if (PP.isBB()) { 1038 dbgs() << " BB: " << PP.getBB()->getName() << "\n"; 1039 } else { 1040 dbgs() << " Inst: "; 1041 PP.getInst()->dump(); 1042 } 1043 } 1044 }); 1045 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom, 1046 CritEdgesTo); 1047 InvalidateRequired = true; 1048 } 1049 if (InvalidateRequired) { 1050 // BitVectors that represent all insns of the function are invalid now 1051 // since we changed BBs/Insts. Re-run steps that depend on pointers being 1052 // valid 1053 Info.invalidateAll(); 1054 classifyCSRUses(); 1055 } 1056 if (CannotPlace) { 1057 LLVM_DEBUG({ 1058 dbgs() << "Dropping opportunity because restore placement failed" 1059 " -- total est. freq reduc: " 1060 << TotalEstimatedWin << "\n"; 1061 }); 1062 Frontier.clear(); 1063 return Frontier; 1064 } 1065 return Frontier; 1066 } 1067 1068 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, 1069 int64_t SaveOffset) { 1070 if (FA.requiresAlignment(BF)) { 1071 LLVM_DEBUG({ 1072 dbgs() << "Reg " << CSR 1073 << " is not using push/pops due to function " 1074 "alignment requirements.\n"; 1075 }); 1076 return false; 1077 } 1078 for (MCInst *Save : CSA.getSavesByReg(CSR)) { 1079 if (!SLM.canCollapseRegion(Save)) { 1080 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n"); 1081 return false; 1082 } 1083 } 1084 // Abort if one of the restores for this CSR is not a POP. 1085 for (MCInst *Load : CSA.getRestoresByReg(CSR)) { 1086 if (!BC.MIB->isPop(*Load)) { 1087 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n"); 1088 return false; 1089 } 1090 } 1091 1092 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1093 // Abort if we are inserting a push into an entry BB (offset -8) and this 1094 // func sets up a frame pointer. 1095 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION || 1096 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { 1097 LLVM_DEBUG({ 1098 dbgs() << "Reg " << CSR 1099 << " cannot insert region or we are " 1100 "trying to insert a push into entry bb.\n"; 1101 }); 1102 return false; 1103 } 1104 return true; 1105 } 1106 1107 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( 1108 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, 1109 unsigned CSR) { 1110 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; 1111 // Moving pop locations to the correct sp offset 1112 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 1113 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1114 for (ProgramPoint &PP : FixedRestorePoints) { 1115 BinaryBasicBlock *BB = Info.getParentBB(PP); 1116 bool Found = false; 1117 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first == 1118 SaveOffset) { 1119 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB)); 1120 BV &= UsesByReg[CSR]; 1121 if (!BV.any()) { 1122 Found = true; 1123 PP = BB; 1124 continue; 1125 } 1126 } 1127 for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) { 1128 if (SPT.getStateBefore(*RIt)->first == SaveOffset) { 1129 BitVector BV = *RI.getStateAt(*RIt); 1130 BV &= UsesByReg[CSR]; 1131 if (!BV.any()) { 1132 Found = true; 1133 PP = &*RIt; 1134 break; 1135 } 1136 } 1137 } 1138 if (!Found) { 1139 LLVM_DEBUG({ 1140 dbgs() << "Could not find restore insertion point for " << CSR 1141 << ", falling back to load/store mode\n"; 1142 }); 1143 FixedRestorePoints.clear(); 1144 return FixedRestorePoints; 1145 } 1146 } 1147 return FixedRestorePoints; 1148 } 1149 1150 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, 1151 bool UsePushPops) { 1152 1153 for (BinaryBasicBlock *&BB : BF.layout()) { 1154 std::vector<MCInst *> CFIs; 1155 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { 1156 MCInst &Inst = *I; 1157 if (BC.MIB->isCFI(Inst)) { 1158 // Delete all offset CFIs related to this CSR 1159 if (SLM.getOffsetCFIReg(Inst) == CSR) { 1160 HasDeletedOffsetCFIs[CSR] = true; 1161 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR)); 1162 continue; 1163 } 1164 CFIs.push_back(&Inst); 1165 continue; 1166 } 1167 1168 uint16_t SavedReg = CSA.getSavedReg(Inst); 1169 uint16_t RestoredReg = CSA.getRestoredReg(Inst); 1170 if (SavedReg != CSR && RestoredReg != CSR) { 1171 CFIs.clear(); 1172 continue; 1173 } 1174 1175 scheduleChange(&Inst, WorklistItem(UsePushPops 1176 ? WorklistItem::Erase 1177 : WorklistItem::ChangeToAdjustment, 1178 CSR)); 1179 1180 // Delete associated CFIs 1181 const bool RecordDeletedPushCFIs = 1182 SavedReg == CSR && DeletedPushCFIs[CSR].empty(); 1183 const bool RecordDeletedPopCFIs = 1184 RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); 1185 for (MCInst *CFI : CFIs) { 1186 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI); 1187 // Do not touch these... 1188 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || 1189 MCCFI->getOperation() == MCCFIInstruction::OpRememberState) 1190 continue; 1191 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR)); 1192 if (RecordDeletedPushCFIs) { 1193 // Do not record this to be replayed later because we are going to 1194 // rebuild it. 1195 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1196 continue; 1197 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1198 } 1199 if (RecordDeletedPopCFIs) { 1200 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1201 continue; 1202 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1203 } 1204 } 1205 CFIs.clear(); 1206 } 1207 } 1208 } 1209 1210 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { 1211 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || 1212 CSA.getRestoredReg(Inst) == CSR) 1213 return false; 1214 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 1215 BC.MIB->getTouchedRegs(Inst, BV); 1216 return BV[CSR]; 1217 } 1218 1219 void ShrinkWrapping::scheduleSaveRestoreInsertions( 1220 unsigned CSR, MCInst *BestPosSave, 1221 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { 1222 auto &InsnToBB = Info.getInsnToBBMap(); 1223 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; 1224 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; 1225 assert(FIESave && FIELoad && "Invalid CSR"); 1226 1227 LLVM_DEBUG({ 1228 dbgs() << "Scheduling save insertion at: "; 1229 BestPosSave->dump(); 1230 }); 1231 1232 scheduleChange(BestPosSave, 1233 UsePushPops ? WorklistItem::InsertPushOrPop 1234 : WorklistItem::InsertLoadOrStore, 1235 *FIESave, CSR); 1236 1237 for (ProgramPoint &PP : RestorePoints) { 1238 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 1239 LLVM_DEBUG({ 1240 dbgs() << "Scheduling restore insertion at: "; 1241 if (PP.isInst()) 1242 PP.getInst()->dump(); 1243 else 1244 dbgs() << PP.getBB()->getName() << "\n"; 1245 }); 1246 MCInst *Term = 1247 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr); 1248 if (Term) 1249 PP = Term; 1250 bool PrecededByPrefix = false; 1251 if (PP.isInst()) { 1252 auto Iter = FrontierBB->findInstruction(PP.getInst()); 1253 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { 1254 --Iter; 1255 PrecededByPrefix = BC.MIB->isPrefix(*Iter); 1256 } 1257 } 1258 if (PP.isInst() && 1259 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) { 1260 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && 1261 "cannot move to end of bb"); 1262 scheduleChange(InsnToBB[PP.getInst()], 1263 UsePushPops ? WorklistItem::InsertPushOrPop 1264 : WorklistItem::InsertLoadOrStore, 1265 *FIELoad, CSR); 1266 continue; 1267 } 1268 scheduleChange(PP, 1269 UsePushPops ? WorklistItem::InsertPushOrPop 1270 : WorklistItem::InsertLoadOrStore, 1271 *FIELoad, CSR); 1272 } 1273 } 1274 1275 void ShrinkWrapping::moveSaveRestores() { 1276 bool DisablePushPopMode = false; 1277 bool UsedPushPopMode = false; 1278 // Keeps info about successfully moved regs: reg index, save position and 1279 // save size 1280 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; 1281 1282 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1283 MCInst *BestPosSave = nullptr; 1284 uint64_t TotalEstimatedWin = 0; 1285 if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin)) 1286 continue; 1287 SmallVector<ProgramPoint, 4> RestorePoints = 1288 doRestorePlacement(BestPosSave, I, TotalEstimatedWin); 1289 if (RestorePoints.empty()) 1290 continue; 1291 1292 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; 1293 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; 1294 (void)FIELoad; 1295 assert(FIESave && FIELoad); 1296 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1297 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave); 1298 int SaveOffset = SPFP.first; 1299 uint8_t SaveSize = FIESave->Size; 1300 1301 // If we don't know stack state at this point, bail 1302 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 1303 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 1304 continue; 1305 1306 // Operation mode: if true, will insert push/pops instead of loads/restores 1307 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset); 1308 1309 if (UsePushPops) { 1310 SmallVector<ProgramPoint, 4> FixedRestorePoints = 1311 fixPopsPlacements(RestorePoints, SaveOffset, I); 1312 if (FixedRestorePoints.empty()) 1313 UsePushPops = false; 1314 else 1315 RestorePoints = FixedRestorePoints; 1316 } 1317 1318 // Disable push-pop mode for all CSRs in this function 1319 if (!UsePushPops) 1320 DisablePushPopMode = true; 1321 else 1322 UsedPushPopMode = true; 1323 1324 scheduleOldSaveRestoresRemoval(I, UsePushPops); 1325 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops); 1326 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize)); 1327 } 1328 1329 // Revert push-pop mode if it failed for a single CSR 1330 if (DisablePushPopMode && UsedPushPopMode) { 1331 UsedPushPopMode = false; 1332 for (BinaryBasicBlock &BB : BF) { 1333 auto WRI = Todo.find(&BB); 1334 if (WRI != Todo.end()) { 1335 std::vector<WorklistItem> &TodoList = WRI->second; 1336 for (WorklistItem &Item : TodoList) 1337 if (Item.Action == WorklistItem::InsertPushOrPop) 1338 Item.Action = WorklistItem::InsertLoadOrStore; 1339 } 1340 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 1341 MCInst &Inst = *I; 1342 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1343 Inst, getAnnotationIndex()); 1344 if (!TodoList) 1345 continue; 1346 bool isCFI = BC.MIB->isCFI(Inst); 1347 for (WorklistItem &Item : *TodoList) { 1348 if (Item.Action == WorklistItem::InsertPushOrPop) 1349 Item.Action = WorklistItem::InsertLoadOrStore; 1350 if (!isCFI && Item.Action == WorklistItem::Erase) 1351 Item.Action = WorklistItem::ChangeToAdjustment; 1352 } 1353 } 1354 } 1355 } 1356 1357 // Update statistics 1358 if (!UsedPushPopMode) { 1359 SpillsMovedRegularMode += MovedRegs.size(); 1360 return; 1361 } 1362 1363 // Schedule modifications to stack-accessing instructions via 1364 // StackLayoutModifier. 1365 SpillsMovedPushPopMode += MovedRegs.size(); 1366 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { 1367 unsigned RegNdx; 1368 MCInst *SavePos; 1369 size_t SaveSize; 1370 std::tie(RegNdx, SavePos, SaveSize) = I; 1371 for (MCInst *Save : CSA.getSavesByReg(RegNdx)) 1372 SLM.collapseRegion(Save); 1373 SLM.insertRegion(SavePos, SaveSize); 1374 } 1375 } 1376 1377 namespace { 1378 /// Helper function to identify whether two basic blocks created by splitting 1379 /// a critical edge have the same contents. 1380 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, 1381 const BinaryBasicBlock &B) { 1382 if (A.succ_size() != B.succ_size()) 1383 return false; 1384 if (A.succ_size() != 1) 1385 return false; 1386 1387 if (*A.succ_begin() != *B.succ_begin()) 1388 return false; 1389 1390 if (A.size() != B.size()) 1391 return false; 1392 1393 // Compare instructions 1394 auto I = A.begin(), E = A.end(); 1395 auto OtherI = B.begin(), OtherE = B.end(); 1396 while (I != E && OtherI != OtherE) { 1397 if (I->getOpcode() != OtherI->getOpcode()) 1398 return false; 1399 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) { 1400 return true; 1401 })) 1402 return false; 1403 ++I; 1404 ++OtherI; 1405 } 1406 return true; 1407 } 1408 } // namespace 1409 1410 bool ShrinkWrapping::foldIdenticalSplitEdges() { 1411 bool Changed = false; 1412 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { 1413 BinaryBasicBlock &BB = *Iter; 1414 if (!BB.getName().startswith(".LSplitEdge")) 1415 continue; 1416 for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) { 1417 BinaryBasicBlock &RBB = *RIter; 1418 if (&RBB == &BB) 1419 break; 1420 if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() || 1421 !isIdenticalSplitEdgeBB(BC, *Iter, RBB)) 1422 continue; 1423 assert(RBB.pred_size() == 1 && "Invalid split edge BB"); 1424 BinaryBasicBlock *Pred = *RBB.pred_begin(); 1425 uint64_t OrigCount = Pred->branch_info_begin()->Count; 1426 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; 1427 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB); 1428 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds); 1429 Changed = true; 1430 // Remove the block from CFG 1431 RBB.markValid(false); 1432 } 1433 } 1434 1435 return Changed; 1436 } 1437 1438 namespace { 1439 1440 // A special StackPointerTracking that compensates for our future plans 1441 // in removing/adding insn. 1442 class PredictiveStackPointerTracking 1443 : public StackPointerTrackingBase<PredictiveStackPointerTracking> { 1444 friend class DataflowAnalysis<PredictiveStackPointerTracking, 1445 std::pair<int, int>>; 1446 decltype(ShrinkWrapping::Todo) &TodoMap; 1447 DataflowInfoManager &Info; 1448 1449 Optional<unsigned> AnnotationIndex; 1450 1451 protected: 1452 void compNextAux(const MCInst &Point, 1453 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, 1454 std::pair<int, int> &Res) { 1455 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { 1456 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1457 BC.MIB->isPush(Point)) { 1458 Res.first += BC.MIB->getPushSize(Point); 1459 continue; 1460 } 1461 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1462 BC.MIB->isPop(Point)) { 1463 Res.first -= BC.MIB->getPopSize(Point); 1464 continue; 1465 } 1466 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1467 Item.FIEToInsert.IsStore) { 1468 Res.first -= Item.FIEToInsert.Size; 1469 continue; 1470 } 1471 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1472 Item.FIEToInsert.IsLoad) { 1473 Res.first += Item.FIEToInsert.Size; 1474 continue; 1475 } 1476 } 1477 } 1478 1479 std::pair<int, int> computeNext(const MCInst &Point, 1480 const std::pair<int, int> &Cur) { 1481 std::pair<int, int> Res = 1482 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( 1483 Point, Cur); 1484 if (Res.first == StackPointerTracking::SUPERPOSITION || 1485 Res.first == StackPointerTracking::EMPTY) 1486 return Res; 1487 auto TodoItems = 1488 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( 1489 Point, ShrinkWrapping::getAnnotationName()); 1490 if (TodoItems) 1491 compNextAux(Point, *TodoItems, Res); 1492 auto &InsnToBBMap = Info.getInsnToBBMap(); 1493 if (&*InsnToBBMap[&Point]->rbegin() != &Point) 1494 return Res; 1495 auto WRI = TodoMap.find(InsnToBBMap[&Point]); 1496 if (WRI == TodoMap.end()) 1497 return Res; 1498 compNextAux(Point, WRI->second, Res); 1499 return Res; 1500 } 1501 1502 StringRef getAnnotationName() const { 1503 return StringRef("PredictiveStackPointerTracking"); 1504 } 1505 1506 public: 1507 PredictiveStackPointerTracking(BinaryFunction &BF, 1508 decltype(ShrinkWrapping::Todo) &TodoMap, 1509 DataflowInfoManager &Info, 1510 MCPlusBuilder::AllocatorIdTy AllocatorId = 0) 1511 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, 1512 AllocatorId), 1513 TodoMap(TodoMap), Info(Info) {} 1514 1515 void run() { 1516 StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); 1517 } 1518 }; 1519 1520 } // end anonymous namespace 1521 1522 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, 1523 int SPValPop) { 1524 MCInst *SavePoint = nullptr; 1525 for (BinaryBasicBlock &BB : BF) { 1526 for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter; 1527 ++InstIter) { 1528 int32_t SrcImm = 0; 1529 MCPhysReg Reg = 0; 1530 MCPhysReg StackPtrReg = 0; 1531 int64_t StackOffset = 0; 1532 bool IsIndexed = false; 1533 bool IsLoad = false; 1534 bool IsStore = false; 1535 bool IsSimple = false; 1536 bool IsStoreFromReg = false; 1537 uint8_t Size = 0; 1538 if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg, 1539 Reg, SrcImm, StackPtrReg, StackOffset, Size, 1540 IsSimple, IsIndexed)) 1541 continue; 1542 if (Reg != CSR || !IsStore || !IsSimple) 1543 continue; 1544 SavePoint = &*InstIter; 1545 break; 1546 } 1547 if (SavePoint) 1548 break; 1549 } 1550 assert(SavePoint); 1551 LLVM_DEBUG({ 1552 dbgs() << "Now using as save point for reg " << CSR << " :"; 1553 SavePoint->dump(); 1554 }); 1555 bool PrevAffectedZone = false; 1556 BinaryBasicBlock *PrevBB = nullptr; 1557 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 1558 for (BinaryBasicBlock *BB : BF.layout()) { 1559 if (BB->size() == 0) 1560 continue; 1561 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); 1562 const bool InAffectedZoneAtBegin = 1563 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]]; 1564 bool InAffectedZone = InAffectedZoneAtBegin; 1565 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { 1566 const bool CurZone = DA.count(*InstIter, *SavePoint); 1567 if (InAffectedZone != CurZone) { 1568 auto InsertionIter = InstIter; 1569 ++InsertionIter; 1570 InAffectedZone = CurZone; 1571 if (InAffectedZone) 1572 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0, 1573 SPValPop); 1574 else 1575 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0, 1576 SPValPush); 1577 --InstIter; 1578 } 1579 } 1580 // Are we at the first basic block or hot-cold split point? 1581 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { 1582 if (InAffectedZoneAtBegin) 1583 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush); 1584 } else if (InAffectedZoneAtBegin != PrevAffectedZone) { 1585 if (InAffectedZoneAtBegin) 1586 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush); 1587 else 1588 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop); 1589 } 1590 PrevAffectedZone = InAffectedZoneAtEnd; 1591 PrevBB = BB; 1592 } 1593 } 1594 1595 void ShrinkWrapping::rebuildCFIForSP() { 1596 for (BinaryBasicBlock &BB : BF) { 1597 for (MCInst &Inst : BB) { 1598 if (!BC.MIB->isCFI(Inst)) 1599 continue; 1600 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1601 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1602 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId); 1603 } 1604 } 1605 1606 int PrevSPVal = -8; 1607 BinaryBasicBlock *PrevBB = nullptr; 1608 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1609 for (BinaryBasicBlock *BB : BF.layout()) { 1610 if (BB->size() == 0) 1611 continue; 1612 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; 1613 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first; 1614 int SPVal = SPValAtBegin; 1615 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { 1616 const int CurVal = SPT.getStateAt(*Iter)->first; 1617 if (SPVal != CurVal) { 1618 auto InsertionIter = Iter; 1619 ++InsertionIter; 1620 Iter = BF.addCFIInstruction( 1621 BB, InsertionIter, 1622 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal)); 1623 SPVal = CurVal; 1624 } 1625 } 1626 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) 1627 BF.addCFIInstruction( 1628 BB, BB->begin(), 1629 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1630 else if (SPValAtBegin != PrevSPVal) 1631 BF.addCFIInstruction( 1632 PrevBB, PrevBB->end(), 1633 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1634 PrevSPVal = SPValAtEnd; 1635 PrevBB = BB; 1636 } 1637 1638 for (BinaryBasicBlock &BB : BF) 1639 for (auto I = BB.begin(); I != BB.end();) 1640 if (BC.MIB->hasAnnotation(*I, "DeleteMe")) 1641 I = BB.eraseInstruction(I); 1642 else 1643 ++I; 1644 } 1645 1646 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal, 1647 const FrameIndexEntry &FIE, 1648 bool CreatePushOrPop) { 1649 MCInst NewInst; 1650 if (SPVal != StackPointerTracking::SUPERPOSITION && 1651 SPVal != StackPointerTracking::EMPTY) { 1652 if (FIE.IsLoad) { 1653 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(), 1654 FIE.StackOffset - SPVal, FIE.RegOrImm, 1655 FIE.Size)) { 1656 errs() << "createRestoreFromStack: not supported on this platform\n"; 1657 abort(); 1658 } 1659 } else { 1660 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(), 1661 FIE.StackOffset - SPVal, FIE.RegOrImm, 1662 FIE.Size)) { 1663 errs() << "createSaveToStack: not supported on this platform\n"; 1664 abort(); 1665 } 1666 } 1667 if (CreatePushOrPop) 1668 BC.MIB->changeToPushOrPop(NewInst); 1669 return NewInst; 1670 } 1671 assert(FPVal != StackPointerTracking::SUPERPOSITION && 1672 FPVal != StackPointerTracking::EMPTY); 1673 1674 if (FIE.IsLoad) { 1675 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(), 1676 FIE.StackOffset - FPVal, FIE.RegOrImm, 1677 FIE.Size)) { 1678 errs() << "createRestoreFromStack: not supported on this platform\n"; 1679 abort(); 1680 } 1681 } else { 1682 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(), 1683 FIE.StackOffset - FPVal, FIE.RegOrImm, 1684 FIE.Size)) { 1685 errs() << "createSaveToStack: not supported on this platform\n"; 1686 abort(); 1687 } 1688 } 1689 return NewInst; 1690 } 1691 1692 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { 1693 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1694 if (UpdatedCFIs.count(CFI)) 1695 return; 1696 1697 switch (CFI->getOperation()) { 1698 case MCCFIInstruction::OpDefCfa: 1699 case MCCFIInstruction::OpDefCfaRegister: 1700 case MCCFIInstruction::OpDefCfaOffset: 1701 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset); 1702 break; 1703 case MCCFIInstruction::OpOffset: 1704 default: 1705 break; 1706 } 1707 1708 UpdatedCFIs.insert(CFI); 1709 } 1710 1711 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, 1712 BBIterTy Pos, unsigned Reg, 1713 bool isPush, int Sz, 1714 int64_t NewOffset) { 1715 if (isPush) { 1716 for (uint32_t Idx : DeletedPushCFIs[Reg]) { 1717 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1718 updateCFIInstOffset(*Pos++, NewOffset); 1719 } 1720 if (HasDeletedOffsetCFIs[Reg]) { 1721 Pos = BF.addCFIInstruction( 1722 &BB, Pos, 1723 MCCFIInstruction::createOffset( 1724 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset)); 1725 ++Pos; 1726 } 1727 } else { 1728 for (uint32_t Idx : DeletedPopCFIs[Reg]) { 1729 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1730 updateCFIInstOffset(*Pos++, NewOffset); 1731 } 1732 if (HasDeletedOffsetCFIs[Reg]) { 1733 Pos = BF.addCFIInstruction( 1734 &BB, Pos, 1735 MCCFIInstruction::createSameValue( 1736 nullptr, BC.MRI->getDwarfRegNum(Reg, false))); 1737 ++Pos; 1738 } 1739 } 1740 return Pos; 1741 } 1742 1743 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, 1744 BinaryBasicBlock *CurBB, 1745 const WorklistItem &Item, 1746 int64_t SPVal, int64_t FPVal) { 1747 // Trigger CFI reconstruction for this CSR if necessary - writing to 1748 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update 1749 if ((Item.FIEToInsert.IsStore && 1750 !DeletedPushCFIs[Item.AffectedReg].empty()) || 1751 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || 1752 HasDeletedOffsetCFIs[Item.AffectedReg]) { 1753 if (Item.Action == WorklistItem::InsertPushOrPop) { 1754 if (Item.FIEToInsert.IsStore) 1755 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; 1756 else 1757 PopOffsetByReg[Item.AffectedReg] = SPVal; 1758 } else { 1759 if (Item.FIEToInsert.IsStore) 1760 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1761 else 1762 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1763 } 1764 } 1765 1766 LLVM_DEBUG({ 1767 dbgs() << "Creating stack access with SPVal = " << SPVal 1768 << "; stack offset = " << Item.FIEToInsert.StackOffset 1769 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) 1770 << "\n"; 1771 }); 1772 MCInst NewInst = 1773 createStackAccess(SPVal, FPVal, Item.FIEToInsert, 1774 Item.Action == WorklistItem::InsertPushOrPop); 1775 if (InsertionPoint != CurBB->end()) { 1776 LLVM_DEBUG({ 1777 dbgs() << "Adding before Inst: "; 1778 InsertionPoint->dump(); 1779 dbgs() << "the following inst: "; 1780 NewInst.dump(); 1781 }); 1782 BBIterTy Iter = 1783 CurBB->insertInstruction(InsertionPoint, std::move(NewInst)); 1784 return ++Iter; 1785 } 1786 CurBB->addInstruction(std::move(NewInst)); 1787 LLVM_DEBUG(dbgs() << "Adding to BB!\n"); 1788 return CurBB->end(); 1789 } 1790 1791 BBIterTy ShrinkWrapping::processInsertionsList( 1792 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, 1793 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { 1794 bool HasInsertions = false; 1795 for (WorklistItem &Item : TodoList) { 1796 if (Item.Action == WorklistItem::Erase || 1797 Item.Action == WorklistItem::ChangeToAdjustment) 1798 continue; 1799 HasInsertions = true; 1800 break; 1801 } 1802 1803 if (!HasInsertions) 1804 return InsertionPoint; 1805 1806 assert(((SPVal != StackPointerTracking::SUPERPOSITION && 1807 SPVal != StackPointerTracking::EMPTY) || 1808 (FPVal != StackPointerTracking::SUPERPOSITION && 1809 FPVal != StackPointerTracking::EMPTY)) && 1810 "Cannot insert if we have no idea of the stack state here"); 1811 1812 // Revert the effect of PSPT for this location, we want SP Value before 1813 // insertions 1814 if (InsertionPoint == CurBB->end()) { 1815 for (WorklistItem &Item : TodoList) { 1816 if (Item.Action != WorklistItem::InsertPushOrPop) 1817 continue; 1818 if (Item.FIEToInsert.IsStore) 1819 SPVal += Item.FIEToInsert.Size; 1820 if (Item.FIEToInsert.IsLoad) 1821 SPVal -= Item.FIEToInsert.Size; 1822 } 1823 } 1824 1825 // Reorder POPs to obey the correct dominance relation between them 1826 std::stable_sort(TodoList.begin(), TodoList.end(), 1827 [&](const WorklistItem &A, const WorklistItem &B) { 1828 if ((A.Action != WorklistItem::InsertPushOrPop || 1829 !A.FIEToInsert.IsLoad) && 1830 (B.Action != WorklistItem::InsertPushOrPop || 1831 !B.FIEToInsert.IsLoad)) 1832 return false; 1833 if ((A.Action != WorklistItem::InsertPushOrPop || 1834 !A.FIEToInsert.IsLoad)) 1835 return true; 1836 if ((B.Action != WorklistItem::InsertPushOrPop || 1837 !B.FIEToInsert.IsLoad)) 1838 return false; 1839 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; 1840 }); 1841 1842 // Process insertions 1843 for (WorklistItem &Item : TodoList) { 1844 if (Item.Action == WorklistItem::Erase || 1845 Item.Action == WorklistItem::ChangeToAdjustment) 1846 continue; 1847 1848 InsertionPoint = 1849 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); 1850 if (Item.Action == WorklistItem::InsertPushOrPop && 1851 Item.FIEToInsert.IsStore) 1852 SPVal -= Item.FIEToInsert.Size; 1853 if (Item.Action == WorklistItem::InsertPushOrPop && 1854 Item.FIEToInsert.IsLoad) 1855 SPVal += Item.FIEToInsert.Size; 1856 } 1857 return InsertionPoint; 1858 } 1859 1860 bool ShrinkWrapping::processInsertions() { 1861 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); 1862 PSPT.run(); 1863 1864 bool Changes = false; 1865 for (BinaryBasicBlock &BB : BF) { 1866 // Process insertions before some inst. 1867 for (auto I = BB.begin(); I != BB.end(); ++I) { 1868 MCInst &Inst = *I; 1869 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1870 Inst, getAnnotationIndex()); 1871 if (!TodoList) 1872 continue; 1873 Changes = true; 1874 std::vector<WorklistItem> List = *TodoList; 1875 LLVM_DEBUG({ 1876 dbgs() << "Now processing insertions in " << BB.getName() 1877 << " before inst: "; 1878 Inst.dump(); 1879 }); 1880 auto Iter = I; 1881 std::pair<int, int> SPTState = 1882 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); 1883 I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second); 1884 } 1885 // Process insertions at the end of bb 1886 auto WRI = Todo.find(&BB); 1887 if (WRI != Todo.end()) { 1888 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin()); 1889 processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first, 1890 SPTState.second); 1891 Changes = true; 1892 } 1893 } 1894 return Changes; 1895 } 1896 1897 void ShrinkWrapping::processDeletions() { 1898 LivenessAnalysis &LA = Info.getLivenessAnalysis(); 1899 for (BinaryBasicBlock &BB : BF) { 1900 for (auto II = BB.begin(); II != BB.end(); ++II) { 1901 MCInst &Inst = *II; 1902 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1903 Inst, getAnnotationIndex()); 1904 if (!TodoList) 1905 continue; 1906 // Process all deletions 1907 for (WorklistItem &Item : *TodoList) { 1908 if (Item.Action != WorklistItem::Erase && 1909 Item.Action != WorklistItem::ChangeToAdjustment) 1910 continue; 1911 1912 if (Item.Action == WorklistItem::ChangeToAdjustment) { 1913 // Is flag reg alive across this func? 1914 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg()); 1915 if (int Sz = BC.MIB->getPushSize(Inst)) { 1916 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags); 1917 continue; 1918 } 1919 if (int Sz = BC.MIB->getPopSize(Inst)) { 1920 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags); 1921 continue; 1922 } 1923 } 1924 1925 LLVM_DEBUG({ 1926 dbgs() << "Erasing: "; 1927 BC.printInstruction(dbgs(), Inst); 1928 }); 1929 II = std::prev(BB.eraseInstruction(II)); 1930 break; 1931 } 1932 } 1933 } 1934 } 1935 1936 void ShrinkWrapping::rebuildCFI() { 1937 const bool FP = Info.getStackPointerTracking().HasFramePointer; 1938 Info.invalidateAll(); 1939 if (!FP) { 1940 rebuildCFIForSP(); 1941 Info.invalidateAll(); 1942 } 1943 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1944 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) 1945 continue; 1946 const int64_t SPValPush = PushOffsetByReg[I]; 1947 const int64_t SPValPop = PopOffsetByReg[I]; 1948 insertUpdatedCFI(I, SPValPush, SPValPop); 1949 Info.invalidateAll(); 1950 } 1951 } 1952 1953 bool ShrinkWrapping::perform() { 1954 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); 1955 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1956 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1957 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); 1958 1959 if (BF.checkForAmbiguousJumpTables()) { 1960 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() 1961 << ".\n"); 1962 // We could call disambiguateJumpTables here, but it is probably not worth 1963 // the cost (of duplicating potentially large jump tables that could regress 1964 // dcache misses). Moreover, ambiguous JTs are rare and coming from code 1965 // written in assembly language. Just bail. 1966 return false; 1967 } 1968 SLM.initialize(); 1969 CSA.compute(); 1970 classifyCSRUses(); 1971 pruneUnwantedCSRs(); 1972 computeSaveLocations(); 1973 computeDomOrder(); 1974 moveSaveRestores(); 1975 LLVM_DEBUG({ 1976 dbgs() << "Func before shrink-wrapping: \n"; 1977 BF.dump(); 1978 }); 1979 SLM.performChanges(); 1980 // Early exit if processInsertions doesn't detect any todo items 1981 if (!processInsertions()) 1982 return false; 1983 processDeletions(); 1984 if (foldIdenticalSplitEdges()) { 1985 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); 1986 (void)Stats; 1987 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first 1988 << " redundant split edge BBs (" << Stats.second 1989 << " bytes) for " << BF.getPrintName() << "\n"); 1990 } 1991 rebuildCFI(); 1992 // We may have split edges, creating BBs that need correct branching 1993 BF.fixBranches(); 1994 LLVM_DEBUG({ 1995 dbgs() << "Func after shrink-wrapping: \n"; 1996 BF.dump(); 1997 }); 1998 return true; 1999 } 2000 2001 void ShrinkWrapping::printStats() { 2002 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode 2003 << " spills inserting load/stores and " << SpillsMovedPushPopMode 2004 << " spills inserting push/pops\n"; 2005 } 2006 2007 // Operators necessary as a result of using MCAnnotation 2008 raw_ostream &operator<<(raw_ostream &OS, 2009 const std::vector<ShrinkWrapping::WorklistItem> &Vec) { 2010 OS << "SWTodo["; 2011 const char *Sep = ""; 2012 for (const ShrinkWrapping::WorklistItem &Item : Vec) { 2013 OS << Sep; 2014 switch (Item.Action) { 2015 case ShrinkWrapping::WorklistItem::Erase: 2016 OS << "Erase"; 2017 break; 2018 case ShrinkWrapping::WorklistItem::ChangeToAdjustment: 2019 OS << "ChangeToAdjustment"; 2020 break; 2021 case ShrinkWrapping::WorklistItem::InsertLoadOrStore: 2022 OS << "InsertLoadOrStore"; 2023 break; 2024 case ShrinkWrapping::WorklistItem::InsertPushOrPop: 2025 OS << "InsertPushOrPop"; 2026 break; 2027 } 2028 Sep = ", "; 2029 } 2030 OS << "]"; 2031 return OS; 2032 } 2033 2034 raw_ostream & 2035 operator<<(raw_ostream &OS, 2036 const std::vector<StackLayoutModifier::WorklistItem> &Vec) { 2037 OS << "SLMTodo["; 2038 const char *Sep = ""; 2039 for (const StackLayoutModifier::WorklistItem &Item : Vec) { 2040 OS << Sep; 2041 switch (Item.Action) { 2042 case StackLayoutModifier::WorklistItem::None: 2043 OS << "None"; 2044 break; 2045 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: 2046 OS << "AdjustLoadStoreOffset"; 2047 break; 2048 case StackLayoutModifier::WorklistItem::AdjustCFI: 2049 OS << "AdjustCFI"; 2050 break; 2051 } 2052 Sep = ", "; 2053 } 2054 OS << "]"; 2055 return OS; 2056 } 2057 2058 bool operator==(const ShrinkWrapping::WorklistItem &A, 2059 const ShrinkWrapping::WorklistItem &B) { 2060 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && 2061 A.Adjustment == B.Adjustment && 2062 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && 2063 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && 2064 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && 2065 A.FIEToInsert.Size == B.FIEToInsert.Size && 2066 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && 2067 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); 2068 } 2069 2070 bool operator==(const StackLayoutModifier::WorklistItem &A, 2071 const StackLayoutModifier::WorklistItem &B) { 2072 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); 2073 } 2074 2075 } // end namespace bolt 2076 } // end namespace llvm 2077