1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ShrinkWrapping class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ShrinkWrapping.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/DataflowInfoManager.h" 16 #include <numeric> 17 #include <stack> 18 19 #define DEBUG_TYPE "shrinkwrapping" 20 21 using namespace llvm; 22 23 namespace opts { 24 25 extern cl::opt<bool> TimeOpts; 26 extern cl::OptionCategory BoltOptCategory; 27 28 static cl::opt<unsigned> ShrinkWrappingThreshold( 29 "shrink-wrapping-threshold", 30 cl::desc("Percentage of prologue execution count to use as threshold when" 31 " evaluating whether a block is cold enough to be profitable to" 32 " move eligible spills there"), 33 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 34 } // namespace opts 35 36 namespace llvm { 37 namespace bolt { 38 39 void CalleeSavedAnalysis::analyzeSaves() { 40 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); 41 StackReachingUses &SRU = Info.getStackReachingUses(); 42 auto &InsnToBB = Info.getInsnToBBMap(); 43 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); 44 45 LLVM_DEBUG(dbgs() << "Checking spill locations\n"); 46 for (BinaryBasicBlock &BB : BF) { 47 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 48 const MCInst *Prev = nullptr; 49 for (MCInst &Inst : BB) { 50 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 51 // Blacklist weird stores we don't understand 52 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && 53 FIE->IsStoreFromReg) { 54 BlacklistedRegs.set(FIE->RegOrImm); 55 CalleeSaved.reset(FIE->RegOrImm); 56 Prev = &Inst; 57 continue; 58 } 59 60 if (!FIE->IsStore || !FIE->IsStoreFromReg || 61 BlacklistedRegs[FIE->RegOrImm]) { 62 Prev = &Inst; 63 continue; 64 } 65 66 // If this reg is defined locally, it is not a callee-saved reg 67 if (RD.isReachedBy(FIE->RegOrImm, 68 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) { 69 BlacklistedRegs.set(FIE->RegOrImm); 70 CalleeSaved.reset(FIE->RegOrImm); 71 Prev = &Inst; 72 continue; 73 } 74 75 // If this stack position is accessed in another function, we are 76 // probably dealing with a parameter passed in a stack -- do not mess 77 // with it 78 if (SRU.isStoreUsed(*FIE, 79 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)), 80 /*IncludeLocalAccesses=*/false) { 81 BlacklistedRegs.set(FIE->RegOrImm); 82 CalleeSaved.reset(FIE->RegOrImm); 83 Prev = &Inst; 84 continue; 85 } 86 87 // If this stack position is loaded elsewhere in another reg, we can't 88 // update it, so blacklist it. 89 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev) 90 : SRU.expr_begin(BB))) { 91 BlacklistedRegs.set(FIE->RegOrImm); 92 CalleeSaved.reset(FIE->RegOrImm); 93 Prev = &Inst; 94 continue; 95 } 96 97 // Ignore regs with multiple saves 98 if (CalleeSaved[FIE->RegOrImm]) { 99 BlacklistedRegs.set(FIE->RegOrImm); 100 CalleeSaved.reset(FIE->RegOrImm); 101 Prev = &Inst; 102 continue; 103 } 104 105 CalleeSaved.set(FIE->RegOrImm); 106 SaveFIEByReg[FIE->RegOrImm] = &*FIE; 107 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); 108 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId); 109 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; 110 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " 111 << FIE->RegOrImm << "\n"); 112 } 113 Prev = &Inst; 114 } 115 } 116 } 117 118 void CalleeSavedAnalysis::analyzeRestores() { 119 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); 120 121 // Now compute all restores of these callee-saved regs 122 for (BinaryBasicBlock &BB : BF) { 123 const MCInst *Prev = nullptr; 124 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 125 MCInst &Inst = *I; 126 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 127 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { 128 Prev = &Inst; 129 continue; 130 } 131 132 // If this reg is used locally after a restore, then we are probably 133 // not dealing with a callee-saved reg. Except if this use is by 134 // another store, but we don't cover this case yet. 135 // Also not callee-saved if this load accesses caller stack or isn't 136 // simple. 137 if (!FIE->IsSimple || FIE->StackOffset >= 0 || 138 RU.isReachedBy(FIE->RegOrImm, 139 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) { 140 CalleeSaved.reset(FIE->RegOrImm); 141 Prev = &Inst; 142 continue; 143 } 144 // If stack offsets between saves/store don't agree with each other, 145 // we don't completely understand what's happening here 146 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { 147 CalleeSaved.reset(FIE->RegOrImm); 148 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " 149 "mismatching restore: " 150 << FIE->RegOrImm << "\n"); 151 Prev = &Inst; 152 continue; 153 } 154 155 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm 156 << "\n"); 157 if (LoadFIEByReg[FIE->RegOrImm] == nullptr) 158 LoadFIEByReg[FIE->RegOrImm] = &*FIE; 159 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm, 160 AllocatorId); 161 HasRestores.set(FIE->RegOrImm); 162 } 163 Prev = &Inst; 164 } 165 } 166 } 167 168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { 169 std::vector<MCInst *> Results; 170 for (BinaryBasicBlock &BB : BF) 171 for (MCInst &Inst : BB) 172 if (getSavedReg(Inst) == Reg) 173 Results.push_back(&Inst); 174 return Results; 175 } 176 177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { 178 std::vector<MCInst *> Results; 179 for (BinaryBasicBlock &BB : BF) 180 for (MCInst &Inst : BB) 181 if (getRestoredReg(Inst) == Reg) 182 Results.push_back(&Inst); 183 return Results; 184 } 185 186 CalleeSavedAnalysis::~CalleeSavedAnalysis() { 187 for (BinaryBasicBlock &BB : BF) { 188 for (MCInst &Inst : BB) { 189 BC.MIB->removeAnnotation(Inst, getSaveTag()); 190 BC.MIB->removeAnnotation(Inst, getRestoreTag()); 191 } 192 } 193 } 194 195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { 196 if (BlacklistedRegions[Offset] < Size) 197 BlacklistedRegions[Offset] = Size; 198 } 199 200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { 201 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) 202 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) 203 return true; 204 return false; 205 } 206 207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, 208 int64_t Size) { 209 bool HasConflict = false; 210 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { 211 std::pair<const int64_t, int64_t> &Elem = *Iter; 212 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && 213 (Offset != Elem.first || Size != Elem.second)) { 214 Iter = AvailableRegions.erase(Iter); 215 HasConflict = true; 216 continue; 217 } 218 ++Iter; 219 } 220 if (HasConflict) { 221 blacklistRegion(Offset, Size); 222 return true; 223 } 224 return false; 225 } 226 227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { 228 StackPointerTracking &SPT = Info.getStackPointerTracking(); 229 if (!BC.MII->get(Point.getOpcode()) 230 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI)) 231 return; 232 233 int SPVal, FPVal; 234 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 235 std::pair<MCPhysReg, int64_t> FP; 236 237 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 238 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 239 else 240 FP = std::make_pair(0, 0); 241 std::pair<MCPhysReg, int64_t> SP; 242 243 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 244 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 245 else 246 SP = std::make_pair(0, 0); 247 248 int64_t Output; 249 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 250 return; 251 252 // Not your regular frame pointer initialization... bail 253 if (Output != SPVal) 254 blacklistRegion(0, 0); 255 } 256 257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { 258 StackPointerTracking &SPT = Info.getStackPointerTracking(); 259 if (!BC.MII->get(Point.getOpcode()) 260 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI)) 261 return; 262 // Check if the definition of SP comes from FP -- in this case, this 263 // value may need to be updated depending on our stack layout changes 264 const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode()); 265 unsigned NumDefs = InstInfo.getNumDefs(); 266 bool UsesFP = false; 267 for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E; 268 ++I) { 269 MCOperand &Operand = Point.getOperand(I); 270 if (!Operand.isReg()) 271 continue; 272 if (Operand.getReg() == BC.MIB->getFramePointer()) { 273 UsesFP = true; 274 break; 275 } 276 } 277 if (!UsesFP) 278 return; 279 280 // Setting up evaluation 281 int SPVal, FPVal; 282 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 283 std::pair<MCPhysReg, int64_t> FP; 284 285 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 286 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 287 else 288 FP = std::make_pair(0, 0); 289 std::pair<MCPhysReg, int64_t> SP; 290 291 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 292 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 293 else 294 SP = std::make_pair(0, 0); 295 296 int64_t Output; 297 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 298 return; 299 300 // If the value is the same of FP, no need to adjust it 301 if (Output == FPVal) 302 return; 303 304 // If an allocation happened through FP, bail 305 if (Output <= SPVal) { 306 blacklistRegion(0, 0); 307 return; 308 } 309 310 // We are restoring SP to an old value based on FP. Mark it as a stack 311 // access to be fixed later. 312 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId); 313 } 314 315 void StackLayoutModifier::classifyStackAccesses() { 316 // Understand when stack slots are being used non-locally 317 StackReachingUses &SRU = Info.getStackReachingUses(); 318 319 for (BinaryBasicBlock &BB : BF) { 320 const MCInst *Prev = nullptr; 321 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 322 MCInst &Inst = *I; 323 checkFramePointerInitialization(Inst); 324 checkStackPointerRestore(Inst); 325 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 326 if (!FIEX) { 327 Prev = &Inst; 328 continue; 329 } 330 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { 331 blacklistRegion(FIEX->StackOffset, FIEX->Size); 332 Prev = &Inst; 333 continue; 334 } 335 // If this stack position is accessed in another function, we are 336 // probably dealing with a parameter passed in a stack -- do not mess 337 // with it 338 if (SRU.isStoreUsed(*FIEX, 339 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB), 340 /*IncludeLocalAccesses=*/false)) { 341 blacklistRegion(FIEX->StackOffset, FIEX->Size); 342 Prev = &Inst; 343 continue; 344 } 345 // Now we have a clear stack slot access. Check if its blacklisted or if 346 // it conflicts with another chunk. 347 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) || 348 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) { 349 Prev = &Inst; 350 continue; 351 } 352 // We are free to go. Add it as available stack slot which we know how 353 // to move it. 354 AvailableRegions[FIEX->StackOffset] = FIEX->Size; 355 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId); 356 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm); 357 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset); 358 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " 359 << (int)FIEX->Size << "\n"); 360 } 361 } 362 } 363 364 void StackLayoutModifier::classifyCFIs() { 365 std::stack<std::pair<int64_t, uint16_t>> CFIStack; 366 int64_t CfaOffset = -8; 367 uint16_t CfaReg = 7; 368 369 auto recordAccess = [&](MCInst *Inst, int64_t Offset) { 370 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false); 371 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { 372 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId); 373 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n"); 374 } else { 375 IsSimple = false; 376 return; 377 } 378 }; 379 380 for (BinaryBasicBlock *&BB : BF.layout()) { 381 for (MCInst &Inst : *BB) { 382 if (!BC.MIB->isCFI(Inst)) 383 continue; 384 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 385 switch (CFI->getOperation()) { 386 case MCCFIInstruction::OpDefCfa: 387 CfaOffset = -CFI->getOffset(); 388 recordAccess(&Inst, CfaOffset); 389 LLVM_FALLTHROUGH; 390 case MCCFIInstruction::OpDefCfaRegister: 391 CfaReg = CFI->getRegister(); 392 break; 393 case MCCFIInstruction::OpDefCfaOffset: 394 CfaOffset = -CFI->getOffset(); 395 recordAccess(&Inst, CfaOffset); 396 break; 397 case MCCFIInstruction::OpOffset: 398 recordAccess(&Inst, CFI->getOffset()); 399 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 400 BC.MRI->getLLVMRegNum(CFI->getRegister(), 401 /*isEH=*/false), 402 AllocatorId); 403 break; 404 case MCCFIInstruction::OpSameValue: 405 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 406 BC.MRI->getLLVMRegNum(CFI->getRegister(), 407 /*isEH=*/false), 408 AllocatorId); 409 break; 410 case MCCFIInstruction::OpRememberState: 411 CFIStack.push(std::make_pair(CfaOffset, CfaReg)); 412 break; 413 case MCCFIInstruction::OpRestoreState: { 414 assert(!CFIStack.empty() && "Corrupt CFI stack"); 415 std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); 416 CFIStack.pop(); 417 CfaOffset = Elem.first; 418 CfaReg = Elem.second; 419 break; 420 } 421 case MCCFIInstruction::OpRelOffset: 422 case MCCFIInstruction::OpAdjustCfaOffset: 423 llvm_unreachable("Unhandled AdjustCfaOffset"); 424 break; 425 default: 426 break; 427 } 428 } 429 } 430 } 431 432 void StackLayoutModifier::scheduleChange( 433 MCInst &Inst, StackLayoutModifier::WorklistItem Item) { 434 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( 435 Inst, getTodoTag(), AllocatorId); 436 WList.push_back(Item); 437 } 438 439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { 440 if (!IsSimple || !BC.MIB->isPush(*DeletedPush)) 441 return false; 442 443 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 444 if (!FIE) 445 return false; 446 447 return canCollapseRegion(FIE->StackOffset); 448 } 449 450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { 451 if (!IsInitialized) 452 initialize(); 453 if (!IsSimple) 454 return false; 455 456 if (CollapsedRegions.count(RegionAddr)) 457 return true; 458 459 // Check if it is possible to readjust all accesses below RegionAddr 460 if (!BlacklistedRegions.empty()) 461 return false; 462 463 return true; 464 } 465 466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { 467 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 468 if (!FIE) 469 return false; 470 int64_t RegionAddr = FIE->StackOffset; 471 int64_t RegionSz = FIE->Size; 472 return collapseRegion(DeletedPush, RegionAddr, RegionSz); 473 } 474 475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, 476 int64_t RegionSz) { 477 if (!canCollapseRegion(RegionAddr)) 478 return false; 479 480 assert(IsInitialized); 481 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); 482 483 for (BinaryBasicBlock &BB : BF) { 484 for (MCInst &Inst : BB) { 485 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 486 continue; 487 auto Slot = 488 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 489 Inst, getSlotTag()); 490 if (!AvailableRegions.count(Slot)) 491 continue; 492 // We need to ensure this access is affected by the deleted push 493 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]]) 494 continue; 495 496 if (BC.MIB->isCFI(Inst)) { 497 if (Slot > RegionAddr) 498 continue; 499 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz)); 500 continue; 501 } 502 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 503 if (!FIE) { 504 if (Slot > RegionAddr) 505 continue; 506 // SP update based on frame pointer 507 scheduleChange( 508 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 509 continue; 510 } 511 512 if (Slot == RegionAddr) { 513 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId); 514 continue; 515 } 516 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 517 continue; 518 519 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 520 continue; 521 522 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) 523 continue; 524 525 scheduleChange( 526 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 527 } 528 } 529 530 CollapsedRegions.insert(RegionAddr); 531 return true; 532 } 533 534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { 535 for (BinaryBasicBlock &BB : BF) { 536 for (MCInst &Inst : BB) { 537 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) 538 continue; 539 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 540 scheduleChange( 541 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); 542 } 543 } 544 } 545 546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { 547 if (!IsInitialized) 548 initialize(); 549 if (!IsSimple) 550 return false; 551 552 StackPointerTracking &SPT = Info.getStackPointerTracking(); 553 int64_t RegionAddr = SPT.getStateBefore(P)->first; 554 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 555 return false; 556 557 if (InsertedRegions.count(RegionAddr)) 558 return true; 559 560 // Check if we are going to screw up stack accesses at call sites that 561 // pass parameters via stack 562 if (!BlacklistedRegions.empty()) 563 return false; 564 565 return true; 566 } 567 568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { 569 if (!canInsertRegion(P)) 570 return false; 571 572 assert(IsInitialized); 573 StackPointerTracking &SPT = Info.getStackPointerTracking(); 574 // This RegionAddr is slightly different from the one seen in collapseRegion 575 // This is the value of SP before the allocation the user wants to make. 576 int64_t RegionAddr = SPT.getStateBefore(P)->first; 577 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 578 return false; 579 580 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 581 582 for (BinaryBasicBlock &BB : BF) { 583 for (MCInst &Inst : BB) { 584 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 585 continue; 586 auto Slot = 587 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 588 Inst, getSlotTag()); 589 if (!AvailableRegions.count(Slot)) 590 continue; 591 592 if (!(DA.doesADominateB(P, Inst))) 593 continue; 594 595 if (BC.MIB->isCFI(Inst)) { 596 if (Slot >= RegionAddr) 597 continue; 598 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); 599 continue; 600 } 601 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 602 if (!FIE) { 603 if (Slot >= RegionAddr) 604 continue; 605 scheduleChange( 606 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 607 continue; 608 } 609 610 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 611 continue; 612 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) 613 continue; 614 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 615 continue; 616 scheduleChange( 617 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 618 } 619 } 620 621 InsertedRegions.insert(RegionAddr); 622 return true; 623 } 624 625 void StackLayoutModifier::performChanges() { 626 std::set<uint32_t> ModifiedCFIIndices; 627 for (BinaryBasicBlock &BB : BF) { 628 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 629 MCInst &Inst = *I; 630 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) { 631 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); 632 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 633 } 634 if (!BC.MIB->hasAnnotation(Inst, getTodoTag())) 635 continue; 636 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( 637 Inst, getTodoTag()); 638 int64_t Adjustment = 0; 639 WorklistItem::ActionType AdjustmentType = WorklistItem::None; 640 for (WorklistItem &WI : WList) { 641 if (WI.Action == WorklistItem::None) 642 continue; 643 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || 644 WI.Action == WorklistItem::AdjustCFI); 645 assert((AdjustmentType == WorklistItem::None || 646 AdjustmentType == WI.Action) && 647 "Conflicting actions requested at the same program point"); 648 AdjustmentType = WI.Action; 649 Adjustment += WI.OffsetUpdate; 650 } 651 if (!Adjustment) 652 continue; 653 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { 654 assert(BC.MIB->isCFI(Inst)); 655 uint32_t CFINum = Inst.getOperand(0).getImm(); 656 if (ModifiedCFIIndices.count(CFINum)) 657 continue; 658 ModifiedCFIIndices.insert(CFINum); 659 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 660 const MCCFIInstruction::OpType Operation = CFI->getOperation(); 661 if (Operation == MCCFIInstruction::OpDefCfa || 662 Operation == MCCFIInstruction::OpDefCfaOffset) 663 Adjustment = 0 - Adjustment; 664 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() 665 << " to " << (CFI->getOffset() + Adjustment) << "\n"); 666 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment); 667 continue; 668 } 669 int32_t SrcImm = 0; 670 MCPhysReg Reg = 0; 671 MCPhysReg StackPtrReg = 0; 672 int64_t StackOffset = 0; 673 bool IsIndexed = false; 674 bool IsLoad = false; 675 bool IsStore = false; 676 bool IsSimple = false; 677 bool IsStoreFromReg = false; 678 uint8_t Size = 0; 679 bool Success = false; 680 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, 681 Reg, SrcImm, StackPtrReg, StackOffset, 682 Size, IsSimple, IsIndexed); 683 if (!Success) { 684 // SP update based on FP value 685 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx); 686 assert(Success); 687 continue; 688 } 689 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); 690 if (StackPtrReg != BC.MIB->getFramePointer()) 691 Adjustment = -Adjustment; 692 if (IsLoad) 693 Success = BC.MIB->createRestoreFromStack( 694 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 695 else if (IsStore) 696 Success = BC.MIB->createSaveToStack( 697 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 698 LLVM_DEBUG({ 699 dbgs() << "Adjusted instruction: "; 700 Inst.dump(); 701 }); 702 assert(Success); 703 } 704 } 705 } 706 707 void StackLayoutModifier::initialize() { 708 classifyStackAccesses(); 709 classifyCFIs(); 710 IsInitialized = true; 711 } 712 713 std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0}; 714 std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0}; 715 716 using BBIterTy = BinaryBasicBlock::iterator; 717 718 void ShrinkWrapping::classifyCSRUses() { 719 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 720 StackPointerTracking &SPT = Info.getStackPointerTracking(); 721 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), 722 BitVector(DA.NumInstrs, false)); 723 724 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer()); 725 for (BinaryBasicBlock &BB : BF) { 726 for (MCInst &Inst : BB) { 727 if (BC.MIB->isCFI(Inst)) 728 continue; 729 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 730 BC.MIB->getTouchedRegs(Inst, BV); 731 BV &= CSA.CalleeSaved; 732 for (int I : BV.set_bits()) { 733 if (I == 0) 734 continue; 735 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) 736 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 737 } 738 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) 739 continue; 740 BV = CSA.CalleeSaved; 741 BV &= FPAliases; 742 for (int I : BV.set_bits()) 743 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 744 } 745 } 746 } 747 748 void ShrinkWrapping::pruneUnwantedCSRs() { 749 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); 750 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 751 if (!CSA.CalleeSaved[I]) 752 continue; 753 if (ParamRegs[I]) { 754 CSA.CalleeSaved.reset(I); 755 continue; 756 } 757 if (UsesByReg[I].empty()) { 758 LLVM_DEBUG( 759 dbgs() 760 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I 761 << "\n"); 762 CSA.CalleeSaved.reset(I); 763 continue; 764 } 765 if (!CSA.HasRestores[I]) { 766 LLVM_DEBUG( 767 dbgs() << "Dismissing Callee-Saved Reg because it does not have " 768 "restores:" 769 << I << "\n"); 770 CSA.CalleeSaved.reset(I); 771 } 772 } 773 } 774 775 void ShrinkWrapping::computeSaveLocations() { 776 SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs()); 777 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 778 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 779 StackPointerTracking &SPT = Info.getStackPointerTracking(); 780 781 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); 782 for (BinaryBasicBlock &BB : BF) { 783 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 784 785 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; 786 if (!First) 787 continue; 788 789 // Use reaching instructions to detect if we are inside a loop - if we 790 // are, do not consider this BB as valid placement for saves. 791 if (RI.isInLoop(BB)) 792 continue; 793 794 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First); 795 // If we don't know stack state at this point, bail 796 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 797 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 798 continue; 799 800 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 801 if (!CSA.CalleeSaved[I]) 802 continue; 803 804 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); 805 for (int J : UsesByReg[I].set_bits()) 806 if (DA.doesADominateB(*First, J)) 807 BBDominatedUses.set(J); 808 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " 809 << BBDominatedUses.count() << " uses for reg " << I 810 << ". Total uses for reg is " << UsesByReg[I].count() 811 << "\n"); 812 BBDominatedUses &= UsesByReg[I]; 813 if (BBDominatedUses == UsesByReg[I]) { 814 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() 815 << " as a save pos for " << I << "\n"); 816 SavePos[I].insert(First); 817 LLVM_DEBUG({ 818 dbgs() << "Dominated uses are:\n"; 819 for (int J : UsesByReg[I].set_bits()) { 820 dbgs() << "Idx " << J << ": "; 821 DA.Expressions[J]->dump(); 822 } 823 }); 824 } 825 } 826 } 827 828 BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(), 829 std::numeric_limits<uint64_t>::max()); 830 BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr); 831 auto &InsnToBB = Info.getInsnToBBMap(); 832 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 833 if (!CSA.CalleeSaved[I]) 834 continue; 835 836 for (MCInst *Pos : SavePos[I]) { 837 BinaryBasicBlock *BB = InsnToBB[Pos]; 838 uint64_t Count = BB->getExecutionCount(); 839 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && 840 Count < BestSaveCount[I]) { 841 BestSavePos[I] = Pos; 842 BestSaveCount[I] = Count; 843 } 844 } 845 } 846 } 847 848 void ShrinkWrapping::computeDomOrder() { 849 std::vector<MCPhysReg> Order; 850 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 851 Order.push_back(I); 852 } 853 854 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 855 auto &InsnToBB = Info.getInsnToBBMap(); 856 std::sort(Order.begin(), Order.end(), 857 [&](const MCPhysReg &A, const MCPhysReg &B) { 858 BinaryBasicBlock *BBA = 859 BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr; 860 BinaryBasicBlock *BBB = 861 BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr; 862 if (BBA == BBB) 863 return A < B; 864 if (!BBA && BBB) 865 return false; 866 if (BBA && !BBB) 867 return true; 868 if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B])) 869 return true; 870 if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A])) 871 return false; 872 return A < B; 873 }); 874 875 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) 876 DomOrder[Order[I]] = I; 877 } 878 879 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, 880 uint64_t &TotalEstimatedWin) { 881 const uint64_t CurSavingCost = CSA.SavingCost[CSR]; 882 if (!CSA.CalleeSaved[CSR]) 883 return false; 884 885 uint64_t BestCount = BestSaveCount[CSR]; 886 BestPosSave = BestSavePos[CSR]; 887 bool ShouldMove = false; 888 if (BestCount != std::numeric_limits<uint64_t>::max() && 889 BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) { 890 LLVM_DEBUG({ 891 auto &InsnToBB = Info.getInsnToBBMap(); 892 dbgs() << "Better position for saves found in func " << BF.getPrintName() 893 << " count << " << BF.getKnownExecutionCount() << "\n"; 894 dbgs() << "Reg: " << CSR 895 << "; New BB: " << InsnToBB[BestPosSave]->getName() 896 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; 897 }); 898 TotalEstimatedWin += CurSavingCost - BestCount; 899 ShouldMove = true; 900 } 901 902 if (!ShouldMove) 903 return false; 904 if (!BestPosSave) { 905 LLVM_DEBUG({ 906 dbgs() << "Dropping opportunity because we don't know where to put " 907 "stores -- total est. freq reduc: " 908 << TotalEstimatedWin << "\n"; 909 }); 910 return false; 911 } 912 return true; 913 } 914 915 /// Auxiliar function used to create basic blocks for critical edges and update 916 /// the dominance frontier with these new locations 917 void ShrinkWrapping::splitFrontierCritEdges( 918 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, 919 const SmallVector<bool, 4> &IsCritEdge, 920 const SmallVector<BinaryBasicBlock *, 4> &From, 921 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { 922 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " 923 << BF.getPrintName() << "\n"); 924 // For every FromBB, there might be one or more critical edges, with 925 // To[I] containing destination BBs. It's important to memorize 926 // the original size of the Frontier as we may append to it while splitting 927 // critical edges originating with blocks with multiple destinations. 928 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { 929 if (!IsCritEdge[I]) 930 continue; 931 if (To[I].empty()) 932 continue; 933 BinaryBasicBlock *FromBB = From[I]; 934 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() 935 << "\n"); 936 // Split edge for every DestinationBBs 937 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { 938 BinaryBasicBlock *DestinationBB = To[I][DI]; 939 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n"); 940 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB); 941 // Insert dummy instruction so this BB is never empty (we need this for 942 // PredictiveStackPointerTracking to work, since it annotates instructions 943 // and not BBs). 944 if (NewBB->empty()) { 945 MCInst NewInst; 946 BC.MIB->createNoop(NewInst); 947 NewBB->addInstruction(std::move(NewInst)); 948 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0)); 949 } 950 951 // Update frontier 952 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB); 953 if (DI == 0) { 954 // Update frontier inplace 955 Frontier[I] = NewFrontierPP; 956 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() 957 << '\n'); 958 } else { 959 // Append new frontier to the end of the list 960 Frontier.push_back(NewFrontierPP); 961 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() 962 << '\n'); 963 } 964 } 965 } 966 } 967 968 SmallVector<ProgramPoint, 4> 969 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, 970 uint64_t TotalEstimatedWin) { 971 SmallVector<ProgramPoint, 4> Frontier; 972 SmallVector<bool, 4> IsCritEdge; 973 bool CannotPlace = false; 974 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 975 976 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; 977 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; 978 // In case of a critical edge, we need to create extra BBs to host restores 979 // into edges transitioning to the dominance frontier, otherwise we pull these 980 // restores to inside the dominated area. 981 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector(); 982 LLVM_DEBUG({ 983 dbgs() << "Dumping dominance frontier for "; 984 BC.printInstruction(dbgs(), *BestPosSave); 985 for (ProgramPoint &PP : Frontier) 986 if (PP.isInst()) 987 BC.printInstruction(dbgs(), *PP.getInst()); 988 else 989 dbgs() << PP.getBB()->getName() << "\n"; 990 }); 991 for (ProgramPoint &PP : Frontier) { 992 bool HasCritEdges = false; 993 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && 994 doesInstUsesCSR(*PP.getInst(), CSR)) 995 CannotPlace = true; 996 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 997 CritEdgesFrom.emplace_back(FrontierBB); 998 CritEdgesTo.emplace_back(0); 999 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); 1000 // Check for invoke instructions at the dominance frontier, which indicates 1001 // the landing pad is not dominated. 1002 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { 1003 LLVM_DEBUG( 1004 dbgs() << "Bailing on restore placement to avoid LP splitting\n"); 1005 Frontier.clear(); 1006 return Frontier; 1007 } 1008 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) { 1009 if (!DA.doesADominateB(*BestPosSave, P)) { 1010 Dests.emplace_back(Info.getParentBB(P)); 1011 return; 1012 } 1013 HasCritEdges = true; 1014 }); 1015 IsCritEdge.push_back(HasCritEdges); 1016 } 1017 // Restores cannot be placed in empty BBs because we have a dataflow 1018 // analysis that depends on insertions happening before real instructions 1019 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a 1020 // dummy nop that is scheduled to be removed later. 1021 bool InvalidateRequired = false; 1022 for (BinaryBasicBlock *&BB : BF.layout()) { 1023 if (BB->size() != 0) 1024 continue; 1025 MCInst NewInst; 1026 BC.MIB->createNoop(NewInst); 1027 auto II = BB->addInstruction(std::move(NewInst)); 1028 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0)); 1029 InvalidateRequired = true; 1030 } 1031 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) { 1032 LLVM_DEBUG({ 1033 dbgs() << "Now detected critical edges in the following frontier:\n"; 1034 for (ProgramPoint &PP : Frontier) { 1035 if (PP.isBB()) { 1036 dbgs() << " BB: " << PP.getBB()->getName() << "\n"; 1037 } else { 1038 dbgs() << " Inst: "; 1039 PP.getInst()->dump(); 1040 } 1041 } 1042 }); 1043 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom, 1044 CritEdgesTo); 1045 InvalidateRequired = true; 1046 } 1047 if (InvalidateRequired) { 1048 // BitVectors that represent all insns of the function are invalid now 1049 // since we changed BBs/Insts. Re-run steps that depend on pointers being 1050 // valid 1051 Info.invalidateAll(); 1052 classifyCSRUses(); 1053 } 1054 if (CannotPlace) { 1055 LLVM_DEBUG({ 1056 dbgs() << "Dropping opportunity because restore placement failed" 1057 " -- total est. freq reduc: " 1058 << TotalEstimatedWin << "\n"; 1059 }); 1060 Frontier.clear(); 1061 return Frontier; 1062 } 1063 return Frontier; 1064 } 1065 1066 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, 1067 int64_t SaveOffset) { 1068 if (FA.requiresAlignment(BF)) { 1069 LLVM_DEBUG({ 1070 dbgs() << "Reg " << CSR 1071 << " is not using push/pops due to function " 1072 "alignment requirements.\n"; 1073 }); 1074 return false; 1075 } 1076 for (MCInst *Save : CSA.getSavesByReg(CSR)) { 1077 if (!SLM.canCollapseRegion(Save)) { 1078 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n"); 1079 return false; 1080 } 1081 } 1082 // Abort if one of the restores for this CSR is not a POP. 1083 for (MCInst *Load : CSA.getRestoresByReg(CSR)) { 1084 if (!BC.MIB->isPop(*Load)) { 1085 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n"); 1086 return false; 1087 } 1088 } 1089 1090 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1091 // Abort if we are inserting a push into an entry BB (offset -8) and this 1092 // func sets up a frame pointer. 1093 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION || 1094 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { 1095 LLVM_DEBUG({ 1096 dbgs() << "Reg " << CSR 1097 << " cannot insert region or we are " 1098 "trying to insert a push into entry bb.\n"; 1099 }); 1100 return false; 1101 } 1102 return true; 1103 } 1104 1105 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( 1106 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, 1107 unsigned CSR) { 1108 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; 1109 // Moving pop locations to the correct sp offset 1110 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 1111 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1112 for (ProgramPoint &PP : FixedRestorePoints) { 1113 BinaryBasicBlock *BB = Info.getParentBB(PP); 1114 bool Found = false; 1115 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first == 1116 SaveOffset) { 1117 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB)); 1118 BV &= UsesByReg[CSR]; 1119 if (!BV.any()) { 1120 Found = true; 1121 PP = BB; 1122 continue; 1123 } 1124 } 1125 for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) { 1126 if (SPT.getStateBefore(*RIt)->first == SaveOffset) { 1127 BitVector BV = *RI.getStateAt(*RIt); 1128 BV &= UsesByReg[CSR]; 1129 if (!BV.any()) { 1130 Found = true; 1131 PP = &*RIt; 1132 break; 1133 } 1134 } 1135 } 1136 if (!Found) { 1137 LLVM_DEBUG({ 1138 dbgs() << "Could not find restore insertion point for " << CSR 1139 << ", falling back to load/store mode\n"; 1140 }); 1141 FixedRestorePoints.clear(); 1142 return FixedRestorePoints; 1143 } 1144 } 1145 return FixedRestorePoints; 1146 } 1147 1148 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, 1149 bool UsePushPops) { 1150 1151 for (BinaryBasicBlock *&BB : BF.layout()) { 1152 std::vector<MCInst *> CFIs; 1153 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { 1154 MCInst &Inst = *I; 1155 if (BC.MIB->isCFI(Inst)) { 1156 // Delete all offset CFIs related to this CSR 1157 if (SLM.getOffsetCFIReg(Inst) == CSR) { 1158 HasDeletedOffsetCFIs[CSR] = true; 1159 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR)); 1160 continue; 1161 } 1162 CFIs.push_back(&Inst); 1163 continue; 1164 } 1165 1166 uint16_t SavedReg = CSA.getSavedReg(Inst); 1167 uint16_t RestoredReg = CSA.getRestoredReg(Inst); 1168 if (SavedReg != CSR && RestoredReg != CSR) { 1169 CFIs.clear(); 1170 continue; 1171 } 1172 1173 scheduleChange(&Inst, WorklistItem(UsePushPops 1174 ? WorklistItem::Erase 1175 : WorklistItem::ChangeToAdjustment, 1176 CSR)); 1177 1178 // Delete associated CFIs 1179 const bool RecordDeletedPushCFIs = 1180 SavedReg == CSR && DeletedPushCFIs[CSR].empty(); 1181 const bool RecordDeletedPopCFIs = 1182 RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); 1183 for (MCInst *CFI : CFIs) { 1184 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI); 1185 // Do not touch these... 1186 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || 1187 MCCFI->getOperation() == MCCFIInstruction::OpRememberState) 1188 continue; 1189 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR)); 1190 if (RecordDeletedPushCFIs) { 1191 // Do not record this to be replayed later because we are going to 1192 // rebuild it. 1193 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1194 continue; 1195 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1196 } 1197 if (RecordDeletedPopCFIs) { 1198 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1199 continue; 1200 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1201 } 1202 } 1203 CFIs.clear(); 1204 } 1205 } 1206 } 1207 1208 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { 1209 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || 1210 CSA.getRestoredReg(Inst) == CSR) 1211 return false; 1212 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 1213 BC.MIB->getTouchedRegs(Inst, BV); 1214 return BV[CSR]; 1215 } 1216 1217 void ShrinkWrapping::scheduleSaveRestoreInsertions( 1218 unsigned CSR, MCInst *BestPosSave, 1219 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { 1220 auto &InsnToBB = Info.getInsnToBBMap(); 1221 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; 1222 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; 1223 assert(FIESave && FIELoad && "Invalid CSR"); 1224 1225 LLVM_DEBUG({ 1226 dbgs() << "Scheduling save insertion at: "; 1227 BestPosSave->dump(); 1228 }); 1229 1230 scheduleChange(BestPosSave, 1231 UsePushPops ? WorklistItem::InsertPushOrPop 1232 : WorklistItem::InsertLoadOrStore, 1233 *FIESave, CSR); 1234 1235 for (ProgramPoint &PP : RestorePoints) { 1236 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 1237 LLVM_DEBUG({ 1238 dbgs() << "Scheduling restore insertion at: "; 1239 if (PP.isInst()) 1240 PP.getInst()->dump(); 1241 else 1242 dbgs() << PP.getBB()->getName() << "\n"; 1243 }); 1244 MCInst *Term = 1245 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr); 1246 if (Term) 1247 PP = Term; 1248 bool PrecededByPrefix = false; 1249 if (PP.isInst()) { 1250 auto Iter = FrontierBB->findInstruction(PP.getInst()); 1251 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { 1252 --Iter; 1253 PrecededByPrefix = BC.MIB->isPrefix(*Iter); 1254 } 1255 } 1256 if (PP.isInst() && 1257 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) { 1258 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && 1259 "cannot move to end of bb"); 1260 scheduleChange(InsnToBB[PP.getInst()], 1261 UsePushPops ? WorklistItem::InsertPushOrPop 1262 : WorklistItem::InsertLoadOrStore, 1263 *FIELoad, CSR); 1264 continue; 1265 } 1266 scheduleChange(PP, 1267 UsePushPops ? WorklistItem::InsertPushOrPop 1268 : WorklistItem::InsertLoadOrStore, 1269 *FIELoad, CSR); 1270 } 1271 } 1272 1273 void ShrinkWrapping::moveSaveRestores() { 1274 bool DisablePushPopMode = false; 1275 bool UsedPushPopMode = false; 1276 // Keeps info about successfully moved regs: reg index, save position and 1277 // save size 1278 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; 1279 1280 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1281 MCInst *BestPosSave = nullptr; 1282 uint64_t TotalEstimatedWin = 0; 1283 if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin)) 1284 continue; 1285 SmallVector<ProgramPoint, 4> RestorePoints = 1286 doRestorePlacement(BestPosSave, I, TotalEstimatedWin); 1287 if (RestorePoints.empty()) 1288 continue; 1289 1290 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; 1291 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; 1292 (void)FIELoad; 1293 assert(FIESave && FIELoad); 1294 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1295 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave); 1296 int SaveOffset = SPFP.first; 1297 uint8_t SaveSize = FIESave->Size; 1298 1299 // If we don't know stack state at this point, bail 1300 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 1301 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 1302 continue; 1303 1304 // Operation mode: if true, will insert push/pops instead of loads/restores 1305 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset); 1306 1307 if (UsePushPops) { 1308 SmallVector<ProgramPoint, 4> FixedRestorePoints = 1309 fixPopsPlacements(RestorePoints, SaveOffset, I); 1310 if (FixedRestorePoints.empty()) 1311 UsePushPops = false; 1312 else 1313 RestorePoints = FixedRestorePoints; 1314 } 1315 1316 // Disable push-pop mode for all CSRs in this function 1317 if (!UsePushPops) 1318 DisablePushPopMode = true; 1319 else 1320 UsedPushPopMode = true; 1321 1322 scheduleOldSaveRestoresRemoval(I, UsePushPops); 1323 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops); 1324 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize)); 1325 } 1326 1327 // Revert push-pop mode if it failed for a single CSR 1328 if (DisablePushPopMode && UsedPushPopMode) { 1329 UsedPushPopMode = false; 1330 for (BinaryBasicBlock &BB : BF) { 1331 auto WRI = Todo.find(&BB); 1332 if (WRI != Todo.end()) { 1333 std::vector<WorklistItem> &TodoList = WRI->second; 1334 for (WorklistItem &Item : TodoList) 1335 if (Item.Action == WorklistItem::InsertPushOrPop) 1336 Item.Action = WorklistItem::InsertLoadOrStore; 1337 } 1338 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 1339 MCInst &Inst = *I; 1340 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1341 Inst, getAnnotationIndex()); 1342 if (!TodoList) 1343 continue; 1344 bool isCFI = BC.MIB->isCFI(Inst); 1345 for (WorklistItem &Item : *TodoList) { 1346 if (Item.Action == WorklistItem::InsertPushOrPop) 1347 Item.Action = WorklistItem::InsertLoadOrStore; 1348 if (!isCFI && Item.Action == WorklistItem::Erase) 1349 Item.Action = WorklistItem::ChangeToAdjustment; 1350 } 1351 } 1352 } 1353 } 1354 1355 // Update statistics 1356 if (!UsedPushPopMode) { 1357 SpillsMovedRegularMode += MovedRegs.size(); 1358 return; 1359 } 1360 1361 // Schedule modifications to stack-accessing instructions via 1362 // StackLayoutModifier. 1363 SpillsMovedPushPopMode += MovedRegs.size(); 1364 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { 1365 unsigned RegNdx; 1366 MCInst *SavePos; 1367 size_t SaveSize; 1368 std::tie(RegNdx, SavePos, SaveSize) = I; 1369 for (MCInst *Save : CSA.getSavesByReg(RegNdx)) 1370 SLM.collapseRegion(Save); 1371 SLM.insertRegion(SavePos, SaveSize); 1372 } 1373 } 1374 1375 namespace { 1376 /// Helper function to identify whether two basic blocks created by splitting 1377 /// a critical edge have the same contents. 1378 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, 1379 const BinaryBasicBlock &B) { 1380 if (A.succ_size() != B.succ_size()) 1381 return false; 1382 if (A.succ_size() != 1) 1383 return false; 1384 1385 if (*A.succ_begin() != *B.succ_begin()) 1386 return false; 1387 1388 if (A.size() != B.size()) 1389 return false; 1390 1391 // Compare instructions 1392 auto I = A.begin(), E = A.end(); 1393 auto OtherI = B.begin(), OtherE = B.end(); 1394 while (I != E && OtherI != OtherE) { 1395 if (I->getOpcode() != OtherI->getOpcode()) 1396 return false; 1397 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) { 1398 return true; 1399 })) 1400 return false; 1401 ++I; 1402 ++OtherI; 1403 } 1404 return true; 1405 } 1406 } // namespace 1407 1408 bool ShrinkWrapping::foldIdenticalSplitEdges() { 1409 bool Changed = false; 1410 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { 1411 BinaryBasicBlock &BB = *Iter; 1412 if (!BB.getName().startswith(".LSplitEdge")) 1413 continue; 1414 for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) { 1415 BinaryBasicBlock &RBB = *RIter; 1416 if (&RBB == &BB) 1417 break; 1418 if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() || 1419 !isIdenticalSplitEdgeBB(BC, *Iter, RBB)) 1420 continue; 1421 assert(RBB.pred_size() == 1 && "Invalid split edge BB"); 1422 BinaryBasicBlock *Pred = *RBB.pred_begin(); 1423 uint64_t OrigCount = Pred->branch_info_begin()->Count; 1424 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; 1425 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB); 1426 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds); 1427 Changed = true; 1428 // Remove the block from CFG 1429 RBB.markValid(false); 1430 } 1431 } 1432 1433 return Changed; 1434 } 1435 1436 namespace { 1437 1438 // A special StackPointerTracking that compensates for our future plans 1439 // in removing/adding insn. 1440 class PredictiveStackPointerTracking 1441 : public StackPointerTrackingBase<PredictiveStackPointerTracking> { 1442 friend class DataflowAnalysis<PredictiveStackPointerTracking, 1443 std::pair<int, int>>; 1444 decltype(ShrinkWrapping::Todo) &TodoMap; 1445 DataflowInfoManager &Info; 1446 1447 Optional<unsigned> AnnotationIndex; 1448 1449 protected: 1450 void compNextAux(const MCInst &Point, 1451 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, 1452 std::pair<int, int> &Res) { 1453 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { 1454 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1455 BC.MIB->isPush(Point)) { 1456 Res.first += BC.MIB->getPushSize(Point); 1457 continue; 1458 } 1459 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1460 BC.MIB->isPop(Point)) { 1461 Res.first -= BC.MIB->getPopSize(Point); 1462 continue; 1463 } 1464 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1465 Item.FIEToInsert.IsStore) { 1466 Res.first -= Item.FIEToInsert.Size; 1467 continue; 1468 } 1469 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1470 Item.FIEToInsert.IsLoad) { 1471 Res.first += Item.FIEToInsert.Size; 1472 continue; 1473 } 1474 } 1475 } 1476 1477 std::pair<int, int> computeNext(const MCInst &Point, 1478 const std::pair<int, int> &Cur) { 1479 std::pair<int, int> Res = 1480 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( 1481 Point, Cur); 1482 if (Res.first == StackPointerTracking::SUPERPOSITION || 1483 Res.first == StackPointerTracking::EMPTY) 1484 return Res; 1485 auto TodoItems = 1486 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( 1487 Point, ShrinkWrapping::getAnnotationName()); 1488 if (TodoItems) 1489 compNextAux(Point, *TodoItems, Res); 1490 auto &InsnToBBMap = Info.getInsnToBBMap(); 1491 if (&*InsnToBBMap[&Point]->rbegin() != &Point) 1492 return Res; 1493 auto WRI = TodoMap.find(InsnToBBMap[&Point]); 1494 if (WRI == TodoMap.end()) 1495 return Res; 1496 compNextAux(Point, WRI->second, Res); 1497 return Res; 1498 } 1499 1500 StringRef getAnnotationName() const { 1501 return StringRef("PredictiveStackPointerTracking"); 1502 } 1503 1504 public: 1505 PredictiveStackPointerTracking(BinaryFunction &BF, 1506 decltype(ShrinkWrapping::Todo) &TodoMap, 1507 DataflowInfoManager &Info, 1508 MCPlusBuilder::AllocatorIdTy AllocatorId = 0) 1509 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, 1510 AllocatorId), 1511 TodoMap(TodoMap), Info(Info) {} 1512 1513 void run() { 1514 StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); 1515 } 1516 }; 1517 1518 } // end anonymous namespace 1519 1520 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, 1521 int SPValPop) { 1522 MCInst *SavePoint = nullptr; 1523 for (BinaryBasicBlock &BB : BF) { 1524 for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter; 1525 ++InstIter) { 1526 int32_t SrcImm = 0; 1527 MCPhysReg Reg = 0; 1528 MCPhysReg StackPtrReg = 0; 1529 int64_t StackOffset = 0; 1530 bool IsIndexed = false; 1531 bool IsLoad = false; 1532 bool IsStore = false; 1533 bool IsSimple = false; 1534 bool IsStoreFromReg = false; 1535 uint8_t Size = 0; 1536 if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg, 1537 Reg, SrcImm, StackPtrReg, StackOffset, Size, 1538 IsSimple, IsIndexed)) 1539 continue; 1540 if (Reg != CSR || !IsStore || !IsSimple) 1541 continue; 1542 SavePoint = &*InstIter; 1543 break; 1544 } 1545 if (SavePoint) 1546 break; 1547 } 1548 assert(SavePoint); 1549 LLVM_DEBUG({ 1550 dbgs() << "Now using as save point for reg " << CSR << " :"; 1551 SavePoint->dump(); 1552 }); 1553 bool PrevAffectedZone = false; 1554 BinaryBasicBlock *PrevBB = nullptr; 1555 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 1556 for (BinaryBasicBlock *BB : BF.layout()) { 1557 if (BB->size() == 0) 1558 continue; 1559 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); 1560 const bool InAffectedZoneAtBegin = 1561 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]]; 1562 bool InAffectedZone = InAffectedZoneAtBegin; 1563 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { 1564 const bool CurZone = DA.count(*InstIter, *SavePoint); 1565 if (InAffectedZone != CurZone) { 1566 auto InsertionIter = InstIter; 1567 ++InsertionIter; 1568 InAffectedZone = CurZone; 1569 if (InAffectedZone) 1570 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0, 1571 SPValPop); 1572 else 1573 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0, 1574 SPValPush); 1575 --InstIter; 1576 } 1577 } 1578 // Are we at the first basic block or hot-cold split point? 1579 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { 1580 if (InAffectedZoneAtBegin) 1581 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush); 1582 } else if (InAffectedZoneAtBegin != PrevAffectedZone) { 1583 if (InAffectedZoneAtBegin) 1584 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush); 1585 else 1586 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop); 1587 } 1588 PrevAffectedZone = InAffectedZoneAtEnd; 1589 PrevBB = BB; 1590 } 1591 } 1592 1593 void ShrinkWrapping::rebuildCFIForSP() { 1594 for (BinaryBasicBlock &BB : BF) { 1595 for (MCInst &Inst : BB) { 1596 if (!BC.MIB->isCFI(Inst)) 1597 continue; 1598 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1599 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1600 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId); 1601 } 1602 } 1603 1604 int PrevSPVal = -8; 1605 BinaryBasicBlock *PrevBB = nullptr; 1606 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1607 for (BinaryBasicBlock *BB : BF.layout()) { 1608 if (BB->size() == 0) 1609 continue; 1610 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; 1611 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first; 1612 int SPVal = SPValAtBegin; 1613 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { 1614 const int CurVal = SPT.getStateAt(*Iter)->first; 1615 if (SPVal != CurVal) { 1616 auto InsertionIter = Iter; 1617 ++InsertionIter; 1618 Iter = BF.addCFIInstruction( 1619 BB, InsertionIter, 1620 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal)); 1621 SPVal = CurVal; 1622 } 1623 } 1624 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) 1625 BF.addCFIInstruction( 1626 BB, BB->begin(), 1627 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1628 else if (SPValAtBegin != PrevSPVal) 1629 BF.addCFIInstruction( 1630 PrevBB, PrevBB->end(), 1631 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1632 PrevSPVal = SPValAtEnd; 1633 PrevBB = BB; 1634 } 1635 1636 for (BinaryBasicBlock &BB : BF) 1637 for (auto I = BB.begin(); I != BB.end();) 1638 if (BC.MIB->hasAnnotation(*I, "DeleteMe")) 1639 I = BB.eraseInstruction(I); 1640 else 1641 ++I; 1642 } 1643 1644 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal, 1645 const FrameIndexEntry &FIE, 1646 bool CreatePushOrPop) { 1647 MCInst NewInst; 1648 if (SPVal != StackPointerTracking::SUPERPOSITION && 1649 SPVal != StackPointerTracking::EMPTY) { 1650 if (FIE.IsLoad) { 1651 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(), 1652 FIE.StackOffset - SPVal, FIE.RegOrImm, 1653 FIE.Size)) { 1654 errs() << "createRestoreFromStack: not supported on this platform\n"; 1655 abort(); 1656 } 1657 } else { 1658 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(), 1659 FIE.StackOffset - SPVal, FIE.RegOrImm, 1660 FIE.Size)) { 1661 errs() << "createSaveToStack: not supported on this platform\n"; 1662 abort(); 1663 } 1664 } 1665 if (CreatePushOrPop) 1666 BC.MIB->changeToPushOrPop(NewInst); 1667 return NewInst; 1668 } 1669 assert(FPVal != StackPointerTracking::SUPERPOSITION && 1670 FPVal != StackPointerTracking::EMPTY); 1671 1672 if (FIE.IsLoad) { 1673 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(), 1674 FIE.StackOffset - FPVal, FIE.RegOrImm, 1675 FIE.Size)) { 1676 errs() << "createRestoreFromStack: not supported on this platform\n"; 1677 abort(); 1678 } 1679 } else { 1680 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(), 1681 FIE.StackOffset - FPVal, FIE.RegOrImm, 1682 FIE.Size)) { 1683 errs() << "createSaveToStack: not supported on this platform\n"; 1684 abort(); 1685 } 1686 } 1687 return NewInst; 1688 } 1689 1690 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { 1691 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1692 if (UpdatedCFIs.count(CFI)) 1693 return; 1694 1695 switch (CFI->getOperation()) { 1696 case MCCFIInstruction::OpDefCfa: 1697 case MCCFIInstruction::OpDefCfaRegister: 1698 case MCCFIInstruction::OpDefCfaOffset: 1699 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset); 1700 break; 1701 case MCCFIInstruction::OpOffset: 1702 default: 1703 break; 1704 } 1705 1706 UpdatedCFIs.insert(CFI); 1707 } 1708 1709 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, 1710 BBIterTy Pos, unsigned Reg, 1711 bool isPush, int Sz, 1712 int64_t NewOffset) { 1713 if (isPush) { 1714 for (uint32_t Idx : DeletedPushCFIs[Reg]) { 1715 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1716 updateCFIInstOffset(*Pos++, NewOffset); 1717 } 1718 if (HasDeletedOffsetCFIs[Reg]) { 1719 Pos = BF.addCFIInstruction( 1720 &BB, Pos, 1721 MCCFIInstruction::createOffset( 1722 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset)); 1723 ++Pos; 1724 } 1725 } else { 1726 for (uint32_t Idx : DeletedPopCFIs[Reg]) { 1727 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1728 updateCFIInstOffset(*Pos++, NewOffset); 1729 } 1730 if (HasDeletedOffsetCFIs[Reg]) { 1731 Pos = BF.addCFIInstruction( 1732 &BB, Pos, 1733 MCCFIInstruction::createSameValue( 1734 nullptr, BC.MRI->getDwarfRegNum(Reg, false))); 1735 ++Pos; 1736 } 1737 } 1738 return Pos; 1739 } 1740 1741 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, 1742 BinaryBasicBlock *CurBB, 1743 const WorklistItem &Item, 1744 int64_t SPVal, int64_t FPVal) { 1745 // Trigger CFI reconstruction for this CSR if necessary - writing to 1746 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update 1747 if ((Item.FIEToInsert.IsStore && 1748 !DeletedPushCFIs[Item.AffectedReg].empty()) || 1749 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || 1750 HasDeletedOffsetCFIs[Item.AffectedReg]) { 1751 if (Item.Action == WorklistItem::InsertPushOrPop) { 1752 if (Item.FIEToInsert.IsStore) 1753 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; 1754 else 1755 PopOffsetByReg[Item.AffectedReg] = SPVal; 1756 } else { 1757 if (Item.FIEToInsert.IsStore) 1758 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1759 else 1760 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1761 } 1762 } 1763 1764 LLVM_DEBUG({ 1765 dbgs() << "Creating stack access with SPVal = " << SPVal 1766 << "; stack offset = " << Item.FIEToInsert.StackOffset 1767 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) 1768 << "\n"; 1769 }); 1770 MCInst NewInst = 1771 createStackAccess(SPVal, FPVal, Item.FIEToInsert, 1772 Item.Action == WorklistItem::InsertPushOrPop); 1773 if (InsertionPoint != CurBB->end()) { 1774 LLVM_DEBUG({ 1775 dbgs() << "Adding before Inst: "; 1776 InsertionPoint->dump(); 1777 dbgs() << "the following inst: "; 1778 NewInst.dump(); 1779 }); 1780 BBIterTy Iter = 1781 CurBB->insertInstruction(InsertionPoint, std::move(NewInst)); 1782 return ++Iter; 1783 } 1784 CurBB->addInstruction(std::move(NewInst)); 1785 LLVM_DEBUG(dbgs() << "Adding to BB!\n"); 1786 return CurBB->end(); 1787 } 1788 1789 BBIterTy ShrinkWrapping::processInsertionsList( 1790 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, 1791 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { 1792 bool HasInsertions = false; 1793 for (WorklistItem &Item : TodoList) { 1794 if (Item.Action == WorklistItem::Erase || 1795 Item.Action == WorklistItem::ChangeToAdjustment) 1796 continue; 1797 HasInsertions = true; 1798 break; 1799 } 1800 1801 if (!HasInsertions) 1802 return InsertionPoint; 1803 1804 assert(((SPVal != StackPointerTracking::SUPERPOSITION && 1805 SPVal != StackPointerTracking::EMPTY) || 1806 (FPVal != StackPointerTracking::SUPERPOSITION && 1807 FPVal != StackPointerTracking::EMPTY)) && 1808 "Cannot insert if we have no idea of the stack state here"); 1809 1810 // Revert the effect of PSPT for this location, we want SP Value before 1811 // insertions 1812 if (InsertionPoint == CurBB->end()) { 1813 for (WorklistItem &Item : TodoList) { 1814 if (Item.Action != WorklistItem::InsertPushOrPop) 1815 continue; 1816 if (Item.FIEToInsert.IsStore) 1817 SPVal += Item.FIEToInsert.Size; 1818 if (Item.FIEToInsert.IsLoad) 1819 SPVal -= Item.FIEToInsert.Size; 1820 } 1821 } 1822 1823 // Reorder POPs to obey the correct dominance relation between them 1824 std::stable_sort(TodoList.begin(), TodoList.end(), 1825 [&](const WorklistItem &A, const WorklistItem &B) { 1826 if ((A.Action != WorklistItem::InsertPushOrPop || 1827 !A.FIEToInsert.IsLoad) && 1828 (B.Action != WorklistItem::InsertPushOrPop || 1829 !B.FIEToInsert.IsLoad)) 1830 return false; 1831 if ((A.Action != WorklistItem::InsertPushOrPop || 1832 !A.FIEToInsert.IsLoad)) 1833 return true; 1834 if ((B.Action != WorklistItem::InsertPushOrPop || 1835 !B.FIEToInsert.IsLoad)) 1836 return false; 1837 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; 1838 }); 1839 1840 // Process insertions 1841 for (WorklistItem &Item : TodoList) { 1842 if (Item.Action == WorklistItem::Erase || 1843 Item.Action == WorklistItem::ChangeToAdjustment) 1844 continue; 1845 1846 InsertionPoint = 1847 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); 1848 if (Item.Action == WorklistItem::InsertPushOrPop && 1849 Item.FIEToInsert.IsStore) 1850 SPVal -= Item.FIEToInsert.Size; 1851 if (Item.Action == WorklistItem::InsertPushOrPop && 1852 Item.FIEToInsert.IsLoad) 1853 SPVal += Item.FIEToInsert.Size; 1854 } 1855 return InsertionPoint; 1856 } 1857 1858 bool ShrinkWrapping::processInsertions() { 1859 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); 1860 PSPT.run(); 1861 1862 bool Changes = false; 1863 for (BinaryBasicBlock &BB : BF) { 1864 // Process insertions before some inst. 1865 for (auto I = BB.begin(); I != BB.end(); ++I) { 1866 MCInst &Inst = *I; 1867 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1868 Inst, getAnnotationIndex()); 1869 if (!TodoList) 1870 continue; 1871 Changes = true; 1872 std::vector<WorklistItem> List = *TodoList; 1873 LLVM_DEBUG({ 1874 dbgs() << "Now processing insertions in " << BB.getName() 1875 << " before inst: "; 1876 Inst.dump(); 1877 }); 1878 auto Iter = I; 1879 std::pair<int, int> SPTState = 1880 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); 1881 I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second); 1882 } 1883 // Process insertions at the end of bb 1884 auto WRI = Todo.find(&BB); 1885 if (WRI != Todo.end()) { 1886 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin()); 1887 processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first, 1888 SPTState.second); 1889 Changes = true; 1890 } 1891 } 1892 return Changes; 1893 } 1894 1895 void ShrinkWrapping::processDeletions() { 1896 LivenessAnalysis &LA = Info.getLivenessAnalysis(); 1897 for (BinaryBasicBlock &BB : BF) { 1898 for (auto II = BB.begin(); II != BB.end(); ++II) { 1899 MCInst &Inst = *II; 1900 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1901 Inst, getAnnotationIndex()); 1902 if (!TodoList) 1903 continue; 1904 // Process all deletions 1905 for (WorklistItem &Item : *TodoList) { 1906 if (Item.Action != WorklistItem::Erase && 1907 Item.Action != WorklistItem::ChangeToAdjustment) 1908 continue; 1909 1910 if (Item.Action == WorklistItem::ChangeToAdjustment) { 1911 // Is flag reg alive across this func? 1912 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg()); 1913 if (int Sz = BC.MIB->getPushSize(Inst)) { 1914 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags); 1915 continue; 1916 } 1917 if (int Sz = BC.MIB->getPopSize(Inst)) { 1918 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags); 1919 continue; 1920 } 1921 } 1922 1923 LLVM_DEBUG({ 1924 dbgs() << "Erasing: "; 1925 BC.printInstruction(dbgs(), Inst); 1926 }); 1927 II = std::prev(BB.eraseInstruction(II)); 1928 break; 1929 } 1930 } 1931 } 1932 } 1933 1934 void ShrinkWrapping::rebuildCFI() { 1935 const bool FP = Info.getStackPointerTracking().HasFramePointer; 1936 Info.invalidateAll(); 1937 if (!FP) { 1938 rebuildCFIForSP(); 1939 Info.invalidateAll(); 1940 } 1941 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1942 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) 1943 continue; 1944 const int64_t SPValPush = PushOffsetByReg[I]; 1945 const int64_t SPValPop = PopOffsetByReg[I]; 1946 insertUpdatedCFI(I, SPValPush, SPValPop); 1947 Info.invalidateAll(); 1948 } 1949 } 1950 1951 bool ShrinkWrapping::perform() { 1952 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); 1953 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1954 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1955 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); 1956 1957 if (BF.checkForAmbiguousJumpTables()) { 1958 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() 1959 << ".\n"); 1960 // We could call disambiguateJumpTables here, but it is probably not worth 1961 // the cost (of duplicating potentially large jump tables that could regress 1962 // dcache misses). Moreover, ambiguous JTs are rare and coming from code 1963 // written in assembly language. Just bail. 1964 return false; 1965 } 1966 SLM.initialize(); 1967 CSA.compute(); 1968 classifyCSRUses(); 1969 pruneUnwantedCSRs(); 1970 computeSaveLocations(); 1971 computeDomOrder(); 1972 moveSaveRestores(); 1973 LLVM_DEBUG({ 1974 dbgs() << "Func before shrink-wrapping: \n"; 1975 BF.dump(); 1976 }); 1977 SLM.performChanges(); 1978 // Early exit if processInsertions doesn't detect any todo items 1979 if (!processInsertions()) 1980 return false; 1981 processDeletions(); 1982 if (foldIdenticalSplitEdges()) { 1983 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); 1984 (void)Stats; 1985 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first 1986 << " redundant split edge BBs (" << Stats.second 1987 << " bytes) for " << BF.getPrintName() << "\n"); 1988 } 1989 rebuildCFI(); 1990 // We may have split edges, creating BBs that need correct branching 1991 BF.fixBranches(); 1992 LLVM_DEBUG({ 1993 dbgs() << "Func after shrink-wrapping: \n"; 1994 BF.dump(); 1995 }); 1996 return true; 1997 } 1998 1999 void ShrinkWrapping::printStats() { 2000 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode 2001 << " spills inserting load/stores and " << SpillsMovedPushPopMode 2002 << " spills inserting push/pops\n"; 2003 } 2004 2005 // Operators necessary as a result of using MCAnnotation 2006 raw_ostream &operator<<(raw_ostream &OS, 2007 const std::vector<ShrinkWrapping::WorklistItem> &Vec) { 2008 OS << "SWTodo["; 2009 const char *Sep = ""; 2010 for (const ShrinkWrapping::WorklistItem &Item : Vec) { 2011 OS << Sep; 2012 switch (Item.Action) { 2013 case ShrinkWrapping::WorklistItem::Erase: 2014 OS << "Erase"; 2015 break; 2016 case ShrinkWrapping::WorklistItem::ChangeToAdjustment: 2017 OS << "ChangeToAdjustment"; 2018 break; 2019 case ShrinkWrapping::WorklistItem::InsertLoadOrStore: 2020 OS << "InsertLoadOrStore"; 2021 break; 2022 case ShrinkWrapping::WorklistItem::InsertPushOrPop: 2023 OS << "InsertPushOrPop"; 2024 break; 2025 } 2026 Sep = ", "; 2027 } 2028 OS << "]"; 2029 return OS; 2030 } 2031 2032 raw_ostream & 2033 operator<<(raw_ostream &OS, 2034 const std::vector<StackLayoutModifier::WorklistItem> &Vec) { 2035 OS << "SLMTodo["; 2036 const char *Sep = ""; 2037 for (const StackLayoutModifier::WorklistItem &Item : Vec) { 2038 OS << Sep; 2039 switch (Item.Action) { 2040 case StackLayoutModifier::WorklistItem::None: 2041 OS << "None"; 2042 break; 2043 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: 2044 OS << "AdjustLoadStoreOffset"; 2045 break; 2046 case StackLayoutModifier::WorklistItem::AdjustCFI: 2047 OS << "AdjustCFI"; 2048 break; 2049 } 2050 Sep = ", "; 2051 } 2052 OS << "]"; 2053 return OS; 2054 } 2055 2056 bool operator==(const ShrinkWrapping::WorklistItem &A, 2057 const ShrinkWrapping::WorklistItem &B) { 2058 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && 2059 A.Adjustment == B.Adjustment && 2060 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && 2061 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && 2062 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && 2063 A.FIEToInsert.Size == B.FIEToInsert.Size && 2064 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && 2065 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); 2066 } 2067 2068 bool operator==(const StackLayoutModifier::WorklistItem &A, 2069 const StackLayoutModifier::WorklistItem &B) { 2070 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); 2071 } 2072 2073 } // end namespace bolt 2074 } // end namespace llvm 2075