1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the ShrinkWrapping class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/ShrinkWrapping.h" 14 #include "bolt/Core/MCPlus.h" 15 #include "bolt/Passes/DataflowInfoManager.h" 16 #include <numeric> 17 #include <stack> 18 19 #define DEBUG_TYPE "shrinkwrapping" 20 21 using namespace llvm; 22 23 namespace opts { 24 25 extern cl::opt<bool> TimeOpts; 26 extern cl::OptionCategory BoltOptCategory; 27 28 static cl::opt<unsigned> ShrinkWrappingThreshold( 29 "shrink-wrapping-threshold", 30 cl::desc("Percentage of prologue execution count to use as threshold when" 31 " evaluating whether a block is cold enough to be profitable to" 32 " move eligible spills there"), 33 cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory)); 34 } // namespace opts 35 36 namespace llvm { 37 namespace bolt { 38 39 void CalleeSavedAnalysis::analyzeSaves() { 40 ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs(); 41 StackReachingUses &SRU = Info.getStackReachingUses(); 42 auto &InsnToBB = Info.getInsnToBBMap(); 43 BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false); 44 45 LLVM_DEBUG(dbgs() << "Checking spill locations\n"); 46 for (BinaryBasicBlock &BB : BF) { 47 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 48 const MCInst *Prev = nullptr; 49 for (MCInst &Inst : BB) { 50 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 51 // Blacklist weird stores we don't understand 52 if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore && 53 FIE->IsStoreFromReg) { 54 BlacklistedRegs.set(FIE->RegOrImm); 55 CalleeSaved.reset(FIE->RegOrImm); 56 Prev = &Inst; 57 continue; 58 } 59 60 if (!FIE->IsStore || !FIE->IsStoreFromReg || 61 BlacklistedRegs[FIE->RegOrImm]) { 62 Prev = &Inst; 63 continue; 64 } 65 66 // If this reg is defined locally, it is not a callee-saved reg 67 if (RD.isReachedBy(FIE->RegOrImm, 68 Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) { 69 BlacklistedRegs.set(FIE->RegOrImm); 70 CalleeSaved.reset(FIE->RegOrImm); 71 Prev = &Inst; 72 continue; 73 } 74 75 // If this stack position is accessed in another function, we are 76 // probably dealing with a parameter passed in a stack -- do not mess 77 // with it 78 if (SRU.isStoreUsed(*FIE, 79 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)), 80 /*IncludeLocalAccesses=*/false) { 81 BlacklistedRegs.set(FIE->RegOrImm); 82 CalleeSaved.reset(FIE->RegOrImm); 83 Prev = &Inst; 84 continue; 85 } 86 87 // If this stack position is loaded elsewhere in another reg, we can't 88 // update it, so blacklist it. 89 if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev) 90 : SRU.expr_begin(BB))) { 91 BlacklistedRegs.set(FIE->RegOrImm); 92 CalleeSaved.reset(FIE->RegOrImm); 93 Prev = &Inst; 94 continue; 95 } 96 97 // Ignore regs with multiple saves 98 if (CalleeSaved[FIE->RegOrImm]) { 99 BlacklistedRegs.set(FIE->RegOrImm); 100 CalleeSaved.reset(FIE->RegOrImm); 101 Prev = &Inst; 102 continue; 103 } 104 105 CalleeSaved.set(FIE->RegOrImm); 106 SaveFIEByReg[FIE->RegOrImm] = &*FIE; 107 SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount(); 108 BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId); 109 OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset; 110 LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: " 111 << FIE->RegOrImm << "\n"); 112 } 113 Prev = &Inst; 114 } 115 } 116 } 117 118 void CalleeSavedAnalysis::analyzeRestores() { 119 ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses(); 120 121 // Now compute all restores of these callee-saved regs 122 for (BinaryBasicBlock &BB : BF) { 123 const MCInst *Prev = nullptr; 124 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 125 MCInst &Inst = *I; 126 if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) { 127 if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) { 128 Prev = &Inst; 129 continue; 130 } 131 132 // If this reg is used locally after a restore, then we are probably 133 // not dealing with a callee-saved reg. Except if this use is by 134 // another store, but we don't cover this case yet. 135 // Also not callee-saved if this load accesses caller stack or isn't 136 // simple. 137 if (!FIE->IsSimple || FIE->StackOffset >= 0 || 138 RU.isReachedBy(FIE->RegOrImm, 139 Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) { 140 CalleeSaved.reset(FIE->RegOrImm); 141 Prev = &Inst; 142 continue; 143 } 144 // If stack offsets between saves/store don't agree with each other, 145 // we don't completely understand what's happening here 146 if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) { 147 CalleeSaved.reset(FIE->RegOrImm); 148 LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a " 149 "mismatching restore: " 150 << FIE->RegOrImm << "\n"); 151 Prev = &Inst; 152 continue; 153 } 154 155 LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm 156 << "\n"); 157 if (LoadFIEByReg[FIE->RegOrImm] == nullptr) 158 LoadFIEByReg[FIE->RegOrImm] = &*FIE; 159 BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm, 160 AllocatorId); 161 HasRestores.set(FIE->RegOrImm); 162 } 163 Prev = &Inst; 164 } 165 } 166 } 167 168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) { 169 std::vector<MCInst *> Results; 170 for (BinaryBasicBlock &BB : BF) 171 for (MCInst &Inst : BB) 172 if (getSavedReg(Inst) == Reg) 173 Results.push_back(&Inst); 174 return Results; 175 } 176 177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) { 178 std::vector<MCInst *> Results; 179 for (BinaryBasicBlock &BB : BF) 180 for (MCInst &Inst : BB) 181 if (getRestoredReg(Inst) == Reg) 182 Results.push_back(&Inst); 183 return Results; 184 } 185 186 CalleeSavedAnalysis::~CalleeSavedAnalysis() { 187 for (BinaryBasicBlock &BB : BF) { 188 for (MCInst &Inst : BB) { 189 BC.MIB->removeAnnotation(Inst, getSaveTag()); 190 BC.MIB->removeAnnotation(Inst, getRestoreTag()); 191 } 192 } 193 } 194 195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) { 196 if (BlacklistedRegions[Offset] < Size) 197 BlacklistedRegions[Offset] = Size; 198 } 199 200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) { 201 for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions) 202 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second) 203 return true; 204 return false; 205 } 206 207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset, 208 int64_t Size) { 209 bool HasConflict = false; 210 for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) { 211 std::pair<const int64_t, int64_t> &Elem = *Iter; 212 if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second && 213 (Offset != Elem.first || Size != Elem.second)) { 214 Iter = AvailableRegions.erase(Iter); 215 HasConflict = true; 216 continue; 217 } 218 ++Iter; 219 } 220 if (HasConflict) { 221 blacklistRegion(Offset, Size); 222 return true; 223 } 224 return false; 225 } 226 227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) { 228 StackPointerTracking &SPT = Info.getStackPointerTracking(); 229 if (!BC.MII->get(Point.getOpcode()) 230 .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI)) 231 return; 232 233 int SPVal, FPVal; 234 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 235 std::pair<MCPhysReg, int64_t> FP; 236 237 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 238 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 239 else 240 FP = std::make_pair(0, 0); 241 std::pair<MCPhysReg, int64_t> SP; 242 243 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 244 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 245 else 246 SP = std::make_pair(0, 0); 247 248 int64_t Output; 249 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 250 return; 251 252 // Not your regular frame pointer initialization... bail 253 if (Output != SPVal) 254 blacklistRegion(0, 0); 255 } 256 257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) { 258 StackPointerTracking &SPT = Info.getStackPointerTracking(); 259 if (!BC.MII->get(Point.getOpcode()) 260 .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI)) 261 return; 262 // Check if the definition of SP comes from FP -- in this case, this 263 // value may need to be updated depending on our stack layout changes 264 const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode()); 265 unsigned NumDefs = InstInfo.getNumDefs(); 266 bool UsesFP = false; 267 for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E; 268 ++I) { 269 MCOperand &Operand = Point.getOperand(I); 270 if (!Operand.isReg()) 271 continue; 272 if (Operand.getReg() == BC.MIB->getFramePointer()) { 273 UsesFP = true; 274 break; 275 } 276 } 277 if (!UsesFP) 278 return; 279 280 // Setting up evaluation 281 int SPVal, FPVal; 282 std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point); 283 std::pair<MCPhysReg, int64_t> FP; 284 285 if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION) 286 FP = std::make_pair(BC.MIB->getFramePointer(), FPVal); 287 else 288 FP = std::make_pair(0, 0); 289 std::pair<MCPhysReg, int64_t> SP; 290 291 if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION) 292 SP = std::make_pair(BC.MIB->getStackPointer(), SPVal); 293 else 294 SP = std::make_pair(0, 0); 295 296 int64_t Output; 297 if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP)) 298 return; 299 300 // If the value is the same of FP, no need to adjust it 301 if (Output == FPVal) 302 return; 303 304 // If an allocation happened through FP, bail 305 if (Output <= SPVal) { 306 blacklistRegion(0, 0); 307 return; 308 } 309 310 // We are restoring SP to an old value based on FP. Mark it as a stack 311 // access to be fixed later. 312 BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId); 313 } 314 315 void StackLayoutModifier::classifyStackAccesses() { 316 // Understand when stack slots are being used non-locally 317 StackReachingUses &SRU = Info.getStackReachingUses(); 318 319 for (BinaryBasicBlock &BB : BF) { 320 const MCInst *Prev = nullptr; 321 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 322 MCInst &Inst = *I; 323 checkFramePointerInitialization(Inst); 324 checkStackPointerRestore(Inst); 325 ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst); 326 if (!FIEX) { 327 Prev = &Inst; 328 continue; 329 } 330 if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) { 331 blacklistRegion(FIEX->StackOffset, FIEX->Size); 332 Prev = &Inst; 333 continue; 334 } 335 // If this stack position is accessed in another function, we are 336 // probably dealing with a parameter passed in a stack -- do not mess 337 // with it 338 if (SRU.isStoreUsed(*FIEX, 339 Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB), 340 /*IncludeLocalAccesses=*/false)) { 341 blacklistRegion(FIEX->StackOffset, FIEX->Size); 342 Prev = &Inst; 343 continue; 344 } 345 // Now we have a clear stack slot access. Check if its blacklisted or if 346 // it conflicts with another chunk. 347 if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) || 348 blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) { 349 Prev = &Inst; 350 continue; 351 } 352 // We are free to go. Add it as available stack slot which we know how 353 // to move it. 354 AvailableRegions[FIEX->StackOffset] = FIEX->Size; 355 BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId); 356 RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm); 357 RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset); 358 LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size " 359 << (int)FIEX->Size << "\n"); 360 } 361 } 362 } 363 364 void StackLayoutModifier::classifyCFIs() { 365 std::stack<std::pair<int64_t, uint16_t>> CFIStack; 366 int64_t CfaOffset = -8; 367 uint16_t CfaReg = 7; 368 369 auto recordAccess = [&](MCInst *Inst, int64_t Offset) { 370 const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false); 371 if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) { 372 BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId); 373 LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n"); 374 } else { 375 IsSimple = false; 376 return; 377 } 378 }; 379 380 for (BinaryBasicBlock *&BB : BF.layout()) { 381 for (MCInst &Inst : *BB) { 382 if (!BC.MIB->isCFI(Inst)) 383 continue; 384 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 385 switch (CFI->getOperation()) { 386 case MCCFIInstruction::OpDefCfa: 387 CfaOffset = -CFI->getOffset(); 388 recordAccess(&Inst, CfaOffset); 389 LLVM_FALLTHROUGH; 390 case MCCFIInstruction::OpDefCfaRegister: 391 CfaReg = CFI->getRegister(); 392 break; 393 case MCCFIInstruction::OpDefCfaOffset: 394 CfaOffset = -CFI->getOffset(); 395 recordAccess(&Inst, CfaOffset); 396 break; 397 case MCCFIInstruction::OpOffset: 398 recordAccess(&Inst, CFI->getOffset()); 399 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 400 BC.MRI->getLLVMRegNum(CFI->getRegister(), 401 /*isEH=*/false), 402 AllocatorId); 403 break; 404 case MCCFIInstruction::OpSameValue: 405 BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(), 406 BC.MRI->getLLVMRegNum(CFI->getRegister(), 407 /*isEH=*/false), 408 AllocatorId); 409 break; 410 case MCCFIInstruction::OpRememberState: 411 CFIStack.push(std::make_pair(CfaOffset, CfaReg)); 412 break; 413 case MCCFIInstruction::OpRestoreState: { 414 assert(!CFIStack.empty() && "Corrupt CFI stack"); 415 std::pair<int64_t, uint16_t> &Elem = CFIStack.top(); 416 CFIStack.pop(); 417 CfaOffset = Elem.first; 418 CfaReg = Elem.second; 419 break; 420 } 421 case MCCFIInstruction::OpRelOffset: 422 case MCCFIInstruction::OpAdjustCfaOffset: 423 llvm_unreachable("Unhandled AdjustCfaOffset"); 424 break; 425 default: 426 break; 427 } 428 } 429 } 430 } 431 432 void StackLayoutModifier::scheduleChange( 433 MCInst &Inst, StackLayoutModifier::WorklistItem Item) { 434 auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>( 435 Inst, getTodoTag(), AllocatorId); 436 WList.push_back(Item); 437 } 438 439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) { 440 if (!IsSimple || !BC.MIB->isPush(*DeletedPush)) 441 return false; 442 443 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 444 if (!FIE) 445 return false; 446 447 return canCollapseRegion(FIE->StackOffset); 448 } 449 450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) { 451 if (!IsInitialized) 452 initialize(); 453 if (!IsSimple) 454 return false; 455 456 if (CollapsedRegions.count(RegionAddr)) 457 return true; 458 459 // Check if it is possible to readjust all accesses below RegionAddr 460 if (!BlacklistedRegions.empty()) 461 return false; 462 463 return true; 464 } 465 466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) { 467 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush); 468 if (!FIE) 469 return false; 470 int64_t RegionAddr = FIE->StackOffset; 471 int64_t RegionSz = FIE->Size; 472 return collapseRegion(DeletedPush, RegionAddr, RegionSz); 473 } 474 475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr, 476 int64_t RegionSz) { 477 if (!canCollapseRegion(RegionAddr)) 478 return false; 479 480 assert(IsInitialized); 481 StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis(); 482 483 for (BinaryBasicBlock &BB : BF) { 484 for (MCInst &Inst : BB) { 485 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 486 continue; 487 auto Slot = 488 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 489 Inst, getSlotTag()); 490 if (!AvailableRegions.count(Slot)) 491 continue; 492 // We need to ensure this access is affected by the deleted push 493 if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]]) 494 continue; 495 496 if (BC.MIB->isCFI(Inst)) { 497 if (Slot > RegionAddr) 498 continue; 499 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz)); 500 continue; 501 } 502 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 503 if (!FIE) { 504 if (Slot > RegionAddr) 505 continue; 506 // SP update based on frame pointer 507 scheduleChange( 508 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 509 continue; 510 } 511 512 if (Slot == RegionAddr) { 513 BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId); 514 continue; 515 } 516 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 517 continue; 518 519 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 520 continue; 521 522 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr) 523 continue; 524 525 scheduleChange( 526 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz)); 527 } 528 } 529 530 CollapsedRegions.insert(RegionAddr); 531 return true; 532 } 533 534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) { 535 for (BinaryBasicBlock &BB : BF) { 536 for (MCInst &Inst : BB) { 537 if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) 538 continue; 539 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 540 scheduleChange( 541 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset)); 542 } 543 } 544 } 545 546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) { 547 if (!IsInitialized) 548 initialize(); 549 if (!IsSimple) 550 return false; 551 552 StackPointerTracking &SPT = Info.getStackPointerTracking(); 553 int64_t RegionAddr = SPT.getStateBefore(P)->first; 554 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 555 return false; 556 557 if (InsertedRegions.count(RegionAddr)) 558 return true; 559 560 // Check if we are going to screw up stack accesses at call sites that 561 // pass parameters via stack 562 if (!BlacklistedRegions.empty()) 563 return false; 564 565 return true; 566 } 567 568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) { 569 if (!canInsertRegion(P)) 570 return false; 571 572 assert(IsInitialized); 573 StackPointerTracking &SPT = Info.getStackPointerTracking(); 574 // This RegionAddr is slightly different from the one seen in collapseRegion 575 // This is the value of SP before the allocation the user wants to make. 576 int64_t RegionAddr = SPT.getStateBefore(P)->first; 577 if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY) 578 return false; 579 580 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 581 582 for (BinaryBasicBlock &BB : BF) { 583 for (MCInst &Inst : BB) { 584 if (!BC.MIB->hasAnnotation(Inst, getSlotTag())) 585 continue; 586 auto Slot = 587 BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>( 588 Inst, getSlotTag()); 589 if (!AvailableRegions.count(Slot)) 590 continue; 591 592 if (!(DA.doesADominateB(P, Inst))) 593 continue; 594 595 if (BC.MIB->isCFI(Inst)) { 596 if (Slot >= RegionAddr) 597 continue; 598 scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz)); 599 continue; 600 } 601 ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst); 602 if (!FIE) { 603 if (Slot >= RegionAddr) 604 continue; 605 scheduleChange( 606 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 607 continue; 608 } 609 610 if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr) 611 continue; 612 if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr) 613 continue; 614 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) 615 continue; 616 scheduleChange( 617 Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz)); 618 } 619 } 620 621 InsertedRegions.insert(RegionAddr); 622 return true; 623 } 624 625 void StackLayoutModifier::performChanges() { 626 std::set<uint32_t> ModifiedCFIIndices; 627 for (BinaryBasicBlock &BB : BF) { 628 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 629 MCInst &Inst = *I; 630 if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) { 631 assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst)); 632 BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos"); 633 } 634 if (!BC.MIB->hasAnnotation(Inst, getTodoTag())) 635 continue; 636 auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>( 637 Inst, getTodoTag()); 638 int64_t Adjustment = 0; 639 WorklistItem::ActionType AdjustmentType = WorklistItem::None; 640 for (WorklistItem &WI : WList) { 641 if (WI.Action == WorklistItem::None) 642 continue; 643 assert(WI.Action == WorklistItem::AdjustLoadStoreOffset || 644 WI.Action == WorklistItem::AdjustCFI); 645 assert((AdjustmentType == WorklistItem::None || 646 AdjustmentType == WI.Action) && 647 "Conflicting actions requested at the same program point"); 648 AdjustmentType = WI.Action; 649 Adjustment += WI.OffsetUpdate; 650 } 651 if (!Adjustment) 652 continue; 653 if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) { 654 assert(BC.MIB->isCFI(Inst)); 655 uint32_t CFINum = Inst.getOperand(0).getImm(); 656 if (ModifiedCFIIndices.count(CFINum)) 657 continue; 658 ModifiedCFIIndices.insert(CFINum); 659 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 660 const MCCFIInstruction::OpType Operation = CFI->getOperation(); 661 if (Operation == MCCFIInstruction::OpDefCfa || 662 Operation == MCCFIInstruction::OpDefCfaOffset) 663 Adjustment = 0 - Adjustment; 664 LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset() 665 << " to " << (CFI->getOffset() + Adjustment) << "\n"); 666 BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment); 667 continue; 668 } 669 int32_t SrcImm = 0; 670 MCPhysReg Reg = 0; 671 MCPhysReg StackPtrReg = 0; 672 int64_t StackOffset = 0; 673 bool IsIndexed = false; 674 bool IsLoad = false; 675 bool IsStore = false; 676 bool IsSimple = false; 677 bool IsStoreFromReg = false; 678 uint8_t Size = 0; 679 bool Success = false; 680 Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, 681 Reg, SrcImm, StackPtrReg, StackOffset, 682 Size, IsSimple, IsIndexed); 683 if (!Success) { 684 // SP update based on FP value 685 Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx); 686 assert(Success); 687 continue; 688 } 689 assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg)); 690 if (StackPtrReg != BC.MIB->getFramePointer()) 691 Adjustment = -Adjustment; 692 if (IsLoad) 693 Success = BC.MIB->createRestoreFromStack( 694 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 695 else if (IsStore) 696 Success = BC.MIB->createSaveToStack( 697 Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size); 698 LLVM_DEBUG({ 699 dbgs() << "Adjusted instruction: "; 700 Inst.dump(); 701 }); 702 assert(Success); 703 } 704 } 705 } 706 707 void StackLayoutModifier::initialize() { 708 classifyStackAccesses(); 709 classifyCFIs(); 710 IsInitialized = true; 711 } 712 713 std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0}; 714 std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0}; 715 716 using BBIterTy = BinaryBasicBlock::iterator; 717 718 void ShrinkWrapping::classifyCSRUses() { 719 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 720 StackPointerTracking &SPT = Info.getStackPointerTracking(); 721 UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(), 722 BitVector(DA.NumInstrs, false)); 723 724 const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer()); 725 for (BinaryBasicBlock &BB : BF) { 726 for (MCInst &Inst : BB) { 727 if (BC.MIB->isCFI(Inst)) 728 continue; 729 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 730 BC.MIB->getTouchedRegs(Inst, BV); 731 BV &= CSA.CalleeSaved; 732 for (int I : BV.set_bits()) { 733 if (I == 0) 734 continue; 735 if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I) 736 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 737 } 738 if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst)) 739 continue; 740 BV = CSA.CalleeSaved; 741 BV &= FPAliases; 742 for (int I : BV.set_bits()) 743 UsesByReg[I].set(DA.ExprToIdx[&Inst]); 744 } 745 } 746 } 747 748 void ShrinkWrapping::pruneUnwantedCSRs() { 749 BitVector ParamRegs = BC.MIB->getRegsUsedAsParams(); 750 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 751 if (!CSA.CalleeSaved[I]) 752 continue; 753 if (ParamRegs[I]) { 754 CSA.CalleeSaved.reset(I); 755 continue; 756 } 757 if (UsesByReg[I].empty()) { 758 LLVM_DEBUG( 759 dbgs() 760 << "Dismissing Callee-Saved Reg because we found no uses of it:" << I 761 << "\n"); 762 CSA.CalleeSaved.reset(I); 763 continue; 764 } 765 if (!CSA.HasRestores[I]) { 766 LLVM_DEBUG( 767 dbgs() << "Dismissing Callee-Saved Reg because it does not have " 768 "restores:" 769 << I << "\n"); 770 CSA.CalleeSaved.reset(I); 771 } 772 } 773 } 774 775 void ShrinkWrapping::computeSaveLocations() { 776 SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs()); 777 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 778 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 779 StackPointerTracking &SPT = Info.getStackPointerTracking(); 780 781 LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n"); 782 for (BinaryBasicBlock &BB : BF) { 783 LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n"); 784 785 MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr; 786 if (!First) 787 continue; 788 789 // Use reaching instructions to detect if we are inside a loop - if we 790 // are, do not consider this BB as valid placement for saves. 791 if (RI.isInLoop(BB)) 792 continue; 793 794 const std::pair<int, int> SPFP = *SPT.getStateBefore(*First); 795 // If we don't know stack state at this point, bail 796 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 797 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 798 continue; 799 800 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 801 if (!CSA.CalleeSaved[I]) 802 continue; 803 804 BitVector BBDominatedUses = BitVector(DA.NumInstrs, false); 805 for (int J : UsesByReg[I].set_bits()) 806 if (DA.doesADominateB(*First, J)) 807 BBDominatedUses.set(J); 808 LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates " 809 << BBDominatedUses.count() << " uses for reg " << I 810 << ". Total uses for reg is " << UsesByReg[I].count() 811 << "\n"); 812 BBDominatedUses &= UsesByReg[I]; 813 if (BBDominatedUses == UsesByReg[I]) { 814 LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName() 815 << " as a save pos for " << I << "\n"); 816 SavePos[I].insert(First); 817 LLVM_DEBUG({ 818 dbgs() << "Dominated uses are:\n"; 819 for (int J : UsesByReg[I].set_bits()) { 820 dbgs() << "Idx " << J << ": "; 821 DA.Expressions[J]->dump(); 822 } 823 }); 824 } 825 } 826 } 827 828 BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(), 829 std::numeric_limits<uint64_t>::max()); 830 BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr); 831 auto &InsnToBB = Info.getInsnToBBMap(); 832 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 833 if (!CSA.CalleeSaved[I]) 834 continue; 835 836 for (MCInst *Pos : SavePos[I]) { 837 BinaryBasicBlock *BB = InsnToBB[Pos]; 838 uint64_t Count = BB->getExecutionCount(); 839 if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && 840 Count < BestSaveCount[I]) { 841 BestSavePos[I] = Pos; 842 BestSaveCount[I] = Count; 843 } 844 } 845 } 846 } 847 848 void ShrinkWrapping::computeDomOrder() { 849 std::vector<MCPhysReg> Order; 850 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 851 Order.push_back(I); 852 } 853 854 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 855 auto &InsnToBB = Info.getInsnToBBMap(); 856 llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) { 857 BinaryBasicBlock *BBA = BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr; 858 BinaryBasicBlock *BBB = BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr; 859 if (BBA == BBB) 860 return A < B; 861 if (!BBA && BBB) 862 return false; 863 if (BBA && !BBB) 864 return true; 865 if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B])) 866 return true; 867 if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A])) 868 return false; 869 return A < B; 870 }); 871 872 for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) 873 DomOrder[Order[I]] = I; 874 } 875 876 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave, 877 uint64_t &TotalEstimatedWin) { 878 const uint64_t CurSavingCost = CSA.SavingCost[CSR]; 879 if (!CSA.CalleeSaved[CSR]) 880 return false; 881 882 uint64_t BestCount = BestSaveCount[CSR]; 883 BestPosSave = BestSavePos[CSR]; 884 bool ShouldMove = false; 885 if (BestCount != std::numeric_limits<uint64_t>::max() && 886 BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) { 887 LLVM_DEBUG({ 888 auto &InsnToBB = Info.getInsnToBBMap(); 889 dbgs() << "Better position for saves found in func " << BF.getPrintName() 890 << " count << " << BF.getKnownExecutionCount() << "\n"; 891 dbgs() << "Reg: " << CSR 892 << "; New BB: " << InsnToBB[BestPosSave]->getName() 893 << " Freq reduction: " << (CurSavingCost - BestCount) << "\n"; 894 }); 895 TotalEstimatedWin += CurSavingCost - BestCount; 896 ShouldMove = true; 897 } 898 899 if (!ShouldMove) 900 return false; 901 if (!BestPosSave) { 902 LLVM_DEBUG({ 903 dbgs() << "Dropping opportunity because we don't know where to put " 904 "stores -- total est. freq reduc: " 905 << TotalEstimatedWin << "\n"; 906 }); 907 return false; 908 } 909 return true; 910 } 911 912 /// Auxiliar function used to create basic blocks for critical edges and update 913 /// the dominance frontier with these new locations 914 void ShrinkWrapping::splitFrontierCritEdges( 915 BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier, 916 const SmallVector<bool, 4> &IsCritEdge, 917 const SmallVector<BinaryBasicBlock *, 4> &From, 918 const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) { 919 LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func " 920 << BF.getPrintName() << "\n"); 921 // For every FromBB, there might be one or more critical edges, with 922 // To[I] containing destination BBs. It's important to memorize 923 // the original size of the Frontier as we may append to it while splitting 924 // critical edges originating with blocks with multiple destinations. 925 for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) { 926 if (!IsCritEdge[I]) 927 continue; 928 if (To[I].empty()) 929 continue; 930 BinaryBasicBlock *FromBB = From[I]; 931 LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName() 932 << "\n"); 933 // Split edge for every DestinationBBs 934 for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) { 935 BinaryBasicBlock *DestinationBB = To[I][DI]; 936 LLVM_DEBUG(dbgs() << " - Dest : " << DestinationBB->getName() << "\n"); 937 BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB); 938 // Insert dummy instruction so this BB is never empty (we need this for 939 // PredictiveStackPointerTracking to work, since it annotates instructions 940 // and not BBs). 941 if (NewBB->empty()) { 942 MCInst NewInst; 943 BC.MIB->createNoop(NewInst); 944 NewBB->addInstruction(std::move(NewInst)); 945 scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0)); 946 } 947 948 // Update frontier 949 ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB); 950 if (DI == 0) { 951 // Update frontier inplace 952 Frontier[I] = NewFrontierPP; 953 LLVM_DEBUG(dbgs() << " - Update frontier with " << NewBB->getName() 954 << '\n'); 955 } else { 956 // Append new frontier to the end of the list 957 Frontier.push_back(NewFrontierPP); 958 LLVM_DEBUG(dbgs() << " - Append frontier " << NewBB->getName() 959 << '\n'); 960 } 961 } 962 } 963 } 964 965 SmallVector<ProgramPoint, 4> 966 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR, 967 uint64_t TotalEstimatedWin) { 968 SmallVector<ProgramPoint, 4> Frontier; 969 SmallVector<bool, 4> IsCritEdge; 970 bool CannotPlace = false; 971 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 972 973 SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom; 974 SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo; 975 // In case of a critical edge, we need to create extra BBs to host restores 976 // into edges transitioning to the dominance frontier, otherwise we pull these 977 // restores to inside the dominated area. 978 Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector(); 979 LLVM_DEBUG({ 980 dbgs() << "Dumping dominance frontier for "; 981 BC.printInstruction(dbgs(), *BestPosSave); 982 for (ProgramPoint &PP : Frontier) 983 if (PP.isInst()) 984 BC.printInstruction(dbgs(), *PP.getInst()); 985 else 986 dbgs() << PP.getBB()->getName() << "\n"; 987 }); 988 for (ProgramPoint &PP : Frontier) { 989 bool HasCritEdges = false; 990 if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) && 991 doesInstUsesCSR(*PP.getInst(), CSR)) 992 CannotPlace = true; 993 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 994 CritEdgesFrom.emplace_back(FrontierBB); 995 CritEdgesTo.emplace_back(0); 996 SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back(); 997 // Check for invoke instructions at the dominance frontier, which indicates 998 // the landing pad is not dominated. 999 if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) { 1000 LLVM_DEBUG( 1001 dbgs() << "Bailing on restore placement to avoid LP splitting\n"); 1002 Frontier.clear(); 1003 return Frontier; 1004 } 1005 doForAllSuccs(*FrontierBB, [&](ProgramPoint P) { 1006 if (!DA.doesADominateB(*BestPosSave, P)) { 1007 Dests.emplace_back(Info.getParentBB(P)); 1008 return; 1009 } 1010 HasCritEdges = true; 1011 }); 1012 IsCritEdge.push_back(HasCritEdges); 1013 } 1014 // Restores cannot be placed in empty BBs because we have a dataflow 1015 // analysis that depends on insertions happening before real instructions 1016 // (PredictiveStackPointerTracking). Detect now for empty BBs and add a 1017 // dummy nop that is scheduled to be removed later. 1018 bool InvalidateRequired = false; 1019 for (BinaryBasicBlock *&BB : BF.layout()) { 1020 if (BB->size() != 0) 1021 continue; 1022 MCInst NewInst; 1023 BC.MIB->createNoop(NewInst); 1024 auto II = BB->addInstruction(std::move(NewInst)); 1025 scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0)); 1026 InvalidateRequired = true; 1027 } 1028 if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) { 1029 LLVM_DEBUG({ 1030 dbgs() << "Now detected critical edges in the following frontier:\n"; 1031 for (ProgramPoint &PP : Frontier) { 1032 if (PP.isBB()) { 1033 dbgs() << " BB: " << PP.getBB()->getName() << "\n"; 1034 } else { 1035 dbgs() << " Inst: "; 1036 PP.getInst()->dump(); 1037 } 1038 } 1039 }); 1040 splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom, 1041 CritEdgesTo); 1042 InvalidateRequired = true; 1043 } 1044 if (InvalidateRequired) { 1045 // BitVectors that represent all insns of the function are invalid now 1046 // since we changed BBs/Insts. Re-run steps that depend on pointers being 1047 // valid 1048 Info.invalidateAll(); 1049 classifyCSRUses(); 1050 } 1051 if (CannotPlace) { 1052 LLVM_DEBUG({ 1053 dbgs() << "Dropping opportunity because restore placement failed" 1054 " -- total est. freq reduc: " 1055 << TotalEstimatedWin << "\n"; 1056 }); 1057 Frontier.clear(); 1058 return Frontier; 1059 } 1060 return Frontier; 1061 } 1062 1063 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave, 1064 int64_t SaveOffset) { 1065 if (FA.requiresAlignment(BF)) { 1066 LLVM_DEBUG({ 1067 dbgs() << "Reg " << CSR 1068 << " is not using push/pops due to function " 1069 "alignment requirements.\n"; 1070 }); 1071 return false; 1072 } 1073 for (MCInst *Save : CSA.getSavesByReg(CSR)) { 1074 if (!SLM.canCollapseRegion(Save)) { 1075 LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n"); 1076 return false; 1077 } 1078 } 1079 // Abort if one of the restores for this CSR is not a POP. 1080 for (MCInst *Load : CSA.getRestoresByReg(CSR)) { 1081 if (!BC.MIB->isPop(*Load)) { 1082 LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n"); 1083 return false; 1084 } 1085 } 1086 1087 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1088 // Abort if we are inserting a push into an entry BB (offset -8) and this 1089 // func sets up a frame pointer. 1090 if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION || 1091 SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) { 1092 LLVM_DEBUG({ 1093 dbgs() << "Reg " << CSR 1094 << " cannot insert region or we are " 1095 "trying to insert a push into entry bb.\n"; 1096 }); 1097 return false; 1098 } 1099 return true; 1100 } 1101 1102 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements( 1103 const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset, 1104 unsigned CSR) { 1105 SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints; 1106 // Moving pop locations to the correct sp offset 1107 ReachingInsns<true> &RI = Info.getReachingInsnsBackwards(); 1108 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1109 for (ProgramPoint &PP : FixedRestorePoints) { 1110 BinaryBasicBlock *BB = Info.getParentBB(PP); 1111 bool Found = false; 1112 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first == 1113 SaveOffset) { 1114 BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB)); 1115 BV &= UsesByReg[CSR]; 1116 if (!BV.any()) { 1117 Found = true; 1118 PP = BB; 1119 continue; 1120 } 1121 } 1122 for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) { 1123 if (SPT.getStateBefore(*RIt)->first == SaveOffset) { 1124 BitVector BV = *RI.getStateAt(*RIt); 1125 BV &= UsesByReg[CSR]; 1126 if (!BV.any()) { 1127 Found = true; 1128 PP = &*RIt; 1129 break; 1130 } 1131 } 1132 } 1133 if (!Found) { 1134 LLVM_DEBUG({ 1135 dbgs() << "Could not find restore insertion point for " << CSR 1136 << ", falling back to load/store mode\n"; 1137 }); 1138 FixedRestorePoints.clear(); 1139 return FixedRestorePoints; 1140 } 1141 } 1142 return FixedRestorePoints; 1143 } 1144 1145 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, 1146 bool UsePushPops) { 1147 1148 for (BinaryBasicBlock *&BB : BF.layout()) { 1149 std::vector<MCInst *> CFIs; 1150 for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { 1151 MCInst &Inst = *I; 1152 if (BC.MIB->isCFI(Inst)) { 1153 // Delete all offset CFIs related to this CSR 1154 if (SLM.getOffsetCFIReg(Inst) == CSR) { 1155 HasDeletedOffsetCFIs[CSR] = true; 1156 scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR)); 1157 continue; 1158 } 1159 CFIs.push_back(&Inst); 1160 continue; 1161 } 1162 1163 uint16_t SavedReg = CSA.getSavedReg(Inst); 1164 uint16_t RestoredReg = CSA.getRestoredReg(Inst); 1165 if (SavedReg != CSR && RestoredReg != CSR) { 1166 CFIs.clear(); 1167 continue; 1168 } 1169 1170 scheduleChange(&Inst, WorklistItem(UsePushPops 1171 ? WorklistItem::Erase 1172 : WorklistItem::ChangeToAdjustment, 1173 CSR)); 1174 1175 // Delete associated CFIs 1176 const bool RecordDeletedPushCFIs = 1177 SavedReg == CSR && DeletedPushCFIs[CSR].empty(); 1178 const bool RecordDeletedPopCFIs = 1179 RestoredReg == CSR && DeletedPopCFIs[CSR].empty(); 1180 for (MCInst *CFI : CFIs) { 1181 const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI); 1182 // Do not touch these... 1183 if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState || 1184 MCCFI->getOperation() == MCCFIInstruction::OpRememberState) 1185 continue; 1186 scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR)); 1187 if (RecordDeletedPushCFIs) { 1188 // Do not record this to be replayed later because we are going to 1189 // rebuild it. 1190 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1191 continue; 1192 DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1193 } 1194 if (RecordDeletedPopCFIs) { 1195 if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1196 continue; 1197 DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm()); 1198 } 1199 } 1200 CFIs.clear(); 1201 } 1202 } 1203 } 1204 1205 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) { 1206 if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR || 1207 CSA.getRestoredReg(Inst) == CSR) 1208 return false; 1209 BitVector BV = BitVector(BC.MRI->getNumRegs(), false); 1210 BC.MIB->getTouchedRegs(Inst, BV); 1211 return BV[CSR]; 1212 } 1213 1214 void ShrinkWrapping::scheduleSaveRestoreInsertions( 1215 unsigned CSR, MCInst *BestPosSave, 1216 SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) { 1217 auto &InsnToBB = Info.getInsnToBBMap(); 1218 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR]; 1219 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR]; 1220 assert(FIESave && FIELoad && "Invalid CSR"); 1221 1222 LLVM_DEBUG({ 1223 dbgs() << "Scheduling save insertion at: "; 1224 BestPosSave->dump(); 1225 }); 1226 1227 scheduleChange(BestPosSave, 1228 UsePushPops ? WorklistItem::InsertPushOrPop 1229 : WorklistItem::InsertLoadOrStore, 1230 *FIESave, CSR); 1231 1232 for (ProgramPoint &PP : RestorePoints) { 1233 BinaryBasicBlock *FrontierBB = Info.getParentBB(PP); 1234 LLVM_DEBUG({ 1235 dbgs() << "Scheduling restore insertion at: "; 1236 if (PP.isInst()) 1237 PP.getInst()->dump(); 1238 else 1239 dbgs() << PP.getBB()->getName() << "\n"; 1240 }); 1241 MCInst *Term = 1242 FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr); 1243 if (Term) 1244 PP = Term; 1245 bool PrecededByPrefix = false; 1246 if (PP.isInst()) { 1247 auto Iter = FrontierBB->findInstruction(PP.getInst()); 1248 if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) { 1249 --Iter; 1250 PrecededByPrefix = BC.MIB->isPrefix(*Iter); 1251 } 1252 } 1253 if (PP.isInst() && 1254 (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) { 1255 assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) && 1256 "cannot move to end of bb"); 1257 scheduleChange(InsnToBB[PP.getInst()], 1258 UsePushPops ? WorklistItem::InsertPushOrPop 1259 : WorklistItem::InsertLoadOrStore, 1260 *FIELoad, CSR); 1261 continue; 1262 } 1263 scheduleChange(PP, 1264 UsePushPops ? WorklistItem::InsertPushOrPop 1265 : WorklistItem::InsertLoadOrStore, 1266 *FIELoad, CSR); 1267 } 1268 } 1269 1270 void ShrinkWrapping::moveSaveRestores() { 1271 bool DisablePushPopMode = false; 1272 bool UsedPushPopMode = false; 1273 // Keeps info about successfully moved regs: reg index, save position and 1274 // save size 1275 std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs; 1276 1277 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1278 MCInst *BestPosSave = nullptr; 1279 uint64_t TotalEstimatedWin = 0; 1280 if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin)) 1281 continue; 1282 SmallVector<ProgramPoint, 4> RestorePoints = 1283 doRestorePlacement(BestPosSave, I, TotalEstimatedWin); 1284 if (RestorePoints.empty()) 1285 continue; 1286 1287 const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I]; 1288 const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I]; 1289 (void)FIELoad; 1290 assert(FIESave && FIELoad); 1291 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1292 const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave); 1293 int SaveOffset = SPFP.first; 1294 uint8_t SaveSize = FIESave->Size; 1295 1296 // If we don't know stack state at this point, bail 1297 if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) && 1298 (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) 1299 continue; 1300 1301 // Operation mode: if true, will insert push/pops instead of loads/restores 1302 bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset); 1303 1304 if (UsePushPops) { 1305 SmallVector<ProgramPoint, 4> FixedRestorePoints = 1306 fixPopsPlacements(RestorePoints, SaveOffset, I); 1307 if (FixedRestorePoints.empty()) 1308 UsePushPops = false; 1309 else 1310 RestorePoints = FixedRestorePoints; 1311 } 1312 1313 // Disable push-pop mode for all CSRs in this function 1314 if (!UsePushPops) 1315 DisablePushPopMode = true; 1316 else 1317 UsedPushPopMode = true; 1318 1319 scheduleOldSaveRestoresRemoval(I, UsePushPops); 1320 scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops); 1321 MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize)); 1322 } 1323 1324 // Revert push-pop mode if it failed for a single CSR 1325 if (DisablePushPopMode && UsedPushPopMode) { 1326 UsedPushPopMode = false; 1327 for (BinaryBasicBlock &BB : BF) { 1328 auto WRI = Todo.find(&BB); 1329 if (WRI != Todo.end()) { 1330 std::vector<WorklistItem> &TodoList = WRI->second; 1331 for (WorklistItem &Item : TodoList) 1332 if (Item.Action == WorklistItem::InsertPushOrPop) 1333 Item.Action = WorklistItem::InsertLoadOrStore; 1334 } 1335 for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) { 1336 MCInst &Inst = *I; 1337 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1338 Inst, getAnnotationIndex()); 1339 if (!TodoList) 1340 continue; 1341 bool isCFI = BC.MIB->isCFI(Inst); 1342 for (WorklistItem &Item : *TodoList) { 1343 if (Item.Action == WorklistItem::InsertPushOrPop) 1344 Item.Action = WorklistItem::InsertLoadOrStore; 1345 if (!isCFI && Item.Action == WorklistItem::Erase) 1346 Item.Action = WorklistItem::ChangeToAdjustment; 1347 } 1348 } 1349 } 1350 } 1351 1352 // Update statistics 1353 if (!UsedPushPopMode) { 1354 SpillsMovedRegularMode += MovedRegs.size(); 1355 return; 1356 } 1357 1358 // Schedule modifications to stack-accessing instructions via 1359 // StackLayoutModifier. 1360 SpillsMovedPushPopMode += MovedRegs.size(); 1361 for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) { 1362 unsigned RegNdx; 1363 MCInst *SavePos; 1364 size_t SaveSize; 1365 std::tie(RegNdx, SavePos, SaveSize) = I; 1366 for (MCInst *Save : CSA.getSavesByReg(RegNdx)) 1367 SLM.collapseRegion(Save); 1368 SLM.insertRegion(SavePos, SaveSize); 1369 } 1370 } 1371 1372 namespace { 1373 /// Helper function to identify whether two basic blocks created by splitting 1374 /// a critical edge have the same contents. 1375 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A, 1376 const BinaryBasicBlock &B) { 1377 if (A.succ_size() != B.succ_size()) 1378 return false; 1379 if (A.succ_size() != 1) 1380 return false; 1381 1382 if (*A.succ_begin() != *B.succ_begin()) 1383 return false; 1384 1385 if (A.size() != B.size()) 1386 return false; 1387 1388 // Compare instructions 1389 auto I = A.begin(), E = A.end(); 1390 auto OtherI = B.begin(), OtherE = B.end(); 1391 while (I != E && OtherI != OtherE) { 1392 if (I->getOpcode() != OtherI->getOpcode()) 1393 return false; 1394 if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) { 1395 return true; 1396 })) 1397 return false; 1398 ++I; 1399 ++OtherI; 1400 } 1401 return true; 1402 } 1403 } // namespace 1404 1405 bool ShrinkWrapping::foldIdenticalSplitEdges() { 1406 bool Changed = false; 1407 for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) { 1408 BinaryBasicBlock &BB = *Iter; 1409 if (!BB.getName().startswith(".LSplitEdge")) 1410 continue; 1411 for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) { 1412 BinaryBasicBlock &RBB = *RIter; 1413 if (&RBB == &BB) 1414 break; 1415 if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() || 1416 !isIdenticalSplitEdgeBB(BC, *Iter, RBB)) 1417 continue; 1418 assert(RBB.pred_size() == 1 && "Invalid split edge BB"); 1419 BinaryBasicBlock *Pred = *RBB.pred_begin(); 1420 uint64_t OrigCount = Pred->branch_info_begin()->Count; 1421 uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount; 1422 BF.replaceJumpTableEntryIn(Pred, &RBB, &BB); 1423 Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds); 1424 Changed = true; 1425 // Remove the block from CFG 1426 RBB.markValid(false); 1427 } 1428 } 1429 1430 return Changed; 1431 } 1432 1433 namespace { 1434 1435 // A special StackPointerTracking that compensates for our future plans 1436 // in removing/adding insn. 1437 class PredictiveStackPointerTracking 1438 : public StackPointerTrackingBase<PredictiveStackPointerTracking> { 1439 friend class DataflowAnalysis<PredictiveStackPointerTracking, 1440 std::pair<int, int>>; 1441 decltype(ShrinkWrapping::Todo) &TodoMap; 1442 DataflowInfoManager &Info; 1443 1444 Optional<unsigned> AnnotationIndex; 1445 1446 protected: 1447 void compNextAux(const MCInst &Point, 1448 const std::vector<ShrinkWrapping::WorklistItem> &TodoItems, 1449 std::pair<int, int> &Res) { 1450 for (const ShrinkWrapping::WorklistItem &Item : TodoItems) { 1451 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1452 BC.MIB->isPush(Point)) { 1453 Res.first += BC.MIB->getPushSize(Point); 1454 continue; 1455 } 1456 if (Item.Action == ShrinkWrapping::WorklistItem::Erase && 1457 BC.MIB->isPop(Point)) { 1458 Res.first -= BC.MIB->getPopSize(Point); 1459 continue; 1460 } 1461 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1462 Item.FIEToInsert.IsStore) { 1463 Res.first -= Item.FIEToInsert.Size; 1464 continue; 1465 } 1466 if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop && 1467 Item.FIEToInsert.IsLoad) { 1468 Res.first += Item.FIEToInsert.Size; 1469 continue; 1470 } 1471 } 1472 } 1473 1474 std::pair<int, int> computeNext(const MCInst &Point, 1475 const std::pair<int, int> &Cur) { 1476 std::pair<int, int> Res = 1477 StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext( 1478 Point, Cur); 1479 if (Res.first == StackPointerTracking::SUPERPOSITION || 1480 Res.first == StackPointerTracking::EMPTY) 1481 return Res; 1482 auto TodoItems = 1483 BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>( 1484 Point, ShrinkWrapping::getAnnotationName()); 1485 if (TodoItems) 1486 compNextAux(Point, *TodoItems, Res); 1487 auto &InsnToBBMap = Info.getInsnToBBMap(); 1488 if (&*InsnToBBMap[&Point]->rbegin() != &Point) 1489 return Res; 1490 auto WRI = TodoMap.find(InsnToBBMap[&Point]); 1491 if (WRI == TodoMap.end()) 1492 return Res; 1493 compNextAux(Point, WRI->second, Res); 1494 return Res; 1495 } 1496 1497 StringRef getAnnotationName() const { 1498 return StringRef("PredictiveStackPointerTracking"); 1499 } 1500 1501 public: 1502 PredictiveStackPointerTracking(BinaryFunction &BF, 1503 decltype(ShrinkWrapping::Todo) &TodoMap, 1504 DataflowInfoManager &Info, 1505 MCPlusBuilder::AllocatorIdTy AllocatorId = 0) 1506 : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF, 1507 AllocatorId), 1508 TodoMap(TodoMap), Info(Info) {} 1509 1510 void run() { 1511 StackPointerTrackingBase<PredictiveStackPointerTracking>::run(); 1512 } 1513 }; 1514 1515 } // end anonymous namespace 1516 1517 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush, 1518 int SPValPop) { 1519 MCInst *SavePoint = nullptr; 1520 for (BinaryBasicBlock &BB : BF) { 1521 for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter; 1522 ++InstIter) { 1523 int32_t SrcImm = 0; 1524 MCPhysReg Reg = 0; 1525 MCPhysReg StackPtrReg = 0; 1526 int64_t StackOffset = 0; 1527 bool IsIndexed = false; 1528 bool IsLoad = false; 1529 bool IsStore = false; 1530 bool IsSimple = false; 1531 bool IsStoreFromReg = false; 1532 uint8_t Size = 0; 1533 if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg, 1534 Reg, SrcImm, StackPtrReg, StackOffset, Size, 1535 IsSimple, IsIndexed)) 1536 continue; 1537 if (Reg != CSR || !IsStore || !IsSimple) 1538 continue; 1539 SavePoint = &*InstIter; 1540 break; 1541 } 1542 if (SavePoint) 1543 break; 1544 } 1545 assert(SavePoint); 1546 LLVM_DEBUG({ 1547 dbgs() << "Now using as save point for reg " << CSR << " :"; 1548 SavePoint->dump(); 1549 }); 1550 bool PrevAffectedZone = false; 1551 BinaryBasicBlock *PrevBB = nullptr; 1552 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 1553 for (BinaryBasicBlock *BB : BF.layout()) { 1554 if (BB->size() == 0) 1555 continue; 1556 const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); 1557 const bool InAffectedZoneAtBegin = 1558 (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]]; 1559 bool InAffectedZone = InAffectedZoneAtBegin; 1560 for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) { 1561 const bool CurZone = DA.count(*InstIter, *SavePoint); 1562 if (InAffectedZone != CurZone) { 1563 auto InsertionIter = InstIter; 1564 ++InsertionIter; 1565 InAffectedZone = CurZone; 1566 if (InAffectedZone) 1567 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0, 1568 SPValPop); 1569 else 1570 InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0, 1571 SPValPush); 1572 --InstIter; 1573 } 1574 } 1575 // Are we at the first basic block or hot-cold split point? 1576 if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) { 1577 if (InAffectedZoneAtBegin) 1578 insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush); 1579 } else if (InAffectedZoneAtBegin != PrevAffectedZone) { 1580 if (InAffectedZoneAtBegin) 1581 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush); 1582 else 1583 insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop); 1584 } 1585 PrevAffectedZone = InAffectedZoneAtEnd; 1586 PrevBB = BB; 1587 } 1588 } 1589 1590 void ShrinkWrapping::rebuildCFIForSP() { 1591 for (BinaryBasicBlock &BB : BF) { 1592 for (MCInst &Inst : BB) { 1593 if (!BC.MIB->isCFI(Inst)) 1594 continue; 1595 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1596 if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset) 1597 BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId); 1598 } 1599 } 1600 1601 int PrevSPVal = -8; 1602 BinaryBasicBlock *PrevBB = nullptr; 1603 StackPointerTracking &SPT = Info.getStackPointerTracking(); 1604 for (BinaryBasicBlock *BB : BF.layout()) { 1605 if (BB->size() == 0) 1606 continue; 1607 const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; 1608 const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first; 1609 int SPVal = SPValAtBegin; 1610 for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) { 1611 const int CurVal = SPT.getStateAt(*Iter)->first; 1612 if (SPVal != CurVal) { 1613 auto InsertionIter = Iter; 1614 ++InsertionIter; 1615 Iter = BF.addCFIInstruction( 1616 BB, InsertionIter, 1617 MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal)); 1618 SPVal = CurVal; 1619 } 1620 } 1621 if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold()) 1622 BF.addCFIInstruction( 1623 BB, BB->begin(), 1624 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1625 else if (SPValAtBegin != PrevSPVal) 1626 BF.addCFIInstruction( 1627 PrevBB, PrevBB->end(), 1628 MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin)); 1629 PrevSPVal = SPValAtEnd; 1630 PrevBB = BB; 1631 } 1632 1633 for (BinaryBasicBlock &BB : BF) 1634 for (auto I = BB.begin(); I != BB.end();) 1635 if (BC.MIB->hasAnnotation(*I, "DeleteMe")) 1636 I = BB.eraseInstruction(I); 1637 else 1638 ++I; 1639 } 1640 1641 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal, 1642 const FrameIndexEntry &FIE, 1643 bool CreatePushOrPop) { 1644 MCInst NewInst; 1645 if (SPVal != StackPointerTracking::SUPERPOSITION && 1646 SPVal != StackPointerTracking::EMPTY) { 1647 if (FIE.IsLoad) { 1648 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(), 1649 FIE.StackOffset - SPVal, FIE.RegOrImm, 1650 FIE.Size)) { 1651 errs() << "createRestoreFromStack: not supported on this platform\n"; 1652 abort(); 1653 } 1654 } else { 1655 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(), 1656 FIE.StackOffset - SPVal, FIE.RegOrImm, 1657 FIE.Size)) { 1658 errs() << "createSaveToStack: not supported on this platform\n"; 1659 abort(); 1660 } 1661 } 1662 if (CreatePushOrPop) 1663 BC.MIB->changeToPushOrPop(NewInst); 1664 return NewInst; 1665 } 1666 assert(FPVal != StackPointerTracking::SUPERPOSITION && 1667 FPVal != StackPointerTracking::EMPTY); 1668 1669 if (FIE.IsLoad) { 1670 if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(), 1671 FIE.StackOffset - FPVal, FIE.RegOrImm, 1672 FIE.Size)) { 1673 errs() << "createRestoreFromStack: not supported on this platform\n"; 1674 abort(); 1675 } 1676 } else { 1677 if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(), 1678 FIE.StackOffset - FPVal, FIE.RegOrImm, 1679 FIE.Size)) { 1680 errs() << "createSaveToStack: not supported on this platform\n"; 1681 abort(); 1682 } 1683 } 1684 return NewInst; 1685 } 1686 1687 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) { 1688 const MCCFIInstruction *CFI = BF.getCFIFor(Inst); 1689 if (UpdatedCFIs.count(CFI)) 1690 return; 1691 1692 switch (CFI->getOperation()) { 1693 case MCCFIInstruction::OpDefCfa: 1694 case MCCFIInstruction::OpDefCfaRegister: 1695 case MCCFIInstruction::OpDefCfaOffset: 1696 CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset); 1697 break; 1698 case MCCFIInstruction::OpOffset: 1699 default: 1700 break; 1701 } 1702 1703 UpdatedCFIs.insert(CFI); 1704 } 1705 1706 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB, 1707 BBIterTy Pos, unsigned Reg, 1708 bool isPush, int Sz, 1709 int64_t NewOffset) { 1710 if (isPush) { 1711 for (uint32_t Idx : DeletedPushCFIs[Reg]) { 1712 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1713 updateCFIInstOffset(*Pos++, NewOffset); 1714 } 1715 if (HasDeletedOffsetCFIs[Reg]) { 1716 Pos = BF.addCFIInstruction( 1717 &BB, Pos, 1718 MCCFIInstruction::createOffset( 1719 nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset)); 1720 ++Pos; 1721 } 1722 } else { 1723 for (uint32_t Idx : DeletedPopCFIs[Reg]) { 1724 Pos = BF.addCFIPseudo(&BB, Pos, Idx); 1725 updateCFIInstOffset(*Pos++, NewOffset); 1726 } 1727 if (HasDeletedOffsetCFIs[Reg]) { 1728 Pos = BF.addCFIInstruction( 1729 &BB, Pos, 1730 MCCFIInstruction::createSameValue( 1731 nullptr, BC.MRI->getDwarfRegNum(Reg, false))); 1732 ++Pos; 1733 } 1734 } 1735 return Pos; 1736 } 1737 1738 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint, 1739 BinaryBasicBlock *CurBB, 1740 const WorklistItem &Item, 1741 int64_t SPVal, int64_t FPVal) { 1742 // Trigger CFI reconstruction for this CSR if necessary - writing to 1743 // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update 1744 if ((Item.FIEToInsert.IsStore && 1745 !DeletedPushCFIs[Item.AffectedReg].empty()) || 1746 (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) || 1747 HasDeletedOffsetCFIs[Item.AffectedReg]) { 1748 if (Item.Action == WorklistItem::InsertPushOrPop) { 1749 if (Item.FIEToInsert.IsStore) 1750 PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size; 1751 else 1752 PopOffsetByReg[Item.AffectedReg] = SPVal; 1753 } else { 1754 if (Item.FIEToInsert.IsStore) 1755 PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1756 else 1757 PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset; 1758 } 1759 } 1760 1761 LLVM_DEBUG({ 1762 dbgs() << "Creating stack access with SPVal = " << SPVal 1763 << "; stack offset = " << Item.FIEToInsert.StackOffset 1764 << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop) 1765 << "\n"; 1766 }); 1767 MCInst NewInst = 1768 createStackAccess(SPVal, FPVal, Item.FIEToInsert, 1769 Item.Action == WorklistItem::InsertPushOrPop); 1770 if (InsertionPoint != CurBB->end()) { 1771 LLVM_DEBUG({ 1772 dbgs() << "Adding before Inst: "; 1773 InsertionPoint->dump(); 1774 dbgs() << "the following inst: "; 1775 NewInst.dump(); 1776 }); 1777 BBIterTy Iter = 1778 CurBB->insertInstruction(InsertionPoint, std::move(NewInst)); 1779 return ++Iter; 1780 } 1781 CurBB->addInstruction(std::move(NewInst)); 1782 LLVM_DEBUG(dbgs() << "Adding to BB!\n"); 1783 return CurBB->end(); 1784 } 1785 1786 BBIterTy ShrinkWrapping::processInsertionsList( 1787 BBIterTy InsertionPoint, BinaryBasicBlock *CurBB, 1788 std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) { 1789 bool HasInsertions = false; 1790 for (WorklistItem &Item : TodoList) { 1791 if (Item.Action == WorklistItem::Erase || 1792 Item.Action == WorklistItem::ChangeToAdjustment) 1793 continue; 1794 HasInsertions = true; 1795 break; 1796 } 1797 1798 if (!HasInsertions) 1799 return InsertionPoint; 1800 1801 assert(((SPVal != StackPointerTracking::SUPERPOSITION && 1802 SPVal != StackPointerTracking::EMPTY) || 1803 (FPVal != StackPointerTracking::SUPERPOSITION && 1804 FPVal != StackPointerTracking::EMPTY)) && 1805 "Cannot insert if we have no idea of the stack state here"); 1806 1807 // Revert the effect of PSPT for this location, we want SP Value before 1808 // insertions 1809 if (InsertionPoint == CurBB->end()) { 1810 for (WorklistItem &Item : TodoList) { 1811 if (Item.Action != WorklistItem::InsertPushOrPop) 1812 continue; 1813 if (Item.FIEToInsert.IsStore) 1814 SPVal += Item.FIEToInsert.Size; 1815 if (Item.FIEToInsert.IsLoad) 1816 SPVal -= Item.FIEToInsert.Size; 1817 } 1818 } 1819 1820 // Reorder POPs to obey the correct dominance relation between them 1821 llvm::stable_sort(TodoList, [&](const WorklistItem &A, 1822 const WorklistItem &B) { 1823 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) && 1824 (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) 1825 return false; 1826 if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad)) 1827 return true; 1828 if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad)) 1829 return false; 1830 return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg]; 1831 }); 1832 1833 // Process insertions 1834 for (WorklistItem &Item : TodoList) { 1835 if (Item.Action == WorklistItem::Erase || 1836 Item.Action == WorklistItem::ChangeToAdjustment) 1837 continue; 1838 1839 InsertionPoint = 1840 processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal); 1841 if (Item.Action == WorklistItem::InsertPushOrPop && 1842 Item.FIEToInsert.IsStore) 1843 SPVal -= Item.FIEToInsert.Size; 1844 if (Item.Action == WorklistItem::InsertPushOrPop && 1845 Item.FIEToInsert.IsLoad) 1846 SPVal += Item.FIEToInsert.Size; 1847 } 1848 return InsertionPoint; 1849 } 1850 1851 bool ShrinkWrapping::processInsertions() { 1852 PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId); 1853 PSPT.run(); 1854 1855 bool Changes = false; 1856 for (BinaryBasicBlock &BB : BF) { 1857 // Process insertions before some inst. 1858 for (auto I = BB.begin(); I != BB.end(); ++I) { 1859 MCInst &Inst = *I; 1860 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1861 Inst, getAnnotationIndex()); 1862 if (!TodoList) 1863 continue; 1864 Changes = true; 1865 std::vector<WorklistItem> List = *TodoList; 1866 LLVM_DEBUG({ 1867 dbgs() << "Now processing insertions in " << BB.getName() 1868 << " before inst: "; 1869 Inst.dump(); 1870 }); 1871 auto Iter = I; 1872 std::pair<int, int> SPTState = 1873 *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter)); 1874 I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second); 1875 } 1876 // Process insertions at the end of bb 1877 auto WRI = Todo.find(&BB); 1878 if (WRI != Todo.end()) { 1879 std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin()); 1880 processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first, 1881 SPTState.second); 1882 Changes = true; 1883 } 1884 } 1885 return Changes; 1886 } 1887 1888 void ShrinkWrapping::processDeletions() { 1889 LivenessAnalysis &LA = Info.getLivenessAnalysis(); 1890 for (BinaryBasicBlock &BB : BF) { 1891 for (auto II = BB.begin(); II != BB.end(); ++II) { 1892 MCInst &Inst = *II; 1893 auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>( 1894 Inst, getAnnotationIndex()); 1895 if (!TodoList) 1896 continue; 1897 // Process all deletions 1898 for (WorklistItem &Item : *TodoList) { 1899 if (Item.Action != WorklistItem::Erase && 1900 Item.Action != WorklistItem::ChangeToAdjustment) 1901 continue; 1902 1903 if (Item.Action == WorklistItem::ChangeToAdjustment) { 1904 // Is flag reg alive across this func? 1905 bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg()); 1906 if (int Sz = BC.MIB->getPushSize(Inst)) { 1907 BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags); 1908 continue; 1909 } 1910 if (int Sz = BC.MIB->getPopSize(Inst)) { 1911 BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags); 1912 continue; 1913 } 1914 } 1915 1916 LLVM_DEBUG({ 1917 dbgs() << "Erasing: "; 1918 BC.printInstruction(dbgs(), Inst); 1919 }); 1920 II = std::prev(BB.eraseInstruction(II)); 1921 break; 1922 } 1923 } 1924 } 1925 } 1926 1927 void ShrinkWrapping::rebuildCFI() { 1928 const bool FP = Info.getStackPointerTracking().HasFramePointer; 1929 Info.invalidateAll(); 1930 if (!FP) { 1931 rebuildCFIForSP(); 1932 Info.invalidateAll(); 1933 } 1934 for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) { 1935 if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0) 1936 continue; 1937 const int64_t SPValPush = PushOffsetByReg[I]; 1938 const int64_t SPValPop = PopOffsetByReg[I]; 1939 insertUpdatedCFI(I, SPValPush, SPValPop); 1940 Info.invalidateAll(); 1941 } 1942 } 1943 1944 bool ShrinkWrapping::perform() { 1945 HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false); 1946 PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1947 PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL); 1948 DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0); 1949 1950 if (BF.checkForAmbiguousJumpTables()) { 1951 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName() 1952 << ".\n"); 1953 // We could call disambiguateJumpTables here, but it is probably not worth 1954 // the cost (of duplicating potentially large jump tables that could regress 1955 // dcache misses). Moreover, ambiguous JTs are rare and coming from code 1956 // written in assembly language. Just bail. 1957 return false; 1958 } 1959 SLM.initialize(); 1960 CSA.compute(); 1961 classifyCSRUses(); 1962 pruneUnwantedCSRs(); 1963 computeSaveLocations(); 1964 computeDomOrder(); 1965 moveSaveRestores(); 1966 LLVM_DEBUG({ 1967 dbgs() << "Func before shrink-wrapping: \n"; 1968 BF.dump(); 1969 }); 1970 SLM.performChanges(); 1971 // Early exit if processInsertions doesn't detect any todo items 1972 if (!processInsertions()) 1973 return false; 1974 processDeletions(); 1975 if (foldIdenticalSplitEdges()) { 1976 const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs(); 1977 (void)Stats; 1978 LLVM_DEBUG(dbgs() << "Deleted " << Stats.first 1979 << " redundant split edge BBs (" << Stats.second 1980 << " bytes) for " << BF.getPrintName() << "\n"); 1981 } 1982 rebuildCFI(); 1983 // We may have split edges, creating BBs that need correct branching 1984 BF.fixBranches(); 1985 LLVM_DEBUG({ 1986 dbgs() << "Func after shrink-wrapping: \n"; 1987 BF.dump(); 1988 }); 1989 return true; 1990 } 1991 1992 void ShrinkWrapping::printStats() { 1993 outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode 1994 << " spills inserting load/stores and " << SpillsMovedPushPopMode 1995 << " spills inserting push/pops\n"; 1996 } 1997 1998 // Operators necessary as a result of using MCAnnotation 1999 raw_ostream &operator<<(raw_ostream &OS, 2000 const std::vector<ShrinkWrapping::WorklistItem> &Vec) { 2001 OS << "SWTodo["; 2002 const char *Sep = ""; 2003 for (const ShrinkWrapping::WorklistItem &Item : Vec) { 2004 OS << Sep; 2005 switch (Item.Action) { 2006 case ShrinkWrapping::WorklistItem::Erase: 2007 OS << "Erase"; 2008 break; 2009 case ShrinkWrapping::WorklistItem::ChangeToAdjustment: 2010 OS << "ChangeToAdjustment"; 2011 break; 2012 case ShrinkWrapping::WorklistItem::InsertLoadOrStore: 2013 OS << "InsertLoadOrStore"; 2014 break; 2015 case ShrinkWrapping::WorklistItem::InsertPushOrPop: 2016 OS << "InsertPushOrPop"; 2017 break; 2018 } 2019 Sep = ", "; 2020 } 2021 OS << "]"; 2022 return OS; 2023 } 2024 2025 raw_ostream & 2026 operator<<(raw_ostream &OS, 2027 const std::vector<StackLayoutModifier::WorklistItem> &Vec) { 2028 OS << "SLMTodo["; 2029 const char *Sep = ""; 2030 for (const StackLayoutModifier::WorklistItem &Item : Vec) { 2031 OS << Sep; 2032 switch (Item.Action) { 2033 case StackLayoutModifier::WorklistItem::None: 2034 OS << "None"; 2035 break; 2036 case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset: 2037 OS << "AdjustLoadStoreOffset"; 2038 break; 2039 case StackLayoutModifier::WorklistItem::AdjustCFI: 2040 OS << "AdjustCFI"; 2041 break; 2042 } 2043 Sep = ", "; 2044 } 2045 OS << "]"; 2046 return OS; 2047 } 2048 2049 bool operator==(const ShrinkWrapping::WorklistItem &A, 2050 const ShrinkWrapping::WorklistItem &B) { 2051 return (A.Action == B.Action && A.AffectedReg == B.AffectedReg && 2052 A.Adjustment == B.Adjustment && 2053 A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad && 2054 A.FIEToInsert.IsStore == B.FIEToInsert.IsStore && 2055 A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm && 2056 A.FIEToInsert.Size == B.FIEToInsert.Size && 2057 A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple && 2058 A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset); 2059 } 2060 2061 bool operator==(const StackLayoutModifier::WorklistItem &A, 2062 const StackLayoutModifier::WorklistItem &B) { 2063 return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate); 2064 } 2065 2066 } // end namespace bolt 2067 } // end namespace llvm 2068