1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include <numeric>
17 #include <stack>
18 
19 #define DEBUG_TYPE "shrinkwrapping"
20 
21 using namespace llvm;
22 
23 namespace opts {
24 
25 extern cl::opt<bool> TimeOpts;
26 extern cl::OptionCategory BoltOptCategory;
27 
28 static cl::opt<unsigned> ShrinkWrappingThreshold(
29     "shrink-wrapping-threshold",
30     cl::desc("Percentage of prologue execution count to use as threshold when"
31              " evaluating whether a block is cold enough to be profitable to"
32              " move eligible spills there"),
33     cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
34 } // namespace opts
35 
36 namespace llvm {
37 namespace bolt {
38 
39 void CalleeSavedAnalysis::analyzeSaves() {
40   ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
41   StackReachingUses &SRU = Info.getStackReachingUses();
42   auto &InsnToBB = Info.getInsnToBBMap();
43   BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
44 
45   LLVM_DEBUG(dbgs() << "Checking spill locations\n");
46   for (BinaryBasicBlock &BB : BF) {
47     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
48     const MCInst *Prev = nullptr;
49     for (MCInst &Inst : BB) {
50       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
51         // Blacklist weird stores we don't understand
52         if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
53             FIE->IsStoreFromReg) {
54           BlacklistedRegs.set(FIE->RegOrImm);
55           CalleeSaved.reset(FIE->RegOrImm);
56           Prev = &Inst;
57           continue;
58         }
59 
60         if (!FIE->IsStore || !FIE->IsStoreFromReg ||
61             BlacklistedRegs[FIE->RegOrImm]) {
62           Prev = &Inst;
63           continue;
64         }
65 
66         // If this reg is defined locally, it is not a callee-saved reg
67         if (RD.isReachedBy(FIE->RegOrImm,
68                            Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
69           BlacklistedRegs.set(FIE->RegOrImm);
70           CalleeSaved.reset(FIE->RegOrImm);
71           Prev = &Inst;
72           continue;
73         }
74 
75         // If this stack position is accessed in another function, we are
76         // probably dealing with a parameter passed in a stack -- do not mess
77         // with it
78         if (SRU.isStoreUsed(*FIE,
79                             Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
80             /*IncludeLocalAccesses=*/false) {
81           BlacklistedRegs.set(FIE->RegOrImm);
82           CalleeSaved.reset(FIE->RegOrImm);
83           Prev = &Inst;
84           continue;
85         }
86 
87         // If this stack position is loaded elsewhere in another reg, we can't
88         // update it, so blacklist it.
89         if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
90                                                   : SRU.expr_begin(BB))) {
91           BlacklistedRegs.set(FIE->RegOrImm);
92           CalleeSaved.reset(FIE->RegOrImm);
93           Prev = &Inst;
94           continue;
95         }
96 
97         // Ignore regs with multiple saves
98         if (CalleeSaved[FIE->RegOrImm]) {
99           BlacklistedRegs.set(FIE->RegOrImm);
100           CalleeSaved.reset(FIE->RegOrImm);
101           Prev = &Inst;
102           continue;
103         }
104 
105         CalleeSaved.set(FIE->RegOrImm);
106         SaveFIEByReg[FIE->RegOrImm] = &*FIE;
107         SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
108         BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
109         OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
110         LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
111                           << FIE->RegOrImm << "\n");
112       }
113       Prev = &Inst;
114     }
115   }
116 }
117 
118 void CalleeSavedAnalysis::analyzeRestores() {
119   ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
120 
121   // Now compute all restores of these callee-saved regs
122   for (BinaryBasicBlock &BB : BF) {
123     const MCInst *Prev = nullptr;
124     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
125       MCInst &Inst = *I;
126       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
127         if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
128           Prev = &Inst;
129           continue;
130         }
131 
132         // If this reg is used locally after a restore, then we are probably
133         // not dealing with a callee-saved reg. Except if this use is by
134         // another store, but we don't cover this case yet.
135         // Also not callee-saved if this load accesses caller stack or isn't
136         // simple.
137         if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
138             RU.isReachedBy(FIE->RegOrImm,
139                            Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
140           CalleeSaved.reset(FIE->RegOrImm);
141           Prev = &Inst;
142           continue;
143         }
144         // If stack offsets between saves/store don't agree with each other,
145         // we don't completely understand what's happening here
146         if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
147           CalleeSaved.reset(FIE->RegOrImm);
148           LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
149                                "mismatching restore: "
150                             << FIE->RegOrImm << "\n");
151           Prev = &Inst;
152           continue;
153         }
154 
155         LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
156                           << "\n");
157         if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
158           LoadFIEByReg[FIE->RegOrImm] = &*FIE;
159         BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
160                               AllocatorId);
161         HasRestores.set(FIE->RegOrImm);
162       }
163       Prev = &Inst;
164     }
165   }
166 }
167 
168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
169   std::vector<MCInst *> Results;
170   for (BinaryBasicBlock &BB : BF)
171     for (MCInst &Inst : BB)
172       if (getSavedReg(Inst) == Reg)
173         Results.push_back(&Inst);
174   return Results;
175 }
176 
177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
178   std::vector<MCInst *> Results;
179   for (BinaryBasicBlock &BB : BF)
180     for (MCInst &Inst : BB)
181       if (getRestoredReg(Inst) == Reg)
182         Results.push_back(&Inst);
183   return Results;
184 }
185 
186 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
187   for (BinaryBasicBlock &BB : BF) {
188     for (MCInst &Inst : BB) {
189       BC.MIB->removeAnnotation(Inst, getSaveTag());
190       BC.MIB->removeAnnotation(Inst, getRestoreTag());
191     }
192   }
193 }
194 
195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
196   if (BlacklistedRegions[Offset] < Size)
197     BlacklistedRegions[Offset] = Size;
198 }
199 
200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
201   for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
202     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
203       return true;
204   return false;
205 }
206 
207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
208                                                      int64_t Size) {
209   bool HasConflict = false;
210   for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
211     std::pair<const int64_t, int64_t> &Elem = *Iter;
212     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
213         (Offset != Elem.first || Size != Elem.second)) {
214       Iter = AvailableRegions.erase(Iter);
215       HasConflict = true;
216       continue;
217     }
218     ++Iter;
219   }
220   if (HasConflict) {
221     blacklistRegion(Offset, Size);
222     return true;
223   }
224   return false;
225 }
226 
227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
228   StackPointerTracking &SPT = Info.getStackPointerTracking();
229   if (!BC.MII->get(Point.getOpcode())
230            .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
231     return;
232 
233   int SPVal, FPVal;
234   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
235   std::pair<MCPhysReg, int64_t> FP;
236 
237   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
238     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
239   else
240     FP = std::make_pair(0, 0);
241   std::pair<MCPhysReg, int64_t> SP;
242 
243   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
244     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
245   else
246     SP = std::make_pair(0, 0);
247 
248   int64_t Output;
249   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
250     return;
251 
252   // Not your regular frame pointer initialization... bail
253   if (Output != SPVal)
254     blacklistRegion(0, 0);
255 }
256 
257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
258   StackPointerTracking &SPT = Info.getStackPointerTracking();
259   if (!BC.MII->get(Point.getOpcode())
260            .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
261     return;
262   // Check if the definition of SP comes from FP -- in this case, this
263   // value may need to be updated depending on our stack layout changes
264   const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
265   unsigned NumDefs = InstInfo.getNumDefs();
266   bool UsesFP = false;
267   for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
268        ++I) {
269     MCOperand &Operand = Point.getOperand(I);
270     if (!Operand.isReg())
271       continue;
272     if (Operand.getReg() == BC.MIB->getFramePointer()) {
273       UsesFP = true;
274       break;
275     }
276   }
277   if (!UsesFP)
278     return;
279 
280   // Setting up evaluation
281   int SPVal, FPVal;
282   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
283   std::pair<MCPhysReg, int64_t> FP;
284 
285   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
286     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
287   else
288     FP = std::make_pair(0, 0);
289   std::pair<MCPhysReg, int64_t> SP;
290 
291   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
292     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
293   else
294     SP = std::make_pair(0, 0);
295 
296   int64_t Output;
297   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
298     return;
299 
300   // If the value is the same of FP, no need to adjust it
301   if (Output == FPVal)
302     return;
303 
304   // If an allocation happened through FP, bail
305   if (Output <= SPVal) {
306     blacklistRegion(0, 0);
307     return;
308   }
309 
310   // We are restoring SP to an old value based on FP. Mark it as a stack
311   // access to be fixed later.
312   BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
313 }
314 
315 void StackLayoutModifier::classifyStackAccesses() {
316   // Understand when stack slots are being used non-locally
317   StackReachingUses &SRU = Info.getStackReachingUses();
318 
319   for (BinaryBasicBlock &BB : BF) {
320     const MCInst *Prev = nullptr;
321     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
322       MCInst &Inst = *I;
323       checkFramePointerInitialization(Inst);
324       checkStackPointerRestore(Inst);
325       ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
326       if (!FIEX) {
327         Prev = &Inst;
328         continue;
329       }
330       if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
331         blacklistRegion(FIEX->StackOffset, FIEX->Size);
332         Prev = &Inst;
333         continue;
334       }
335       // If this stack position is accessed in another function, we are
336       // probably dealing with a parameter passed in a stack -- do not mess
337       // with it
338       if (SRU.isStoreUsed(*FIEX,
339                           Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
340                           /*IncludeLocalAccesses=*/false)) {
341         blacklistRegion(FIEX->StackOffset, FIEX->Size);
342         Prev = &Inst;
343         continue;
344       }
345       // Now we have a clear stack slot access. Check if its blacklisted or if
346       // it conflicts with another chunk.
347       if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
348           blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
349         Prev = &Inst;
350         continue;
351       }
352       // We are free to go. Add it as available stack slot which we know how
353       // to move it.
354       AvailableRegions[FIEX->StackOffset] = FIEX->Size;
355       BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
356       RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
357       RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
358       LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
359                         << (int)FIEX->Size << "\n");
360     }
361   }
362 }
363 
364 void StackLayoutModifier::classifyCFIs() {
365   std::stack<std::pair<int64_t, uint16_t>> CFIStack;
366   int64_t CfaOffset = -8;
367   uint16_t CfaReg = 7;
368 
369   auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
370     const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
371     if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
372       BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
373       LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
374     } else {
375       IsSimple = false;
376       return;
377     }
378   };
379 
380   for (BinaryBasicBlock *&BB : BF.layout()) {
381     for (MCInst &Inst : *BB) {
382       if (!BC.MIB->isCFI(Inst))
383         continue;
384       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
385       switch (CFI->getOperation()) {
386       case MCCFIInstruction::OpDefCfa:
387         CfaOffset = -CFI->getOffset();
388         recordAccess(&Inst, CfaOffset);
389         LLVM_FALLTHROUGH;
390       case MCCFIInstruction::OpDefCfaRegister:
391         CfaReg = CFI->getRegister();
392         break;
393       case MCCFIInstruction::OpDefCfaOffset:
394         CfaOffset = -CFI->getOffset();
395         recordAccess(&Inst, CfaOffset);
396         break;
397       case MCCFIInstruction::OpOffset:
398         recordAccess(&Inst, CFI->getOffset());
399         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
400                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
401                                                     /*isEH=*/false),
402                               AllocatorId);
403         break;
404       case MCCFIInstruction::OpSameValue:
405         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
406                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
407                                                     /*isEH=*/false),
408                               AllocatorId);
409         break;
410       case MCCFIInstruction::OpRememberState:
411         CFIStack.push(std::make_pair(CfaOffset, CfaReg));
412         break;
413       case MCCFIInstruction::OpRestoreState: {
414         assert(!CFIStack.empty() && "Corrupt CFI stack");
415         std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
416         CFIStack.pop();
417         CfaOffset = Elem.first;
418         CfaReg = Elem.second;
419         break;
420       }
421       case MCCFIInstruction::OpRelOffset:
422       case MCCFIInstruction::OpAdjustCfaOffset:
423         llvm_unreachable("Unhandled AdjustCfaOffset");
424         break;
425       default:
426         break;
427       }
428     }
429   }
430 }
431 
432 void StackLayoutModifier::scheduleChange(
433     MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
434   auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
435       Inst, getTodoTag(), AllocatorId);
436   WList.push_back(Item);
437 }
438 
439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
440   if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
441     return false;
442 
443   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
444   if (!FIE)
445     return false;
446 
447   return canCollapseRegion(FIE->StackOffset);
448 }
449 
450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
451   if (!IsInitialized)
452     initialize();
453   if (!IsSimple)
454     return false;
455 
456   if (CollapsedRegions.count(RegionAddr))
457     return true;
458 
459   // Check if it is possible to readjust all accesses below RegionAddr
460   if (!BlacklistedRegions.empty())
461     return false;
462 
463   return true;
464 }
465 
466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
467   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
468   if (!FIE)
469     return false;
470   int64_t RegionAddr = FIE->StackOffset;
471   int64_t RegionSz = FIE->Size;
472   return collapseRegion(DeletedPush, RegionAddr, RegionSz);
473 }
474 
475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
476                                          int64_t RegionSz) {
477   if (!canCollapseRegion(RegionAddr))
478     return false;
479 
480   assert(IsInitialized);
481   StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
482 
483   for (BinaryBasicBlock &BB : BF) {
484     for (MCInst &Inst : BB) {
485       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
486         continue;
487       auto Slot =
488           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
489               Inst, getSlotTag());
490       if (!AvailableRegions.count(Slot))
491         continue;
492       // We need to ensure this access is affected by the deleted push
493       if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
494         continue;
495 
496       if (BC.MIB->isCFI(Inst)) {
497         if (Slot > RegionAddr)
498           continue;
499         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
500         continue;
501       }
502       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
503       if (!FIE) {
504         if (Slot > RegionAddr)
505           continue;
506         // SP update based on frame pointer
507         scheduleChange(
508             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
509         continue;
510       }
511 
512       if (Slot == RegionAddr) {
513         BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
514         continue;
515       }
516       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
517         continue;
518 
519       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
520         continue;
521 
522       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
523         continue;
524 
525       scheduleChange(
526           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
527     }
528   }
529 
530   CollapsedRegions.insert(RegionAddr);
531   return true;
532 }
533 
534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
535   for (BinaryBasicBlock &BB : BF) {
536     for (MCInst &Inst : BB) {
537       if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
538         continue;
539       BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
540       scheduleChange(
541           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
542     }
543   }
544 }
545 
546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
547   if (!IsInitialized)
548     initialize();
549   if (!IsSimple)
550     return false;
551 
552   StackPointerTracking &SPT = Info.getStackPointerTracking();
553   int64_t RegionAddr = SPT.getStateBefore(P)->first;
554   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
555     return false;
556 
557   if (InsertedRegions.count(RegionAddr))
558     return true;
559 
560   // Check if we are going to screw up stack accesses at call sites that
561   // pass parameters via stack
562   if (!BlacklistedRegions.empty())
563     return false;
564 
565   return true;
566 }
567 
568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
569   if (!canInsertRegion(P))
570     return false;
571 
572   assert(IsInitialized);
573   StackPointerTracking &SPT = Info.getStackPointerTracking();
574   // This RegionAddr is slightly different from the one seen in collapseRegion
575   // This is the value of SP before the allocation the user wants to make.
576   int64_t RegionAddr = SPT.getStateBefore(P)->first;
577   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
578     return false;
579 
580   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
581 
582   for (BinaryBasicBlock &BB : BF) {
583     for (MCInst &Inst : BB) {
584       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
585         continue;
586       auto Slot =
587           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
588               Inst, getSlotTag());
589       if (!AvailableRegions.count(Slot))
590         continue;
591 
592       if (!(DA.doesADominateB(P, Inst)))
593         continue;
594 
595       if (BC.MIB->isCFI(Inst)) {
596         if (Slot >= RegionAddr)
597           continue;
598         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
599         continue;
600       }
601       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
602       if (!FIE) {
603         if (Slot >= RegionAddr)
604           continue;
605         scheduleChange(
606             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
607         continue;
608       }
609 
610       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
611         continue;
612       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
613         continue;
614       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
615         continue;
616       scheduleChange(
617           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
618     }
619   }
620 
621   InsertedRegions.insert(RegionAddr);
622   return true;
623 }
624 
625 void StackLayoutModifier::performChanges() {
626   std::set<uint32_t> ModifiedCFIIndices;
627   for (BinaryBasicBlock &BB : BF) {
628     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
629       MCInst &Inst = *I;
630       if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
631         assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
632         BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
633       }
634       if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
635         continue;
636       auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
637           Inst, getTodoTag());
638       int64_t Adjustment = 0;
639       WorklistItem::ActionType AdjustmentType = WorklistItem::None;
640       for (WorklistItem &WI : WList) {
641         if (WI.Action == WorklistItem::None)
642           continue;
643         assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
644                WI.Action == WorklistItem::AdjustCFI);
645         assert((AdjustmentType == WorklistItem::None ||
646                 AdjustmentType == WI.Action) &&
647                "Conflicting actions requested at the same program point");
648         AdjustmentType = WI.Action;
649         Adjustment += WI.OffsetUpdate;
650       }
651       if (!Adjustment)
652         continue;
653       if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
654         assert(BC.MIB->isCFI(Inst));
655         uint32_t CFINum = Inst.getOperand(0).getImm();
656         if (ModifiedCFIIndices.count(CFINum))
657           continue;
658         ModifiedCFIIndices.insert(CFINum);
659         const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
660         const MCCFIInstruction::OpType Operation = CFI->getOperation();
661         if (Operation == MCCFIInstruction::OpDefCfa ||
662             Operation == MCCFIInstruction::OpDefCfaOffset)
663           Adjustment = 0 - Adjustment;
664         LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
665                           << " to " << (CFI->getOffset() + Adjustment) << "\n");
666         BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
667         continue;
668       }
669       int32_t SrcImm = 0;
670       MCPhysReg Reg = 0;
671       MCPhysReg StackPtrReg = 0;
672       int64_t StackOffset = 0;
673       bool IsIndexed = false;
674       bool IsLoad = false;
675       bool IsStore = false;
676       bool IsSimple = false;
677       bool IsStoreFromReg = false;
678       uint8_t Size = 0;
679       bool Success = false;
680       Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
681                                       Reg, SrcImm, StackPtrReg, StackOffset,
682                                       Size, IsSimple, IsIndexed);
683       if (!Success) {
684         // SP update based on FP value
685         Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
686         assert(Success);
687         continue;
688       }
689       assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
690       if (StackPtrReg != BC.MIB->getFramePointer())
691         Adjustment = -Adjustment;
692       if (IsLoad)
693         Success = BC.MIB->createRestoreFromStack(
694             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
695       else if (IsStore)
696         Success = BC.MIB->createSaveToStack(
697             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
698       LLVM_DEBUG({
699         dbgs() << "Adjusted instruction: ";
700         Inst.dump();
701       });
702       assert(Success);
703     }
704   }
705 }
706 
707 void StackLayoutModifier::initialize() {
708   classifyStackAccesses();
709   classifyCFIs();
710   IsInitialized = true;
711 }
712 
713 std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0};
714 std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0};
715 
716 using BBIterTy = BinaryBasicBlock::iterator;
717 
718 void ShrinkWrapping::classifyCSRUses() {
719   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
720   StackPointerTracking &SPT = Info.getStackPointerTracking();
721   UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
722                                      BitVector(DA.NumInstrs, false));
723 
724   const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
725   for (BinaryBasicBlock &BB : BF) {
726     for (MCInst &Inst : BB) {
727       if (BC.MIB->isCFI(Inst))
728         continue;
729       BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
730       BC.MIB->getTouchedRegs(Inst, BV);
731       BV &= CSA.CalleeSaved;
732       for (int I : BV.set_bits()) {
733         if (I == 0)
734           continue;
735         if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
736           UsesByReg[I].set(DA.ExprToIdx[&Inst]);
737       }
738       if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
739         continue;
740       BV = CSA.CalleeSaved;
741       BV &= FPAliases;
742       for (int I : BV.set_bits())
743         UsesByReg[I].set(DA.ExprToIdx[&Inst]);
744     }
745   }
746 }
747 
748 void ShrinkWrapping::pruneUnwantedCSRs() {
749   BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
750   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
751     if (!CSA.CalleeSaved[I])
752       continue;
753     if (ParamRegs[I]) {
754       CSA.CalleeSaved.reset(I);
755       continue;
756     }
757     if (UsesByReg[I].empty()) {
758       LLVM_DEBUG(
759           dbgs()
760           << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
761           << "\n");
762       CSA.CalleeSaved.reset(I);
763       continue;
764     }
765     if (!CSA.HasRestores[I]) {
766       LLVM_DEBUG(
767           dbgs() << "Dismissing Callee-Saved Reg because it does not have "
768                     "restores:"
769                  << I << "\n");
770       CSA.CalleeSaved.reset(I);
771     }
772   }
773 }
774 
775 void ShrinkWrapping::computeSaveLocations() {
776   SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs());
777   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
778   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
779   StackPointerTracking &SPT = Info.getStackPointerTracking();
780 
781   LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
782   for (BinaryBasicBlock &BB : BF) {
783     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
784 
785     MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
786     if (!First)
787       continue;
788 
789     // Use reaching instructions to detect if we are inside a loop - if we
790     // are, do not consider this BB as valid placement for saves.
791     if (RI.isInLoop(BB))
792       continue;
793 
794     const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
795     // If we don't know stack state at this point, bail
796     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
797         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
798       continue;
799 
800     for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
801       if (!CSA.CalleeSaved[I])
802         continue;
803 
804       BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
805       for (int J : UsesByReg[I].set_bits())
806         if (DA.doesADominateB(*First, J))
807           BBDominatedUses.set(J);
808       LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
809                         << BBDominatedUses.count() << " uses for reg " << I
810                         << ". Total uses for reg is " << UsesByReg[I].count()
811                         << "\n");
812       BBDominatedUses &= UsesByReg[I];
813       if (BBDominatedUses == UsesByReg[I]) {
814         LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
815                           << " as a save pos for " << I << "\n");
816         SavePos[I].insert(First);
817         LLVM_DEBUG({
818           dbgs() << "Dominated uses are:\n";
819           for (int J : UsesByReg[I].set_bits()) {
820             dbgs() << "Idx " << J << ": ";
821             DA.Expressions[J]->dump();
822           }
823         });
824       }
825     }
826   }
827 
828   BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(),
829                                         std::numeric_limits<uint64_t>::max());
830   BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr);
831   auto &InsnToBB = Info.getInsnToBBMap();
832   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
833     if (!CSA.CalleeSaved[I])
834       continue;
835 
836     for (MCInst *Pos : SavePos[I]) {
837       BinaryBasicBlock *BB = InsnToBB[Pos];
838       uint64_t Count = BB->getExecutionCount();
839       if (Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
840           Count < BestSaveCount[I]) {
841         BestSavePos[I] = Pos;
842         BestSaveCount[I] = Count;
843       }
844     }
845   }
846 }
847 
848 void ShrinkWrapping::computeDomOrder() {
849   std::vector<MCPhysReg> Order;
850   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
851     Order.push_back(I);
852   }
853 
854   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
855   auto &InsnToBB = Info.getInsnToBBMap();
856   llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
857     BinaryBasicBlock *BBA = BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr;
858     BinaryBasicBlock *BBB = BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr;
859     if (BBA == BBB)
860       return A < B;
861     if (!BBA && BBB)
862       return false;
863     if (BBA && !BBB)
864       return true;
865     if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B]))
866       return true;
867     if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A]))
868       return false;
869     return A < B;
870   });
871 
872   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
873     DomOrder[Order[I]] = I;
874 }
875 
876 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
877                                        uint64_t &TotalEstimatedWin) {
878   const uint64_t CurSavingCost = CSA.SavingCost[CSR];
879   if (!CSA.CalleeSaved[CSR])
880     return false;
881 
882   uint64_t BestCount = BestSaveCount[CSR];
883   BestPosSave = BestSavePos[CSR];
884   bool ShouldMove = false;
885   if (BestCount != std::numeric_limits<uint64_t>::max() &&
886       BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) {
887     LLVM_DEBUG({
888       auto &InsnToBB = Info.getInsnToBBMap();
889       dbgs() << "Better position for saves found in func " << BF.getPrintName()
890              << " count << " << BF.getKnownExecutionCount() << "\n";
891       dbgs() << "Reg: " << CSR
892              << "; New BB: " << InsnToBB[BestPosSave]->getName()
893              << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
894     });
895     TotalEstimatedWin += CurSavingCost - BestCount;
896     ShouldMove = true;
897   }
898 
899   if (!ShouldMove)
900     return false;
901   if (!BestPosSave) {
902     LLVM_DEBUG({
903       dbgs() << "Dropping opportunity because we don't know where to put "
904                 "stores -- total est. freq reduc: "
905              << TotalEstimatedWin << "\n";
906     });
907     return false;
908   }
909   return true;
910 }
911 
912 /// Auxiliar function used to create basic blocks for critical edges and update
913 /// the dominance frontier with these new locations
914 void ShrinkWrapping::splitFrontierCritEdges(
915     BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
916     const SmallVector<bool, 4> &IsCritEdge,
917     const SmallVector<BinaryBasicBlock *, 4> &From,
918     const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
919   LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
920                     << BF.getPrintName() << "\n");
921   // For every FromBB, there might be one or more critical edges, with
922   // To[I] containing destination BBs. It's important to memorize
923   // the original size of the Frontier as we may append to it while splitting
924   // critical edges originating with blocks with multiple destinations.
925   for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
926     if (!IsCritEdge[I])
927       continue;
928     if (To[I].empty())
929       continue;
930     BinaryBasicBlock *FromBB = From[I];
931     LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
932                       << "\n");
933     // Split edge for every DestinationBBs
934     for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
935       BinaryBasicBlock *DestinationBB = To[I][DI];
936       LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
937       BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
938       // Insert dummy instruction so this BB is never empty (we need this for
939       // PredictiveStackPointerTracking to work, since it annotates instructions
940       // and not BBs).
941       if (NewBB->empty()) {
942         MCInst NewInst;
943         BC.MIB->createNoop(NewInst);
944         NewBB->addInstruction(std::move(NewInst));
945         scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
946       }
947 
948       // Update frontier
949       ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
950       if (DI == 0) {
951         // Update frontier inplace
952         Frontier[I] = NewFrontierPP;
953         LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
954                           << '\n');
955       } else {
956         // Append new frontier to the end of the list
957         Frontier.push_back(NewFrontierPP);
958         LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
959                           << '\n');
960       }
961     }
962   }
963 }
964 
965 SmallVector<ProgramPoint, 4>
966 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
967                                    uint64_t TotalEstimatedWin) {
968   SmallVector<ProgramPoint, 4> Frontier;
969   SmallVector<bool, 4> IsCritEdge;
970   bool CannotPlace = false;
971   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
972 
973   SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
974   SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
975   // In case of a critical edge, we need to create extra BBs to host restores
976   // into edges transitioning to the dominance frontier, otherwise we pull these
977   // restores to inside the dominated area.
978   Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
979   LLVM_DEBUG({
980     dbgs() << "Dumping dominance frontier for ";
981     BC.printInstruction(dbgs(), *BestPosSave);
982     for (ProgramPoint &PP : Frontier)
983       if (PP.isInst())
984         BC.printInstruction(dbgs(), *PP.getInst());
985       else
986         dbgs() << PP.getBB()->getName() << "\n";
987   });
988   for (ProgramPoint &PP : Frontier) {
989     bool HasCritEdges = false;
990     if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
991         doesInstUsesCSR(*PP.getInst(), CSR))
992       CannotPlace = true;
993     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
994     CritEdgesFrom.emplace_back(FrontierBB);
995     CritEdgesTo.emplace_back(0);
996     SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
997     // Check for invoke instructions at the dominance frontier, which indicates
998     // the landing pad is not dominated.
999     if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
1000       LLVM_DEBUG(
1001           dbgs() << "Bailing on restore placement to avoid LP splitting\n");
1002       Frontier.clear();
1003       return Frontier;
1004     }
1005     doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
1006       if (!DA.doesADominateB(*BestPosSave, P)) {
1007         Dests.emplace_back(Info.getParentBB(P));
1008         return;
1009       }
1010       HasCritEdges = true;
1011     });
1012     IsCritEdge.push_back(HasCritEdges);
1013   }
1014   // Restores cannot be placed in empty BBs because we have a dataflow
1015   // analysis that depends on insertions happening before real instructions
1016   // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1017   // dummy nop that is scheduled to be removed later.
1018   bool InvalidateRequired = false;
1019   for (BinaryBasicBlock *&BB : BF.layout()) {
1020     if (BB->size() != 0)
1021       continue;
1022     MCInst NewInst;
1023     BC.MIB->createNoop(NewInst);
1024     auto II = BB->addInstruction(std::move(NewInst));
1025     scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1026     InvalidateRequired = true;
1027   }
1028   if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1029     LLVM_DEBUG({
1030       dbgs() << "Now detected critical edges in the following frontier:\n";
1031       for (ProgramPoint &PP : Frontier) {
1032         if (PP.isBB()) {
1033           dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
1034         } else {
1035           dbgs() << "  Inst: ";
1036           PP.getInst()->dump();
1037         }
1038       }
1039     });
1040     splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1041                            CritEdgesTo);
1042     InvalidateRequired = true;
1043   }
1044   if (InvalidateRequired) {
1045     // BitVectors that represent all insns of the function are invalid now
1046     // since we changed BBs/Insts. Re-run steps that depend on pointers being
1047     // valid
1048     Info.invalidateAll();
1049     classifyCSRUses();
1050   }
1051   if (CannotPlace) {
1052     LLVM_DEBUG({
1053       dbgs() << "Dropping opportunity because restore placement failed"
1054                 " -- total est. freq reduc: "
1055              << TotalEstimatedWin << "\n";
1056     });
1057     Frontier.clear();
1058     return Frontier;
1059   }
1060   return Frontier;
1061 }
1062 
1063 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1064                                           int64_t SaveOffset) {
1065   if (FA.requiresAlignment(BF)) {
1066     LLVM_DEBUG({
1067       dbgs() << "Reg " << CSR
1068              << " is not using push/pops due to function "
1069                 "alignment requirements.\n";
1070     });
1071     return false;
1072   }
1073   for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1074     if (!SLM.canCollapseRegion(Save)) {
1075       LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1076       return false;
1077     }
1078   }
1079   // Abort if one of the restores for this CSR is not a POP.
1080   for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1081     if (!BC.MIB->isPop(*Load)) {
1082       LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1083       return false;
1084     }
1085   }
1086 
1087   StackPointerTracking &SPT = Info.getStackPointerTracking();
1088   // Abort if we are inserting a push into an entry BB (offset -8) and this
1089   // func sets up a frame pointer.
1090   if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1091       SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1092     LLVM_DEBUG({
1093       dbgs() << "Reg " << CSR
1094              << " cannot insert region or we are "
1095                 "trying to insert a push into entry bb.\n";
1096     });
1097     return false;
1098   }
1099   return true;
1100 }
1101 
1102 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1103     const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1104     unsigned CSR) {
1105   SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1106   // Moving pop locations to the correct sp offset
1107   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1108   StackPointerTracking &SPT = Info.getStackPointerTracking();
1109   for (ProgramPoint &PP : FixedRestorePoints) {
1110     BinaryBasicBlock *BB = Info.getParentBB(PP);
1111     bool Found = false;
1112     if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1113         SaveOffset) {
1114       BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1115       BV &= UsesByReg[CSR];
1116       if (!BV.any()) {
1117         Found = true;
1118         PP = BB;
1119         continue;
1120       }
1121     }
1122     for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
1123       if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
1124         BitVector BV = *RI.getStateAt(*RIt);
1125         BV &= UsesByReg[CSR];
1126         if (!BV.any()) {
1127           Found = true;
1128           PP = &*RIt;
1129           break;
1130         }
1131       }
1132     }
1133     if (!Found) {
1134       LLVM_DEBUG({
1135         dbgs() << "Could not find restore insertion point for " << CSR
1136                << ", falling back to load/store mode\n";
1137       });
1138       FixedRestorePoints.clear();
1139       return FixedRestorePoints;
1140     }
1141   }
1142   return FixedRestorePoints;
1143 }
1144 
1145 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1146                                                     bool UsePushPops) {
1147 
1148   for (BinaryBasicBlock *&BB : BF.layout()) {
1149     std::vector<MCInst *> CFIs;
1150     for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
1151       MCInst &Inst = *I;
1152       if (BC.MIB->isCFI(Inst)) {
1153         // Delete all offset CFIs related to this CSR
1154         if (SLM.getOffsetCFIReg(Inst) == CSR) {
1155           HasDeletedOffsetCFIs[CSR] = true;
1156           scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1157           continue;
1158         }
1159         CFIs.push_back(&Inst);
1160         continue;
1161       }
1162 
1163       uint16_t SavedReg = CSA.getSavedReg(Inst);
1164       uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1165       if (SavedReg != CSR && RestoredReg != CSR) {
1166         CFIs.clear();
1167         continue;
1168       }
1169 
1170       scheduleChange(&Inst, WorklistItem(UsePushPops
1171                                              ? WorklistItem::Erase
1172                                              : WorklistItem::ChangeToAdjustment,
1173                                          CSR));
1174 
1175       // Delete associated CFIs
1176       const bool RecordDeletedPushCFIs =
1177           SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1178       const bool RecordDeletedPopCFIs =
1179           RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1180       for (MCInst *CFI : CFIs) {
1181         const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1182         // Do not touch these...
1183         if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1184             MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1185           continue;
1186         scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1187         if (RecordDeletedPushCFIs) {
1188           // Do not record this to be replayed later because we are going to
1189           // rebuild it.
1190           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1191             continue;
1192           DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1193         }
1194         if (RecordDeletedPopCFIs) {
1195           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1196             continue;
1197           DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1198         }
1199       }
1200       CFIs.clear();
1201     }
1202   }
1203 }
1204 
1205 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1206   if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1207       CSA.getRestoredReg(Inst) == CSR)
1208     return false;
1209   BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1210   BC.MIB->getTouchedRegs(Inst, BV);
1211   return BV[CSR];
1212 }
1213 
1214 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1215     unsigned CSR, MCInst *BestPosSave,
1216     SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1217   auto &InsnToBB = Info.getInsnToBBMap();
1218   const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1219   const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1220   assert(FIESave && FIELoad && "Invalid CSR");
1221 
1222   LLVM_DEBUG({
1223     dbgs() << "Scheduling save insertion at: ";
1224     BestPosSave->dump();
1225   });
1226 
1227   scheduleChange(BestPosSave,
1228                  UsePushPops ? WorklistItem::InsertPushOrPop
1229                              : WorklistItem::InsertLoadOrStore,
1230                  *FIESave, CSR);
1231 
1232   for (ProgramPoint &PP : RestorePoints) {
1233     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1234     LLVM_DEBUG({
1235       dbgs() << "Scheduling restore insertion at: ";
1236       if (PP.isInst())
1237         PP.getInst()->dump();
1238       else
1239         dbgs() << PP.getBB()->getName() << "\n";
1240     });
1241     MCInst *Term =
1242         FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1243     if (Term)
1244       PP = Term;
1245     bool PrecededByPrefix = false;
1246     if (PP.isInst()) {
1247       auto Iter = FrontierBB->findInstruction(PP.getInst());
1248       if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1249         --Iter;
1250         PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1251       }
1252     }
1253     if (PP.isInst() &&
1254         (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1255       assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1256              "cannot move to end of bb");
1257       scheduleChange(InsnToBB[PP.getInst()],
1258                      UsePushPops ? WorklistItem::InsertPushOrPop
1259                                  : WorklistItem::InsertLoadOrStore,
1260                      *FIELoad, CSR);
1261       continue;
1262     }
1263     scheduleChange(PP,
1264                    UsePushPops ? WorklistItem::InsertPushOrPop
1265                                : WorklistItem::InsertLoadOrStore,
1266                    *FIELoad, CSR);
1267   }
1268 }
1269 
1270 void ShrinkWrapping::moveSaveRestores() {
1271   bool DisablePushPopMode = false;
1272   bool UsedPushPopMode = false;
1273   // Keeps info about successfully moved regs: reg index, save position and
1274   // save size
1275   std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1276 
1277   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1278     MCInst *BestPosSave = nullptr;
1279     uint64_t TotalEstimatedWin = 0;
1280     if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin))
1281       continue;
1282     SmallVector<ProgramPoint, 4> RestorePoints =
1283         doRestorePlacement(BestPosSave, I, TotalEstimatedWin);
1284     if (RestorePoints.empty())
1285       continue;
1286 
1287     const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1288     const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1289     (void)FIELoad;
1290     assert(FIESave && FIELoad);
1291     StackPointerTracking &SPT = Info.getStackPointerTracking();
1292     const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1293     int SaveOffset = SPFP.first;
1294     uint8_t SaveSize = FIESave->Size;
1295 
1296     // If we don't know stack state at this point, bail
1297     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1298         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
1299       continue;
1300 
1301     // Operation mode: if true, will insert push/pops instead of loads/restores
1302     bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1303 
1304     if (UsePushPops) {
1305       SmallVector<ProgramPoint, 4> FixedRestorePoints =
1306           fixPopsPlacements(RestorePoints, SaveOffset, I);
1307       if (FixedRestorePoints.empty())
1308         UsePushPops = false;
1309       else
1310         RestorePoints = FixedRestorePoints;
1311     }
1312 
1313     // Disable push-pop mode for all CSRs in this function
1314     if (!UsePushPops)
1315       DisablePushPopMode = true;
1316     else
1317       UsedPushPopMode = true;
1318 
1319     scheduleOldSaveRestoresRemoval(I, UsePushPops);
1320     scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1321     MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1322   }
1323 
1324   // Revert push-pop mode if it failed for a single CSR
1325   if (DisablePushPopMode && UsedPushPopMode) {
1326     UsedPushPopMode = false;
1327     for (BinaryBasicBlock &BB : BF) {
1328       auto WRI = Todo.find(&BB);
1329       if (WRI != Todo.end()) {
1330         std::vector<WorklistItem> &TodoList = WRI->second;
1331         for (WorklistItem &Item : TodoList)
1332           if (Item.Action == WorklistItem::InsertPushOrPop)
1333             Item.Action = WorklistItem::InsertLoadOrStore;
1334       }
1335       for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
1336         MCInst &Inst = *I;
1337         auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1338             Inst, getAnnotationIndex());
1339         if (!TodoList)
1340           continue;
1341         bool isCFI = BC.MIB->isCFI(Inst);
1342         for (WorklistItem &Item : *TodoList) {
1343           if (Item.Action == WorklistItem::InsertPushOrPop)
1344             Item.Action = WorklistItem::InsertLoadOrStore;
1345           if (!isCFI && Item.Action == WorklistItem::Erase)
1346             Item.Action = WorklistItem::ChangeToAdjustment;
1347         }
1348       }
1349     }
1350   }
1351 
1352   // Update statistics
1353   if (!UsedPushPopMode) {
1354     SpillsMovedRegularMode += MovedRegs.size();
1355     return;
1356   }
1357 
1358   // Schedule modifications to stack-accessing instructions via
1359   // StackLayoutModifier.
1360   SpillsMovedPushPopMode += MovedRegs.size();
1361   for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1362     unsigned RegNdx;
1363     MCInst *SavePos;
1364     size_t SaveSize;
1365     std::tie(RegNdx, SavePos, SaveSize) = I;
1366     for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1367       SLM.collapseRegion(Save);
1368     SLM.insertRegion(SavePos, SaveSize);
1369   }
1370 }
1371 
1372 namespace {
1373 /// Helper function to identify whether two basic blocks created by splitting
1374 /// a critical edge have the same contents.
1375 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1376                             const BinaryBasicBlock &B) {
1377   if (A.succ_size() != B.succ_size())
1378     return false;
1379   if (A.succ_size() != 1)
1380     return false;
1381 
1382   if (*A.succ_begin() != *B.succ_begin())
1383     return false;
1384 
1385   if (A.size() != B.size())
1386     return false;
1387 
1388   // Compare instructions
1389   auto I = A.begin(), E = A.end();
1390   auto OtherI = B.begin(), OtherE = B.end();
1391   while (I != E && OtherI != OtherE) {
1392     if (I->getOpcode() != OtherI->getOpcode())
1393       return false;
1394     if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1395           return true;
1396         }))
1397       return false;
1398     ++I;
1399     ++OtherI;
1400   }
1401   return true;
1402 }
1403 } // namespace
1404 
1405 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1406   bool Changed = false;
1407   for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1408     BinaryBasicBlock &BB = *Iter;
1409     if (!BB.getName().startswith(".LSplitEdge"))
1410       continue;
1411     for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
1412       BinaryBasicBlock &RBB = *RIter;
1413       if (&RBB == &BB)
1414         break;
1415       if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
1416           !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1417         continue;
1418       assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1419       BinaryBasicBlock *Pred = *RBB.pred_begin();
1420       uint64_t OrigCount = Pred->branch_info_begin()->Count;
1421       uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1422       BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1423       Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1424       Changed = true;
1425       // Remove the block from CFG
1426       RBB.markValid(false);
1427     }
1428   }
1429 
1430   return Changed;
1431 }
1432 
1433 namespace {
1434 
1435 // A special StackPointerTracking that compensates for our future plans
1436 // in removing/adding insn.
1437 class PredictiveStackPointerTracking
1438     : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1439   friend class DataflowAnalysis<PredictiveStackPointerTracking,
1440                                 std::pair<int, int>>;
1441   decltype(ShrinkWrapping::Todo) &TodoMap;
1442   DataflowInfoManager &Info;
1443 
1444   Optional<unsigned> AnnotationIndex;
1445 
1446 protected:
1447   void compNextAux(const MCInst &Point,
1448                    const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1449                    std::pair<int, int> &Res) {
1450     for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1451       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1452           BC.MIB->isPush(Point)) {
1453         Res.first += BC.MIB->getPushSize(Point);
1454         continue;
1455       }
1456       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1457           BC.MIB->isPop(Point)) {
1458         Res.first -= BC.MIB->getPopSize(Point);
1459         continue;
1460       }
1461       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1462           Item.FIEToInsert.IsStore) {
1463         Res.first -= Item.FIEToInsert.Size;
1464         continue;
1465       }
1466       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1467           Item.FIEToInsert.IsLoad) {
1468         Res.first += Item.FIEToInsert.Size;
1469         continue;
1470       }
1471     }
1472   }
1473 
1474   std::pair<int, int> computeNext(const MCInst &Point,
1475                                   const std::pair<int, int> &Cur) {
1476     std::pair<int, int> Res =
1477         StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1478             Point, Cur);
1479     if (Res.first == StackPointerTracking::SUPERPOSITION ||
1480         Res.first == StackPointerTracking::EMPTY)
1481       return Res;
1482     auto TodoItems =
1483         BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1484             Point, ShrinkWrapping::getAnnotationName());
1485     if (TodoItems)
1486       compNextAux(Point, *TodoItems, Res);
1487     auto &InsnToBBMap = Info.getInsnToBBMap();
1488     if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1489       return Res;
1490     auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1491     if (WRI == TodoMap.end())
1492       return Res;
1493     compNextAux(Point, WRI->second, Res);
1494     return Res;
1495   }
1496 
1497   StringRef getAnnotationName() const {
1498     return StringRef("PredictiveStackPointerTracking");
1499   }
1500 
1501 public:
1502   PredictiveStackPointerTracking(BinaryFunction &BF,
1503                                  decltype(ShrinkWrapping::Todo) &TodoMap,
1504                                  DataflowInfoManager &Info,
1505                                  MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1506       : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1507                                                                  AllocatorId),
1508         TodoMap(TodoMap), Info(Info) {}
1509 
1510   void run() {
1511     StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1512   }
1513 };
1514 
1515 } // end anonymous namespace
1516 
1517 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1518                                       int SPValPop) {
1519   MCInst *SavePoint = nullptr;
1520   for (BinaryBasicBlock &BB : BF) {
1521     for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
1522          ++InstIter) {
1523       int32_t SrcImm = 0;
1524       MCPhysReg Reg = 0;
1525       MCPhysReg StackPtrReg = 0;
1526       int64_t StackOffset = 0;
1527       bool IsIndexed = false;
1528       bool IsLoad = false;
1529       bool IsStore = false;
1530       bool IsSimple = false;
1531       bool IsStoreFromReg = false;
1532       uint8_t Size = 0;
1533       if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
1534                                  Reg, SrcImm, StackPtrReg, StackOffset, Size,
1535                                  IsSimple, IsIndexed))
1536         continue;
1537       if (Reg != CSR || !IsStore || !IsSimple)
1538         continue;
1539       SavePoint = &*InstIter;
1540       break;
1541     }
1542     if (SavePoint)
1543       break;
1544   }
1545   assert(SavePoint);
1546   LLVM_DEBUG({
1547     dbgs() << "Now using as save point for reg " << CSR << " :";
1548     SavePoint->dump();
1549   });
1550   bool PrevAffectedZone = false;
1551   BinaryBasicBlock *PrevBB = nullptr;
1552   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1553   for (BinaryBasicBlock *BB : BF.layout()) {
1554     if (BB->size() == 0)
1555       continue;
1556     const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1557     const bool InAffectedZoneAtBegin =
1558         (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1559     bool InAffectedZone = InAffectedZoneAtBegin;
1560     for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1561       const bool CurZone = DA.count(*InstIter, *SavePoint);
1562       if (InAffectedZone != CurZone) {
1563         auto InsertionIter = InstIter;
1564         ++InsertionIter;
1565         InAffectedZone = CurZone;
1566         if (InAffectedZone)
1567           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1568                                             SPValPop);
1569         else
1570           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1571                                             SPValPush);
1572         --InstIter;
1573       }
1574     }
1575     // Are we at the first basic block or hot-cold split point?
1576     if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1577       if (InAffectedZoneAtBegin)
1578         insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1579     } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1580       if (InAffectedZoneAtBegin)
1581         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1582       else
1583         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1584     }
1585     PrevAffectedZone = InAffectedZoneAtEnd;
1586     PrevBB = BB;
1587   }
1588 }
1589 
1590 void ShrinkWrapping::rebuildCFIForSP() {
1591   for (BinaryBasicBlock &BB : BF) {
1592     for (MCInst &Inst : BB) {
1593       if (!BC.MIB->isCFI(Inst))
1594         continue;
1595       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1596       if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1597         BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1598     }
1599   }
1600 
1601   int PrevSPVal = -8;
1602   BinaryBasicBlock *PrevBB = nullptr;
1603   StackPointerTracking &SPT = Info.getStackPointerTracking();
1604   for (BinaryBasicBlock *BB : BF.layout()) {
1605     if (BB->size() == 0)
1606       continue;
1607     const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1608     const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1609     int SPVal = SPValAtBegin;
1610     for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1611       const int CurVal = SPT.getStateAt(*Iter)->first;
1612       if (SPVal != CurVal) {
1613         auto InsertionIter = Iter;
1614         ++InsertionIter;
1615         Iter = BF.addCFIInstruction(
1616             BB, InsertionIter,
1617             MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1618         SPVal = CurVal;
1619       }
1620     }
1621     if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1622       BF.addCFIInstruction(
1623           BB, BB->begin(),
1624           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1625     else if (SPValAtBegin != PrevSPVal)
1626       BF.addCFIInstruction(
1627           PrevBB, PrevBB->end(),
1628           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1629     PrevSPVal = SPValAtEnd;
1630     PrevBB = BB;
1631   }
1632 
1633   for (BinaryBasicBlock &BB : BF)
1634     for (auto I = BB.begin(); I != BB.end();)
1635       if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1636         I = BB.eraseInstruction(I);
1637       else
1638         ++I;
1639 }
1640 
1641 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1642                                          const FrameIndexEntry &FIE,
1643                                          bool CreatePushOrPop) {
1644   MCInst NewInst;
1645   if (SPVal != StackPointerTracking::SUPERPOSITION &&
1646       SPVal != StackPointerTracking::EMPTY) {
1647     if (FIE.IsLoad) {
1648       if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1649                                           FIE.StackOffset - SPVal, FIE.RegOrImm,
1650                                           FIE.Size)) {
1651         errs() << "createRestoreFromStack: not supported on this platform\n";
1652         abort();
1653       }
1654     } else {
1655       if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1656                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
1657                                      FIE.Size)) {
1658         errs() << "createSaveToStack: not supported on this platform\n";
1659         abort();
1660       }
1661     }
1662     if (CreatePushOrPop)
1663       BC.MIB->changeToPushOrPop(NewInst);
1664     return NewInst;
1665   }
1666   assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1667          FPVal != StackPointerTracking::EMPTY);
1668 
1669   if (FIE.IsLoad) {
1670     if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1671                                         FIE.StackOffset - FPVal, FIE.RegOrImm,
1672                                         FIE.Size)) {
1673       errs() << "createRestoreFromStack: not supported on this platform\n";
1674       abort();
1675     }
1676   } else {
1677     if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1678                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
1679                                    FIE.Size)) {
1680       errs() << "createSaveToStack: not supported on this platform\n";
1681       abort();
1682     }
1683   }
1684   return NewInst;
1685 }
1686 
1687 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1688   const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1689   if (UpdatedCFIs.count(CFI))
1690     return;
1691 
1692   switch (CFI->getOperation()) {
1693   case MCCFIInstruction::OpDefCfa:
1694   case MCCFIInstruction::OpDefCfaRegister:
1695   case MCCFIInstruction::OpDefCfaOffset:
1696     CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1697     break;
1698   case MCCFIInstruction::OpOffset:
1699   default:
1700     break;
1701   }
1702 
1703   UpdatedCFIs.insert(CFI);
1704 }
1705 
1706 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1707                                                 BBIterTy Pos, unsigned Reg,
1708                                                 bool isPush, int Sz,
1709                                                 int64_t NewOffset) {
1710   if (isPush) {
1711     for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1712       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1713       updateCFIInstOffset(*Pos++, NewOffset);
1714     }
1715     if (HasDeletedOffsetCFIs[Reg]) {
1716       Pos = BF.addCFIInstruction(
1717           &BB, Pos,
1718           MCCFIInstruction::createOffset(
1719               nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1720       ++Pos;
1721     }
1722   } else {
1723     for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1724       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1725       updateCFIInstOffset(*Pos++, NewOffset);
1726     }
1727     if (HasDeletedOffsetCFIs[Reg]) {
1728       Pos = BF.addCFIInstruction(
1729           &BB, Pos,
1730           MCCFIInstruction::createSameValue(
1731               nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1732       ++Pos;
1733     }
1734   }
1735   return Pos;
1736 }
1737 
1738 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1739                                           BinaryBasicBlock *CurBB,
1740                                           const WorklistItem &Item,
1741                                           int64_t SPVal, int64_t FPVal) {
1742   // Trigger CFI reconstruction for this CSR if necessary - writing to
1743   // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1744   if ((Item.FIEToInsert.IsStore &&
1745        !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1746       (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1747       HasDeletedOffsetCFIs[Item.AffectedReg]) {
1748     if (Item.Action == WorklistItem::InsertPushOrPop) {
1749       if (Item.FIEToInsert.IsStore)
1750         PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1751       else
1752         PopOffsetByReg[Item.AffectedReg] = SPVal;
1753     } else {
1754       if (Item.FIEToInsert.IsStore)
1755         PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1756       else
1757         PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1758     }
1759   }
1760 
1761   LLVM_DEBUG({
1762     dbgs() << "Creating stack access with SPVal = " << SPVal
1763            << "; stack offset = " << Item.FIEToInsert.StackOffset
1764            << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1765            << "\n";
1766   });
1767   MCInst NewInst =
1768       createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1769                         Item.Action == WorklistItem::InsertPushOrPop);
1770   if (InsertionPoint != CurBB->end()) {
1771     LLVM_DEBUG({
1772       dbgs() << "Adding before Inst: ";
1773       InsertionPoint->dump();
1774       dbgs() << "the following inst: ";
1775       NewInst.dump();
1776     });
1777     BBIterTy Iter =
1778         CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1779     return ++Iter;
1780   }
1781   CurBB->addInstruction(std::move(NewInst));
1782   LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1783   return CurBB->end();
1784 }
1785 
1786 BBIterTy ShrinkWrapping::processInsertionsList(
1787     BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1788     std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1789   bool HasInsertions = false;
1790   for (WorklistItem &Item : TodoList) {
1791     if (Item.Action == WorklistItem::Erase ||
1792         Item.Action == WorklistItem::ChangeToAdjustment)
1793       continue;
1794     HasInsertions = true;
1795     break;
1796   }
1797 
1798   if (!HasInsertions)
1799     return InsertionPoint;
1800 
1801   assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1802            SPVal != StackPointerTracking::EMPTY) ||
1803           (FPVal != StackPointerTracking::SUPERPOSITION &&
1804            FPVal != StackPointerTracking::EMPTY)) &&
1805          "Cannot insert if we have no idea of the stack state here");
1806 
1807   // Revert the effect of PSPT for this location, we want SP Value before
1808   // insertions
1809   if (InsertionPoint == CurBB->end()) {
1810     for (WorklistItem &Item : TodoList) {
1811       if (Item.Action != WorklistItem::InsertPushOrPop)
1812         continue;
1813       if (Item.FIEToInsert.IsStore)
1814         SPVal += Item.FIEToInsert.Size;
1815       if (Item.FIEToInsert.IsLoad)
1816         SPVal -= Item.FIEToInsert.Size;
1817     }
1818   }
1819 
1820   // Reorder POPs to obey the correct dominance relation between them
1821   llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1822                                   const WorklistItem &B) {
1823     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1824         (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1825       return false;
1826     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1827       return true;
1828     if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1829       return false;
1830     return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1831   });
1832 
1833   // Process insertions
1834   for (WorklistItem &Item : TodoList) {
1835     if (Item.Action == WorklistItem::Erase ||
1836         Item.Action == WorklistItem::ChangeToAdjustment)
1837       continue;
1838 
1839     InsertionPoint =
1840         processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1841     if (Item.Action == WorklistItem::InsertPushOrPop &&
1842         Item.FIEToInsert.IsStore)
1843       SPVal -= Item.FIEToInsert.Size;
1844     if (Item.Action == WorklistItem::InsertPushOrPop &&
1845         Item.FIEToInsert.IsLoad)
1846       SPVal += Item.FIEToInsert.Size;
1847   }
1848   return InsertionPoint;
1849 }
1850 
1851 bool ShrinkWrapping::processInsertions() {
1852   PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1853   PSPT.run();
1854 
1855   bool Changes = false;
1856   for (BinaryBasicBlock &BB : BF) {
1857     // Process insertions before some inst.
1858     for (auto I = BB.begin(); I != BB.end(); ++I) {
1859       MCInst &Inst = *I;
1860       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1861           Inst, getAnnotationIndex());
1862       if (!TodoList)
1863         continue;
1864       Changes = true;
1865       std::vector<WorklistItem> List = *TodoList;
1866       LLVM_DEBUG({
1867         dbgs() << "Now processing insertions in " << BB.getName()
1868                << " before inst: ";
1869         Inst.dump();
1870       });
1871       auto Iter = I;
1872       std::pair<int, int> SPTState =
1873           *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1874       I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1875     }
1876     // Process insertions at the end of bb
1877     auto WRI = Todo.find(&BB);
1878     if (WRI != Todo.end()) {
1879       std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1880       processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
1881                             SPTState.second);
1882       Changes = true;
1883     }
1884   }
1885   return Changes;
1886 }
1887 
1888 void ShrinkWrapping::processDeletions() {
1889   LivenessAnalysis &LA = Info.getLivenessAnalysis();
1890   for (BinaryBasicBlock &BB : BF) {
1891     for (auto II = BB.begin(); II != BB.end(); ++II) {
1892       MCInst &Inst = *II;
1893       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1894           Inst, getAnnotationIndex());
1895       if (!TodoList)
1896         continue;
1897       // Process all deletions
1898       for (WorklistItem &Item : *TodoList) {
1899         if (Item.Action != WorklistItem::Erase &&
1900             Item.Action != WorklistItem::ChangeToAdjustment)
1901           continue;
1902 
1903         if (Item.Action == WorklistItem::ChangeToAdjustment) {
1904           // Is flag reg alive across this func?
1905           bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1906           if (int Sz = BC.MIB->getPushSize(Inst)) {
1907             BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1908             continue;
1909           }
1910           if (int Sz = BC.MIB->getPopSize(Inst)) {
1911             BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1912             continue;
1913           }
1914         }
1915 
1916         LLVM_DEBUG({
1917           dbgs() << "Erasing: ";
1918           BC.printInstruction(dbgs(), Inst);
1919         });
1920         II = std::prev(BB.eraseInstruction(II));
1921         break;
1922       }
1923     }
1924   }
1925 }
1926 
1927 void ShrinkWrapping::rebuildCFI() {
1928   const bool FP = Info.getStackPointerTracking().HasFramePointer;
1929   Info.invalidateAll();
1930   if (!FP) {
1931     rebuildCFIForSP();
1932     Info.invalidateAll();
1933   }
1934   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1935     if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1936       continue;
1937     const int64_t SPValPush = PushOffsetByReg[I];
1938     const int64_t SPValPop = PopOffsetByReg[I];
1939     insertUpdatedCFI(I, SPValPush, SPValPop);
1940     Info.invalidateAll();
1941   }
1942 }
1943 
1944 bool ShrinkWrapping::perform() {
1945   HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1946   PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1947   PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1948   DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
1949 
1950   if (BF.checkForAmbiguousJumpTables()) {
1951     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1952                       << ".\n");
1953     // We could call disambiguateJumpTables here, but it is probably not worth
1954     // the cost (of duplicating potentially large jump tables that could regress
1955     // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1956     // written in assembly language. Just bail.
1957     return false;
1958   }
1959   SLM.initialize();
1960   CSA.compute();
1961   classifyCSRUses();
1962   pruneUnwantedCSRs();
1963   computeSaveLocations();
1964   computeDomOrder();
1965   moveSaveRestores();
1966   LLVM_DEBUG({
1967     dbgs() << "Func before shrink-wrapping: \n";
1968     BF.dump();
1969   });
1970   SLM.performChanges();
1971   // Early exit if processInsertions doesn't detect any todo items
1972   if (!processInsertions())
1973     return false;
1974   processDeletions();
1975   if (foldIdenticalSplitEdges()) {
1976     const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
1977     (void)Stats;
1978     LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
1979                       << " redundant split edge BBs (" << Stats.second
1980                       << " bytes) for " << BF.getPrintName() << "\n");
1981   }
1982   rebuildCFI();
1983   // We may have split edges, creating BBs that need correct branching
1984   BF.fixBranches();
1985   LLVM_DEBUG({
1986     dbgs() << "Func after shrink-wrapping: \n";
1987     BF.dump();
1988   });
1989   return true;
1990 }
1991 
1992 void ShrinkWrapping::printStats() {
1993   outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
1994          << " spills inserting load/stores and " << SpillsMovedPushPopMode
1995          << " spills inserting push/pops\n";
1996 }
1997 
1998 // Operators necessary as a result of using MCAnnotation
1999 raw_ostream &operator<<(raw_ostream &OS,
2000                         const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2001   OS << "SWTodo[";
2002   const char *Sep = "";
2003   for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2004     OS << Sep;
2005     switch (Item.Action) {
2006     case ShrinkWrapping::WorklistItem::Erase:
2007       OS << "Erase";
2008       break;
2009     case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2010       OS << "ChangeToAdjustment";
2011       break;
2012     case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2013       OS << "InsertLoadOrStore";
2014       break;
2015     case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2016       OS << "InsertPushOrPop";
2017       break;
2018     }
2019     Sep = ", ";
2020   }
2021   OS << "]";
2022   return OS;
2023 }
2024 
2025 raw_ostream &
2026 operator<<(raw_ostream &OS,
2027            const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2028   OS << "SLMTodo[";
2029   const char *Sep = "";
2030   for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2031     OS << Sep;
2032     switch (Item.Action) {
2033     case StackLayoutModifier::WorklistItem::None:
2034       OS << "None";
2035       break;
2036     case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2037       OS << "AdjustLoadStoreOffset";
2038       break;
2039     case StackLayoutModifier::WorklistItem::AdjustCFI:
2040       OS << "AdjustCFI";
2041       break;
2042     }
2043     Sep = ", ";
2044   }
2045   OS << "]";
2046   return OS;
2047 }
2048 
2049 bool operator==(const ShrinkWrapping::WorklistItem &A,
2050                 const ShrinkWrapping::WorklistItem &B) {
2051   return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2052           A.Adjustment == B.Adjustment &&
2053           A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2054           A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2055           A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2056           A.FIEToInsert.Size == B.FIEToInsert.Size &&
2057           A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2058           A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2059 }
2060 
2061 bool operator==(const StackLayoutModifier::WorklistItem &A,
2062                 const StackLayoutModifier::WorklistItem &B) {
2063   return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2064 }
2065 
2066 } // end namespace bolt
2067 } // end namespace llvm
2068