1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include <numeric>
17 #include <stack>
18 
19 #define DEBUG_TYPE "shrinkwrapping"
20 
21 using namespace llvm;
22 
23 namespace opts {
24 
25 extern cl::opt<bool> TimeOpts;
26 extern cl::OptionCategory BoltOptCategory;
27 
28 static cl::opt<unsigned> ShrinkWrappingThreshold(
29     "shrink-wrapping-threshold",
30     cl::desc("Percentage of prologue execution count to use as threshold when"
31              " evaluating whether a block is cold enough to be profitable to"
32              " move eligible spills there"),
33     cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
34 } // namespace opts
35 
36 namespace llvm {
37 namespace bolt {
38 
39 void CalleeSavedAnalysis::analyzeSaves() {
40   ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
41   StackReachingUses &SRU = Info.getStackReachingUses();
42   auto &InsnToBB = Info.getInsnToBBMap();
43   BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
44 
45   LLVM_DEBUG(dbgs() << "Checking spill locations\n");
46   for (BinaryBasicBlock &BB : BF) {
47     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
48     const MCInst *Prev = nullptr;
49     for (MCInst &Inst : BB) {
50       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
51         // Blacklist weird stores we don't understand
52         if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
53             FIE->IsStoreFromReg) {
54           BlacklistedRegs.set(FIE->RegOrImm);
55           CalleeSaved.reset(FIE->RegOrImm);
56           Prev = &Inst;
57           continue;
58         }
59 
60         if (!FIE->IsStore || !FIE->IsStoreFromReg ||
61             BlacklistedRegs[FIE->RegOrImm]) {
62           Prev = &Inst;
63           continue;
64         }
65 
66         // If this reg is defined locally, it is not a callee-saved reg
67         if (RD.isReachedBy(FIE->RegOrImm,
68                            Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
69           BlacklistedRegs.set(FIE->RegOrImm);
70           CalleeSaved.reset(FIE->RegOrImm);
71           Prev = &Inst;
72           continue;
73         }
74 
75         // If this stack position is accessed in another function, we are
76         // probably dealing with a parameter passed in a stack -- do not mess
77         // with it
78         if (SRU.isStoreUsed(*FIE,
79                             Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
80             /*IncludeLocalAccesses=*/false) {
81           BlacklistedRegs.set(FIE->RegOrImm);
82           CalleeSaved.reset(FIE->RegOrImm);
83           Prev = &Inst;
84           continue;
85         }
86 
87         // If this stack position is loaded elsewhere in another reg, we can't
88         // update it, so blacklist it.
89         if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
90                                                   : SRU.expr_begin(BB))) {
91           BlacklistedRegs.set(FIE->RegOrImm);
92           CalleeSaved.reset(FIE->RegOrImm);
93           Prev = &Inst;
94           continue;
95         }
96 
97         // Ignore regs with multiple saves
98         if (CalleeSaved[FIE->RegOrImm]) {
99           BlacklistedRegs.set(FIE->RegOrImm);
100           CalleeSaved.reset(FIE->RegOrImm);
101           Prev = &Inst;
102           continue;
103         }
104 
105         CalleeSaved.set(FIE->RegOrImm);
106         SaveFIEByReg[FIE->RegOrImm] = &*FIE;
107         SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
108         BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
109         OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
110         LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
111                           << FIE->RegOrImm << "\n");
112       }
113       Prev = &Inst;
114     }
115   }
116 }
117 
118 void CalleeSavedAnalysis::analyzeRestores() {
119   ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
120 
121   // Now compute all restores of these callee-saved regs
122   for (BinaryBasicBlock &BB : BF) {
123     const MCInst *Prev = nullptr;
124     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
125       MCInst &Inst = *I;
126       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
127         if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
128           Prev = &Inst;
129           continue;
130         }
131 
132         // If this reg is used locally after a restore, then we are probably
133         // not dealing with a callee-saved reg. Except if this use is by
134         // another store, but we don't cover this case yet.
135         // Also not callee-saved if this load accesses caller stack or isn't
136         // simple.
137         if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
138             RU.isReachedBy(FIE->RegOrImm,
139                            Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
140           CalleeSaved.reset(FIE->RegOrImm);
141           Prev = &Inst;
142           continue;
143         }
144         // If stack offsets between saves/store don't agree with each other,
145         // we don't completely understand what's happening here
146         if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
147           CalleeSaved.reset(FIE->RegOrImm);
148           LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
149                                "mismatching restore: "
150                             << FIE->RegOrImm << "\n");
151           Prev = &Inst;
152           continue;
153         }
154 
155         LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
156                           << "\n");
157         if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
158           LoadFIEByReg[FIE->RegOrImm] = &*FIE;
159         BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
160                               AllocatorId);
161         HasRestores.set(FIE->RegOrImm);
162       }
163       Prev = &Inst;
164     }
165   }
166 }
167 
168 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
169   std::vector<MCInst *> Results;
170   for (BinaryBasicBlock &BB : BF)
171     for (MCInst &Inst : BB)
172       if (getSavedReg(Inst) == Reg)
173         Results.push_back(&Inst);
174   return Results;
175 }
176 
177 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
178   std::vector<MCInst *> Results;
179   for (BinaryBasicBlock &BB : BF)
180     for (MCInst &Inst : BB)
181       if (getRestoredReg(Inst) == Reg)
182         Results.push_back(&Inst);
183   return Results;
184 }
185 
186 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
187   for (BinaryBasicBlock &BB : BF) {
188     for (MCInst &Inst : BB) {
189       BC.MIB->removeAnnotation(Inst, getSaveTag());
190       BC.MIB->removeAnnotation(Inst, getRestoreTag());
191     }
192   }
193 }
194 
195 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
196   if (BlacklistedRegions[Offset] < Size)
197     BlacklistedRegions[Offset] = Size;
198 }
199 
200 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
201   for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
202     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
203       return true;
204   return false;
205 }
206 
207 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
208                                                      int64_t Size) {
209   bool HasConflict = false;
210   for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
211     std::pair<const int64_t, int64_t> &Elem = *Iter;
212     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
213         (Offset != Elem.first || Size != Elem.second)) {
214       Iter = AvailableRegions.erase(Iter);
215       HasConflict = true;
216       continue;
217     }
218     ++Iter;
219   }
220   if (HasConflict) {
221     blacklistRegion(Offset, Size);
222     return true;
223   }
224   return false;
225 }
226 
227 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
228   StackPointerTracking &SPT = Info.getStackPointerTracking();
229   if (!BC.MII->get(Point.getOpcode())
230            .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
231     return;
232 
233   int SPVal, FPVal;
234   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
235   std::pair<MCPhysReg, int64_t> FP;
236 
237   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
238     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
239   else
240     FP = std::make_pair(0, 0);
241   std::pair<MCPhysReg, int64_t> SP;
242 
243   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
244     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
245   else
246     SP = std::make_pair(0, 0);
247 
248   int64_t Output;
249   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
250     return;
251 
252   // Not your regular frame pointer initialization... bail
253   if (Output != SPVal)
254     blacklistRegion(0, 0);
255 }
256 
257 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
258   StackPointerTracking &SPT = Info.getStackPointerTracking();
259   if (!BC.MII->get(Point.getOpcode())
260            .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
261     return;
262   // Check if the definition of SP comes from FP -- in this case, this
263   // value may need to be updated depending on our stack layout changes
264   const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
265   unsigned NumDefs = InstInfo.getNumDefs();
266   bool UsesFP = false;
267   for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
268        ++I) {
269     MCOperand &Operand = Point.getOperand(I);
270     if (!Operand.isReg())
271       continue;
272     if (Operand.getReg() == BC.MIB->getFramePointer()) {
273       UsesFP = true;
274       break;
275     }
276   }
277   if (!UsesFP)
278     return;
279 
280   // Setting up evaluation
281   int SPVal, FPVal;
282   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
283   std::pair<MCPhysReg, int64_t> FP;
284 
285   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
286     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
287   else
288     FP = std::make_pair(0, 0);
289   std::pair<MCPhysReg, int64_t> SP;
290 
291   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
292     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
293   else
294     SP = std::make_pair(0, 0);
295 
296   int64_t Output;
297   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
298     return;
299 
300   // If the value is the same of FP, no need to adjust it
301   if (Output == FPVal)
302     return;
303 
304   // If an allocation happened through FP, bail
305   if (Output <= SPVal) {
306     blacklistRegion(0, 0);
307     return;
308   }
309 
310   // We are restoring SP to an old value based on FP. Mark it as a stack
311   // access to be fixed later.
312   BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
313 }
314 
315 void StackLayoutModifier::classifyStackAccesses() {
316   // Understand when stack slots are being used non-locally
317   StackReachingUses &SRU = Info.getStackReachingUses();
318 
319   for (BinaryBasicBlock &BB : BF) {
320     const MCInst *Prev = nullptr;
321     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
322       MCInst &Inst = *I;
323       checkFramePointerInitialization(Inst);
324       checkStackPointerRestore(Inst);
325       ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
326       if (!FIEX) {
327         Prev = &Inst;
328         continue;
329       }
330       if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
331         blacklistRegion(FIEX->StackOffset, FIEX->Size);
332         Prev = &Inst;
333         continue;
334       }
335       // If this stack position is accessed in another function, we are
336       // probably dealing with a parameter passed in a stack -- do not mess
337       // with it
338       if (SRU.isStoreUsed(*FIEX,
339                           Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
340                           /*IncludeLocalAccesses=*/false)) {
341         blacklistRegion(FIEX->StackOffset, FIEX->Size);
342         Prev = &Inst;
343         continue;
344       }
345       // Now we have a clear stack slot access. Check if its blacklisted or if
346       // it conflicts with another chunk.
347       if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
348           blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
349         Prev = &Inst;
350         continue;
351       }
352       // We are free to go. Add it as available stack slot which we know how
353       // to move it.
354       AvailableRegions[FIEX->StackOffset] = FIEX->Size;
355       BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
356       RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
357       RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
358       LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
359                         << (int)FIEX->Size << "\n");
360     }
361   }
362 }
363 
364 void StackLayoutModifier::classifyCFIs() {
365   std::stack<std::pair<int64_t, uint16_t>> CFIStack;
366   int64_t CfaOffset = -8;
367   uint16_t CfaReg = 7;
368 
369   auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
370     const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
371     if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
372       BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
373       LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
374     } else {
375       IsSimple = false;
376       return;
377     }
378   };
379 
380   for (BinaryBasicBlock *&BB : BF.layout()) {
381     for (MCInst &Inst : *BB) {
382       if (!BC.MIB->isCFI(Inst))
383         continue;
384       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
385       switch (CFI->getOperation()) {
386       case MCCFIInstruction::OpDefCfa:
387         CfaOffset = -CFI->getOffset();
388         recordAccess(&Inst, CfaOffset);
389         LLVM_FALLTHROUGH;
390       case MCCFIInstruction::OpDefCfaRegister:
391         CfaReg = CFI->getRegister();
392         break;
393       case MCCFIInstruction::OpDefCfaOffset:
394         CfaOffset = -CFI->getOffset();
395         recordAccess(&Inst, CfaOffset);
396         break;
397       case MCCFIInstruction::OpOffset:
398         recordAccess(&Inst, CFI->getOffset());
399         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
400                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
401                                                     /*isEH=*/false),
402                               AllocatorId);
403         break;
404       case MCCFIInstruction::OpSameValue:
405         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
406                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
407                                                     /*isEH=*/false),
408                               AllocatorId);
409         break;
410       case MCCFIInstruction::OpRememberState:
411         CFIStack.push(std::make_pair(CfaOffset, CfaReg));
412         break;
413       case MCCFIInstruction::OpRestoreState: {
414         assert(!CFIStack.empty() && "Corrupt CFI stack");
415         std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
416         CFIStack.pop();
417         CfaOffset = Elem.first;
418         CfaReg = Elem.second;
419         break;
420       }
421       case MCCFIInstruction::OpRelOffset:
422       case MCCFIInstruction::OpAdjustCfaOffset:
423         llvm_unreachable("Unhandled AdjustCfaOffset");
424         break;
425       default:
426         break;
427       }
428     }
429   }
430 }
431 
432 void StackLayoutModifier::scheduleChange(
433     MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
434   auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
435       Inst, getTodoTag(), AllocatorId);
436   WList.push_back(Item);
437 }
438 
439 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
440   if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
441     return false;
442 
443   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
444   if (!FIE)
445     return false;
446 
447   return canCollapseRegion(FIE->StackOffset);
448 }
449 
450 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
451   if (!IsInitialized)
452     initialize();
453   if (!IsSimple)
454     return false;
455 
456   if (CollapsedRegions.count(RegionAddr))
457     return true;
458 
459   // Check if it is possible to readjust all accesses below RegionAddr
460   if (!BlacklistedRegions.empty())
461     return false;
462 
463   return true;
464 }
465 
466 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
467   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
468   if (!FIE)
469     return false;
470   int64_t RegionAddr = FIE->StackOffset;
471   int64_t RegionSz = FIE->Size;
472   return collapseRegion(DeletedPush, RegionAddr, RegionSz);
473 }
474 
475 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
476                                          int64_t RegionSz) {
477   if (!canCollapseRegion(RegionAddr))
478     return false;
479 
480   assert(IsInitialized);
481   StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
482 
483   for (BinaryBasicBlock &BB : BF) {
484     for (MCInst &Inst : BB) {
485       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
486         continue;
487       auto Slot =
488           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
489               Inst, getSlotTag());
490       if (!AvailableRegions.count(Slot))
491         continue;
492       // We need to ensure this access is affected by the deleted push
493       if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
494         continue;
495 
496       if (BC.MIB->isCFI(Inst)) {
497         if (Slot > RegionAddr)
498           continue;
499         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
500         continue;
501       }
502       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
503       if (!FIE) {
504         if (Slot > RegionAddr)
505           continue;
506         // SP update based on frame pointer
507         scheduleChange(
508             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
509         continue;
510       }
511 
512       if (Slot == RegionAddr) {
513         BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
514         continue;
515       }
516       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
517         continue;
518 
519       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
520         continue;
521 
522       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
523         continue;
524 
525       scheduleChange(
526           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
527     }
528   }
529 
530   CollapsedRegions.insert(RegionAddr);
531   return true;
532 }
533 
534 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
535   for (BinaryBasicBlock &BB : BF) {
536     for (MCInst &Inst : BB) {
537       if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
538         continue;
539       BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
540       scheduleChange(
541           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
542     }
543   }
544 }
545 
546 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
547   if (!IsInitialized)
548     initialize();
549   if (!IsSimple)
550     return false;
551 
552   StackPointerTracking &SPT = Info.getStackPointerTracking();
553   int64_t RegionAddr = SPT.getStateBefore(P)->first;
554   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
555     return false;
556 
557   if (InsertedRegions.count(RegionAddr))
558     return true;
559 
560   // Check if we are going to screw up stack accesses at call sites that
561   // pass parameters via stack
562   if (!BlacklistedRegions.empty())
563     return false;
564 
565   return true;
566 }
567 
568 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
569   if (!canInsertRegion(P))
570     return false;
571 
572   assert(IsInitialized);
573   StackPointerTracking &SPT = Info.getStackPointerTracking();
574   // This RegionAddr is slightly different from the one seen in collapseRegion
575   // This is the value of SP before the allocation the user wants to make.
576   int64_t RegionAddr = SPT.getStateBefore(P)->first;
577   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
578     return false;
579 
580   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
581 
582   for (BinaryBasicBlock &BB : BF) {
583     for (MCInst &Inst : BB) {
584       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
585         continue;
586       auto Slot =
587           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
588               Inst, getSlotTag());
589       if (!AvailableRegions.count(Slot))
590         continue;
591 
592       if (!(DA.doesADominateB(P, Inst)))
593         continue;
594 
595       if (BC.MIB->isCFI(Inst)) {
596         if (Slot >= RegionAddr)
597           continue;
598         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
599         continue;
600       }
601       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
602       if (!FIE) {
603         if (Slot >= RegionAddr)
604           continue;
605         scheduleChange(
606             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
607         continue;
608       }
609 
610       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
611         continue;
612       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
613         continue;
614       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
615         continue;
616       scheduleChange(
617           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
618     }
619   }
620 
621   InsertedRegions.insert(RegionAddr);
622   return true;
623 }
624 
625 void StackLayoutModifier::performChanges() {
626   std::set<uint32_t> ModifiedCFIIndices;
627   for (BinaryBasicBlock &BB : BF) {
628     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
629       MCInst &Inst = *I;
630       if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
631         assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
632         BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
633       }
634       if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
635         continue;
636       auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
637           Inst, getTodoTag());
638       int64_t Adjustment = 0;
639       WorklistItem::ActionType AdjustmentType = WorklistItem::None;
640       for (WorklistItem &WI : WList) {
641         if (WI.Action == WorklistItem::None)
642           continue;
643         assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
644                WI.Action == WorklistItem::AdjustCFI);
645         assert((AdjustmentType == WorklistItem::None ||
646                 AdjustmentType == WI.Action) &&
647                "Conflicting actions requested at the same program point");
648         AdjustmentType = WI.Action;
649         Adjustment += WI.OffsetUpdate;
650       }
651       if (!Adjustment)
652         continue;
653       if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
654         assert(BC.MIB->isCFI(Inst));
655         uint32_t CFINum = Inst.getOperand(0).getImm();
656         if (ModifiedCFIIndices.count(CFINum))
657           continue;
658         ModifiedCFIIndices.insert(CFINum);
659         const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
660         const MCCFIInstruction::OpType Operation = CFI->getOperation();
661         if (Operation == MCCFIInstruction::OpDefCfa ||
662             Operation == MCCFIInstruction::OpDefCfaOffset)
663           Adjustment = 0 - Adjustment;
664         LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
665                           << " to " << (CFI->getOffset() + Adjustment) << "\n");
666         BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
667         continue;
668       }
669       int32_t SrcImm = 0;
670       MCPhysReg Reg = 0;
671       MCPhysReg StackPtrReg = 0;
672       int64_t StackOffset = 0;
673       bool IsIndexed = false;
674       bool IsLoad = false;
675       bool IsStore = false;
676       bool IsSimple = false;
677       bool IsStoreFromReg = false;
678       uint8_t Size = 0;
679       bool Success = false;
680       Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
681                                       Reg, SrcImm, StackPtrReg, StackOffset,
682                                       Size, IsSimple, IsIndexed);
683       if (!Success) {
684         // SP update based on FP value
685         Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
686         assert(Success);
687         continue;
688       }
689       assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
690       if (StackPtrReg != BC.MIB->getFramePointer())
691         Adjustment = -Adjustment;
692       if (IsLoad)
693         Success = BC.MIB->createRestoreFromStack(
694             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
695       else if (IsStore)
696         Success = BC.MIB->createSaveToStack(
697             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
698       LLVM_DEBUG({
699         dbgs() << "Adjusted instruction: ";
700         Inst.dump();
701       });
702       assert(Success);
703     }
704   }
705 }
706 
707 void StackLayoutModifier::initialize() {
708   classifyStackAccesses();
709   classifyCFIs();
710   IsInitialized = true;
711 }
712 
713 std::atomic_uint64_t ShrinkWrapping::SpillsMovedRegularMode{0};
714 std::atomic_uint64_t ShrinkWrapping::SpillsMovedPushPopMode{0};
715 
716 using BBIterTy = BinaryBasicBlock::iterator;
717 
718 void ShrinkWrapping::classifyCSRUses() {
719   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
720   StackPointerTracking &SPT = Info.getStackPointerTracking();
721   UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
722                                      BitVector(DA.NumInstrs, false));
723 
724   const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
725   for (BinaryBasicBlock &BB : BF) {
726     for (MCInst &Inst : BB) {
727       if (BC.MIB->isCFI(Inst))
728         continue;
729       BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
730       BC.MIB->getTouchedRegs(Inst, BV);
731       BV &= CSA.CalleeSaved;
732       for (int I : BV.set_bits()) {
733         if (I == 0)
734           continue;
735         if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
736           UsesByReg[I].set(DA.ExprToIdx[&Inst]);
737       }
738       if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
739         continue;
740       BV = CSA.CalleeSaved;
741       BV &= FPAliases;
742       for (int I : BV.set_bits())
743         UsesByReg[I].set(DA.ExprToIdx[&Inst]);
744     }
745   }
746 }
747 
748 void ShrinkWrapping::pruneUnwantedCSRs() {
749   BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
750   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
751     if (!CSA.CalleeSaved[I])
752       continue;
753     if (ParamRegs[I]) {
754       CSA.CalleeSaved.reset(I);
755       continue;
756     }
757     if (UsesByReg[I].empty()) {
758       LLVM_DEBUG(
759           dbgs()
760           << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
761           << "\n");
762       CSA.CalleeSaved.reset(I);
763       continue;
764     }
765     if (!CSA.HasRestores[I]) {
766       LLVM_DEBUG(
767           dbgs() << "Dismissing Callee-Saved Reg because it does not have "
768                     "restores:"
769                  << I << "\n");
770       CSA.CalleeSaved.reset(I);
771     }
772   }
773 }
774 
775 void ShrinkWrapping::computeSaveLocations() {
776   SavePos = std::vector<SmallSetVector<MCInst *, 4>>(BC.MRI->getNumRegs());
777   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
778   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
779   StackPointerTracking &SPT = Info.getStackPointerTracking();
780 
781   LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
782   for (BinaryBasicBlock &BB : BF) {
783     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
784 
785     MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
786     if (!First)
787       continue;
788 
789     // Use reaching instructions to detect if we are inside a loop - if we
790     // are, do not consider this BB as valid placement for saves.
791     if (RI.isInLoop(BB))
792       continue;
793 
794     const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
795     // If we don't know stack state at this point, bail
796     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
797         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
798       continue;
799 
800     for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
801       if (!CSA.CalleeSaved[I])
802         continue;
803 
804       BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
805       for (int J : UsesByReg[I].set_bits())
806         if (DA.doesADominateB(*First, J))
807           BBDominatedUses.set(J);
808       LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
809                         << BBDominatedUses.count() << " uses for reg " << I
810                         << ". Total uses for reg is " << UsesByReg[I].count()
811                         << "\n");
812       BBDominatedUses &= UsesByReg[I];
813       if (BBDominatedUses == UsesByReg[I]) {
814         LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
815                           << " as a save pos for " << I << "\n");
816         SavePos[I].insert(First);
817         LLVM_DEBUG({
818           dbgs() << "Dominated uses are:\n";
819           for (int J : UsesByReg[I].set_bits()) {
820             dbgs() << "Idx " << J << ": ";
821             DA.Expressions[J]->dump();
822           }
823         });
824       }
825     }
826   }
827 
828   BestSaveCount = std::vector<uint64_t>(BC.MRI->getNumRegs(),
829                                         std::numeric_limits<uint64_t>::max());
830   BestSavePos = std::vector<MCInst *>(BC.MRI->getNumRegs(), nullptr);
831   auto &InsnToBB = Info.getInsnToBBMap();
832   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
833     if (!CSA.CalleeSaved[I])
834       continue;
835 
836     for (MCInst *Pos : SavePos[I]) {
837       BinaryBasicBlock *BB = InsnToBB[Pos];
838       uint64_t Count = BB->getExecutionCount();
839       if (Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
840           Count < BestSaveCount[I]) {
841         BestSavePos[I] = Pos;
842         BestSaveCount[I] = Count;
843       }
844     }
845   }
846 }
847 
848 void ShrinkWrapping::computeDomOrder() {
849   std::vector<MCPhysReg> Order;
850   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
851     Order.push_back(I);
852   }
853 
854   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
855   auto &InsnToBB = Info.getInsnToBBMap();
856   std::sort(Order.begin(), Order.end(),
857             [&](const MCPhysReg &A, const MCPhysReg &B) {
858               BinaryBasicBlock *BBA =
859                   BestSavePos[A] ? InsnToBB[BestSavePos[A]] : nullptr;
860               BinaryBasicBlock *BBB =
861                   BestSavePos[B] ? InsnToBB[BestSavePos[B]] : nullptr;
862               if (BBA == BBB)
863                 return A < B;
864               if (!BBA && BBB)
865                 return false;
866               if (BBA && !BBB)
867                 return true;
868               if (DA.doesADominateB(*BestSavePos[A], *BestSavePos[B]))
869                 return true;
870               if (DA.doesADominateB(*BestSavePos[B], *BestSavePos[A]))
871                 return false;
872               return A < B;
873             });
874 
875   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
876     DomOrder[Order[I]] = I;
877 }
878 
879 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
880                                        uint64_t &TotalEstimatedWin) {
881   const uint64_t CurSavingCost = CSA.SavingCost[CSR];
882   if (!CSA.CalleeSaved[CSR])
883     return false;
884 
885   uint64_t BestCount = BestSaveCount[CSR];
886   BestPosSave = BestSavePos[CSR];
887   bool ShouldMove = false;
888   if (BestCount != std::numeric_limits<uint64_t>::max() &&
889       BestCount < (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost) {
890     LLVM_DEBUG({
891       auto &InsnToBB = Info.getInsnToBBMap();
892       dbgs() << "Better position for saves found in func " << BF.getPrintName()
893              << " count << " << BF.getKnownExecutionCount() << "\n";
894       dbgs() << "Reg: " << CSR
895              << "; New BB: " << InsnToBB[BestPosSave]->getName()
896              << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
897     });
898     TotalEstimatedWin += CurSavingCost - BestCount;
899     ShouldMove = true;
900   }
901 
902   if (!ShouldMove)
903     return false;
904   if (!BestPosSave) {
905     LLVM_DEBUG({
906       dbgs() << "Dropping opportunity because we don't know where to put "
907                 "stores -- total est. freq reduc: "
908              << TotalEstimatedWin << "\n";
909     });
910     return false;
911   }
912   return true;
913 }
914 
915 /// Auxiliar function used to create basic blocks for critical edges and update
916 /// the dominance frontier with these new locations
917 void ShrinkWrapping::splitFrontierCritEdges(
918     BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
919     const SmallVector<bool, 4> &IsCritEdge,
920     const SmallVector<BinaryBasicBlock *, 4> &From,
921     const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
922   LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
923                     << BF.getPrintName() << "\n");
924   // For every FromBB, there might be one or more critical edges, with
925   // To[I] containing destination BBs. It's important to memorize
926   // the original size of the Frontier as we may append to it while splitting
927   // critical edges originating with blocks with multiple destinations.
928   for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
929     if (!IsCritEdge[I])
930       continue;
931     if (To[I].empty())
932       continue;
933     BinaryBasicBlock *FromBB = From[I];
934     LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
935                       << "\n");
936     // Split edge for every DestinationBBs
937     for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
938       BinaryBasicBlock *DestinationBB = To[I][DI];
939       LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
940       BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
941       // Insert dummy instruction so this BB is never empty (we need this for
942       // PredictiveStackPointerTracking to work, since it annotates instructions
943       // and not BBs).
944       if (NewBB->empty()) {
945         MCInst NewInst;
946         BC.MIB->createNoop(NewInst);
947         NewBB->addInstruction(std::move(NewInst));
948         scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
949       }
950 
951       // Update frontier
952       ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
953       if (DI == 0) {
954         // Update frontier inplace
955         Frontier[I] = NewFrontierPP;
956         LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
957                           << '\n');
958       } else {
959         // Append new frontier to the end of the list
960         Frontier.push_back(NewFrontierPP);
961         LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
962                           << '\n');
963       }
964     }
965   }
966 }
967 
968 SmallVector<ProgramPoint, 4>
969 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
970                                    uint64_t TotalEstimatedWin) {
971   SmallVector<ProgramPoint, 4> Frontier;
972   SmallVector<bool, 4> IsCritEdge;
973   bool CannotPlace = false;
974   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
975 
976   SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
977   SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
978   // In case of a critical edge, we need to create extra BBs to host restores
979   // into edges transitioning to the dominance frontier, otherwise we pull these
980   // restores to inside the dominated area.
981   Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
982   LLVM_DEBUG({
983     dbgs() << "Dumping dominance frontier for ";
984     BC.printInstruction(dbgs(), *BestPosSave);
985     for (ProgramPoint &PP : Frontier)
986       if (PP.isInst())
987         BC.printInstruction(dbgs(), *PP.getInst());
988       else
989         dbgs() << PP.getBB()->getName() << "\n";
990   });
991   for (ProgramPoint &PP : Frontier) {
992     bool HasCritEdges = false;
993     if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
994         doesInstUsesCSR(*PP.getInst(), CSR))
995       CannotPlace = true;
996     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
997     CritEdgesFrom.emplace_back(FrontierBB);
998     CritEdgesTo.emplace_back(0);
999     SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
1000     // Check for invoke instructions at the dominance frontier, which indicates
1001     // the landing pad is not dominated.
1002     if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
1003       LLVM_DEBUG(
1004           dbgs() << "Bailing on restore placement to avoid LP splitting\n");
1005       Frontier.clear();
1006       return Frontier;
1007     }
1008     doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
1009       if (!DA.doesADominateB(*BestPosSave, P)) {
1010         Dests.emplace_back(Info.getParentBB(P));
1011         return;
1012       }
1013       HasCritEdges = true;
1014     });
1015     IsCritEdge.push_back(HasCritEdges);
1016   }
1017   // Restores cannot be placed in empty BBs because we have a dataflow
1018   // analysis that depends on insertions happening before real instructions
1019   // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1020   // dummy nop that is scheduled to be removed later.
1021   bool InvalidateRequired = false;
1022   for (BinaryBasicBlock *&BB : BF.layout()) {
1023     if (BB->size() != 0)
1024       continue;
1025     MCInst NewInst;
1026     BC.MIB->createNoop(NewInst);
1027     auto II = BB->addInstruction(std::move(NewInst));
1028     scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1029     InvalidateRequired = true;
1030   }
1031   if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1032     LLVM_DEBUG({
1033       dbgs() << "Now detected critical edges in the following frontier:\n";
1034       for (ProgramPoint &PP : Frontier) {
1035         if (PP.isBB()) {
1036           dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
1037         } else {
1038           dbgs() << "  Inst: ";
1039           PP.getInst()->dump();
1040         }
1041       }
1042     });
1043     splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1044                            CritEdgesTo);
1045     InvalidateRequired = true;
1046   }
1047   if (InvalidateRequired) {
1048     // BitVectors that represent all insns of the function are invalid now
1049     // since we changed BBs/Insts. Re-run steps that depend on pointers being
1050     // valid
1051     Info.invalidateAll();
1052     classifyCSRUses();
1053   }
1054   if (CannotPlace) {
1055     LLVM_DEBUG({
1056       dbgs() << "Dropping opportunity because restore placement failed"
1057                 " -- total est. freq reduc: "
1058              << TotalEstimatedWin << "\n";
1059     });
1060     Frontier.clear();
1061     return Frontier;
1062   }
1063   return Frontier;
1064 }
1065 
1066 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1067                                           int64_t SaveOffset) {
1068   if (FA.requiresAlignment(BF)) {
1069     LLVM_DEBUG({
1070       dbgs() << "Reg " << CSR
1071              << " is not using push/pops due to function "
1072                 "alignment requirements.\n";
1073     });
1074     return false;
1075   }
1076   for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1077     if (!SLM.canCollapseRegion(Save)) {
1078       LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1079       return false;
1080     }
1081   }
1082   // Abort if one of the restores for this CSR is not a POP.
1083   for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1084     if (!BC.MIB->isPop(*Load)) {
1085       LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1086       return false;
1087     }
1088   }
1089 
1090   StackPointerTracking &SPT = Info.getStackPointerTracking();
1091   // Abort if we are inserting a push into an entry BB (offset -8) and this
1092   // func sets up a frame pointer.
1093   if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1094       SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1095     LLVM_DEBUG({
1096       dbgs() << "Reg " << CSR
1097              << " cannot insert region or we are "
1098                 "trying to insert a push into entry bb.\n";
1099     });
1100     return false;
1101   }
1102   return true;
1103 }
1104 
1105 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1106     const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1107     unsigned CSR) {
1108   SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1109   // Moving pop locations to the correct sp offset
1110   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1111   StackPointerTracking &SPT = Info.getStackPointerTracking();
1112   for (ProgramPoint &PP : FixedRestorePoints) {
1113     BinaryBasicBlock *BB = Info.getParentBB(PP);
1114     bool Found = false;
1115     if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1116         SaveOffset) {
1117       BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1118       BV &= UsesByReg[CSR];
1119       if (!BV.any()) {
1120         Found = true;
1121         PP = BB;
1122         continue;
1123       }
1124     }
1125     for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
1126       if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
1127         BitVector BV = *RI.getStateAt(*RIt);
1128         BV &= UsesByReg[CSR];
1129         if (!BV.any()) {
1130           Found = true;
1131           PP = &*RIt;
1132           break;
1133         }
1134       }
1135     }
1136     if (!Found) {
1137       LLVM_DEBUG({
1138         dbgs() << "Could not find restore insertion point for " << CSR
1139                << ", falling back to load/store mode\n";
1140       });
1141       FixedRestorePoints.clear();
1142       return FixedRestorePoints;
1143     }
1144   }
1145   return FixedRestorePoints;
1146 }
1147 
1148 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1149                                                     bool UsePushPops) {
1150 
1151   for (BinaryBasicBlock *&BB : BF.layout()) {
1152     std::vector<MCInst *> CFIs;
1153     for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
1154       MCInst &Inst = *I;
1155       if (BC.MIB->isCFI(Inst)) {
1156         // Delete all offset CFIs related to this CSR
1157         if (SLM.getOffsetCFIReg(Inst) == CSR) {
1158           HasDeletedOffsetCFIs[CSR] = true;
1159           scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1160           continue;
1161         }
1162         CFIs.push_back(&Inst);
1163         continue;
1164       }
1165 
1166       uint16_t SavedReg = CSA.getSavedReg(Inst);
1167       uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1168       if (SavedReg != CSR && RestoredReg != CSR) {
1169         CFIs.clear();
1170         continue;
1171       }
1172 
1173       scheduleChange(&Inst, WorklistItem(UsePushPops
1174                                              ? WorklistItem::Erase
1175                                              : WorklistItem::ChangeToAdjustment,
1176                                          CSR));
1177 
1178       // Delete associated CFIs
1179       const bool RecordDeletedPushCFIs =
1180           SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1181       const bool RecordDeletedPopCFIs =
1182           RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1183       for (MCInst *CFI : CFIs) {
1184         const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1185         // Do not touch these...
1186         if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1187             MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1188           continue;
1189         scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1190         if (RecordDeletedPushCFIs) {
1191           // Do not record this to be replayed later because we are going to
1192           // rebuild it.
1193           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1194             continue;
1195           DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1196         }
1197         if (RecordDeletedPopCFIs) {
1198           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1199             continue;
1200           DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1201         }
1202       }
1203       CFIs.clear();
1204     }
1205   }
1206 }
1207 
1208 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1209   if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1210       CSA.getRestoredReg(Inst) == CSR)
1211     return false;
1212   BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1213   BC.MIB->getTouchedRegs(Inst, BV);
1214   return BV[CSR];
1215 }
1216 
1217 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1218     unsigned CSR, MCInst *BestPosSave,
1219     SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1220   auto &InsnToBB = Info.getInsnToBBMap();
1221   const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1222   const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1223   assert(FIESave && FIELoad && "Invalid CSR");
1224 
1225   LLVM_DEBUG({
1226     dbgs() << "Scheduling save insertion at: ";
1227     BestPosSave->dump();
1228   });
1229 
1230   scheduleChange(BestPosSave,
1231                  UsePushPops ? WorklistItem::InsertPushOrPop
1232                              : WorklistItem::InsertLoadOrStore,
1233                  *FIESave, CSR);
1234 
1235   for (ProgramPoint &PP : RestorePoints) {
1236     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1237     LLVM_DEBUG({
1238       dbgs() << "Scheduling restore insertion at: ";
1239       if (PP.isInst())
1240         PP.getInst()->dump();
1241       else
1242         dbgs() << PP.getBB()->getName() << "\n";
1243     });
1244     MCInst *Term =
1245         FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1246     if (Term)
1247       PP = Term;
1248     bool PrecededByPrefix = false;
1249     if (PP.isInst()) {
1250       auto Iter = FrontierBB->findInstruction(PP.getInst());
1251       if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1252         --Iter;
1253         PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1254       }
1255     }
1256     if (PP.isInst() &&
1257         (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1258       assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1259              "cannot move to end of bb");
1260       scheduleChange(InsnToBB[PP.getInst()],
1261                      UsePushPops ? WorklistItem::InsertPushOrPop
1262                                  : WorklistItem::InsertLoadOrStore,
1263                      *FIELoad, CSR);
1264       continue;
1265     }
1266     scheduleChange(PP,
1267                    UsePushPops ? WorklistItem::InsertPushOrPop
1268                                : WorklistItem::InsertLoadOrStore,
1269                    *FIELoad, CSR);
1270   }
1271 }
1272 
1273 void ShrinkWrapping::moveSaveRestores() {
1274   bool DisablePushPopMode = false;
1275   bool UsedPushPopMode = false;
1276   // Keeps info about successfully moved regs: reg index, save position and
1277   // save size
1278   std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1279 
1280   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1281     MCInst *BestPosSave = nullptr;
1282     uint64_t TotalEstimatedWin = 0;
1283     if (!isBestSavePosCold(I, BestPosSave, TotalEstimatedWin))
1284       continue;
1285     SmallVector<ProgramPoint, 4> RestorePoints =
1286         doRestorePlacement(BestPosSave, I, TotalEstimatedWin);
1287     if (RestorePoints.empty())
1288       continue;
1289 
1290     const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1291     const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1292     (void)FIELoad;
1293     assert(FIESave && FIELoad);
1294     StackPointerTracking &SPT = Info.getStackPointerTracking();
1295     const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1296     int SaveOffset = SPFP.first;
1297     uint8_t SaveSize = FIESave->Size;
1298 
1299     // If we don't know stack state at this point, bail
1300     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1301         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
1302       continue;
1303 
1304     // Operation mode: if true, will insert push/pops instead of loads/restores
1305     bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1306 
1307     if (UsePushPops) {
1308       SmallVector<ProgramPoint, 4> FixedRestorePoints =
1309           fixPopsPlacements(RestorePoints, SaveOffset, I);
1310       if (FixedRestorePoints.empty())
1311         UsePushPops = false;
1312       else
1313         RestorePoints = FixedRestorePoints;
1314     }
1315 
1316     // Disable push-pop mode for all CSRs in this function
1317     if (!UsePushPops)
1318       DisablePushPopMode = true;
1319     else
1320       UsedPushPopMode = true;
1321 
1322     scheduleOldSaveRestoresRemoval(I, UsePushPops);
1323     scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1324     MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1325   }
1326 
1327   // Revert push-pop mode if it failed for a single CSR
1328   if (DisablePushPopMode && UsedPushPopMode) {
1329     UsedPushPopMode = false;
1330     for (BinaryBasicBlock &BB : BF) {
1331       auto WRI = Todo.find(&BB);
1332       if (WRI != Todo.end()) {
1333         std::vector<WorklistItem> &TodoList = WRI->second;
1334         for (WorklistItem &Item : TodoList)
1335           if (Item.Action == WorklistItem::InsertPushOrPop)
1336             Item.Action = WorklistItem::InsertLoadOrStore;
1337       }
1338       for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
1339         MCInst &Inst = *I;
1340         auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1341             Inst, getAnnotationIndex());
1342         if (!TodoList)
1343           continue;
1344         bool isCFI = BC.MIB->isCFI(Inst);
1345         for (WorklistItem &Item : *TodoList) {
1346           if (Item.Action == WorklistItem::InsertPushOrPop)
1347             Item.Action = WorklistItem::InsertLoadOrStore;
1348           if (!isCFI && Item.Action == WorklistItem::Erase)
1349             Item.Action = WorklistItem::ChangeToAdjustment;
1350         }
1351       }
1352     }
1353   }
1354 
1355   // Update statistics
1356   if (!UsedPushPopMode) {
1357     SpillsMovedRegularMode += MovedRegs.size();
1358     return;
1359   }
1360 
1361   // Schedule modifications to stack-accessing instructions via
1362   // StackLayoutModifier.
1363   SpillsMovedPushPopMode += MovedRegs.size();
1364   for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1365     unsigned RegNdx;
1366     MCInst *SavePos;
1367     size_t SaveSize;
1368     std::tie(RegNdx, SavePos, SaveSize) = I;
1369     for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1370       SLM.collapseRegion(Save);
1371     SLM.insertRegion(SavePos, SaveSize);
1372   }
1373 }
1374 
1375 namespace {
1376 /// Helper function to identify whether two basic blocks created by splitting
1377 /// a critical edge have the same contents.
1378 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1379                             const BinaryBasicBlock &B) {
1380   if (A.succ_size() != B.succ_size())
1381     return false;
1382   if (A.succ_size() != 1)
1383     return false;
1384 
1385   if (*A.succ_begin() != *B.succ_begin())
1386     return false;
1387 
1388   if (A.size() != B.size())
1389     return false;
1390 
1391   // Compare instructions
1392   auto I = A.begin(), E = A.end();
1393   auto OtherI = B.begin(), OtherE = B.end();
1394   while (I != E && OtherI != OtherE) {
1395     if (I->getOpcode() != OtherI->getOpcode())
1396       return false;
1397     if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1398           return true;
1399         }))
1400       return false;
1401     ++I;
1402     ++OtherI;
1403   }
1404   return true;
1405 }
1406 } // namespace
1407 
1408 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1409   bool Changed = false;
1410   for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1411     BinaryBasicBlock &BB = *Iter;
1412     if (!BB.getName().startswith(".LSplitEdge"))
1413       continue;
1414     for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
1415       BinaryBasicBlock &RBB = *RIter;
1416       if (&RBB == &BB)
1417         break;
1418       if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
1419           !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1420         continue;
1421       assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1422       BinaryBasicBlock *Pred = *RBB.pred_begin();
1423       uint64_t OrigCount = Pred->branch_info_begin()->Count;
1424       uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1425       BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1426       Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1427       Changed = true;
1428       // Remove the block from CFG
1429       RBB.markValid(false);
1430     }
1431   }
1432 
1433   return Changed;
1434 }
1435 
1436 namespace {
1437 
1438 // A special StackPointerTracking that compensates for our future plans
1439 // in removing/adding insn.
1440 class PredictiveStackPointerTracking
1441     : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1442   friend class DataflowAnalysis<PredictiveStackPointerTracking,
1443                                 std::pair<int, int>>;
1444   decltype(ShrinkWrapping::Todo) &TodoMap;
1445   DataflowInfoManager &Info;
1446 
1447   Optional<unsigned> AnnotationIndex;
1448 
1449 protected:
1450   void compNextAux(const MCInst &Point,
1451                    const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1452                    std::pair<int, int> &Res) {
1453     for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1454       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1455           BC.MIB->isPush(Point)) {
1456         Res.first += BC.MIB->getPushSize(Point);
1457         continue;
1458       }
1459       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1460           BC.MIB->isPop(Point)) {
1461         Res.first -= BC.MIB->getPopSize(Point);
1462         continue;
1463       }
1464       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1465           Item.FIEToInsert.IsStore) {
1466         Res.first -= Item.FIEToInsert.Size;
1467         continue;
1468       }
1469       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1470           Item.FIEToInsert.IsLoad) {
1471         Res.first += Item.FIEToInsert.Size;
1472         continue;
1473       }
1474     }
1475   }
1476 
1477   std::pair<int, int> computeNext(const MCInst &Point,
1478                                   const std::pair<int, int> &Cur) {
1479     std::pair<int, int> Res =
1480         StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1481             Point, Cur);
1482     if (Res.first == StackPointerTracking::SUPERPOSITION ||
1483         Res.first == StackPointerTracking::EMPTY)
1484       return Res;
1485     auto TodoItems =
1486         BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1487             Point, ShrinkWrapping::getAnnotationName());
1488     if (TodoItems)
1489       compNextAux(Point, *TodoItems, Res);
1490     auto &InsnToBBMap = Info.getInsnToBBMap();
1491     if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1492       return Res;
1493     auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1494     if (WRI == TodoMap.end())
1495       return Res;
1496     compNextAux(Point, WRI->second, Res);
1497     return Res;
1498   }
1499 
1500   StringRef getAnnotationName() const {
1501     return StringRef("PredictiveStackPointerTracking");
1502   }
1503 
1504 public:
1505   PredictiveStackPointerTracking(BinaryFunction &BF,
1506                                  decltype(ShrinkWrapping::Todo) &TodoMap,
1507                                  DataflowInfoManager &Info,
1508                                  MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1509       : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1510                                                                  AllocatorId),
1511         TodoMap(TodoMap), Info(Info) {}
1512 
1513   void run() {
1514     StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1515   }
1516 };
1517 
1518 } // end anonymous namespace
1519 
1520 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1521                                       int SPValPop) {
1522   MCInst *SavePoint = nullptr;
1523   for (BinaryBasicBlock &BB : BF) {
1524     for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
1525          ++InstIter) {
1526       int32_t SrcImm = 0;
1527       MCPhysReg Reg = 0;
1528       MCPhysReg StackPtrReg = 0;
1529       int64_t StackOffset = 0;
1530       bool IsIndexed = false;
1531       bool IsLoad = false;
1532       bool IsStore = false;
1533       bool IsSimple = false;
1534       bool IsStoreFromReg = false;
1535       uint8_t Size = 0;
1536       if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
1537                                  Reg, SrcImm, StackPtrReg, StackOffset, Size,
1538                                  IsSimple, IsIndexed))
1539         continue;
1540       if (Reg != CSR || !IsStore || !IsSimple)
1541         continue;
1542       SavePoint = &*InstIter;
1543       break;
1544     }
1545     if (SavePoint)
1546       break;
1547   }
1548   assert(SavePoint);
1549   LLVM_DEBUG({
1550     dbgs() << "Now using as save point for reg " << CSR << " :";
1551     SavePoint->dump();
1552   });
1553   bool PrevAffectedZone = false;
1554   BinaryBasicBlock *PrevBB = nullptr;
1555   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1556   for (BinaryBasicBlock *BB : BF.layout()) {
1557     if (BB->size() == 0)
1558       continue;
1559     const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1560     const bool InAffectedZoneAtBegin =
1561         (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1562     bool InAffectedZone = InAffectedZoneAtBegin;
1563     for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1564       const bool CurZone = DA.count(*InstIter, *SavePoint);
1565       if (InAffectedZone != CurZone) {
1566         auto InsertionIter = InstIter;
1567         ++InsertionIter;
1568         InAffectedZone = CurZone;
1569         if (InAffectedZone)
1570           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1571                                             SPValPop);
1572         else
1573           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1574                                             SPValPush);
1575         --InstIter;
1576       }
1577     }
1578     // Are we at the first basic block or hot-cold split point?
1579     if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1580       if (InAffectedZoneAtBegin)
1581         insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1582     } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1583       if (InAffectedZoneAtBegin)
1584         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1585       else
1586         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1587     }
1588     PrevAffectedZone = InAffectedZoneAtEnd;
1589     PrevBB = BB;
1590   }
1591 }
1592 
1593 void ShrinkWrapping::rebuildCFIForSP() {
1594   for (BinaryBasicBlock &BB : BF) {
1595     for (MCInst &Inst : BB) {
1596       if (!BC.MIB->isCFI(Inst))
1597         continue;
1598       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1599       if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1600         BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1601     }
1602   }
1603 
1604   int PrevSPVal = -8;
1605   BinaryBasicBlock *PrevBB = nullptr;
1606   StackPointerTracking &SPT = Info.getStackPointerTracking();
1607   for (BinaryBasicBlock *BB : BF.layout()) {
1608     if (BB->size() == 0)
1609       continue;
1610     const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1611     const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1612     int SPVal = SPValAtBegin;
1613     for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1614       const int CurVal = SPT.getStateAt(*Iter)->first;
1615       if (SPVal != CurVal) {
1616         auto InsertionIter = Iter;
1617         ++InsertionIter;
1618         Iter = BF.addCFIInstruction(
1619             BB, InsertionIter,
1620             MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1621         SPVal = CurVal;
1622       }
1623     }
1624     if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1625       BF.addCFIInstruction(
1626           BB, BB->begin(),
1627           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1628     else if (SPValAtBegin != PrevSPVal)
1629       BF.addCFIInstruction(
1630           PrevBB, PrevBB->end(),
1631           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1632     PrevSPVal = SPValAtEnd;
1633     PrevBB = BB;
1634   }
1635 
1636   for (BinaryBasicBlock &BB : BF)
1637     for (auto I = BB.begin(); I != BB.end();)
1638       if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1639         I = BB.eraseInstruction(I);
1640       else
1641         ++I;
1642 }
1643 
1644 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1645                                          const FrameIndexEntry &FIE,
1646                                          bool CreatePushOrPop) {
1647   MCInst NewInst;
1648   if (SPVal != StackPointerTracking::SUPERPOSITION &&
1649       SPVal != StackPointerTracking::EMPTY) {
1650     if (FIE.IsLoad) {
1651       if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1652                                           FIE.StackOffset - SPVal, FIE.RegOrImm,
1653                                           FIE.Size)) {
1654         errs() << "createRestoreFromStack: not supported on this platform\n";
1655         abort();
1656       }
1657     } else {
1658       if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1659                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
1660                                      FIE.Size)) {
1661         errs() << "createSaveToStack: not supported on this platform\n";
1662         abort();
1663       }
1664     }
1665     if (CreatePushOrPop)
1666       BC.MIB->changeToPushOrPop(NewInst);
1667     return NewInst;
1668   }
1669   assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1670          FPVal != StackPointerTracking::EMPTY);
1671 
1672   if (FIE.IsLoad) {
1673     if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1674                                         FIE.StackOffset - FPVal, FIE.RegOrImm,
1675                                         FIE.Size)) {
1676       errs() << "createRestoreFromStack: not supported on this platform\n";
1677       abort();
1678     }
1679   } else {
1680     if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1681                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
1682                                    FIE.Size)) {
1683       errs() << "createSaveToStack: not supported on this platform\n";
1684       abort();
1685     }
1686   }
1687   return NewInst;
1688 }
1689 
1690 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1691   const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1692   if (UpdatedCFIs.count(CFI))
1693     return;
1694 
1695   switch (CFI->getOperation()) {
1696   case MCCFIInstruction::OpDefCfa:
1697   case MCCFIInstruction::OpDefCfaRegister:
1698   case MCCFIInstruction::OpDefCfaOffset:
1699     CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1700     break;
1701   case MCCFIInstruction::OpOffset:
1702   default:
1703     break;
1704   }
1705 
1706   UpdatedCFIs.insert(CFI);
1707 }
1708 
1709 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1710                                                 BBIterTy Pos, unsigned Reg,
1711                                                 bool isPush, int Sz,
1712                                                 int64_t NewOffset) {
1713   if (isPush) {
1714     for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1715       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1716       updateCFIInstOffset(*Pos++, NewOffset);
1717     }
1718     if (HasDeletedOffsetCFIs[Reg]) {
1719       Pos = BF.addCFIInstruction(
1720           &BB, Pos,
1721           MCCFIInstruction::createOffset(
1722               nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1723       ++Pos;
1724     }
1725   } else {
1726     for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1727       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1728       updateCFIInstOffset(*Pos++, NewOffset);
1729     }
1730     if (HasDeletedOffsetCFIs[Reg]) {
1731       Pos = BF.addCFIInstruction(
1732           &BB, Pos,
1733           MCCFIInstruction::createSameValue(
1734               nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1735       ++Pos;
1736     }
1737   }
1738   return Pos;
1739 }
1740 
1741 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1742                                           BinaryBasicBlock *CurBB,
1743                                           const WorklistItem &Item,
1744                                           int64_t SPVal, int64_t FPVal) {
1745   // Trigger CFI reconstruction for this CSR if necessary - writing to
1746   // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1747   if ((Item.FIEToInsert.IsStore &&
1748        !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1749       (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1750       HasDeletedOffsetCFIs[Item.AffectedReg]) {
1751     if (Item.Action == WorklistItem::InsertPushOrPop) {
1752       if (Item.FIEToInsert.IsStore)
1753         PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1754       else
1755         PopOffsetByReg[Item.AffectedReg] = SPVal;
1756     } else {
1757       if (Item.FIEToInsert.IsStore)
1758         PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1759       else
1760         PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1761     }
1762   }
1763 
1764   LLVM_DEBUG({
1765     dbgs() << "Creating stack access with SPVal = " << SPVal
1766            << "; stack offset = " << Item.FIEToInsert.StackOffset
1767            << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1768            << "\n";
1769   });
1770   MCInst NewInst =
1771       createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1772                         Item.Action == WorklistItem::InsertPushOrPop);
1773   if (InsertionPoint != CurBB->end()) {
1774     LLVM_DEBUG({
1775       dbgs() << "Adding before Inst: ";
1776       InsertionPoint->dump();
1777       dbgs() << "the following inst: ";
1778       NewInst.dump();
1779     });
1780     BBIterTy Iter =
1781         CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1782     return ++Iter;
1783   }
1784   CurBB->addInstruction(std::move(NewInst));
1785   LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1786   return CurBB->end();
1787 }
1788 
1789 BBIterTy ShrinkWrapping::processInsertionsList(
1790     BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1791     std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1792   bool HasInsertions = false;
1793   for (WorklistItem &Item : TodoList) {
1794     if (Item.Action == WorklistItem::Erase ||
1795         Item.Action == WorklistItem::ChangeToAdjustment)
1796       continue;
1797     HasInsertions = true;
1798     break;
1799   }
1800 
1801   if (!HasInsertions)
1802     return InsertionPoint;
1803 
1804   assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1805            SPVal != StackPointerTracking::EMPTY) ||
1806           (FPVal != StackPointerTracking::SUPERPOSITION &&
1807            FPVal != StackPointerTracking::EMPTY)) &&
1808          "Cannot insert if we have no idea of the stack state here");
1809 
1810   // Revert the effect of PSPT for this location, we want SP Value before
1811   // insertions
1812   if (InsertionPoint == CurBB->end()) {
1813     for (WorklistItem &Item : TodoList) {
1814       if (Item.Action != WorklistItem::InsertPushOrPop)
1815         continue;
1816       if (Item.FIEToInsert.IsStore)
1817         SPVal += Item.FIEToInsert.Size;
1818       if (Item.FIEToInsert.IsLoad)
1819         SPVal -= Item.FIEToInsert.Size;
1820     }
1821   }
1822 
1823   // Reorder POPs to obey the correct dominance relation between them
1824   std::stable_sort(TodoList.begin(), TodoList.end(),
1825                    [&](const WorklistItem &A, const WorklistItem &B) {
1826                      if ((A.Action != WorklistItem::InsertPushOrPop ||
1827                           !A.FIEToInsert.IsLoad) &&
1828                          (B.Action != WorklistItem::InsertPushOrPop ||
1829                           !B.FIEToInsert.IsLoad))
1830                        return false;
1831                      if ((A.Action != WorklistItem::InsertPushOrPop ||
1832                           !A.FIEToInsert.IsLoad))
1833                        return true;
1834                      if ((B.Action != WorklistItem::InsertPushOrPop ||
1835                           !B.FIEToInsert.IsLoad))
1836                        return false;
1837                      return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1838                    });
1839 
1840   // Process insertions
1841   for (WorklistItem &Item : TodoList) {
1842     if (Item.Action == WorklistItem::Erase ||
1843         Item.Action == WorklistItem::ChangeToAdjustment)
1844       continue;
1845 
1846     InsertionPoint =
1847         processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1848     if (Item.Action == WorklistItem::InsertPushOrPop &&
1849         Item.FIEToInsert.IsStore)
1850       SPVal -= Item.FIEToInsert.Size;
1851     if (Item.Action == WorklistItem::InsertPushOrPop &&
1852         Item.FIEToInsert.IsLoad)
1853       SPVal += Item.FIEToInsert.Size;
1854   }
1855   return InsertionPoint;
1856 }
1857 
1858 bool ShrinkWrapping::processInsertions() {
1859   PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1860   PSPT.run();
1861 
1862   bool Changes = false;
1863   for (BinaryBasicBlock &BB : BF) {
1864     // Process insertions before some inst.
1865     for (auto I = BB.begin(); I != BB.end(); ++I) {
1866       MCInst &Inst = *I;
1867       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1868           Inst, getAnnotationIndex());
1869       if (!TodoList)
1870         continue;
1871       Changes = true;
1872       std::vector<WorklistItem> List = *TodoList;
1873       LLVM_DEBUG({
1874         dbgs() << "Now processing insertions in " << BB.getName()
1875                << " before inst: ";
1876         Inst.dump();
1877       });
1878       auto Iter = I;
1879       std::pair<int, int> SPTState =
1880           *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1881       I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1882     }
1883     // Process insertions at the end of bb
1884     auto WRI = Todo.find(&BB);
1885     if (WRI != Todo.end()) {
1886       std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1887       processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
1888                             SPTState.second);
1889       Changes = true;
1890     }
1891   }
1892   return Changes;
1893 }
1894 
1895 void ShrinkWrapping::processDeletions() {
1896   LivenessAnalysis &LA = Info.getLivenessAnalysis();
1897   for (BinaryBasicBlock &BB : BF) {
1898     for (auto II = BB.begin(); II != BB.end(); ++II) {
1899       MCInst &Inst = *II;
1900       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1901           Inst, getAnnotationIndex());
1902       if (!TodoList)
1903         continue;
1904       // Process all deletions
1905       for (WorklistItem &Item : *TodoList) {
1906         if (Item.Action != WorklistItem::Erase &&
1907             Item.Action != WorklistItem::ChangeToAdjustment)
1908           continue;
1909 
1910         if (Item.Action == WorklistItem::ChangeToAdjustment) {
1911           // Is flag reg alive across this func?
1912           bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1913           if (int Sz = BC.MIB->getPushSize(Inst)) {
1914             BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1915             continue;
1916           }
1917           if (int Sz = BC.MIB->getPopSize(Inst)) {
1918             BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1919             continue;
1920           }
1921         }
1922 
1923         LLVM_DEBUG({
1924           dbgs() << "Erasing: ";
1925           BC.printInstruction(dbgs(), Inst);
1926         });
1927         II = std::prev(BB.eraseInstruction(II));
1928         break;
1929       }
1930     }
1931   }
1932 }
1933 
1934 void ShrinkWrapping::rebuildCFI() {
1935   const bool FP = Info.getStackPointerTracking().HasFramePointer;
1936   Info.invalidateAll();
1937   if (!FP) {
1938     rebuildCFIForSP();
1939     Info.invalidateAll();
1940   }
1941   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1942     if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1943       continue;
1944     const int64_t SPValPush = PushOffsetByReg[I];
1945     const int64_t SPValPop = PopOffsetByReg[I];
1946     insertUpdatedCFI(I, SPValPush, SPValPop);
1947     Info.invalidateAll();
1948   }
1949 }
1950 
1951 bool ShrinkWrapping::perform() {
1952   HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1953   PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1954   PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1955   DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
1956 
1957   if (BF.checkForAmbiguousJumpTables()) {
1958     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
1959                       << ".\n");
1960     // We could call disambiguateJumpTables here, but it is probably not worth
1961     // the cost (of duplicating potentially large jump tables that could regress
1962     // dcache misses). Moreover, ambiguous JTs are rare and coming from code
1963     // written in assembly language. Just bail.
1964     return false;
1965   }
1966   SLM.initialize();
1967   CSA.compute();
1968   classifyCSRUses();
1969   pruneUnwantedCSRs();
1970   computeSaveLocations();
1971   computeDomOrder();
1972   moveSaveRestores();
1973   LLVM_DEBUG({
1974     dbgs() << "Func before shrink-wrapping: \n";
1975     BF.dump();
1976   });
1977   SLM.performChanges();
1978   // Early exit if processInsertions doesn't detect any todo items
1979   if (!processInsertions())
1980     return false;
1981   processDeletions();
1982   if (foldIdenticalSplitEdges()) {
1983     const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
1984     (void)Stats;
1985     LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
1986                       << " redundant split edge BBs (" << Stats.second
1987                       << " bytes) for " << BF.getPrintName() << "\n");
1988   }
1989   rebuildCFI();
1990   // We may have split edges, creating BBs that need correct branching
1991   BF.fixBranches();
1992   LLVM_DEBUG({
1993     dbgs() << "Func after shrink-wrapping: \n";
1994     BF.dump();
1995   });
1996   return true;
1997 }
1998 
1999 void ShrinkWrapping::printStats() {
2000   outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2001          << " spills inserting load/stores and " << SpillsMovedPushPopMode
2002          << " spills inserting push/pops\n";
2003 }
2004 
2005 // Operators necessary as a result of using MCAnnotation
2006 raw_ostream &operator<<(raw_ostream &OS,
2007                         const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2008   OS << "SWTodo[";
2009   const char *Sep = "";
2010   for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2011     OS << Sep;
2012     switch (Item.Action) {
2013     case ShrinkWrapping::WorklistItem::Erase:
2014       OS << "Erase";
2015       break;
2016     case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2017       OS << "ChangeToAdjustment";
2018       break;
2019     case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2020       OS << "InsertLoadOrStore";
2021       break;
2022     case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2023       OS << "InsertPushOrPop";
2024       break;
2025     }
2026     Sep = ", ";
2027   }
2028   OS << "]";
2029   return OS;
2030 }
2031 
2032 raw_ostream &
2033 operator<<(raw_ostream &OS,
2034            const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2035   OS << "SLMTodo[";
2036   const char *Sep = "";
2037   for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2038     OS << Sep;
2039     switch (Item.Action) {
2040     case StackLayoutModifier::WorklistItem::None:
2041       OS << "None";
2042       break;
2043     case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2044       OS << "AdjustLoadStoreOffset";
2045       break;
2046     case StackLayoutModifier::WorklistItem::AdjustCFI:
2047       OS << "AdjustCFI";
2048       break;
2049     }
2050     Sep = ", ";
2051   }
2052   OS << "]";
2053   return OS;
2054 }
2055 
2056 bool operator==(const ShrinkWrapping::WorklistItem &A,
2057                 const ShrinkWrapping::WorklistItem &B) {
2058   return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2059           A.Adjustment == B.Adjustment &&
2060           A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2061           A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2062           A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2063           A.FIEToInsert.Size == B.FIEToInsert.Size &&
2064           A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2065           A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2066 }
2067 
2068 bool operator==(const StackLayoutModifier::WorklistItem &A,
2069                 const StackLayoutModifier::WorklistItem &B) {
2070   return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2071 }
2072 
2073 } // end namespace bolt
2074 } // end namespace llvm
2075