1 //===- bolt/Passes/ShrinkWrapping.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ShrinkWrapping class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/ShrinkWrapping.h"
14 #include "bolt/Core/MCPlus.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Passes/MCF.h"
17 #include "bolt/Utils/CommandLineOpts.h"
18 #include <numeric>
19 #include <stack>
20 
21 #define DEBUG_TYPE "shrinkwrapping"
22 
23 using namespace llvm;
24 
25 namespace opts {
26 
27 extern cl::opt<bool> TimeOpts;
28 extern cl::OptionCategory BoltOptCategory;
29 
30 static cl::opt<unsigned> ShrinkWrappingThreshold(
31     "shrink-wrapping-threshold",
32     cl::desc("Percentage of prologue execution count to use as threshold when"
33              " evaluating whether a block is cold enough to be profitable to"
34              " move eligible spills there"),
35     cl::init(30), cl::ZeroOrMore, cl::cat(BoltOptCategory));
36 } // namespace opts
37 
38 namespace llvm {
39 namespace bolt {
40 
analyzeSaves()41 void CalleeSavedAnalysis::analyzeSaves() {
42   ReachingDefOrUse</*Def=*/true> &RD = Info.getReachingDefs();
43   StackReachingUses &SRU = Info.getStackReachingUses();
44   auto &InsnToBB = Info.getInsnToBBMap();
45   BitVector BlacklistedRegs(BC.MRI->getNumRegs(), false);
46 
47   LLVM_DEBUG(dbgs() << "Checking spill locations\n");
48   for (BinaryBasicBlock &BB : BF) {
49     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
50     const MCInst *Prev = nullptr;
51     for (MCInst &Inst : BB) {
52       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
53         // Blacklist weird stores we don't understand
54         if ((!FIE->IsSimple || FIE->StackOffset >= 0) && FIE->IsStore &&
55             FIE->IsStoreFromReg) {
56           BlacklistedRegs.set(FIE->RegOrImm);
57           CalleeSaved.reset(FIE->RegOrImm);
58           Prev = &Inst;
59           continue;
60         }
61 
62         if (!FIE->IsStore || !FIE->IsStoreFromReg ||
63             BlacklistedRegs[FIE->RegOrImm]) {
64           Prev = &Inst;
65           continue;
66         }
67 
68         // If this reg is defined locally, it is not a callee-saved reg
69         if (RD.isReachedBy(FIE->RegOrImm,
70                            Prev ? RD.expr_begin(*Prev) : RD.expr_begin(BB))) {
71           BlacklistedRegs.set(FIE->RegOrImm);
72           CalleeSaved.reset(FIE->RegOrImm);
73           Prev = &Inst;
74           continue;
75         }
76 
77         // If this stack position is accessed in another function, we are
78         // probably dealing with a parameter passed in a stack -- do not mess
79         // with it
80         if (SRU.isStoreUsed(*FIE,
81                             Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB)),
82             /*IncludeLocalAccesses=*/false) {
83           BlacklistedRegs.set(FIE->RegOrImm);
84           CalleeSaved.reset(FIE->RegOrImm);
85           Prev = &Inst;
86           continue;
87         }
88 
89         // If this stack position is loaded elsewhere in another reg, we can't
90         // update it, so blacklist it.
91         if (SRU.isLoadedInDifferentReg(*FIE, Prev ? SRU.expr_begin(*Prev)
92                                                   : SRU.expr_begin(BB))) {
93           BlacklistedRegs.set(FIE->RegOrImm);
94           CalleeSaved.reset(FIE->RegOrImm);
95           Prev = &Inst;
96           continue;
97         }
98 
99         // Ignore regs with multiple saves
100         if (CalleeSaved[FIE->RegOrImm]) {
101           BlacklistedRegs.set(FIE->RegOrImm);
102           CalleeSaved.reset(FIE->RegOrImm);
103           Prev = &Inst;
104           continue;
105         }
106 
107         CalleeSaved.set(FIE->RegOrImm);
108         SaveFIEByReg[FIE->RegOrImm] = &*FIE;
109         SavingCost[FIE->RegOrImm] += InsnToBB[&Inst]->getKnownExecutionCount();
110         BC.MIB->addAnnotation(Inst, getSaveTag(), FIE->RegOrImm, AllocatorId);
111         OffsetsByReg[FIE->RegOrImm] = FIE->StackOffset;
112         LLVM_DEBUG(dbgs() << "Logging new candidate for Callee-Saved Reg: "
113                           << FIE->RegOrImm << "\n");
114       }
115       Prev = &Inst;
116     }
117   }
118 }
119 
analyzeRestores()120 void CalleeSavedAnalysis::analyzeRestores() {
121   ReachingDefOrUse</*Def=*/false> &RU = Info.getReachingUses();
122 
123   // Now compute all restores of these callee-saved regs
124   for (BinaryBasicBlock &BB : BF) {
125     const MCInst *Prev = nullptr;
126     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
127       MCInst &Inst = *I;
128       if (ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst)) {
129         if (!FIE->IsLoad || !CalleeSaved[FIE->RegOrImm]) {
130           Prev = &Inst;
131           continue;
132         }
133 
134         // If this reg is used locally after a restore, then we are probably
135         // not dealing with a callee-saved reg. Except if this use is by
136         // another store, but we don't cover this case yet.
137         // Also not callee-saved if this load accesses caller stack or isn't
138         // simple.
139         if (!FIE->IsSimple || FIE->StackOffset >= 0 ||
140             RU.isReachedBy(FIE->RegOrImm,
141                            Prev ? RU.expr_begin(*Prev) : RU.expr_begin(BB))) {
142           CalleeSaved.reset(FIE->RegOrImm);
143           Prev = &Inst;
144           continue;
145         }
146         // If stack offsets between saves/store don't agree with each other,
147         // we don't completely understand what's happening here
148         if (FIE->StackOffset != OffsetsByReg[FIE->RegOrImm]) {
149           CalleeSaved.reset(FIE->RegOrImm);
150           LLVM_DEBUG(dbgs() << "Dismissing Callee-Saved Reg because we found a "
151                                "mismatching restore: "
152                             << FIE->RegOrImm << "\n");
153           Prev = &Inst;
154           continue;
155         }
156 
157         LLVM_DEBUG(dbgs() << "Adding matching restore for: " << FIE->RegOrImm
158                           << "\n");
159         if (LoadFIEByReg[FIE->RegOrImm] == nullptr)
160           LoadFIEByReg[FIE->RegOrImm] = &*FIE;
161         BC.MIB->addAnnotation(Inst, getRestoreTag(), FIE->RegOrImm,
162                               AllocatorId);
163         HasRestores.set(FIE->RegOrImm);
164       }
165       Prev = &Inst;
166     }
167   }
168 }
169 
getSavesByReg(uint16_t Reg)170 std::vector<MCInst *> CalleeSavedAnalysis::getSavesByReg(uint16_t Reg) {
171   std::vector<MCInst *> Results;
172   for (BinaryBasicBlock &BB : BF)
173     for (MCInst &Inst : BB)
174       if (getSavedReg(Inst) == Reg)
175         Results.push_back(&Inst);
176   return Results;
177 }
178 
getRestoresByReg(uint16_t Reg)179 std::vector<MCInst *> CalleeSavedAnalysis::getRestoresByReg(uint16_t Reg) {
180   std::vector<MCInst *> Results;
181   for (BinaryBasicBlock &BB : BF)
182     for (MCInst &Inst : BB)
183       if (getRestoredReg(Inst) == Reg)
184         Results.push_back(&Inst);
185   return Results;
186 }
187 
~CalleeSavedAnalysis()188 CalleeSavedAnalysis::~CalleeSavedAnalysis() {
189   for (BinaryBasicBlock &BB : BF) {
190     for (MCInst &Inst : BB) {
191       BC.MIB->removeAnnotation(Inst, getSaveTag());
192       BC.MIB->removeAnnotation(Inst, getRestoreTag());
193     }
194   }
195 }
196 
blacklistRegion(int64_t Offset,int64_t Size)197 void StackLayoutModifier::blacklistRegion(int64_t Offset, int64_t Size) {
198   if (BlacklistedRegions[Offset] < Size)
199     BlacklistedRegions[Offset] = Size;
200 }
201 
isRegionBlacklisted(int64_t Offset,int64_t Size)202 bool StackLayoutModifier::isRegionBlacklisted(int64_t Offset, int64_t Size) {
203   for (std::pair<const int64_t, int64_t> Elem : BlacklistedRegions)
204     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second)
205       return true;
206   return false;
207 }
208 
blacklistAllInConflictWith(int64_t Offset,int64_t Size)209 bool StackLayoutModifier::blacklistAllInConflictWith(int64_t Offset,
210                                                      int64_t Size) {
211   bool HasConflict = false;
212   for (auto Iter = AvailableRegions.begin(); Iter != AvailableRegions.end();) {
213     std::pair<const int64_t, int64_t> &Elem = *Iter;
214     if (Offset + Size > Elem.first && Offset < Elem.first + Elem.second &&
215         (Offset != Elem.first || Size != Elem.second)) {
216       Iter = AvailableRegions.erase(Iter);
217       HasConflict = true;
218       continue;
219     }
220     ++Iter;
221   }
222   if (HasConflict) {
223     blacklistRegion(Offset, Size);
224     return true;
225   }
226   return false;
227 }
228 
checkFramePointerInitialization(MCInst & Point)229 void StackLayoutModifier::checkFramePointerInitialization(MCInst &Point) {
230   StackPointerTracking &SPT = Info.getStackPointerTracking();
231   if (!BC.MII->get(Point.getOpcode())
232            .hasDefOfPhysReg(Point, BC.MIB->getFramePointer(), *BC.MRI))
233     return;
234 
235   int SPVal, FPVal;
236   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
237   std::pair<MCPhysReg, int64_t> FP;
238 
239   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
240     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
241   else
242     FP = std::make_pair(0, 0);
243   std::pair<MCPhysReg, int64_t> SP;
244 
245   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
246     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
247   else
248     SP = std::make_pair(0, 0);
249 
250   int64_t Output;
251   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
252     return;
253 
254   // Not your regular frame pointer initialization... bail
255   if (Output != SPVal)
256     blacklistRegion(0, 0);
257 }
258 
checkStackPointerRestore(MCInst & Point)259 void StackLayoutModifier::checkStackPointerRestore(MCInst &Point) {
260   StackPointerTracking &SPT = Info.getStackPointerTracking();
261   if (!BC.MII->get(Point.getOpcode())
262            .hasDefOfPhysReg(Point, BC.MIB->getStackPointer(), *BC.MRI))
263     return;
264   // Check if the definition of SP comes from FP -- in this case, this
265   // value may need to be updated depending on our stack layout changes
266   const MCInstrDesc &InstInfo = BC.MII->get(Point.getOpcode());
267   unsigned NumDefs = InstInfo.getNumDefs();
268   bool UsesFP = false;
269   for (unsigned I = NumDefs, E = MCPlus::getNumPrimeOperands(Point); I < E;
270        ++I) {
271     MCOperand &Operand = Point.getOperand(I);
272     if (!Operand.isReg())
273       continue;
274     if (Operand.getReg() == BC.MIB->getFramePointer()) {
275       UsesFP = true;
276       break;
277     }
278   }
279   if (!UsesFP)
280     return;
281 
282   // Setting up evaluation
283   int SPVal, FPVal;
284   std::tie(SPVal, FPVal) = *SPT.getStateBefore(Point);
285   std::pair<MCPhysReg, int64_t> FP;
286 
287   if (FPVal != SPT.EMPTY && FPVal != SPT.SUPERPOSITION)
288     FP = std::make_pair(BC.MIB->getFramePointer(), FPVal);
289   else
290     FP = std::make_pair(0, 0);
291   std::pair<MCPhysReg, int64_t> SP;
292 
293   if (SPVal != SPT.EMPTY && SPVal != SPT.SUPERPOSITION)
294     SP = std::make_pair(BC.MIB->getStackPointer(), SPVal);
295   else
296     SP = std::make_pair(0, 0);
297 
298   int64_t Output;
299   if (!BC.MIB->evaluateStackOffsetExpr(Point, Output, SP, FP))
300     return;
301 
302   // If the value is the same of FP, no need to adjust it
303   if (Output == FPVal)
304     return;
305 
306   // If an allocation happened through FP, bail
307   if (Output <= SPVal) {
308     blacklistRegion(0, 0);
309     return;
310   }
311 
312   // We are restoring SP to an old value based on FP. Mark it as a stack
313   // access to be fixed later.
314   BC.MIB->addAnnotation(Point, getSlotTag(), Output, AllocatorId);
315 }
316 
classifyStackAccesses()317 void StackLayoutModifier::classifyStackAccesses() {
318   // Understand when stack slots are being used non-locally
319   StackReachingUses &SRU = Info.getStackReachingUses();
320 
321   for (BinaryBasicBlock &BB : BF) {
322     const MCInst *Prev = nullptr;
323     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
324       MCInst &Inst = *I;
325       checkFramePointerInitialization(Inst);
326       checkStackPointerRestore(Inst);
327       ErrorOr<const FrameIndexEntry &> FIEX = FA.getFIEFor(Inst);
328       if (!FIEX) {
329         Prev = &Inst;
330         continue;
331       }
332       if (!FIEX->IsSimple || (FIEX->IsStore && !FIEX->IsStoreFromReg)) {
333         blacklistRegion(FIEX->StackOffset, FIEX->Size);
334         Prev = &Inst;
335         continue;
336       }
337       // If this stack position is accessed in another function, we are
338       // probably dealing with a parameter passed in a stack -- do not mess
339       // with it
340       if (SRU.isStoreUsed(*FIEX,
341                           Prev ? SRU.expr_begin(*Prev) : SRU.expr_begin(BB),
342                           /*IncludeLocalAccesses=*/false)) {
343         blacklistRegion(FIEX->StackOffset, FIEX->Size);
344         Prev = &Inst;
345         continue;
346       }
347       // Now we have a clear stack slot access. Check if its blacklisted or if
348       // it conflicts with another chunk.
349       if (isRegionBlacklisted(FIEX->StackOffset, FIEX->Size) ||
350           blacklistAllInConflictWith(FIEX->StackOffset, FIEX->Size)) {
351         Prev = &Inst;
352         continue;
353       }
354       // We are free to go. Add it as available stack slot which we know how
355       // to move it.
356       AvailableRegions[FIEX->StackOffset] = FIEX->Size;
357       BC.MIB->addAnnotation(Inst, getSlotTag(), FIEX->StackOffset, AllocatorId);
358       RegionToRegMap[FIEX->StackOffset].insert(FIEX->RegOrImm);
359       RegToRegionMap[FIEX->RegOrImm].insert(FIEX->StackOffset);
360       LLVM_DEBUG(dbgs() << "Adding region " << FIEX->StackOffset << " size "
361                         << (int)FIEX->Size << "\n");
362     }
363   }
364 }
365 
classifyCFIs()366 void StackLayoutModifier::classifyCFIs() {
367   std::stack<std::pair<int64_t, uint16_t>> CFIStack;
368   int64_t CfaOffset = -8;
369   uint16_t CfaReg = 7;
370 
371   auto recordAccess = [&](MCInst *Inst, int64_t Offset) {
372     const uint16_t Reg = *BC.MRI->getLLVMRegNum(CfaReg, /*isEH=*/false);
373     if (Reg == BC.MIB->getStackPointer() || Reg == BC.MIB->getFramePointer()) {
374       BC.MIB->addAnnotation(*Inst, getSlotTag(), Offset, AllocatorId);
375       LLVM_DEBUG(dbgs() << "Recording CFI " << Offset << "\n");
376     } else {
377       IsSimple = false;
378       return;
379     }
380   };
381 
382   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
383     for (MCInst &Inst : *BB) {
384       if (!BC.MIB->isCFI(Inst))
385         continue;
386       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
387       switch (CFI->getOperation()) {
388       case MCCFIInstruction::OpDefCfa:
389         CfaOffset = -CFI->getOffset();
390         recordAccess(&Inst, CfaOffset);
391         LLVM_FALLTHROUGH;
392       case MCCFIInstruction::OpDefCfaRegister:
393         CfaReg = CFI->getRegister();
394         break;
395       case MCCFIInstruction::OpDefCfaOffset:
396         CfaOffset = -CFI->getOffset();
397         recordAccess(&Inst, CfaOffset);
398         break;
399       case MCCFIInstruction::OpOffset:
400         recordAccess(&Inst, CFI->getOffset());
401         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
402                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
403                                                     /*isEH=*/false),
404                               AllocatorId);
405         break;
406       case MCCFIInstruction::OpSameValue:
407         BC.MIB->addAnnotation(Inst, getOffsetCFIRegTag(),
408                               BC.MRI->getLLVMRegNum(CFI->getRegister(),
409                                                     /*isEH=*/false),
410                               AllocatorId);
411         break;
412       case MCCFIInstruction::OpRememberState:
413         CFIStack.push(std::make_pair(CfaOffset, CfaReg));
414         break;
415       case MCCFIInstruction::OpRestoreState: {
416         assert(!CFIStack.empty() && "Corrupt CFI stack");
417         std::pair<int64_t, uint16_t> &Elem = CFIStack.top();
418         CFIStack.pop();
419         CfaOffset = Elem.first;
420         CfaReg = Elem.second;
421         break;
422       }
423       case MCCFIInstruction::OpRelOffset:
424       case MCCFIInstruction::OpAdjustCfaOffset:
425         llvm_unreachable("Unhandled AdjustCfaOffset");
426         break;
427       default:
428         break;
429       }
430     }
431   }
432 }
433 
scheduleChange(MCInst & Inst,StackLayoutModifier::WorklistItem Item)434 void StackLayoutModifier::scheduleChange(
435     MCInst &Inst, StackLayoutModifier::WorklistItem Item) {
436   auto &WList = BC.MIB->getOrCreateAnnotationAs<std::vector<WorklistItem>>(
437       Inst, getTodoTag(), AllocatorId);
438   WList.push_back(Item);
439 }
440 
canCollapseRegion(MCInst * DeletedPush)441 bool StackLayoutModifier::canCollapseRegion(MCInst *DeletedPush) {
442   if (!IsSimple || !BC.MIB->isPush(*DeletedPush))
443     return false;
444 
445   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
446   if (!FIE)
447     return false;
448 
449   return canCollapseRegion(FIE->StackOffset);
450 }
451 
canCollapseRegion(int64_t RegionAddr)452 bool StackLayoutModifier::canCollapseRegion(int64_t RegionAddr) {
453   if (!IsInitialized)
454     initialize();
455   if (!IsSimple)
456     return false;
457 
458   if (CollapsedRegions.count(RegionAddr))
459     return true;
460 
461   // Check if it is possible to readjust all accesses below RegionAddr
462   if (!BlacklistedRegions.empty())
463     return false;
464 
465   return true;
466 }
467 
collapseRegion(MCInst * DeletedPush)468 bool StackLayoutModifier::collapseRegion(MCInst *DeletedPush) {
469   ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(*DeletedPush);
470   if (!FIE)
471     return false;
472   int64_t RegionAddr = FIE->StackOffset;
473   int64_t RegionSz = FIE->Size;
474   return collapseRegion(DeletedPush, RegionAddr, RegionSz);
475 }
476 
collapseRegion(MCInst * Alloc,int64_t RegionAddr,int64_t RegionSz)477 bool StackLayoutModifier::collapseRegion(MCInst *Alloc, int64_t RegionAddr,
478                                          int64_t RegionSz) {
479   if (!canCollapseRegion(RegionAddr))
480     return false;
481 
482   assert(IsInitialized);
483   StackAllocationAnalysis &SAA = Info.getStackAllocationAnalysis();
484 
485   for (BinaryBasicBlock &BB : BF) {
486     for (MCInst &Inst : BB) {
487       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
488         continue;
489       auto Slot =
490           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
491               Inst, getSlotTag());
492       if (!AvailableRegions.count(Slot))
493         continue;
494       // We need to ensure this access is affected by the deleted push
495       if (!(*SAA.getStateBefore(Inst))[SAA.ExprToIdx[Alloc]])
496         continue;
497 
498       if (BC.MIB->isCFI(Inst)) {
499         if (Slot > RegionAddr)
500           continue;
501         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, RegionSz));
502         continue;
503       }
504       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
505       if (!FIE) {
506         if (Slot > RegionAddr)
507           continue;
508         // SP update based on frame pointer
509         scheduleChange(
510             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
511         continue;
512       }
513 
514       if (Slot == RegionAddr) {
515         BC.MIB->addAnnotation(Inst, "AccessesDeletedPos", 0U, AllocatorId);
516         continue;
517       }
518       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
519         continue;
520 
521       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
522         continue;
523 
524       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot > RegionAddr)
525         continue;
526 
527       scheduleChange(
528           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, RegionSz));
529     }
530   }
531 
532   CollapsedRegions.insert(RegionAddr);
533   return true;
534 }
535 
setOffsetForCollapsedAccesses(int64_t NewOffset)536 void StackLayoutModifier::setOffsetForCollapsedAccesses(int64_t NewOffset) {
537   for (BinaryBasicBlock &BB : BF) {
538     for (MCInst &Inst : BB) {
539       if (!BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos"))
540         continue;
541       BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
542       scheduleChange(
543           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, NewOffset));
544     }
545   }
546 }
547 
canInsertRegion(ProgramPoint P)548 bool StackLayoutModifier::canInsertRegion(ProgramPoint P) {
549   if (!IsInitialized)
550     initialize();
551   if (!IsSimple)
552     return false;
553 
554   StackPointerTracking &SPT = Info.getStackPointerTracking();
555   int64_t RegionAddr = SPT.getStateBefore(P)->first;
556   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
557     return false;
558 
559   if (InsertedRegions.count(RegionAddr))
560     return true;
561 
562   // Check if we are going to screw up stack accesses at call sites that
563   // pass parameters via stack
564   if (!BlacklistedRegions.empty())
565     return false;
566 
567   return true;
568 }
569 
insertRegion(ProgramPoint P,int64_t RegionSz)570 bool StackLayoutModifier::insertRegion(ProgramPoint P, int64_t RegionSz) {
571   if (!canInsertRegion(P))
572     return false;
573 
574   assert(IsInitialized);
575   StackPointerTracking &SPT = Info.getStackPointerTracking();
576   // This RegionAddr is slightly different from the one seen in collapseRegion
577   // This is the value of SP before the allocation the user wants to make.
578   int64_t RegionAddr = SPT.getStateBefore(P)->first;
579   if (RegionAddr == SPT.SUPERPOSITION || RegionAddr == SPT.EMPTY)
580     return false;
581 
582   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
583 
584   for (BinaryBasicBlock &BB : BF) {
585     for (MCInst &Inst : BB) {
586       if (!BC.MIB->hasAnnotation(Inst, getSlotTag()))
587         continue;
588       auto Slot =
589           BC.MIB->getAnnotationAs<decltype(FrameIndexEntry::StackOffset)>(
590               Inst, getSlotTag());
591       if (!AvailableRegions.count(Slot))
592         continue;
593 
594       if (!(DA.doesADominateB(P, Inst)))
595         continue;
596 
597       if (BC.MIB->isCFI(Inst)) {
598         if (Slot >= RegionAddr)
599           continue;
600         scheduleChange(Inst, WorklistItem(WorklistItem::AdjustCFI, -RegionSz));
601         continue;
602       }
603       ErrorOr<const FrameIndexEntry &> FIE = FA.getFIEFor(Inst);
604       if (!FIE) {
605         if (Slot >= RegionAddr)
606           continue;
607         scheduleChange(
608             Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
609         continue;
610       }
611 
612       if (FIE->StackPtrReg == BC.MIB->getStackPointer() && Slot < RegionAddr)
613         continue;
614       if (FIE->StackPtrReg == BC.MIB->getFramePointer() && Slot >= RegionAddr)
615         continue;
616       if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
617         continue;
618       scheduleChange(
619           Inst, WorklistItem(WorklistItem::AdjustLoadStoreOffset, -RegionSz));
620     }
621   }
622 
623   InsertedRegions.insert(RegionAddr);
624   return true;
625 }
626 
performChanges()627 void StackLayoutModifier::performChanges() {
628   std::set<uint32_t> ModifiedCFIIndices;
629   for (BinaryBasicBlock &BB : BF) {
630     for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
631       MCInst &Inst = *I;
632       if (BC.MIB->hasAnnotation(Inst, "AccessesDeletedPos")) {
633         assert(BC.MIB->isPop(Inst) || BC.MIB->isPush(Inst));
634         BC.MIB->removeAnnotation(Inst, "AccessesDeletedPos");
635       }
636       if (!BC.MIB->hasAnnotation(Inst, getTodoTag()))
637         continue;
638       auto &WList = BC.MIB->getAnnotationAs<std::vector<WorklistItem>>(
639           Inst, getTodoTag());
640       int64_t Adjustment = 0;
641       WorklistItem::ActionType AdjustmentType = WorklistItem::None;
642       for (WorklistItem &WI : WList) {
643         if (WI.Action == WorklistItem::None)
644           continue;
645         assert(WI.Action == WorklistItem::AdjustLoadStoreOffset ||
646                WI.Action == WorklistItem::AdjustCFI);
647         assert((AdjustmentType == WorklistItem::None ||
648                 AdjustmentType == WI.Action) &&
649                "Conflicting actions requested at the same program point");
650         AdjustmentType = WI.Action;
651         Adjustment += WI.OffsetUpdate;
652       }
653       if (!Adjustment)
654         continue;
655       if (AdjustmentType != WorklistItem::AdjustLoadStoreOffset) {
656         assert(BC.MIB->isCFI(Inst));
657         uint32_t CFINum = Inst.getOperand(0).getImm();
658         if (ModifiedCFIIndices.count(CFINum))
659           continue;
660         ModifiedCFIIndices.insert(CFINum);
661         const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
662         const MCCFIInstruction::OpType Operation = CFI->getOperation();
663         if (Operation == MCCFIInstruction::OpDefCfa ||
664             Operation == MCCFIInstruction::OpDefCfaOffset)
665           Adjustment = 0 - Adjustment;
666         LLVM_DEBUG(dbgs() << "Changing CFI offset from " << CFI->getOffset()
667                           << " to " << (CFI->getOffset() + Adjustment) << "\n");
668         BF.mutateCFIOffsetFor(Inst, CFI->getOffset() + Adjustment);
669         continue;
670       }
671       int32_t SrcImm = 0;
672       MCPhysReg Reg = 0;
673       MCPhysReg StackPtrReg = 0;
674       int64_t StackOffset = 0;
675       bool IsIndexed = false;
676       bool IsLoad = false;
677       bool IsStore = false;
678       bool IsSimple = false;
679       bool IsStoreFromReg = false;
680       uint8_t Size = 0;
681       bool Success = false;
682       Success = BC.MIB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg,
683                                       Reg, SrcImm, StackPtrReg, StackOffset,
684                                       Size, IsSimple, IsIndexed);
685       if (!Success) {
686         // SP update based on FP value
687         Success = BC.MIB->addToImm(Inst, Adjustment, &*BC.Ctx);
688         assert(Success);
689         continue;
690       }
691       assert(Success && IsSimple && !IsIndexed && (!IsStore || IsStoreFromReg));
692       if (StackPtrReg != BC.MIB->getFramePointer())
693         Adjustment = -Adjustment;
694       if (IsLoad)
695         Success = BC.MIB->createRestoreFromStack(
696             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
697       else if (IsStore)
698         Success = BC.MIB->createSaveToStack(
699             Inst, StackPtrReg, StackOffset + Adjustment, Reg, Size);
700       LLVM_DEBUG({
701         dbgs() << "Adjusted instruction: ";
702         Inst.dump();
703       });
704       assert(Success);
705     }
706   }
707 }
708 
initialize()709 void StackLayoutModifier::initialize() {
710   classifyStackAccesses();
711   classifyCFIs();
712   IsInitialized = true;
713 }
714 
715 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedRegularMode{0};
716 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedPushPopMode{0};
717 std::atomic<std::uint64_t> ShrinkWrapping::SpillsMovedDynamicCount{0};
718 std::atomic<std::uint64_t> ShrinkWrapping::SpillsFailedDynamicCount{0};
719 std::atomic<std::uint64_t> ShrinkWrapping::InstrDynamicCount{0};
720 std::atomic<std::uint64_t> ShrinkWrapping::StoreDynamicCount{0};
721 
722 using BBIterTy = BinaryBasicBlock::iterator;
723 
classifyCSRUses()724 void ShrinkWrapping::classifyCSRUses() {
725   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
726   StackPointerTracking &SPT = Info.getStackPointerTracking();
727   UsesByReg = std::vector<BitVector>(BC.MRI->getNumRegs(),
728                                      BitVector(DA.NumInstrs, false));
729 
730   const BitVector &FPAliases = BC.MIB->getAliases(BC.MIB->getFramePointer());
731   for (BinaryBasicBlock &BB : BF) {
732     for (MCInst &Inst : BB) {
733       if (BC.MIB->isCFI(Inst))
734         continue;
735       BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
736       BC.MIB->getTouchedRegs(Inst, BV);
737       BV &= CSA.CalleeSaved;
738       for (int I : BV.set_bits()) {
739         if (I == 0)
740           continue;
741         if (CSA.getSavedReg(Inst) != I && CSA.getRestoredReg(Inst) != I)
742           UsesByReg[I].set(DA.ExprToIdx[&Inst]);
743       }
744       if (!SPT.HasFramePointer || !BC.MIB->isCall(Inst))
745         continue;
746       BV = CSA.CalleeSaved;
747       BV &= FPAliases;
748       for (int I : BV.set_bits())
749         UsesByReg[I].set(DA.ExprToIdx[&Inst]);
750     }
751   }
752 }
753 
pruneUnwantedCSRs()754 void ShrinkWrapping::pruneUnwantedCSRs() {
755   BitVector ParamRegs = BC.MIB->getRegsUsedAsParams();
756   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
757     if (!CSA.CalleeSaved[I])
758       continue;
759     if (ParamRegs[I]) {
760       CSA.CalleeSaved.reset(I);
761       continue;
762     }
763     if (UsesByReg[I].empty()) {
764       LLVM_DEBUG(
765           dbgs()
766           << "Dismissing Callee-Saved Reg because we found no uses of it:" << I
767           << "\n");
768       CSA.CalleeSaved.reset(I);
769       continue;
770     }
771     if (!CSA.HasRestores[I]) {
772       LLVM_DEBUG(
773           dbgs() << "Dismissing Callee-Saved Reg because it does not have "
774                     "restores:"
775                  << I << "\n");
776       CSA.CalleeSaved.reset(I);
777     }
778   }
779 }
780 
computeSaveLocations()781 void ShrinkWrapping::computeSaveLocations() {
782   BestSavePos = std::vector<std::vector<MCInst *>>(BC.MRI->getNumRegs());
783   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
784   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
785   StackPointerTracking &SPT = Info.getStackPointerTracking();
786 
787   LLVM_DEBUG(dbgs() << "Checking save/restore possibilities\n");
788   for (BinaryBasicBlock &BB : BF) {
789     LLVM_DEBUG(dbgs() << "\tNow at BB " << BB.getName() << "\n");
790 
791     MCInst *First = BB.begin() != BB.end() ? &*BB.begin() : nullptr;
792     if (!First)
793       continue;
794 
795     // Use reaching instructions to detect if we are inside a loop - if we
796     // are, do not consider this BB as valid placement for saves.
797     if (RI.isInLoop(BB))
798       continue;
799 
800     const std::pair<int, int> SPFP = *SPT.getStateBefore(*First);
801     // If we don't know stack state at this point, bail
802     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
803         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY))
804       continue;
805 
806     for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
807       if (!CSA.CalleeSaved[I])
808         continue;
809 
810       BitVector BBDominatedUses = BitVector(DA.NumInstrs, false);
811       for (int J : UsesByReg[I].set_bits())
812         if (DA.doesADominateB(*First, J))
813           BBDominatedUses.set(J);
814       LLVM_DEBUG(dbgs() << "\t\tBB " << BB.getName() << " dominates "
815                         << BBDominatedUses.count() << " uses for reg " << I
816                         << ". Total uses for reg is " << UsesByReg[I].count()
817                         << "\n");
818       BBDominatedUses &= UsesByReg[I];
819       if (BBDominatedUses == UsesByReg[I]) {
820         LLVM_DEBUG(dbgs() << "\t\t\tAdded " << BB.getName()
821                           << " as a save pos for " << I << "\n");
822         BestSavePos[I].push_back(First);
823         LLVM_DEBUG({
824           dbgs() << "Dominated uses are:\n";
825           for (int J : UsesByReg[I].set_bits()) {
826             dbgs() << "Idx " << J << ": ";
827             BC.printInstruction(dbgs(), *DA.Expressions[J]);
828             DA.Expressions[J]->dump();
829           }
830         });
831       }
832     }
833   }
834 
835   BestSaveCount = std::vector<std::vector<uint64_t>>(BC.MRI->getNumRegs());
836 
837   auto &InsnToBB = Info.getInsnToBBMap();
838   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
839     if (!CSA.CalleeSaved[I])
840       continue;
841 
842     std::stable_sort(BestSavePos[I].begin(), BestSavePos[I].end(),
843                      [&](const MCInst *A, const MCInst *B) {
844                        const BinaryBasicBlock *BBA = InsnToBB[A];
845                        const BinaryBasicBlock *BBB = InsnToBB[B];
846                        const uint64_t CountA = BBA->getKnownExecutionCount();
847                        const uint64_t CountB = BBB->getKnownExecutionCount();
848                        return CountB < CountA;
849                      });
850 
851     for (MCInst *Pos : BestSavePos[I]) {
852       const BinaryBasicBlock *BB = InsnToBB[Pos];
853       const uint64_t Count = BB->getKnownExecutionCount();
854       BestSaveCount[I].push_back(Count);
855     }
856   }
857 }
858 
computeDomOrder()859 void ShrinkWrapping::computeDomOrder() {
860   DomOrder = std::vector<MCPhysReg>(BC.MRI->getNumRegs(), 0);
861   std::vector<MCPhysReg> Order;
862   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
863     Order.push_back(I);
864   }
865 
866   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
867   auto &InsnToBB = Info.getInsnToBBMap();
868   llvm::sort(Order, [&](const MCPhysReg &A, const MCPhysReg &B) {
869     BinaryBasicBlock *BBA =
870         BestSavePos[A].size() ? InsnToBB[BestSavePos[A].back()] : nullptr;
871     BinaryBasicBlock *BBB =
872         BestSavePos[B].size() ? InsnToBB[BestSavePos[B].back()] : nullptr;
873     if (BBA == BBB)
874       return A < B;
875     if (!BBA && BBB)
876       return false;
877     if (BBA && !BBB)
878       return true;
879     if (DA.doesADominateB(*BestSavePos[A].back(), *BestSavePos[B].back()))
880       return true;
881     if (DA.doesADominateB(*BestSavePos[B].back(), *BestSavePos[A].back()))
882       return false;
883     return A < B;
884   });
885 
886   for (MCPhysReg I = 0, E = BC.MRI->getNumRegs(); I != E; ++I)
887     DomOrder[Order[I]] = I;
888 }
889 
isBestSavePosCold(unsigned CSR,MCInst * & BestPosSave,uint64_t & TotalEstimatedWin)890 bool ShrinkWrapping::isBestSavePosCold(unsigned CSR, MCInst *&BestPosSave,
891                                        uint64_t &TotalEstimatedWin) {
892   const uint64_t CurSavingCost = CSA.SavingCost[CSR];
893   if (!CSA.CalleeSaved[CSR])
894     return false;
895 
896   assert(BestSaveCount[CSR].size() == BestSavePos[CSR].size() &&
897          "save position vectors out of sync");
898   if (BestSaveCount[CSR].empty())
899     return false;
900 
901   const uint64_t BestCount = BestSaveCount[CSR].back();
902   BestPosSave = BestSavePos[CSR].back();
903   if (BestCount >= (opts::ShrinkWrappingThreshold / 100.0) * CurSavingCost)
904     return false;
905 
906   LLVM_DEBUG({
907     auto &InsnToBB = Info.getInsnToBBMap();
908     dbgs() << "Better position for saves found in func " << BF.getPrintName()
909            << " count << " << BF.getKnownExecutionCount() << "\n";
910     dbgs() << "Reg: " << CSR << "; New BB: " << InsnToBB[BestPosSave]->getName()
911            << " Freq reduction: " << (CurSavingCost - BestCount) << "\n";
912   });
913 
914   TotalEstimatedWin = CurSavingCost - BestCount;
915   return true;
916 }
917 
918 /// Auxiliar function used to create basic blocks for critical edges and update
919 /// the dominance frontier with these new locations
splitFrontierCritEdges(BinaryFunction * Func,SmallVector<ProgramPoint,4> & Frontier,const SmallVector<bool,4> & IsCritEdge,const SmallVector<BinaryBasicBlock *,4> & From,const SmallVector<SmallVector<BinaryBasicBlock *,4>,4> & To)920 void ShrinkWrapping::splitFrontierCritEdges(
921     BinaryFunction *Func, SmallVector<ProgramPoint, 4> &Frontier,
922     const SmallVector<bool, 4> &IsCritEdge,
923     const SmallVector<BinaryBasicBlock *, 4> &From,
924     const SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> &To) {
925   LLVM_DEBUG(dbgs() << "splitFrontierCritEdges: Now handling func "
926                     << BF.getPrintName() << "\n");
927   // For every FromBB, there might be one or more critical edges, with
928   // To[I] containing destination BBs. It's important to memorize
929   // the original size of the Frontier as we may append to it while splitting
930   // critical edges originating with blocks with multiple destinations.
931   for (size_t I = 0, IE = Frontier.size(); I < IE; ++I) {
932     if (!IsCritEdge[I])
933       continue;
934     if (To[I].empty())
935       continue;
936     BinaryBasicBlock *FromBB = From[I];
937     LLVM_DEBUG(dbgs() << " - Now handling FrontierBB " << FromBB->getName()
938                       << "\n");
939     // Split edge for every DestinationBBs
940     for (size_t DI = 0, DIE = To[I].size(); DI < DIE; ++DI) {
941       BinaryBasicBlock *DestinationBB = To[I][DI];
942       LLVM_DEBUG(dbgs() << "   - Dest : " << DestinationBB->getName() << "\n");
943       BinaryBasicBlock *NewBB = Func->splitEdge(FromBB, DestinationBB);
944       // Insert dummy instruction so this BB is never empty (we need this for
945       // PredictiveStackPointerTracking to work, since it annotates instructions
946       // and not BBs).
947       if (NewBB->empty()) {
948         MCInst NewInst;
949         BC.MIB->createNoop(NewInst);
950         NewBB->addInstruction(std::move(NewInst));
951         scheduleChange(&*NewBB->begin(), WorklistItem(WorklistItem::Erase, 0));
952       }
953 
954       // Update frontier
955       ProgramPoint NewFrontierPP = ProgramPoint::getLastPointAt(*NewBB);
956       if (DI == 0) {
957         // Update frontier inplace
958         Frontier[I] = NewFrontierPP;
959         LLVM_DEBUG(dbgs() << "   - Update frontier with " << NewBB->getName()
960                           << '\n');
961       } else {
962         // Append new frontier to the end of the list
963         Frontier.push_back(NewFrontierPP);
964         LLVM_DEBUG(dbgs() << "   - Append frontier " << NewBB->getName()
965                           << '\n');
966       }
967     }
968   }
969 }
970 
971 SmallVector<ProgramPoint, 4>
doRestorePlacement(MCInst * BestPosSave,unsigned CSR,uint64_t TotalEstimatedWin)972 ShrinkWrapping::doRestorePlacement(MCInst *BestPosSave, unsigned CSR,
973                                    uint64_t TotalEstimatedWin) {
974   SmallVector<ProgramPoint, 4> Frontier;
975   SmallVector<bool, 4> IsCritEdge;
976   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
977 
978   SmallVector<BinaryBasicBlock *, 4> CritEdgesFrom;
979   SmallVector<SmallVector<BinaryBasicBlock *, 4>, 4> CritEdgesTo;
980   // In case of a critical edge, we need to create extra BBs to host restores
981   // into edges transitioning to the dominance frontier, otherwise we pull these
982   // restores to inside the dominated area.
983   Frontier = DA.getDominanceFrontierFor(*BestPosSave).takeVector();
984   LLVM_DEBUG({
985     dbgs() << "Dumping dominance frontier for ";
986     BC.printInstruction(dbgs(), *BestPosSave);
987     for (ProgramPoint &PP : Frontier)
988       if (PP.isInst())
989         BC.printInstruction(dbgs(), *PP.getInst());
990       else
991         dbgs() << PP.getBB()->getName() << "\n";
992   });
993   for (ProgramPoint &PP : Frontier) {
994     bool HasCritEdges = false;
995     if (PP.isInst() && BC.MIB->isTerminator(*PP.getInst()) &&
996         doesInstUsesCSR(*PP.getInst(), CSR)) {
997       Frontier.clear();
998       return Frontier;
999     }
1000     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1001     CritEdgesFrom.emplace_back(FrontierBB);
1002     CritEdgesTo.emplace_back(0);
1003     SmallVector<BinaryBasicBlock *, 4> &Dests = CritEdgesTo.back();
1004     // Check for invoke instructions at the dominance frontier, which indicates
1005     // the landing pad is not dominated.
1006     if (PP.isInst() && BC.MIB->isInvoke(*PP.getInst())) {
1007       Frontier.clear();
1008       return Frontier;
1009     }
1010     doForAllSuccs(*FrontierBB, [&](ProgramPoint P) {
1011       if (!DA.doesADominateB(*BestPosSave, P)) {
1012         Dests.emplace_back(Info.getParentBB(P));
1013         return;
1014       }
1015       HasCritEdges = true;
1016     });
1017     IsCritEdge.push_back(HasCritEdges);
1018   }
1019   // Restores cannot be placed in empty BBs because we have a dataflow
1020   // analysis that depends on insertions happening before real instructions
1021   // (PredictiveStackPointerTracking). Detect now for empty BBs and add a
1022   // dummy nop that is scheduled to be removed later.
1023   bool InvalidateRequired = false;
1024   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1025     if (BB->size() != 0)
1026       continue;
1027     MCInst NewInst;
1028     BC.MIB->createNoop(NewInst);
1029     auto II = BB->addInstruction(std::move(NewInst));
1030     scheduleChange(&*II, WorklistItem(WorklistItem::Erase, 0));
1031     InvalidateRequired = true;
1032   }
1033   if (std::accumulate(IsCritEdge.begin(), IsCritEdge.end(), 0)) {
1034     LLVM_DEBUG({
1035       dbgs() << "Now detected critical edges in the following frontier:\n";
1036       for (ProgramPoint &PP : Frontier) {
1037         if (PP.isBB()) {
1038           dbgs() << "  BB: " << PP.getBB()->getName() << "\n";
1039         } else {
1040           dbgs() << "  Inst: ";
1041           PP.getInst()->dump();
1042         }
1043       }
1044     });
1045     splitFrontierCritEdges(&BF, Frontier, IsCritEdge, CritEdgesFrom,
1046                            CritEdgesTo);
1047     InvalidateRequired = true;
1048   }
1049   if (InvalidateRequired) {
1050     // BitVectors that represent all insns of the function are invalid now
1051     // since we changed BBs/Insts. Re-run steps that depend on pointers being
1052     // valid
1053     Info.invalidateAll();
1054     classifyCSRUses();
1055   }
1056   return Frontier;
1057 }
1058 
validatePushPopsMode(unsigned CSR,MCInst * BestPosSave,int64_t SaveOffset)1059 bool ShrinkWrapping::validatePushPopsMode(unsigned CSR, MCInst *BestPosSave,
1060                                           int64_t SaveOffset) {
1061   if (FA.requiresAlignment(BF)) {
1062     LLVM_DEBUG({
1063       dbgs() << "Reg " << CSR
1064              << " is not using push/pops due to function "
1065                 "alignment requirements.\n";
1066     });
1067     return false;
1068   }
1069   if (FA.hasStackArithmetic(BF)) {
1070     LLVM_DEBUG({
1071       dbgs() << "Reg " << CSR
1072              << " is not using push/pops due to function "
1073                 "taking the address of a stack position.\n";
1074     });
1075     return false;
1076   }
1077   for (MCInst *Save : CSA.getSavesByReg(CSR)) {
1078     if (!SLM.canCollapseRegion(Save)) {
1079       LLVM_DEBUG(dbgs() << "Reg " << CSR << " cannot collapse region.\n");
1080       return false;
1081     }
1082   }
1083   // Abort if one of the restores for this CSR is not a POP.
1084   for (MCInst *Load : CSA.getRestoresByReg(CSR)) {
1085     if (!BC.MIB->isPop(*Load)) {
1086       LLVM_DEBUG(dbgs() << "Reg " << CSR << " has a mismatching restore.\n");
1087       return false;
1088     }
1089   }
1090 
1091   StackPointerTracking &SPT = Info.getStackPointerTracking();
1092   // Abort if we are inserting a push into an entry BB (offset -8) and this
1093   // func sets up a frame pointer.
1094   if (!SLM.canInsertRegion(BestPosSave) || SaveOffset == SPT.SUPERPOSITION ||
1095       SaveOffset == SPT.EMPTY || (SaveOffset == -8 && SPT.HasFramePointer)) {
1096     LLVM_DEBUG({
1097       dbgs() << "Reg " << CSR
1098              << " cannot insert region or we are "
1099                 "trying to insert a push into entry bb.\n";
1100     });
1101     return false;
1102   }
1103   return true;
1104 }
1105 
fixPopsPlacements(const SmallVector<ProgramPoint,4> & RestorePoints,int64_t SaveOffset,unsigned CSR)1106 SmallVector<ProgramPoint, 4> ShrinkWrapping::fixPopsPlacements(
1107     const SmallVector<ProgramPoint, 4> &RestorePoints, int64_t SaveOffset,
1108     unsigned CSR) {
1109   SmallVector<ProgramPoint, 4> FixedRestorePoints = RestorePoints;
1110   // Moving pop locations to the correct sp offset
1111   ReachingInsns<true> &RI = Info.getReachingInsnsBackwards();
1112   StackPointerTracking &SPT = Info.getStackPointerTracking();
1113   for (ProgramPoint &PP : FixedRestorePoints) {
1114     BinaryBasicBlock *BB = Info.getParentBB(PP);
1115     bool Found = false;
1116     if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
1117         SaveOffset) {
1118       BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
1119       BV &= UsesByReg[CSR];
1120       if (!BV.any()) {
1121         Found = true;
1122         PP = BB;
1123         continue;
1124       }
1125     }
1126     for (auto RIt = BB->rbegin(), End = BB->rend(); RIt != End; ++RIt) {
1127       if (SPT.getStateBefore(*RIt)->first == SaveOffset) {
1128         BitVector BV = *RI.getStateAt(*RIt);
1129         BV &= UsesByReg[CSR];
1130         if (!BV.any()) {
1131           Found = true;
1132           PP = &*RIt;
1133           break;
1134         }
1135       }
1136     }
1137     if (!Found) {
1138       LLVM_DEBUG({
1139         dbgs() << "Could not find restore insertion point for " << CSR
1140                << ", falling back to load/store mode\n";
1141       });
1142       FixedRestorePoints.clear();
1143       return FixedRestorePoints;
1144     }
1145   }
1146   return FixedRestorePoints;
1147 }
1148 
scheduleOldSaveRestoresRemoval(unsigned CSR,bool UsePushPops)1149 void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR,
1150                                                     bool UsePushPops) {
1151 
1152   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1153     std::vector<MCInst *> CFIs;
1154     for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) {
1155       MCInst &Inst = *I;
1156       if (BC.MIB->isCFI(Inst)) {
1157         // Delete all offset CFIs related to this CSR
1158         if (SLM.getOffsetCFIReg(Inst) == CSR) {
1159           HasDeletedOffsetCFIs[CSR] = true;
1160           scheduleChange(&Inst, WorklistItem(WorklistItem::Erase, CSR));
1161           continue;
1162         }
1163         CFIs.push_back(&Inst);
1164         continue;
1165       }
1166 
1167       uint16_t SavedReg = CSA.getSavedReg(Inst);
1168       uint16_t RestoredReg = CSA.getRestoredReg(Inst);
1169       if (SavedReg != CSR && RestoredReg != CSR) {
1170         CFIs.clear();
1171         continue;
1172       }
1173 
1174       scheduleChange(&Inst, WorklistItem(UsePushPops
1175                                              ? WorklistItem::Erase
1176                                              : WorklistItem::ChangeToAdjustment,
1177                                          CSR));
1178 
1179       // Delete associated CFIs
1180       const bool RecordDeletedPushCFIs =
1181           SavedReg == CSR && DeletedPushCFIs[CSR].empty();
1182       const bool RecordDeletedPopCFIs =
1183           RestoredReg == CSR && DeletedPopCFIs[CSR].empty();
1184       for (MCInst *CFI : CFIs) {
1185         const MCCFIInstruction *MCCFI = BF.getCFIFor(*CFI);
1186         // Do not touch these...
1187         if (MCCFI->getOperation() == MCCFIInstruction::OpRestoreState ||
1188             MCCFI->getOperation() == MCCFIInstruction::OpRememberState)
1189           continue;
1190         scheduleChange(CFI, WorklistItem(WorklistItem::Erase, CSR));
1191         if (RecordDeletedPushCFIs) {
1192           // Do not record this to be replayed later because we are going to
1193           // rebuild it.
1194           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1195             continue;
1196           DeletedPushCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1197         }
1198         if (RecordDeletedPopCFIs) {
1199           if (MCCFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1200             continue;
1201           DeletedPopCFIs[CSR].push_back(CFI->getOperand(0).getImm());
1202         }
1203       }
1204       CFIs.clear();
1205     }
1206   }
1207 }
1208 
doesInstUsesCSR(const MCInst & Inst,uint16_t CSR)1209 bool ShrinkWrapping::doesInstUsesCSR(const MCInst &Inst, uint16_t CSR) {
1210   if (BC.MIB->isCFI(Inst) || CSA.getSavedReg(Inst) == CSR ||
1211       CSA.getRestoredReg(Inst) == CSR)
1212     return false;
1213   BitVector BV = BitVector(BC.MRI->getNumRegs(), false);
1214   BC.MIB->getTouchedRegs(Inst, BV);
1215   return BV[CSR];
1216 }
1217 
scheduleSaveRestoreInsertions(unsigned CSR,MCInst * BestPosSave,SmallVector<ProgramPoint,4> & RestorePoints,bool UsePushPops)1218 void ShrinkWrapping::scheduleSaveRestoreInsertions(
1219     unsigned CSR, MCInst *BestPosSave,
1220     SmallVector<ProgramPoint, 4> &RestorePoints, bool UsePushPops) {
1221   auto &InsnToBB = Info.getInsnToBBMap();
1222   const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[CSR];
1223   const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[CSR];
1224   assert(FIESave && FIELoad && "Invalid CSR");
1225 
1226   LLVM_DEBUG({
1227     dbgs() << "Scheduling save insertion at: ";
1228     BestPosSave->dump();
1229   });
1230 
1231   scheduleChange(BestPosSave,
1232                  UsePushPops ? WorklistItem::InsertPushOrPop
1233                              : WorklistItem::InsertLoadOrStore,
1234                  *FIESave, CSR);
1235 
1236   for (ProgramPoint &PP : RestorePoints) {
1237     BinaryBasicBlock *FrontierBB = Info.getParentBB(PP);
1238     LLVM_DEBUG({
1239       dbgs() << "Scheduling restore insertion at: ";
1240       if (PP.isInst())
1241         PP.getInst()->dump();
1242       else
1243         dbgs() << PP.getBB()->getName() << "\n";
1244     });
1245     MCInst *Term =
1246         FrontierBB->getTerminatorBefore(PP.isInst() ? PP.getInst() : nullptr);
1247     if (Term)
1248       PP = Term;
1249     bool PrecededByPrefix = false;
1250     if (PP.isInst()) {
1251       auto Iter = FrontierBB->findInstruction(PP.getInst());
1252       if (Iter != FrontierBB->end() && Iter != FrontierBB->begin()) {
1253         --Iter;
1254         PrecededByPrefix = BC.MIB->isPrefix(*Iter);
1255       }
1256     }
1257     if (PP.isInst() &&
1258         (doesInstUsesCSR(*PP.getInst(), CSR) || PrecededByPrefix)) {
1259       assert(!InsnToBB[PP.getInst()]->hasTerminatorAfter(PP.getInst()) &&
1260              "cannot move to end of bb");
1261       scheduleChange(InsnToBB[PP.getInst()],
1262                      UsePushPops ? WorklistItem::InsertPushOrPop
1263                                  : WorklistItem::InsertLoadOrStore,
1264                      *FIELoad, CSR);
1265       continue;
1266     }
1267     scheduleChange(PP,
1268                    UsePushPops ? WorklistItem::InsertPushOrPop
1269                                : WorklistItem::InsertLoadOrStore,
1270                    *FIELoad, CSR);
1271   }
1272 }
1273 
moveSaveRestores()1274 void ShrinkWrapping::moveSaveRestores() {
1275   bool DisablePushPopMode = false;
1276   bool UsedPushPopMode = false;
1277   // Keeps info about successfully moved regs: reg index, save position and
1278   // save size
1279   std::vector<std::tuple<unsigned, MCInst *, size_t>> MovedRegs;
1280   uint64_t TotalEstimatedWin = 0;
1281 
1282   computeDomOrder();
1283   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1284     MCInst *BestPosSave = nullptr;
1285     uint64_t EstimatedWin = 0;
1286     SmallVector<ProgramPoint, 4> RestorePoints;
1287     while (RestorePoints.empty() &&
1288            isBestSavePosCold(I, BestPosSave, EstimatedWin)) {
1289       RestorePoints = doRestorePlacement(BestPosSave, I, EstimatedWin);
1290       if (RestorePoints.empty()) {
1291         LLVM_DEBUG({
1292           dbgs() << "Dropping opportunity because restore placement failed"
1293                     " -- total est. freq reduc: "
1294                  << EstimatedWin << ". Will try "
1295                  << (BestSaveCount[I].size() - 1) << " more times.\n";
1296         });
1297         BestSaveCount[I].pop_back();
1298         BestSavePos[I].pop_back();
1299         computeDomOrder();
1300       }
1301     }
1302     if (RestorePoints.empty()) {
1303       SpillsFailedDynamicCount += EstimatedWin;
1304       continue;
1305     }
1306 
1307     const FrameIndexEntry *FIESave = CSA.SaveFIEByReg[I];
1308     const FrameIndexEntry *FIELoad = CSA.LoadFIEByReg[I];
1309     (void)FIELoad;
1310     assert(FIESave && FIELoad);
1311     StackPointerTracking &SPT = Info.getStackPointerTracking();
1312     const std::pair<int, int> SPFP = *SPT.getStateBefore(*BestPosSave);
1313     int SaveOffset = SPFP.first;
1314     uint8_t SaveSize = FIESave->Size;
1315 
1316     // If we don't know stack state at this point, bail
1317     if ((SPFP.first == SPT.SUPERPOSITION || SPFP.first == SPT.EMPTY) &&
1318         (SPFP.second == SPT.SUPERPOSITION || SPFP.second == SPT.EMPTY)) {
1319       SpillsFailedDynamicCount += EstimatedWin;
1320       continue;
1321     }
1322 
1323     // Operation mode: if true, will insert push/pops instead of loads/restores
1324     bool UsePushPops = validatePushPopsMode(I, BestPosSave, SaveOffset);
1325 
1326     if (UsePushPops) {
1327       SmallVector<ProgramPoint, 4> FixedRestorePoints =
1328           fixPopsPlacements(RestorePoints, SaveOffset, I);
1329       if (FixedRestorePoints.empty())
1330         UsePushPops = false;
1331       else
1332         RestorePoints = FixedRestorePoints;
1333     }
1334 
1335     // Disable push-pop mode for all CSRs in this function
1336     if (!UsePushPops)
1337       DisablePushPopMode = true;
1338     else
1339       UsedPushPopMode = true;
1340 
1341     scheduleOldSaveRestoresRemoval(I, UsePushPops);
1342     scheduleSaveRestoreInsertions(I, BestPosSave, RestorePoints, UsePushPops);
1343     MovedRegs.emplace_back(std::make_tuple(I, BestPosSave, SaveSize));
1344     TotalEstimatedWin += EstimatedWin;
1345   }
1346 
1347   // Revert push-pop mode if it failed for a single CSR
1348   if (DisablePushPopMode && UsedPushPopMode) {
1349     UsedPushPopMode = false;
1350     for (BinaryBasicBlock &BB : BF) {
1351       auto WRI = Todo.find(&BB);
1352       if (WRI != Todo.end()) {
1353         std::vector<WorklistItem> &TodoList = WRI->second;
1354         for (WorklistItem &Item : TodoList)
1355           if (Item.Action == WorklistItem::InsertPushOrPop)
1356             Item.Action = WorklistItem::InsertLoadOrStore;
1357       }
1358       for (auto I = BB.rbegin(), E = BB.rend(); I != E; ++I) {
1359         MCInst &Inst = *I;
1360         auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1361             Inst, getAnnotationIndex());
1362         if (!TodoList)
1363           continue;
1364         bool isCFI = BC.MIB->isCFI(Inst);
1365         for (WorklistItem &Item : *TodoList) {
1366           if (Item.Action == WorklistItem::InsertPushOrPop)
1367             Item.Action = WorklistItem::InsertLoadOrStore;
1368           if (!isCFI && Item.Action == WorklistItem::Erase)
1369             Item.Action = WorklistItem::ChangeToAdjustment;
1370         }
1371       }
1372     }
1373   }
1374   SpillsMovedDynamicCount += TotalEstimatedWin;
1375 
1376   // Update statistics
1377   if (!UsedPushPopMode) {
1378     SpillsMovedRegularMode += MovedRegs.size();
1379     return;
1380   }
1381 
1382   // Schedule modifications to stack-accessing instructions via
1383   // StackLayoutModifier.
1384   SpillsMovedPushPopMode += MovedRegs.size();
1385   for (std::tuple<unsigned, MCInst *, size_t> &I : MovedRegs) {
1386     unsigned RegNdx;
1387     MCInst *SavePos;
1388     size_t SaveSize;
1389     std::tie(RegNdx, SavePos, SaveSize) = I;
1390     for (MCInst *Save : CSA.getSavesByReg(RegNdx))
1391       SLM.collapseRegion(Save);
1392     SLM.insertRegion(SavePos, SaveSize);
1393   }
1394 }
1395 
1396 namespace {
1397 /// Helper function to identify whether two basic blocks created by splitting
1398 /// a critical edge have the same contents.
isIdenticalSplitEdgeBB(const BinaryContext & BC,const BinaryBasicBlock & A,const BinaryBasicBlock & B)1399 bool isIdenticalSplitEdgeBB(const BinaryContext &BC, const BinaryBasicBlock &A,
1400                             const BinaryBasicBlock &B) {
1401   if (A.succ_size() != B.succ_size())
1402     return false;
1403   if (A.succ_size() != 1)
1404     return false;
1405 
1406   if (*A.succ_begin() != *B.succ_begin())
1407     return false;
1408 
1409   if (A.size() != B.size())
1410     return false;
1411 
1412   // Compare instructions
1413   auto I = A.begin(), E = A.end();
1414   auto OtherI = B.begin(), OtherE = B.end();
1415   while (I != E && OtherI != OtherE) {
1416     if (I->getOpcode() != OtherI->getOpcode())
1417       return false;
1418     if (!BC.MIB->equals(*I, *OtherI, [](const MCSymbol *A, const MCSymbol *B) {
1419           return true;
1420         }))
1421       return false;
1422     ++I;
1423     ++OtherI;
1424   }
1425   return true;
1426 }
1427 } // namespace
1428 
foldIdenticalSplitEdges()1429 bool ShrinkWrapping::foldIdenticalSplitEdges() {
1430   bool Changed = false;
1431   for (auto Iter = BF.begin(); Iter != BF.end(); ++Iter) {
1432     BinaryBasicBlock &BB = *Iter;
1433     if (!BB.getName().startswith(".LSplitEdge"))
1434       continue;
1435     for (auto RIter = BF.rbegin(); RIter != BF.rend(); ++RIter) {
1436       BinaryBasicBlock &RBB = *RIter;
1437       if (&RBB == &BB)
1438         break;
1439       if (!RBB.getName().startswith(".LSplitEdge") || !RBB.isValid() ||
1440           !isIdenticalSplitEdgeBB(BC, *Iter, RBB))
1441         continue;
1442       assert(RBB.pred_size() == 1 && "Invalid split edge BB");
1443       BinaryBasicBlock *Pred = *RBB.pred_begin();
1444       uint64_t OrigCount = Pred->branch_info_begin()->Count;
1445       uint64_t OrigMispreds = Pred->branch_info_begin()->MispredictedCount;
1446       BF.replaceJumpTableEntryIn(Pred, &RBB, &BB);
1447       Pred->replaceSuccessor(&RBB, &BB, OrigCount, OrigMispreds);
1448       Changed = true;
1449       // Remove the block from CFG
1450       RBB.markValid(false);
1451     }
1452   }
1453 
1454   return Changed;
1455 }
1456 
1457 namespace {
1458 
1459 // A special StackPointerTracking that compensates for our future plans
1460 // in removing/adding insn.
1461 class PredictiveStackPointerTracking
1462     : public StackPointerTrackingBase<PredictiveStackPointerTracking> {
1463   friend class DataflowAnalysis<PredictiveStackPointerTracking,
1464                                 std::pair<int, int>>;
1465   decltype(ShrinkWrapping::Todo) &TodoMap;
1466   DataflowInfoManager &Info;
1467 
1468   Optional<unsigned> AnnotationIndex;
1469 
1470 protected:
compNextAux(const MCInst & Point,const std::vector<ShrinkWrapping::WorklistItem> & TodoItems,std::pair<int,int> & Res)1471   void compNextAux(const MCInst &Point,
1472                    const std::vector<ShrinkWrapping::WorklistItem> &TodoItems,
1473                    std::pair<int, int> &Res) {
1474     for (const ShrinkWrapping::WorklistItem &Item : TodoItems) {
1475       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1476           BC.MIB->isPush(Point)) {
1477         Res.first += BC.MIB->getPushSize(Point);
1478         continue;
1479       }
1480       if (Item.Action == ShrinkWrapping::WorklistItem::Erase &&
1481           BC.MIB->isPop(Point)) {
1482         Res.first -= BC.MIB->getPopSize(Point);
1483         continue;
1484       }
1485       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1486           Item.FIEToInsert.IsStore) {
1487         Res.first -= Item.FIEToInsert.Size;
1488         continue;
1489       }
1490       if (Item.Action == ShrinkWrapping::WorklistItem::InsertPushOrPop &&
1491           Item.FIEToInsert.IsLoad) {
1492         Res.first += Item.FIEToInsert.Size;
1493         continue;
1494       }
1495     }
1496   }
1497 
computeNext(const MCInst & Point,const std::pair<int,int> & Cur)1498   std::pair<int, int> computeNext(const MCInst &Point,
1499                                   const std::pair<int, int> &Cur) {
1500     std::pair<int, int> Res =
1501         StackPointerTrackingBase<PredictiveStackPointerTracking>::computeNext(
1502             Point, Cur);
1503     if (Res.first == StackPointerTracking::SUPERPOSITION ||
1504         Res.first == StackPointerTracking::EMPTY)
1505       return Res;
1506     auto TodoItems =
1507         BC.MIB->tryGetAnnotationAs<std::vector<ShrinkWrapping::WorklistItem>>(
1508             Point, ShrinkWrapping::getAnnotationName());
1509     if (TodoItems)
1510       compNextAux(Point, *TodoItems, Res);
1511     auto &InsnToBBMap = Info.getInsnToBBMap();
1512     if (&*InsnToBBMap[&Point]->rbegin() != &Point)
1513       return Res;
1514     auto WRI = TodoMap.find(InsnToBBMap[&Point]);
1515     if (WRI == TodoMap.end())
1516       return Res;
1517     compNextAux(Point, WRI->second, Res);
1518     return Res;
1519   }
1520 
getAnnotationName() const1521   StringRef getAnnotationName() const {
1522     return StringRef("PredictiveStackPointerTracking");
1523   }
1524 
1525 public:
PredictiveStackPointerTracking(BinaryFunction & BF,decltype(ShrinkWrapping::Todo) & TodoMap,DataflowInfoManager & Info,MCPlusBuilder::AllocatorIdTy AllocatorId=0)1526   PredictiveStackPointerTracking(BinaryFunction &BF,
1527                                  decltype(ShrinkWrapping::Todo) &TodoMap,
1528                                  DataflowInfoManager &Info,
1529                                  MCPlusBuilder::AllocatorIdTy AllocatorId = 0)
1530       : StackPointerTrackingBase<PredictiveStackPointerTracking>(BF,
1531                                                                  AllocatorId),
1532         TodoMap(TodoMap), Info(Info) {}
1533 
run()1534   void run() {
1535     StackPointerTrackingBase<PredictiveStackPointerTracking>::run();
1536   }
1537 };
1538 
1539 } // end anonymous namespace
1540 
insertUpdatedCFI(unsigned CSR,int SPValPush,int SPValPop)1541 void ShrinkWrapping::insertUpdatedCFI(unsigned CSR, int SPValPush,
1542                                       int SPValPop) {
1543   MCInst *SavePoint = nullptr;
1544   for (BinaryBasicBlock &BB : BF) {
1545     for (auto InstIter = BB.rbegin(), EndIter = BB.rend(); InstIter != EndIter;
1546          ++InstIter) {
1547       int32_t SrcImm = 0;
1548       MCPhysReg Reg = 0;
1549       MCPhysReg StackPtrReg = 0;
1550       int64_t StackOffset = 0;
1551       bool IsIndexed = false;
1552       bool IsLoad = false;
1553       bool IsStore = false;
1554       bool IsSimple = false;
1555       bool IsStoreFromReg = false;
1556       uint8_t Size = 0;
1557       if (!BC.MIB->isStackAccess(*InstIter, IsLoad, IsStore, IsStoreFromReg,
1558                                  Reg, SrcImm, StackPtrReg, StackOffset, Size,
1559                                  IsSimple, IsIndexed))
1560         continue;
1561       if (Reg != CSR || !IsStore || !IsSimple)
1562         continue;
1563       SavePoint = &*InstIter;
1564       break;
1565     }
1566     if (SavePoint)
1567       break;
1568   }
1569   assert(SavePoint);
1570   LLVM_DEBUG({
1571     dbgs() << "Now using as save point for reg " << CSR << " :";
1572     SavePoint->dump();
1573   });
1574   bool PrevAffectedZone = false;
1575   BinaryBasicBlock *PrevBB = nullptr;
1576   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
1577   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1578     if (BB->size() == 0)
1579       continue;
1580     const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint);
1581     const bool InAffectedZoneAtBegin =
1582         (*DA.getStateBefore(*BB->begin()))[DA.ExprToIdx[SavePoint]];
1583     bool InAffectedZone = InAffectedZoneAtBegin;
1584     for (auto InstIter = BB->begin(); InstIter != BB->end(); ++InstIter) {
1585       const bool CurZone = DA.count(*InstIter, *SavePoint);
1586       if (InAffectedZone != CurZone) {
1587         auto InsertionIter = InstIter;
1588         ++InsertionIter;
1589         InAffectedZone = CurZone;
1590         if (InAffectedZone)
1591           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, true, 0,
1592                                             SPValPop);
1593         else
1594           InstIter = insertCFIsForPushOrPop(*BB, InsertionIter, CSR, false, 0,
1595                                             SPValPush);
1596         --InstIter;
1597       }
1598     }
1599     // Are we at the first basic block or hot-cold split point?
1600     if (!PrevBB || (BF.isSplit() && BB->isCold() != PrevBB->isCold())) {
1601       if (InAffectedZoneAtBegin)
1602         insertCFIsForPushOrPop(*BB, BB->begin(), CSR, true, 0, SPValPush);
1603     } else if (InAffectedZoneAtBegin != PrevAffectedZone) {
1604       if (InAffectedZoneAtBegin)
1605         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, true, 0, SPValPush);
1606       else
1607         insertCFIsForPushOrPop(*PrevBB, PrevBB->end(), CSR, false, 0, SPValPop);
1608     }
1609     PrevAffectedZone = InAffectedZoneAtEnd;
1610     PrevBB = BB;
1611   }
1612 }
1613 
rebuildCFIForSP()1614 void ShrinkWrapping::rebuildCFIForSP() {
1615   for (BinaryBasicBlock &BB : BF) {
1616     for (MCInst &Inst : BB) {
1617       if (!BC.MIB->isCFI(Inst))
1618         continue;
1619       const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1620       if (CFI->getOperation() == MCCFIInstruction::OpDefCfaOffset)
1621         BC.MIB->addAnnotation(Inst, "DeleteMe", 0U, AllocatorId);
1622     }
1623   }
1624 
1625   int PrevSPVal = -8;
1626   BinaryBasicBlock *PrevBB = nullptr;
1627   StackPointerTracking &SPT = Info.getStackPointerTracking();
1628   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1629     if (BB->size() == 0)
1630       continue;
1631     const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first;
1632     const int SPValAtBegin = SPT.getStateBefore(*BB->begin())->first;
1633     int SPVal = SPValAtBegin;
1634     for (auto Iter = BB->begin(); Iter != BB->end(); ++Iter) {
1635       const int CurVal = SPT.getStateAt(*Iter)->first;
1636       if (SPVal != CurVal) {
1637         auto InsertionIter = Iter;
1638         ++InsertionIter;
1639         Iter = BF.addCFIInstruction(
1640             BB, InsertionIter,
1641             MCCFIInstruction::cfiDefCfaOffset(nullptr, -CurVal));
1642         SPVal = CurVal;
1643       }
1644     }
1645     if (BF.isSplit() && PrevBB && BB->isCold() != PrevBB->isCold())
1646       BF.addCFIInstruction(
1647           BB, BB->begin(),
1648           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1649     else if (SPValAtBegin != PrevSPVal)
1650       BF.addCFIInstruction(
1651           PrevBB, PrevBB->end(),
1652           MCCFIInstruction::cfiDefCfaOffset(nullptr, -SPValAtBegin));
1653     PrevSPVal = SPValAtEnd;
1654     PrevBB = BB;
1655   }
1656 
1657   for (BinaryBasicBlock &BB : BF)
1658     for (auto I = BB.begin(); I != BB.end();)
1659       if (BC.MIB->hasAnnotation(*I, "DeleteMe"))
1660         I = BB.eraseInstruction(I);
1661       else
1662         ++I;
1663 }
1664 
createStackAccess(int SPVal,int FPVal,const FrameIndexEntry & FIE,bool CreatePushOrPop)1665 MCInst ShrinkWrapping::createStackAccess(int SPVal, int FPVal,
1666                                          const FrameIndexEntry &FIE,
1667                                          bool CreatePushOrPop) {
1668   MCInst NewInst;
1669   if (SPVal != StackPointerTracking::SUPERPOSITION &&
1670       SPVal != StackPointerTracking::EMPTY) {
1671     if (FIE.IsLoad) {
1672       if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getStackPointer(),
1673                                           FIE.StackOffset - SPVal, FIE.RegOrImm,
1674                                           FIE.Size)) {
1675         errs() << "createRestoreFromStack: not supported on this platform\n";
1676         abort();
1677       }
1678     } else {
1679       if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getStackPointer(),
1680                                      FIE.StackOffset - SPVal, FIE.RegOrImm,
1681                                      FIE.Size)) {
1682         errs() << "createSaveToStack: not supported on this platform\n";
1683         abort();
1684       }
1685     }
1686     if (CreatePushOrPop)
1687       BC.MIB->changeToPushOrPop(NewInst);
1688     return NewInst;
1689   }
1690   assert(FPVal != StackPointerTracking::SUPERPOSITION &&
1691          FPVal != StackPointerTracking::EMPTY);
1692 
1693   if (FIE.IsLoad) {
1694     if (!BC.MIB->createRestoreFromStack(NewInst, BC.MIB->getFramePointer(),
1695                                         FIE.StackOffset - FPVal, FIE.RegOrImm,
1696                                         FIE.Size)) {
1697       errs() << "createRestoreFromStack: not supported on this platform\n";
1698       abort();
1699     }
1700   } else {
1701     if (!BC.MIB->createSaveToStack(NewInst, BC.MIB->getFramePointer(),
1702                                    FIE.StackOffset - FPVal, FIE.RegOrImm,
1703                                    FIE.Size)) {
1704       errs() << "createSaveToStack: not supported on this platform\n";
1705       abort();
1706     }
1707   }
1708   return NewInst;
1709 }
1710 
updateCFIInstOffset(MCInst & Inst,int64_t NewOffset)1711 void ShrinkWrapping::updateCFIInstOffset(MCInst &Inst, int64_t NewOffset) {
1712   const MCCFIInstruction *CFI = BF.getCFIFor(Inst);
1713   if (UpdatedCFIs.count(CFI))
1714     return;
1715 
1716   switch (CFI->getOperation()) {
1717   case MCCFIInstruction::OpDefCfa:
1718   case MCCFIInstruction::OpDefCfaRegister:
1719   case MCCFIInstruction::OpDefCfaOffset:
1720     CFI = BF.mutateCFIOffsetFor(Inst, -NewOffset);
1721     break;
1722   case MCCFIInstruction::OpOffset:
1723   default:
1724     break;
1725   }
1726 
1727   UpdatedCFIs.insert(CFI);
1728 }
1729 
insertCFIsForPushOrPop(BinaryBasicBlock & BB,BBIterTy Pos,unsigned Reg,bool isPush,int Sz,int64_t NewOffset)1730 BBIterTy ShrinkWrapping::insertCFIsForPushOrPop(BinaryBasicBlock &BB,
1731                                                 BBIterTy Pos, unsigned Reg,
1732                                                 bool isPush, int Sz,
1733                                                 int64_t NewOffset) {
1734   if (isPush) {
1735     for (uint32_t Idx : DeletedPushCFIs[Reg]) {
1736       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1737       updateCFIInstOffset(*Pos++, NewOffset);
1738     }
1739     if (HasDeletedOffsetCFIs[Reg]) {
1740       Pos = BF.addCFIInstruction(
1741           &BB, Pos,
1742           MCCFIInstruction::createOffset(
1743               nullptr, BC.MRI->getDwarfRegNum(Reg, false), NewOffset));
1744       ++Pos;
1745     }
1746   } else {
1747     for (uint32_t Idx : DeletedPopCFIs[Reg]) {
1748       Pos = BF.addCFIPseudo(&BB, Pos, Idx);
1749       updateCFIInstOffset(*Pos++, NewOffset);
1750     }
1751     if (HasDeletedOffsetCFIs[Reg]) {
1752       Pos = BF.addCFIInstruction(
1753           &BB, Pos,
1754           MCCFIInstruction::createSameValue(
1755               nullptr, BC.MRI->getDwarfRegNum(Reg, false)));
1756       ++Pos;
1757     }
1758   }
1759   return Pos;
1760 }
1761 
processInsertion(BBIterTy InsertionPoint,BinaryBasicBlock * CurBB,const WorklistItem & Item,int64_t SPVal,int64_t FPVal)1762 BBIterTy ShrinkWrapping::processInsertion(BBIterTy InsertionPoint,
1763                                           BinaryBasicBlock *CurBB,
1764                                           const WorklistItem &Item,
1765                                           int64_t SPVal, int64_t FPVal) {
1766   // Trigger CFI reconstruction for this CSR if necessary - writing to
1767   // PushOffsetByReg/PopOffsetByReg *will* trigger CFI update
1768   if ((Item.FIEToInsert.IsStore &&
1769        !DeletedPushCFIs[Item.AffectedReg].empty()) ||
1770       (Item.FIEToInsert.IsLoad && !DeletedPopCFIs[Item.AffectedReg].empty()) ||
1771       HasDeletedOffsetCFIs[Item.AffectedReg]) {
1772     if (Item.Action == WorklistItem::InsertPushOrPop) {
1773       if (Item.FIEToInsert.IsStore)
1774         PushOffsetByReg[Item.AffectedReg] = SPVal - Item.FIEToInsert.Size;
1775       else
1776         PopOffsetByReg[Item.AffectedReg] = SPVal;
1777     } else {
1778       if (Item.FIEToInsert.IsStore)
1779         PushOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1780       else
1781         PopOffsetByReg[Item.AffectedReg] = Item.FIEToInsert.StackOffset;
1782     }
1783   }
1784 
1785   LLVM_DEBUG({
1786     dbgs() << "Creating stack access with SPVal = " << SPVal
1787            << "; stack offset = " << Item.FIEToInsert.StackOffset
1788            << " Is push = " << (Item.Action == WorklistItem::InsertPushOrPop)
1789            << "\n";
1790   });
1791   MCInst NewInst =
1792       createStackAccess(SPVal, FPVal, Item.FIEToInsert,
1793                         Item.Action == WorklistItem::InsertPushOrPop);
1794   if (InsertionPoint != CurBB->end()) {
1795     LLVM_DEBUG({
1796       dbgs() << "Adding before Inst: ";
1797       InsertionPoint->dump();
1798       dbgs() << "the following inst: ";
1799       NewInst.dump();
1800     });
1801     BBIterTy Iter =
1802         CurBB->insertInstruction(InsertionPoint, std::move(NewInst));
1803     return ++Iter;
1804   }
1805   CurBB->addInstruction(std::move(NewInst));
1806   LLVM_DEBUG(dbgs() << "Adding to BB!\n");
1807   return CurBB->end();
1808 }
1809 
processInsertionsList(BBIterTy InsertionPoint,BinaryBasicBlock * CurBB,std::vector<WorklistItem> & TodoList,int64_t SPVal,int64_t FPVal)1810 BBIterTy ShrinkWrapping::processInsertionsList(
1811     BBIterTy InsertionPoint, BinaryBasicBlock *CurBB,
1812     std::vector<WorklistItem> &TodoList, int64_t SPVal, int64_t FPVal) {
1813   bool HasInsertions = false;
1814   for (WorklistItem &Item : TodoList) {
1815     if (Item.Action == WorklistItem::Erase ||
1816         Item.Action == WorklistItem::ChangeToAdjustment)
1817       continue;
1818     HasInsertions = true;
1819     break;
1820   }
1821 
1822   if (!HasInsertions)
1823     return InsertionPoint;
1824 
1825   assert(((SPVal != StackPointerTracking::SUPERPOSITION &&
1826            SPVal != StackPointerTracking::EMPTY) ||
1827           (FPVal != StackPointerTracking::SUPERPOSITION &&
1828            FPVal != StackPointerTracking::EMPTY)) &&
1829          "Cannot insert if we have no idea of the stack state here");
1830 
1831   // Revert the effect of PSPT for this location, we want SP Value before
1832   // insertions
1833   if (InsertionPoint == CurBB->end()) {
1834     for (WorklistItem &Item : TodoList) {
1835       if (Item.Action != WorklistItem::InsertPushOrPop)
1836         continue;
1837       if (Item.FIEToInsert.IsStore)
1838         SPVal += Item.FIEToInsert.Size;
1839       if (Item.FIEToInsert.IsLoad)
1840         SPVal -= Item.FIEToInsert.Size;
1841     }
1842   }
1843 
1844   // Reorder POPs to obey the correct dominance relation between them
1845   llvm::stable_sort(TodoList, [&](const WorklistItem &A,
1846                                   const WorklistItem &B) {
1847     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad) &&
1848         (B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1849       return false;
1850     if ((A.Action != WorklistItem::InsertPushOrPop || !A.FIEToInsert.IsLoad))
1851       return true;
1852     if ((B.Action != WorklistItem::InsertPushOrPop || !B.FIEToInsert.IsLoad))
1853       return false;
1854     return DomOrder[B.AffectedReg] < DomOrder[A.AffectedReg];
1855   });
1856 
1857   // Process insertions
1858   for (WorklistItem &Item : TodoList) {
1859     if (Item.Action == WorklistItem::Erase ||
1860         Item.Action == WorklistItem::ChangeToAdjustment)
1861       continue;
1862 
1863     InsertionPoint =
1864         processInsertion(InsertionPoint, CurBB, Item, SPVal, FPVal);
1865     if (Item.Action == WorklistItem::InsertPushOrPop &&
1866         Item.FIEToInsert.IsStore)
1867       SPVal -= Item.FIEToInsert.Size;
1868     if (Item.Action == WorklistItem::InsertPushOrPop &&
1869         Item.FIEToInsert.IsLoad)
1870       SPVal += Item.FIEToInsert.Size;
1871   }
1872   return InsertionPoint;
1873 }
1874 
processInsertions()1875 bool ShrinkWrapping::processInsertions() {
1876   PredictiveStackPointerTracking PSPT(BF, Todo, Info, AllocatorId);
1877   PSPT.run();
1878 
1879   bool Changes = false;
1880   for (BinaryBasicBlock &BB : BF) {
1881     // Process insertions before some inst.
1882     for (auto I = BB.begin(); I != BB.end(); ++I) {
1883       MCInst &Inst = *I;
1884       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1885           Inst, getAnnotationIndex());
1886       if (!TodoList)
1887         continue;
1888       Changes = true;
1889       std::vector<WorklistItem> List = *TodoList;
1890       LLVM_DEBUG({
1891         dbgs() << "Now processing insertions in " << BB.getName()
1892                << " before inst: ";
1893         Inst.dump();
1894       });
1895       auto Iter = I;
1896       std::pair<int, int> SPTState =
1897           *PSPT.getStateAt(Iter == BB.begin() ? (ProgramPoint)&BB : &*(--Iter));
1898       I = processInsertionsList(I, &BB, List, SPTState.first, SPTState.second);
1899     }
1900     // Process insertions at the end of bb
1901     auto WRI = Todo.find(&BB);
1902     if (WRI != Todo.end()) {
1903       std::pair<int, int> SPTState = *PSPT.getStateAt(*BB.rbegin());
1904       processInsertionsList(BB.end(), &BB, WRI->second, SPTState.first,
1905                             SPTState.second);
1906       Changes = true;
1907     }
1908   }
1909   return Changes;
1910 }
1911 
processDeletions()1912 void ShrinkWrapping::processDeletions() {
1913   LivenessAnalysis &LA = Info.getLivenessAnalysis();
1914   for (BinaryBasicBlock &BB : BF) {
1915     for (auto II = BB.begin(); II != BB.end(); ++II) {
1916       MCInst &Inst = *II;
1917       auto TodoList = BC.MIB->tryGetAnnotationAs<std::vector<WorklistItem>>(
1918           Inst, getAnnotationIndex());
1919       if (!TodoList)
1920         continue;
1921       // Process all deletions
1922       for (WorklistItem &Item : *TodoList) {
1923         if (Item.Action != WorklistItem::Erase &&
1924             Item.Action != WorklistItem::ChangeToAdjustment)
1925           continue;
1926 
1927         if (Item.Action == WorklistItem::ChangeToAdjustment) {
1928           // Is flag reg alive across this func?
1929           bool DontClobberFlags = LA.isAlive(&Inst, BC.MIB->getFlagsReg());
1930           if (int Sz = BC.MIB->getPushSize(Inst)) {
1931             BC.MIB->createStackPointerIncrement(Inst, Sz, DontClobberFlags);
1932             continue;
1933           }
1934           if (int Sz = BC.MIB->getPopSize(Inst)) {
1935             BC.MIB->createStackPointerDecrement(Inst, Sz, DontClobberFlags);
1936             continue;
1937           }
1938         }
1939 
1940         LLVM_DEBUG({
1941           dbgs() << "Erasing: ";
1942           BC.printInstruction(dbgs(), Inst);
1943         });
1944         II = std::prev(BB.eraseInstruction(II));
1945         break;
1946       }
1947     }
1948   }
1949 }
1950 
rebuildCFI()1951 void ShrinkWrapping::rebuildCFI() {
1952   const bool FP = Info.getStackPointerTracking().HasFramePointer;
1953   Info.invalidateAll();
1954   if (!FP) {
1955     rebuildCFIForSP();
1956     Info.invalidateAll();
1957   }
1958   for (unsigned I = 0, E = BC.MRI->getNumRegs(); I != E; ++I) {
1959     if (PushOffsetByReg[I] == 0 || PopOffsetByReg[I] == 0)
1960       continue;
1961     const int64_t SPValPush = PushOffsetByReg[I];
1962     const int64_t SPValPop = PopOffsetByReg[I];
1963     insertUpdatedCFI(I, SPValPush, SPValPop);
1964     Info.invalidateAll();
1965   }
1966 }
1967 
perform(bool HotOnly)1968 bool ShrinkWrapping::perform(bool HotOnly) {
1969   HasDeletedOffsetCFIs = BitVector(BC.MRI->getNumRegs(), false);
1970   PushOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1971   PopOffsetByReg = std::vector<int64_t>(BC.MRI->getNumRegs(), 0LL);
1972 
1973   // Update pass statistics
1974   uint64_t TotalInstrs = 0ULL;
1975   uint64_t TotalStoreInstrs = 0ULL;
1976   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
1977     uint64_t BBExecCount = BB->getExecutionCount();
1978     if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE)
1979       continue;
1980     for (const auto &Instr : *BB) {
1981       if (BC.MIB->isPseudo(Instr))
1982         continue;
1983       if (BC.MIB->isStore(Instr))
1984         TotalStoreInstrs += BBExecCount;
1985       TotalInstrs += BBExecCount;
1986     }
1987   }
1988   InstrDynamicCount += TotalInstrs;
1989   StoreDynamicCount += TotalStoreInstrs;
1990 
1991   if (!FA.hasFrameInfo(BF))
1992     return false;
1993 
1994   if (HotOnly && (BF.getKnownExecutionCount() < BC.getHotThreshold()))
1995     return false;
1996 
1997   if (opts::EqualizeBBCounts)
1998     equalizeBBCounts(Info, BF);
1999 
2000   if (BF.checkForAmbiguousJumpTables()) {
2001     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ambiguous JTs in " << BF.getPrintName()
2002                       << ".\n");
2003     // We could call disambiguateJumpTables here, but it is probably not worth
2004     // the cost (of duplicating potentially large jump tables that could regress
2005     // dcache misses). Moreover, ambiguous JTs are rare and coming from code
2006     // written in assembly language. Just bail.
2007     return false;
2008   }
2009   SLM.initialize();
2010   CSA.compute();
2011   classifyCSRUses();
2012   pruneUnwantedCSRs();
2013   computeSaveLocations();
2014   moveSaveRestores();
2015   LLVM_DEBUG({
2016     dbgs() << "Func before shrink-wrapping: \n";
2017     BF.dump();
2018   });
2019   SLM.performChanges();
2020   // Early exit if processInsertions doesn't detect any todo items
2021   if (!processInsertions())
2022     return false;
2023   processDeletions();
2024   if (foldIdenticalSplitEdges()) {
2025     const std::pair<unsigned, uint64_t> Stats = BF.eraseInvalidBBs();
2026     (void)Stats;
2027     LLVM_DEBUG(dbgs() << "Deleted " << Stats.first
2028                       << " redundant split edge BBs (" << Stats.second
2029                       << " bytes) for " << BF.getPrintName() << "\n");
2030   }
2031   rebuildCFI();
2032   // We may have split edges, creating BBs that need correct branching
2033   BF.fixBranches();
2034   LLVM_DEBUG({
2035     dbgs() << "Func after shrink-wrapping: \n";
2036     BF.dump();
2037   });
2038   return true;
2039 }
2040 
printStats()2041 void ShrinkWrapping::printStats() {
2042   outs() << "BOLT-INFO: Shrink wrapping moved " << SpillsMovedRegularMode
2043          << " spills inserting load/stores and " << SpillsMovedPushPopMode
2044          << " spills inserting push/pops\n";
2045   if (!InstrDynamicCount || !StoreDynamicCount)
2046     return;
2047   outs() << "BOLT-INFO: Shrink wrapping reduced " << SpillsMovedDynamicCount
2048          << " store executions ("
2049          << format("%.1lf%%",
2050                    (100.0 * SpillsMovedDynamicCount / InstrDynamicCount))
2051          << " total instructions executed, "
2052          << format("%.1lf%%",
2053                    (100.0 * SpillsMovedDynamicCount / StoreDynamicCount))
2054          << " store instructions)\n";
2055   outs() << "BOLT-INFO: Shrink wrapping failed at reducing "
2056          << SpillsFailedDynamicCount << " store executions ("
2057          << format("%.1lf%%",
2058                    (100.0 * SpillsFailedDynamicCount / InstrDynamicCount))
2059          << " total instructions executed, "
2060          << format("%.1lf%%",
2061                    (100.0 * SpillsFailedDynamicCount / StoreDynamicCount))
2062          << " store instructions)\n";
2063 }
2064 
2065 // Operators necessary as a result of using MCAnnotation
operator <<(raw_ostream & OS,const std::vector<ShrinkWrapping::WorklistItem> & Vec)2066 raw_ostream &operator<<(raw_ostream &OS,
2067                         const std::vector<ShrinkWrapping::WorklistItem> &Vec) {
2068   OS << "SWTodo[";
2069   const char *Sep = "";
2070   for (const ShrinkWrapping::WorklistItem &Item : Vec) {
2071     OS << Sep;
2072     switch (Item.Action) {
2073     case ShrinkWrapping::WorklistItem::Erase:
2074       OS << "Erase";
2075       break;
2076     case ShrinkWrapping::WorklistItem::ChangeToAdjustment:
2077       OS << "ChangeToAdjustment";
2078       break;
2079     case ShrinkWrapping::WorklistItem::InsertLoadOrStore:
2080       OS << "InsertLoadOrStore";
2081       break;
2082     case ShrinkWrapping::WorklistItem::InsertPushOrPop:
2083       OS << "InsertPushOrPop";
2084       break;
2085     }
2086     Sep = ", ";
2087   }
2088   OS << "]";
2089   return OS;
2090 }
2091 
2092 raw_ostream &
operator <<(raw_ostream & OS,const std::vector<StackLayoutModifier::WorklistItem> & Vec)2093 operator<<(raw_ostream &OS,
2094            const std::vector<StackLayoutModifier::WorklistItem> &Vec) {
2095   OS << "SLMTodo[";
2096   const char *Sep = "";
2097   for (const StackLayoutModifier::WorklistItem &Item : Vec) {
2098     OS << Sep;
2099     switch (Item.Action) {
2100     case StackLayoutModifier::WorklistItem::None:
2101       OS << "None";
2102       break;
2103     case StackLayoutModifier::WorklistItem::AdjustLoadStoreOffset:
2104       OS << "AdjustLoadStoreOffset";
2105       break;
2106     case StackLayoutModifier::WorklistItem::AdjustCFI:
2107       OS << "AdjustCFI";
2108       break;
2109     }
2110     Sep = ", ";
2111   }
2112   OS << "]";
2113   return OS;
2114 }
2115 
operator ==(const ShrinkWrapping::WorklistItem & A,const ShrinkWrapping::WorklistItem & B)2116 bool operator==(const ShrinkWrapping::WorklistItem &A,
2117                 const ShrinkWrapping::WorklistItem &B) {
2118   return (A.Action == B.Action && A.AffectedReg == B.AffectedReg &&
2119           A.Adjustment == B.Adjustment &&
2120           A.FIEToInsert.IsLoad == B.FIEToInsert.IsLoad &&
2121           A.FIEToInsert.IsStore == B.FIEToInsert.IsStore &&
2122           A.FIEToInsert.RegOrImm == B.FIEToInsert.RegOrImm &&
2123           A.FIEToInsert.Size == B.FIEToInsert.Size &&
2124           A.FIEToInsert.IsSimple == B.FIEToInsert.IsSimple &&
2125           A.FIEToInsert.StackOffset == B.FIEToInsert.StackOffset);
2126 }
2127 
operator ==(const StackLayoutModifier::WorklistItem & A,const StackLayoutModifier::WorklistItem & B)2128 bool operator==(const StackLayoutModifier::WorklistItem &A,
2129                 const StackLayoutModifier::WorklistItem &B) {
2130   return (A.Action == B.Action && A.OffsetUpdate == B.OffsetUpdate);
2131 }
2132 
2133 } // end namespace bolt
2134 } // end namespace llvm
2135