1 //== ---lib/CodeGen/GlobalISel/GICombinerHelper.cpp --------------------- == //
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
11 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
12 #include "llvm/CodeGen/GlobalISel/Utils.h"
13 #include "llvm/CodeGen/MachineInstr.h"
14 #include "llvm/CodeGen/MachineRegisterInfo.h"
15 #include "llvm/CodeGen/TargetInstrInfo.h"
16 
17 #define DEBUG_TYPE "gi-combine"
18 
19 using namespace llvm;
20 
21 CombinerHelper::CombinerHelper(CombinerChangeObserver &Observer,
22                                MachineIRBuilder &B)
23     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
24 
25 void CombinerHelper::eraseInstr(MachineInstr &MI) {
26   Observer.erasedInstr(MI);
27 }
28 void CombinerHelper::scheduleForVisit(MachineInstr &MI) {
29   Observer.createdInstr(MI);
30 }
31 
32 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
33   if (MI.getOpcode() != TargetOpcode::COPY)
34     return false;
35   unsigned DstReg = MI.getOperand(0).getReg();
36   unsigned SrcReg = MI.getOperand(1).getReg();
37   LLT DstTy = MRI.getType(DstReg);
38   LLT SrcTy = MRI.getType(SrcReg);
39   // Simple Copy Propagation.
40   // a(sx) = COPY b(sx) -> Replace all uses of a with b.
41   if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
42     MI.eraseFromParent();
43     MRI.replaceRegWith(DstReg, SrcReg);
44     return true;
45   }
46   return false;
47 }
48 
49 namespace {
50 struct PreferredTuple {
51   LLT Ty;                // The result type of the extend.
52   unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
53   MachineInstr *MI;
54 };
55 
56 /// Select a preference between two uses. CurrentUse is the current preference
57 /// while *ForCandidate is attributes of the candidate under consideration.
58 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
59                                   const LLT &TyForCandidate,
60                                   unsigned OpcodeForCandidate,
61                                   MachineInstr *MIForCandidate) {
62   if (!CurrentUse.Ty.isValid()) {
63     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
64         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
65       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
66     return CurrentUse;
67   }
68 
69   // We permit the extend to hoist through basic blocks but this is only
70   // sensible if the target has extending loads. If you end up lowering back
71   // into a load and extend during the legalizer then the end result is
72   // hoisting the extend up to the load.
73 
74   // Prefer defined extensions to undefined extensions as these are more
75   // likely to reduce the number of instructions.
76   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
77       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
78     return CurrentUse;
79   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
80            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
81     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
82 
83   // Prefer sign extensions to zero extensions as sign-extensions tend to be
84   // more expensive.
85   if (CurrentUse.Ty == TyForCandidate) {
86     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
87         OpcodeForCandidate == TargetOpcode::G_ZEXT)
88       return CurrentUse;
89     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
90              OpcodeForCandidate == TargetOpcode::G_SEXT)
91       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
92   }
93 
94   // This is potentially target specific. We've chosen the largest type
95   // because G_TRUNC is usually free. One potential catch with this is that
96   // some targets have a reduced number of larger registers than smaller
97   // registers and this choice potentially increases the live-range for the
98   // larger value.
99   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
100     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
101   }
102   return CurrentUse;
103 }
104 
105 /// Find a suitable place to insert some instructions and insert them. This
106 /// function accounts for special cases like inserting before a PHI node.
107 /// The current strategy for inserting before PHI's is to duplicate the
108 /// instructions for each predecessor. However, while that's ok for G_TRUNC
109 /// on most targets since it generally requires no code, other targets/cases may
110 /// want to try harder to find a dominating block.
111 static void InsertInsnsWithoutSideEffectsBeforeUse(
112     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
113     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
114         Inserter) {
115   MachineInstr &UseMI = *UseMO.getParent();
116 
117   MachineBasicBlock *InsertBB = UseMI.getParent();
118 
119   // If the use is a PHI then we want the predecessor block instead.
120   if (UseMI.isPHI()) {
121     MachineOperand *PredBB = std::next(&UseMO);
122     InsertBB = PredBB->getMBB();
123   }
124 
125   // If the block is the same block as the def then we want to insert just after
126   // the def instead of at the start of the block.
127   if (InsertBB == DefMI.getParent()) {
128     MachineBasicBlock::iterator InsertPt = &DefMI;
129     Inserter(InsertBB, std::next(InsertPt));
130     return;
131   }
132 
133   // Otherwise we want the start of the BB
134   Inserter(InsertBB, InsertBB->getFirstNonPHI());
135 }
136 } // end anonymous namespace
137 
138 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
139   struct InsertionPoint {
140     MachineOperand *UseMO;
141     MachineBasicBlock *InsertIntoBB;
142     MachineBasicBlock::iterator InsertBefore;
143     InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
144                    MachineBasicBlock::iterator InsertBefore)
145         : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
146     }
147   };
148 
149   // We match the loads and follow the uses to the extend instead of matching
150   // the extends and following the def to the load. This is because the load
151   // must remain in the same position for correctness (unless we also add code
152   // to find a safe place to sink it) whereas the extend is freely movable.
153   // It also prevents us from duplicating the load for the volatile case or just
154   // for performance.
155 
156   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
157       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
158       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
159     return false;
160 
161   auto &LoadValue = MI.getOperand(0);
162   assert(LoadValue.isReg() && "Result wasn't a register?");
163 
164   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
165   if (!LoadValueTy.isScalar())
166     return false;
167 
168   // Find the preferred type aside from the any-extends (unless it's the only
169   // one) and non-extending ops. We'll emit an extending load to that type and
170   // and emit a variant of (extend (trunc X)) for the others according to the
171   // relative type sizes. At the same time, pick an extend to use based on the
172   // extend involved in the chosen type.
173   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
174                                  ? TargetOpcode::G_ANYEXT
175                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
176                                        ? TargetOpcode::G_SEXT
177                                        : TargetOpcode::G_ZEXT;
178   PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
179   for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
180     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
181         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
182         UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
183       Preferred = ChoosePreferredUse(Preferred,
184                                      MRI.getType(UseMI.getOperand(0).getReg()),
185                                      UseMI.getOpcode(), &UseMI);
186     }
187   }
188 
189   // There were no extends
190   if (!Preferred.MI)
191     return false;
192   // It should be impossible to chose an extend without selecting a different
193   // type since by definition the result of an extend is larger.
194   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
195 
196   // Rewrite the load to the chosen extending load.
197   unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
198   MI.setDesc(
199       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
200                                ? TargetOpcode::G_SEXTLOAD
201                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
202                                      ? TargetOpcode::G_ZEXTLOAD
203                                      : TargetOpcode::G_LOAD));
204 
205   // Rewrite all the uses to fix up the types.
206   SmallVector<MachineInstr *, 1> ScheduleForErase;
207   SmallVector<InsertionPoint, 4> ScheduleForInsert;
208   for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
209     MachineInstr *UseMI = UseMO.getParent();
210 
211     // If the extend is compatible with the preferred extend then we should fix
212     // up the type and extend so that it uses the preferred use.
213     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
214         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
215       unsigned UseDstReg = UseMI->getOperand(0).getReg();
216       unsigned UseSrcReg = UseMI->getOperand(1).getReg();
217       const LLT &UseDstTy = MRI.getType(UseDstReg);
218       if (UseDstReg != ChosenDstReg) {
219         if (Preferred.Ty == UseDstTy) {
220           // If the use has the same type as the preferred use, then merge
221           // the vregs and erase the extend. For example:
222           //    %1:_(s8) = G_LOAD ...
223           //    %2:_(s32) = G_SEXT %1(s8)
224           //    %3:_(s32) = G_ANYEXT %1(s8)
225           //    ... = ... %3(s32)
226           // rewrites to:
227           //    %2:_(s32) = G_SEXTLOAD ...
228           //    ... = ... %2(s32)
229           MRI.replaceRegWith(UseDstReg, ChosenDstReg);
230           ScheduleForErase.push_back(UseMO.getParent());
231         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
232           // If the preferred size is smaller, then keep the extend but extend
233           // from the result of the extending load. For example:
234           //    %1:_(s8) = G_LOAD ...
235           //    %2:_(s32) = G_SEXT %1(s8)
236           //    %3:_(s64) = G_ANYEXT %1(s8)
237           //    ... = ... %3(s64)
238           /// rewrites to:
239           //    %2:_(s32) = G_SEXTLOAD ...
240           //    %3:_(s64) = G_ANYEXT %2:_(s32)
241           //    ... = ... %3(s64)
242           MRI.replaceRegWith(UseSrcReg, ChosenDstReg);
243         } else {
244           // If the preferred size is large, then insert a truncate. For
245           // example:
246           //    %1:_(s8) = G_LOAD ...
247           //    %2:_(s64) = G_SEXT %1(s8)
248           //    %3:_(s32) = G_ZEXT %1(s8)
249           //    ... = ... %3(s32)
250           /// rewrites to:
251           //    %2:_(s64) = G_SEXTLOAD ...
252           //    %4:_(s8) = G_TRUNC %2:_(s32)
253           //    %3:_(s64) = G_ZEXT %2:_(s8)
254           //    ... = ... %3(s64)
255           InsertInsnsWithoutSideEffectsBeforeUse(
256               Builder, MI, UseMO,
257               [&](MachineBasicBlock *InsertIntoBB,
258                   MachineBasicBlock::iterator InsertBefore) {
259                 ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
260               });
261         }
262         continue;
263       }
264       // The use is (one of) the uses of the preferred use we chose earlier.
265       // We're going to update the load to def this value later so just erase
266       // the old extend.
267       ScheduleForErase.push_back(UseMO.getParent());
268       continue;
269     }
270 
271     // The use isn't an extend. Truncate back to the type we originally loaded.
272     // This is free on many targets.
273     InsertInsnsWithoutSideEffectsBeforeUse(
274         Builder, MI, UseMO,
275         [&](MachineBasicBlock *InsertIntoBB,
276             MachineBasicBlock::iterator InsertBefore) {
277           ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
278         });
279   }
280 
281   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
282   for (auto &InsertionInfo : ScheduleForInsert) {
283     MachineOperand *UseMO = InsertionInfo.UseMO;
284     MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
285     MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
286 
287     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
288     if (PreviouslyEmitted) {
289       UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
290       continue;
291     }
292 
293     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
294     unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
295     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
296     EmittedInsns[InsertIntoBB] = NewMI;
297     UseMO->setReg(NewDstReg);
298     Observer.createdInstr(*NewMI);
299   }
300   for (auto &EraseMI : ScheduleForErase) {
301     EraseMI->eraseFromParent();
302     Observer.erasedInstr(*EraseMI);
303   }
304   MI.getOperand(0).setReg(ChosenDstReg);
305 
306   return true;
307 }
308 
309 bool CombinerHelper::tryCombine(MachineInstr &MI) {
310   if (tryCombineCopy(MI))
311     return true;
312   return tryCombineExtendingLoads(MI);
313 }
314