1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the pass that finds instructions that can be
11 // re-written as LEA instructions in order to reduce pipeline delays.
12 // When optimizing for size it replaces suitable LEAs with INC or DEC.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "X86.h"
17 #include "X86InstrInfo.h"
18 #include "X86Subtarget.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/Passes.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/raw_ostream.h"
26 using namespace llvm;
27
28 #define FIXUPLEA_DESC "X86 LEA Fixup"
29 #define FIXUPLEA_NAME "x86-fixup-LEAs"
30
31 #define DEBUG_TYPE FIXUPLEA_NAME
32
33 STATISTIC(NumLEAs, "Number of LEA instructions created");
34
35 namespace {
36 class FixupLEAPass : public MachineFunctionPass {
37 enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
38
39 /// Loop over all of the instructions in the basic block
40 /// replacing applicable instructions with LEA instructions,
41 /// where appropriate.
42 bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI,
43 bool IsSlowLEA, bool IsSlow3OpsLEA);
44
45 /// Given a machine register, look for the instruction
46 /// which writes it in the current basic block. If found,
47 /// try to replace it with an equivalent LEA instruction.
48 /// If replacement succeeds, then also process the newly created
49 /// instruction.
50 void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
51 MachineFunction::iterator MFI);
52
53 /// Given a memory access or LEA instruction
54 /// whose address mode uses a base and/or index register, look for
55 /// an opportunity to replace the instruction which sets the base or index
56 /// register with an equivalent LEA instruction.
57 void processInstruction(MachineBasicBlock::iterator &I,
58 MachineFunction::iterator MFI);
59
60 /// Given a LEA instruction which is unprofitable
61 /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
62 void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
63 MachineFunction::iterator MFI);
64
65 /// Given a LEA instruction which is unprofitable
66 /// on SNB+ try to replace it with other instructions.
67 /// According to Intel's Optimization Reference Manual:
68 /// " For LEA instructions with three source operands and some specific
69 /// situations, instruction latency has increased to 3 cycles, and must
70 /// dispatch via port 1:
71 /// - LEA that has all three source operands: base, index, and offset
72 /// - LEA that uses base and index registers where the base is EBP, RBP,
73 /// or R13
74 /// - LEA that uses RIP relative addressing mode
75 /// - LEA that uses 16-bit addressing mode "
76 /// This function currently handles the first 2 cases only.
77 MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
78 MachineFunction::iterator MFI);
79
80 /// Look for LEAs that add 1 to reg or subtract 1 from reg
81 /// and convert them to INC or DEC respectively.
82 bool fixupIncDec(MachineBasicBlock::iterator &I,
83 MachineFunction::iterator MFI) const;
84
85 /// Determine if an instruction references a machine register
86 /// and, if so, whether it reads or writes the register.
87 RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
88
89 /// Step backwards through a basic block, looking
90 /// for an instruction which writes a register within
91 /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
92 MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
93 MachineBasicBlock::iterator &I,
94 MachineFunction::iterator MFI);
95
96 /// if an instruction can be converted to an
97 /// equivalent LEA, insert the new instruction into the basic block
98 /// and return a pointer to it. Otherwise, return zero.
99 MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI,
100 MachineBasicBlock::iterator &MBBI) const;
101
102 public:
103 static char ID;
104
getPassName() const105 StringRef getPassName() const override { return FIXUPLEA_DESC; }
106
FixupLEAPass()107 FixupLEAPass() : MachineFunctionPass(ID) {
108 initializeFixupLEAPassPass(*PassRegistry::getPassRegistry());
109 }
110
111 /// Loop over all of the basic blocks,
112 /// replacing instructions by equivalent LEA instructions
113 /// if needed and when possible.
114 bool runOnMachineFunction(MachineFunction &MF) override;
115
116 // This pass runs after regalloc and doesn't support VReg operands.
getRequiredProperties() const117 MachineFunctionProperties getRequiredProperties() const override {
118 return MachineFunctionProperties().set(
119 MachineFunctionProperties::Property::NoVRegs);
120 }
121
122 private:
123 TargetSchedModel TSM;
124 MachineFunction *MF;
125 const X86InstrInfo *TII; // Machine instruction info.
126 bool OptIncDec;
127 bool OptLEA;
128 };
129 }
130
131 char FixupLEAPass::ID = 0;
132
INITIALIZE_PASS(FixupLEAPass,FIXUPLEA_NAME,FIXUPLEA_DESC,false,false)133 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
134
135 MachineInstr *
136 FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
137 MachineBasicBlock::iterator &MBBI) const {
138 MachineInstr &MI = *MBBI;
139 switch (MI.getOpcode()) {
140 case X86::MOV32rr:
141 case X86::MOV64rr: {
142 const MachineOperand &Src = MI.getOperand(1);
143 const MachineOperand &Dest = MI.getOperand(0);
144 MachineInstr *NewMI =
145 BuildMI(*MF, MI.getDebugLoc(),
146 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
147 : X86::LEA64r))
148 .add(Dest)
149 .add(Src)
150 .addImm(1)
151 .addReg(0)
152 .addImm(0)
153 .addReg(0);
154 MFI->insert(MBBI, NewMI); // Insert the new inst
155 return NewMI;
156 }
157 }
158
159 if (!MI.isConvertibleTo3Addr())
160 return nullptr;
161
162 switch (MI.getOpcode()) {
163 default:
164 // Only convert instructions that we've verified are safe.
165 return nullptr;
166 case X86::ADD64ri32:
167 case X86::ADD64ri8:
168 case X86::ADD64ri32_DB:
169 case X86::ADD64ri8_DB:
170 case X86::ADD32ri:
171 case X86::ADD32ri8:
172 case X86::ADD32ri_DB:
173 case X86::ADD32ri8_DB:
174 if (!MI.getOperand(2).isImm()) {
175 // convertToThreeAddress will call getImm()
176 // which requires isImm() to be true
177 return nullptr;
178 }
179 break;
180 case X86::SHL64ri:
181 case X86::SHL32ri:
182 case X86::INC64r:
183 case X86::INC32r:
184 case X86::DEC64r:
185 case X86::DEC32r:
186 case X86::ADD64rr:
187 case X86::ADD64rr_DB:
188 case X86::ADD32rr:
189 case X86::ADD32rr_DB:
190 // These instructions are all fine to convert.
191 break;
192 }
193 return TII->convertToThreeAddress(MFI, MI, nullptr);
194 }
195
createX86FixupLEAs()196 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
197
runOnMachineFunction(MachineFunction & Func)198 bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
199 if (skipFunction(Func.getFunction()))
200 return false;
201
202 MF = &Func;
203 const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
204 bool IsSlowLEA = ST.slowLEA();
205 bool IsSlow3OpsLEA = ST.slow3OpsLEA();
206
207 OptIncDec = !ST.slowIncDec() || Func.getFunction().optForMinSize();
208 OptLEA = ST.LEAusesAG() || IsSlowLEA || IsSlow3OpsLEA;
209
210 if (!OptLEA && !OptIncDec)
211 return false;
212
213 TSM.init(&Func.getSubtarget());
214 TII = ST.getInstrInfo();
215
216 LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
217 // Process all basic blocks.
218 for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
219 processBasicBlock(Func, I, IsSlowLEA, IsSlow3OpsLEA);
220 LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
221
222 return true;
223 }
224
225 FixupLEAPass::RegUsageState
usesRegister(MachineOperand & p,MachineBasicBlock::iterator I)226 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
227 RegUsageState RegUsage = RU_NotUsed;
228 MachineInstr &MI = *I;
229
230 for (unsigned int i = 0; i < MI.getNumOperands(); ++i) {
231 MachineOperand &opnd = MI.getOperand(i);
232 if (opnd.isReg() && opnd.getReg() == p.getReg()) {
233 if (opnd.isDef())
234 return RU_Write;
235 RegUsage = RU_Read;
236 }
237 }
238 return RegUsage;
239 }
240
241 /// getPreviousInstr - Given a reference to an instruction in a basic
242 /// block, return a reference to the previous instruction in the block,
243 /// wrapping around to the last instruction of the block if the block
244 /// branches to itself.
getPreviousInstr(MachineBasicBlock::iterator & I,MachineFunction::iterator MFI)245 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
246 MachineFunction::iterator MFI) {
247 if (I == MFI->begin()) {
248 if (MFI->isPredecessor(&*MFI)) {
249 I = --MFI->end();
250 return true;
251 } else
252 return false;
253 }
254 --I;
255 return true;
256 }
257
258 MachineBasicBlock::iterator
searchBackwards(MachineOperand & p,MachineBasicBlock::iterator & I,MachineFunction::iterator MFI)259 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
260 MachineFunction::iterator MFI) {
261 int InstrDistance = 1;
262 MachineBasicBlock::iterator CurInst;
263 static const int INSTR_DISTANCE_THRESHOLD = 5;
264
265 CurInst = I;
266 bool Found;
267 Found = getPreviousInstr(CurInst, MFI);
268 while (Found && I != CurInst) {
269 if (CurInst->isCall() || CurInst->isInlineAsm())
270 break;
271 if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
272 break; // too far back to make a difference
273 if (usesRegister(p, CurInst) == RU_Write) {
274 return CurInst;
275 }
276 InstrDistance += TSM.computeInstrLatency(&*CurInst);
277 Found = getPreviousInstr(CurInst, MFI);
278 }
279 return MachineBasicBlock::iterator();
280 }
281
isLEA(const int Opcode)282 static inline bool isLEA(const int Opcode) {
283 return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
284 Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
285 }
286
isInefficientLEAReg(unsigned int Reg)287 static inline bool isInefficientLEAReg(unsigned int Reg) {
288 return Reg == X86::EBP || Reg == X86::RBP ||
289 Reg == X86::R13D || Reg == X86::R13;
290 }
291
isRegOperand(const MachineOperand & Op)292 static inline bool isRegOperand(const MachineOperand &Op) {
293 return Op.isReg() && Op.getReg() != X86::NoRegister;
294 }
295
296 /// Returns true if this LEA uses base an index registers, and the base register
297 /// is known to be inefficient for the subtarget.
298 // TODO: use a variant scheduling class to model the latency profile
299 // of LEA instructions, and implement this logic as a scheduling predicate.
hasInefficientLEABaseReg(const MachineOperand & Base,const MachineOperand & Index)300 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
301 const MachineOperand &Index) {
302 return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
303 isRegOperand(Index);
304 }
305
hasLEAOffset(const MachineOperand & Offset)306 static inline bool hasLEAOffset(const MachineOperand &Offset) {
307 return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
308 }
309
getADDrrFromLEA(int LEAOpcode)310 static inline int getADDrrFromLEA(int LEAOpcode) {
311 switch (LEAOpcode) {
312 default:
313 llvm_unreachable("Unexpected LEA instruction");
314 case X86::LEA16r:
315 return X86::ADD16rr;
316 case X86::LEA32r:
317 return X86::ADD32rr;
318 case X86::LEA64_32r:
319 case X86::LEA64r:
320 return X86::ADD64rr;
321 }
322 }
323
getADDriFromLEA(int LEAOpcode,const MachineOperand & Offset)324 static inline int getADDriFromLEA(int LEAOpcode, const MachineOperand &Offset) {
325 bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
326 switch (LEAOpcode) {
327 default:
328 llvm_unreachable("Unexpected LEA instruction");
329 case X86::LEA16r:
330 return IsInt8 ? X86::ADD16ri8 : X86::ADD16ri;
331 case X86::LEA32r:
332 case X86::LEA64_32r:
333 return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
334 case X86::LEA64r:
335 return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
336 }
337 }
338
339 /// isLEASimpleIncOrDec - Does this LEA have one these forms:
340 /// lea %reg, 1(%reg)
341 /// lea %reg, -1(%reg)
isLEASimpleIncOrDec(MachineInstr & LEA)342 static inline bool isLEASimpleIncOrDec(MachineInstr &LEA) {
343 unsigned SrcReg = LEA.getOperand(1 + X86::AddrBaseReg).getReg();
344 unsigned DstReg = LEA.getOperand(0).getReg();
345 const MachineOperand &AddrDisp = LEA.getOperand(1 + X86::AddrDisp);
346 return SrcReg == DstReg &&
347 LEA.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
348 LEA.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
349 AddrDisp.isImm() &&
350 (AddrDisp.getImm() == 1 || AddrDisp.getImm() == -1);
351 }
352
fixupIncDec(MachineBasicBlock::iterator & I,MachineFunction::iterator MFI) const353 bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
354 MachineFunction::iterator MFI) const {
355 MachineInstr &MI = *I;
356 int Opcode = MI.getOpcode();
357 if (!isLEA(Opcode))
358 return false;
359
360 if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
361 int NewOpcode;
362 bool isINC = MI.getOperand(1 + X86::AddrDisp).getImm() == 1;
363 switch (Opcode) {
364 case X86::LEA16r:
365 NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
366 break;
367 case X86::LEA32r:
368 case X86::LEA64_32r:
369 NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
370 break;
371 case X86::LEA64r:
372 NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
373 break;
374 }
375
376 MachineInstr *NewMI =
377 BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
378 .add(MI.getOperand(0))
379 .add(MI.getOperand(1 + X86::AddrBaseReg));
380 MFI->erase(I);
381 I = static_cast<MachineBasicBlock::iterator>(NewMI);
382 return true;
383 }
384 return false;
385 }
386
processInstruction(MachineBasicBlock::iterator & I,MachineFunction::iterator MFI)387 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
388 MachineFunction::iterator MFI) {
389 // Process a load, store, or LEA instruction.
390 MachineInstr &MI = *I;
391 const MCInstrDesc &Desc = MI.getDesc();
392 int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
393 if (AddrOffset >= 0) {
394 AddrOffset += X86II::getOperandBias(Desc);
395 MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
396 if (p.isReg() && p.getReg() != X86::ESP) {
397 seekLEAFixup(p, I, MFI);
398 }
399 MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
400 if (q.isReg() && q.getReg() != X86::ESP) {
401 seekLEAFixup(q, I, MFI);
402 }
403 }
404 }
405
seekLEAFixup(MachineOperand & p,MachineBasicBlock::iterator & I,MachineFunction::iterator MFI)406 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
407 MachineBasicBlock::iterator &I,
408 MachineFunction::iterator MFI) {
409 MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
410 if (MBI != MachineBasicBlock::iterator()) {
411 MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI);
412 if (NewMI) {
413 ++NumLEAs;
414 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
415 // now to replace with an equivalent LEA...
416 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
417 MFI->erase(MBI);
418 MachineBasicBlock::iterator J =
419 static_cast<MachineBasicBlock::iterator>(NewMI);
420 processInstruction(J, MFI);
421 }
422 }
423 }
424
processInstructionForSlowLEA(MachineBasicBlock::iterator & I,MachineFunction::iterator MFI)425 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
426 MachineFunction::iterator MFI) {
427 MachineInstr &MI = *I;
428 const int Opcode = MI.getOpcode();
429 if (!isLEA(Opcode))
430 return;
431
432 const MachineOperand &Dst = MI.getOperand(0);
433 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
434 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
435 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
436 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
437 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
438
439 if (Segment.getReg() != 0 || !Offset.isImm() ||
440 !TII->isSafeToClobberEFLAGS(*MFI, I))
441 return;
442 const unsigned DstR = Dst.getReg();
443 const unsigned SrcR1 = Base.getReg();
444 const unsigned SrcR2 = Index.getReg();
445 if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
446 return;
447 if (Scale.getImm() > 1)
448 return;
449 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
450 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
451 MachineInstr *NewMI = nullptr;
452 // Make ADD instruction for two registers writing to LEA's destination
453 if (SrcR1 != 0 && SrcR2 != 0) {
454 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
455 const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
456 NewMI =
457 BuildMI(*MFI, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
458 LLVM_DEBUG(NewMI->dump(););
459 }
460 // Make ADD instruction for immediate
461 if (Offset.getImm() != 0) {
462 const MCInstrDesc &ADDri =
463 TII->get(getADDriFromLEA(Opcode, Offset));
464 const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
465 NewMI = BuildMI(*MFI, I, MI.getDebugLoc(), ADDri, DstR)
466 .add(SrcR)
467 .addImm(Offset.getImm());
468 LLVM_DEBUG(NewMI->dump(););
469 }
470 if (NewMI) {
471 MFI->erase(I);
472 I = NewMI;
473 }
474 }
475
476 MachineInstr *
processInstrForSlow3OpLEA(MachineInstr & MI,MachineFunction::iterator MFI)477 FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
478 MachineFunction::iterator MFI) {
479
480 const int LEAOpcode = MI.getOpcode();
481 if (!isLEA(LEAOpcode))
482 return nullptr;
483
484 const MachineOperand &Dst = MI.getOperand(0);
485 const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
486 const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
487 const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
488 const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
489 const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
490
491 if (!(TII->isThreeOperandsLEA(MI) ||
492 hasInefficientLEABaseReg(Base, Index)) ||
493 !TII->isSafeToClobberEFLAGS(*MFI, MI) ||
494 Segment.getReg() != X86::NoRegister)
495 return nullptr;
496
497 unsigned int DstR = Dst.getReg();
498 unsigned int BaseR = Base.getReg();
499 unsigned int IndexR = Index.getReg();
500 unsigned SSDstR =
501 (LEAOpcode == X86::LEA64_32r) ? getX86SubSuperRegister(DstR, 64) : DstR;
502 bool IsScale1 = Scale.getImm() == 1;
503 bool IsInefficientBase = isInefficientLEAReg(BaseR);
504 bool IsInefficientIndex = isInefficientLEAReg(IndexR);
505
506 // Skip these cases since it takes more than 2 instructions
507 // to replace the LEA instruction.
508 if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
509 return nullptr;
510 if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
511 (IsInefficientIndex || !IsScale1))
512 return nullptr;
513
514 const DebugLoc DL = MI.getDebugLoc();
515 const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
516 const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
517
518 LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
519 LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
520
521 // First try to replace LEA with one or two (for the 3-op LEA case)
522 // add instructions:
523 // 1.lea (%base,%index,1), %base => add %index,%base
524 // 2.lea (%base,%index,1), %index => add %base,%index
525 if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
526 const MachineOperand &Src = DstR == BaseR ? Index : Base;
527 MachineInstr *NewMI =
528 BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
529 LLVM_DEBUG(NewMI->dump(););
530 // Create ADD instruction for the Offset in case of 3-Ops LEA.
531 if (hasLEAOffset(Offset)) {
532 NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
533 LLVM_DEBUG(NewMI->dump(););
534 }
535 return NewMI;
536 }
537 // If the base is inefficient try switching the index and base operands,
538 // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
539 // lea offset(%base,%index,scale),%dst =>
540 // lea (%base,%index,scale); add offset,%dst
541 if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
542 MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
543 .add(Dst)
544 .add(IsInefficientBase ? Index : Base)
545 .add(Scale)
546 .add(IsInefficientBase ? Base : Index)
547 .addImm(0)
548 .add(Segment);
549 LLVM_DEBUG(NewMI->dump(););
550 // Create ADD instruction for the Offset in case of 3-Ops LEA.
551 if (hasLEAOffset(Offset)) {
552 NewMI = BuildMI(*MFI, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
553 LLVM_DEBUG(NewMI->dump(););
554 }
555 return NewMI;
556 }
557 // Handle the rest of the cases with inefficient base register:
558 assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
559 assert(IsInefficientBase && "efficient base should be handled already!");
560
561 // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
562 if (IsScale1 && !hasLEAOffset(Offset)) {
563 bool BIK = Base.isKill() && BaseR != IndexR;
564 TII->copyPhysReg(*MFI, MI, DL, DstR, BaseR, BIK);
565 LLVM_DEBUG(MI.getPrevNode()->dump(););
566
567 MachineInstr *NewMI =
568 BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
569 LLVM_DEBUG(NewMI->dump(););
570 return NewMI;
571 }
572 // lea offset(%base,%index,scale), %dst =>
573 // lea offset( ,%index,scale), %dst; add %base,%dst
574 MachineInstr *NewMI = BuildMI(*MFI, MI, DL, TII->get(LEAOpcode))
575 .add(Dst)
576 .addReg(0)
577 .add(Scale)
578 .add(Index)
579 .add(Offset)
580 .add(Segment);
581 LLVM_DEBUG(NewMI->dump(););
582
583 NewMI = BuildMI(*MFI, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
584 LLVM_DEBUG(NewMI->dump(););
585 return NewMI;
586 }
587
processBasicBlock(MachineFunction & MF,MachineFunction::iterator MFI,bool IsSlowLEA,bool IsSlow3OpsLEA)588 bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
589 MachineFunction::iterator MFI,
590 bool IsSlowLEA, bool IsSlow3OpsLEA) {
591 for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
592 if (OptIncDec)
593 if (fixupIncDec(I, MFI))
594 continue;
595
596 if (OptLEA) {
597 if (IsSlowLEA) {
598 processInstructionForSlowLEA(I, MFI);
599 continue;
600 }
601
602 if (IsSlow3OpsLEA) {
603 if (auto *NewMI = processInstrForSlow3OpLEA(*I, MFI)) {
604 MFI->erase(I);
605 I = NewMI;
606 }
607 continue;
608 }
609
610 processInstruction(I, MFI);
611 }
612 }
613 return false;
614 }
615