1 //===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the pass that performs some optimizations with LEA 11 // instructions in order to improve code size. 12 // Currently, it does two things: 13 // 1) If there are two LEA instructions calculating addresses which only differ 14 // by displacement inside a basic block, one of them is removed. 15 // 2) Address calculations in load and store instructions are replaced by 16 // existing LEA def registers where possible. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "X86.h" 21 #include "X86InstrInfo.h" 22 #include "X86Subtarget.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/Passes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "x86-optimize-LEAs" 37 38 static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden, 39 cl::desc("X86: Enable LEA optimizations."), 40 cl::init(false)); 41 42 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); 43 STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed"); 44 45 namespace { 46 class OptimizeLEAPass : public MachineFunctionPass { 47 public: 48 OptimizeLEAPass() : MachineFunctionPass(ID) {} 49 50 const char *getPassName() const override { return "X86 LEA Optimize"; } 51 52 /// \brief Loop over all of the basic blocks, replacing address 53 /// calculations in load and store instructions, if it's already 54 /// been calculated by LEA. Also, remove redundant LEAs. 55 bool runOnMachineFunction(MachineFunction &MF) override; 56 57 private: 58 /// \brief Returns a distance between two instructions inside one basic block. 59 /// Negative result means, that instructions occur in reverse order. 60 int calcInstrDist(const MachineInstr &First, const MachineInstr &Last); 61 62 /// \brief Choose the best \p LEA instruction from the \p List to replace 63 /// address calculation in \p MI instruction. Return the address displacement 64 /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift 65 /// and \p Dist. 66 bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List, 67 const MachineInstr &MI, MachineInstr *&LEA, 68 int64_t &AddrDispShift, int &Dist); 69 70 /// \brief Returns true if two machine operand are identical and they are not 71 /// physical registers. 72 bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); 73 74 /// \brief Returns true if the instruction is LEA. 75 bool isLEA(const MachineInstr &MI); 76 77 /// \brief Returns true if the \p Last LEA instruction can be replaced by the 78 /// \p First. The difference between displacements of the addresses calculated 79 /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper 80 /// replacement of the \p Last LEA's uses with the \p First's def register. 81 bool isReplaceable(const MachineInstr &First, const MachineInstr &Last, 82 int64_t &AddrDispShift); 83 84 /// \brief Returns true if two instructions have memory operands that only 85 /// differ by displacement. The numbers of the first memory operands for both 86 /// instructions are specified through \p N1 and \p N2. The address 87 /// displacement is returned through AddrDispShift. 88 bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1, 89 const MachineInstr &MI2, unsigned N2, 90 int64_t &AddrDispShift); 91 92 /// \brief Find all LEA instructions in the basic block. Also, assign position 93 /// numbers to all instructions in the basic block to speed up calculation of 94 /// distance between them. 95 void findLEAs(const MachineBasicBlock &MBB, 96 SmallVectorImpl<MachineInstr *> &List); 97 98 /// \brief Removes redundant address calculations. 99 bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List); 100 101 /// \brief Removes LEAs which calculate similar addresses. 102 bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List); 103 104 DenseMap<const MachineInstr *, unsigned> InstrPos; 105 106 MachineRegisterInfo *MRI; 107 const X86InstrInfo *TII; 108 const X86RegisterInfo *TRI; 109 110 static char ID; 111 }; 112 char OptimizeLEAPass::ID = 0; 113 } 114 115 FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } 116 117 int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, 118 const MachineInstr &Last) { 119 // Both instructions must be in the same basic block and they must be 120 // presented in InstrPos. 121 assert(Last.getParent() == First.getParent() && 122 "Instructions are in different basic blocks"); 123 assert(InstrPos.find(&First) != InstrPos.end() && 124 InstrPos.find(&Last) != InstrPos.end() && 125 "Instructions' positions are undefined"); 126 127 return InstrPos[&Last] - InstrPos[&First]; 128 } 129 130 // Find the best LEA instruction in the List to replace address recalculation in 131 // MI. Such LEA must meet these requirements: 132 // 1) The address calculated by the LEA differs only by the displacement from 133 // the address used in MI. 134 // 2) The register class of the definition of the LEA is compatible with the 135 // register class of the address base register of MI. 136 // 3) Displacement of the new memory operand should fit in 1 byte if possible. 137 // 4) The LEA should be as close to MI as possible, and prior to it if 138 // possible. 139 bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List, 140 const MachineInstr &MI, MachineInstr *&LEA, 141 int64_t &AddrDispShift, int &Dist) { 142 const MachineFunction *MF = MI.getParent()->getParent(); 143 const MCInstrDesc &Desc = MI.getDesc(); 144 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + 145 X86II::getOperandBias(Desc); 146 147 LEA = nullptr; 148 149 // Loop over all LEA instructions. 150 for (auto DefMI : List) { 151 int64_t AddrDispShiftTemp = 0; 152 153 // Compare instructions memory operands. 154 if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp)) 155 continue; 156 157 // Make sure address displacement fits 4 bytes. 158 if (!isInt<32>(AddrDispShiftTemp)) 159 continue; 160 161 // Check that LEA def register can be used as MI address base. Some 162 // instructions can use a limited set of registers as address base, for 163 // example MOV8mr_NOREX. We could constrain the register class of the LEA 164 // def to suit MI, however since this case is very rare and hard to 165 // reproduce in a test it's just more reliable to skip the LEA. 166 if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != 167 MRI->getRegClass(DefMI->getOperand(0).getReg())) 168 continue; 169 170 // Choose the closest LEA instruction from the list, prior to MI if 171 // possible. Note that we took into account resulting address displacement 172 // as well. Also note that the list is sorted by the order in which the LEAs 173 // occur, so the break condition is pretty simple. 174 int DistTemp = calcInstrDist(*DefMI, MI); 175 assert(DistTemp != 0 && 176 "The distance between two different instructions cannot be zero"); 177 if (DistTemp > 0 || LEA == nullptr) { 178 // Do not update return LEA, if the current one provides a displacement 179 // which fits in 1 byte, while the new candidate does not. 180 if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) && 181 isInt<8>(AddrDispShift)) 182 continue; 183 184 LEA = DefMI; 185 AddrDispShift = AddrDispShiftTemp; 186 Dist = DistTemp; 187 } 188 189 // FIXME: Maybe we should not always stop at the first LEA after MI. 190 if (DistTemp < 0) 191 break; 192 } 193 194 return LEA != nullptr; 195 } 196 197 bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1, 198 const MachineOperand &MO2) { 199 return MO1.isIdenticalTo(MO2) && 200 (!MO1.isReg() || 201 !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); 202 } 203 204 bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { 205 unsigned Opcode = MI.getOpcode(); 206 return Opcode == X86::LEA16r || Opcode == X86::LEA32r || 207 Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; 208 } 209 210 // Check that the Last LEA can be replaced by the First LEA. To be so, 211 // these requirements must be met: 212 // 1) Addresses calculated by LEAs differ only by displacement. 213 // 2) Def registers of LEAs belong to the same class. 214 // 3) All uses of the Last LEA def register are replaceable, thus the 215 // register is used only as address base. 216 bool OptimizeLEAPass::isReplaceable(const MachineInstr &First, 217 const MachineInstr &Last, 218 int64_t &AddrDispShift) { 219 assert(isLEA(First) && isLEA(Last) && 220 "The function works only with LEA instructions"); 221 222 // Compare instructions' memory operands. 223 if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift)) 224 return false; 225 226 // Make sure that LEA def registers belong to the same class. There may be 227 // instructions (like MOV8mr_NOREX) which allow a limited set of registers to 228 // be used as their operands, so we must be sure that replacing one LEA 229 // with another won't lead to putting a wrong register in the instruction. 230 if (MRI->getRegClass(First.getOperand(0).getReg()) != 231 MRI->getRegClass(Last.getOperand(0).getReg())) 232 return false; 233 234 // Loop over all uses of the Last LEA to check that its def register is 235 // used only as address base for memory accesses. If so, it can be 236 // replaced, otherwise - no. 237 for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) { 238 MachineInstr &MI = *MO.getParent(); 239 240 // Get the number of the first memory operand. 241 const MCInstrDesc &Desc = MI.getDesc(); 242 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()); 243 244 // If the use instruction has no memory operand - the LEA is not 245 // replaceable. 246 if (MemOpNo < 0) 247 return false; 248 249 MemOpNo += X86II::getOperandBias(Desc); 250 251 // If the address base of the use instruction is not the LEA def register - 252 // the LEA is not replaceable. 253 if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO)) 254 return false; 255 256 // If the LEA def register is used as any other operand of the use 257 // instruction - the LEA is not replaceable. 258 for (unsigned i = 0; i < MI.getNumOperands(); i++) 259 if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) && 260 isIdenticalOp(MI.getOperand(i), MO)) 261 return false; 262 263 // Check that the new address displacement will fit 4 bytes. 264 if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() && 265 !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() + 266 AddrDispShift)) 267 return false; 268 } 269 270 return true; 271 } 272 273 // Check if MI1 and MI2 have memory operands which represent addresses that 274 // differ only by displacement. 275 bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, 276 const MachineInstr &MI2, unsigned N2, 277 int64_t &AddrDispShift) { 278 // Address base, scale, index and segment operands must be identical. 279 static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt, 280 X86::AddrIndexReg, X86::AddrSegmentReg}; 281 for (auto &N : IdenticalOpNums) 282 if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N))) 283 return false; 284 285 // Address displacement operands may differ by a constant. 286 const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp); 287 const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp); 288 if (!isIdenticalOp(*Op1, *Op2)) { 289 if (Op1->isImm() && Op2->isImm()) 290 AddrDispShift = Op1->getImm() - Op2->getImm(); 291 else if (Op1->isGlobal() && Op2->isGlobal() && 292 Op1->getGlobal() == Op2->getGlobal()) 293 AddrDispShift = Op1->getOffset() - Op2->getOffset(); 294 else 295 return false; 296 } 297 298 return true; 299 } 300 301 void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, 302 SmallVectorImpl<MachineInstr *> &List) { 303 unsigned Pos = 0; 304 for (auto &MI : MBB) { 305 // Assign the position number to the instruction. Note that we are going to 306 // move some instructions during the optimization however there will never 307 // be a need to move two instructions before any selected instruction. So to 308 // avoid multiple positions' updates during moves we just increase position 309 // counter by two leaving a free space for instructions which will be moved. 310 InstrPos[&MI] = Pos += 2; 311 312 if (isLEA(MI)) 313 List.push_back(const_cast<MachineInstr *>(&MI)); 314 } 315 } 316 317 // Try to find load and store instructions which recalculate addresses already 318 // calculated by some LEA and replace their memory operands with its def 319 // register. 320 bool OptimizeLEAPass::removeRedundantAddrCalc( 321 const SmallVectorImpl<MachineInstr *> &List) { 322 bool Changed = false; 323 324 assert(List.size() > 0); 325 MachineBasicBlock *MBB = List[0]->getParent(); 326 327 // Process all instructions in basic block. 328 for (auto I = MBB->begin(), E = MBB->end(); I != E;) { 329 MachineInstr &MI = *I++; 330 unsigned Opcode = MI.getOpcode(); 331 332 // Instruction must be load or store. 333 if (!MI.mayLoadOrStore()) 334 continue; 335 336 // Get the number of the first memory operand. 337 const MCInstrDesc &Desc = MI.getDesc(); 338 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode); 339 340 // If instruction has no memory operand - skip it. 341 if (MemOpNo < 0) 342 continue; 343 344 MemOpNo += X86II::getOperandBias(Desc); 345 346 // Get the best LEA instruction to replace address calculation. 347 MachineInstr *DefMI; 348 int64_t AddrDispShift; 349 int Dist; 350 if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist)) 351 continue; 352 353 // If LEA occurs before current instruction, we can freely replace 354 // the instruction. If LEA occurs after, we can lift LEA above the 355 // instruction and this way to be able to replace it. Since LEA and the 356 // instruction have similar memory operands (thus, the same def 357 // instructions for these operands), we can always do that, without 358 // worries of using registers before their defs. 359 if (Dist < 0) { 360 DefMI->removeFromParent(); 361 MBB->insert(MachineBasicBlock::iterator(&MI), DefMI); 362 InstrPos[DefMI] = InstrPos[&MI] - 1; 363 364 // Make sure the instructions' position numbers are sane. 365 assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) || 366 InstrPos[DefMI] > 367 InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) && 368 "Instruction positioning is broken"); 369 } 370 371 // Since we can possibly extend register lifetime, clear kill flags. 372 MRI->clearKillFlags(DefMI->getOperand(0).getReg()); 373 374 ++NumSubstLEAs; 375 DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump();); 376 377 // Change instruction operands. 378 MI.getOperand(MemOpNo + X86::AddrBaseReg) 379 .ChangeToRegister(DefMI->getOperand(0).getReg(), false); 380 MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1); 381 MI.getOperand(MemOpNo + X86::AddrIndexReg) 382 .ChangeToRegister(X86::NoRegister, false); 383 MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift); 384 MI.getOperand(MemOpNo + X86::AddrSegmentReg) 385 .ChangeToRegister(X86::NoRegister, false); 386 387 DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump();); 388 389 Changed = true; 390 } 391 392 return Changed; 393 } 394 395 // Try to find similar LEAs in the list and replace one with another. 396 bool 397 OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) { 398 bool Changed = false; 399 400 // Loop over all LEA pairs. 401 auto I1 = List.begin(); 402 while (I1 != List.end()) { 403 MachineInstr &First = **I1; 404 auto I2 = std::next(I1); 405 while (I2 != List.end()) { 406 MachineInstr &Last = **I2; 407 int64_t AddrDispShift; 408 409 // LEAs should be in occurence order in the list, so we can freely 410 // replace later LEAs with earlier ones. 411 assert(calcInstrDist(First, Last) > 0 && 412 "LEAs must be in occurence order in the list"); 413 414 // Check that the Last LEA instruction can be replaced by the First. 415 if (!isReplaceable(First, Last, AddrDispShift)) { 416 ++I2; 417 continue; 418 } 419 420 // Loop over all uses of the Last LEA and update their operands. Note that 421 // the correctness of this has already been checked in the isReplaceable 422 // function. 423 for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()), 424 UE = MRI->use_end(); 425 UI != UE;) { 426 MachineOperand &MO = *UI++; 427 MachineInstr &MI = *MO.getParent(); 428 429 // Get the number of the first memory operand. 430 const MCInstrDesc &Desc = MI.getDesc(); 431 int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + 432 X86II::getOperandBias(Desc); 433 434 // Update address base. 435 MO.setReg(First.getOperand(0).getReg()); 436 437 // Update address disp. 438 MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp); 439 if (Op->isImm()) 440 Op->setImm(Op->getImm() + AddrDispShift); 441 else if (Op->isGlobal()) 442 Op->setOffset(Op->getOffset() + AddrDispShift); 443 else 444 llvm_unreachable("Invalid address displacement operand"); 445 } 446 447 // Since we can possibly extend register lifetime, clear kill flags. 448 MRI->clearKillFlags(First.getOperand(0).getReg()); 449 450 ++NumRedundantLEAs; 451 DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump();); 452 453 // By this moment, all of the Last LEA's uses must be replaced. So we can 454 // freely remove it. 455 assert(MRI->use_empty(Last.getOperand(0).getReg()) && 456 "The LEA's def register must have no uses"); 457 Last.eraseFromParent(); 458 459 // Erase removed LEA from the list. 460 I2 = List.erase(I2); 461 462 Changed = true; 463 } 464 ++I1; 465 } 466 467 return Changed; 468 } 469 470 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { 471 bool Changed = false; 472 473 // Perform this optimization only if we care about code size. 474 if (!EnableX86LEAOpt || !MF.getFunction()->optForSize()) 475 return false; 476 477 MRI = &MF.getRegInfo(); 478 TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); 479 TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo(); 480 481 // Process all basic blocks. 482 for (auto &MBB : MF) { 483 SmallVector<MachineInstr *, 16> LEAs; 484 InstrPos.clear(); 485 486 // Find all LEA instructions in basic block. 487 findLEAs(MBB, LEAs); 488 489 // If current basic block has no LEAs, move on to the next one. 490 if (LEAs.empty()) 491 continue; 492 493 // Remove redundant LEA instructions. The optimization may have a negative 494 // effect on performance, so do it only for -Oz. 495 if (MF.getFunction()->optForMinSize()) 496 Changed |= removeRedundantLEAs(LEAs); 497 498 // Remove redundant address calculations. 499 Changed |= removeRedundantAddrCalc(LEAs); 500 } 501 502 return Changed; 503 } 504