1 //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions 11 /// into a conditional branch (B.cond), when the NZCV flags can be set for 12 /// "free". This is preferred on targets that have more flexibility when 13 /// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming 14 /// all other variables are equal). This can also reduce register pressure. 15 /// 16 /// A few examples: 17 /// 18 /// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. 19 /// cbz w8, .LBB_2 -> b.eq .LBB0_2 20 /// 21 /// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. 22 /// cbz w8, .LBB1_2 -> b.eq .LBB1_2 23 /// 24 /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. 25 /// tbz w8, #31, .LBB6_2 -> b.ge .LBB6_2 26 /// 27 //===----------------------------------------------------------------------===// 28 29 #include "AArch64.h" 30 #include "AArch64Subtarget.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineTraceMetrics.h" 36 #include "llvm/CodeGen/Passes.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetInstrInfo.h" 40 #include "llvm/Target/TargetRegisterInfo.h" 41 #include "llvm/Target/TargetSubtargetInfo.h" 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "aarch64-cond-br-tuning" 46 #define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" 47 48 namespace { 49 class AArch64CondBrTuning : public MachineFunctionPass { 50 const AArch64InstrInfo *TII; 51 const TargetRegisterInfo *TRI; 52 53 MachineRegisterInfo *MRI; 54 55 public: 56 static char ID; 57 AArch64CondBrTuning() : MachineFunctionPass(ID) { 58 initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); 59 } 60 void getAnalysisUsage(AnalysisUsage &AU) const override; 61 bool runOnMachineFunction(MachineFunction &MF) override; 62 StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } 63 64 private: 65 MachineInstr *getOperandDef(const MachineOperand &MO); 66 MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); 67 MachineInstr *convertToCondBr(MachineInstr &MI); 68 bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); 69 }; 70 } // end anonymous namespace 71 72 char AArch64CondBrTuning::ID = 0; 73 74 INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", 75 AARCH64_CONDBR_TUNING_NAME, false, false) 76 77 void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { 78 AU.setPreservesCFG(); 79 MachineFunctionPass::getAnalysisUsage(AU); 80 } 81 82 MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { 83 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) 84 return nullptr; 85 return MRI->getUniqueVRegDef(MO.getReg()); 86 } 87 88 MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, 89 bool IsFlagSetting) { 90 // If this is already the flag setting version of the instruction (e.g., SUBS) 91 // just make sure the implicit-def of NZCV isn't marked dead. 92 if (IsFlagSetting) { 93 for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); 94 I != E; ++I) { 95 MachineOperand &MO = MI.getOperand(I); 96 if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) 97 MO.setIsDead(false); 98 } 99 return &MI; 100 } 101 bool Is64Bit; 102 unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); 103 unsigned NewDestReg = MI.getOperand(0).getReg(); 104 if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) 105 NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 106 107 MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 108 TII->get(NewOpc), NewDestReg); 109 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 110 MIB.add(MI.getOperand(I)); 111 112 return MIB; 113 } 114 115 MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { 116 AArch64CC::CondCode CC; 117 MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI); 118 switch (MI.getOpcode()) { 119 default: 120 llvm_unreachable("Unexpected opcode!"); 121 122 case AArch64::CBZW: 123 case AArch64::CBZX: 124 CC = AArch64CC::EQ; 125 break; 126 case AArch64::CBNZW: 127 case AArch64::CBNZX: 128 CC = AArch64CC::NE; 129 break; 130 case AArch64::TBZW: 131 case AArch64::TBZX: 132 CC = AArch64CC::GE; 133 break; 134 case AArch64::TBNZW: 135 case AArch64::TBNZX: 136 CC = AArch64CC::LT; 137 break; 138 } 139 return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) 140 .addImm(CC) 141 .addMBB(TargetMBB); 142 } 143 144 bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, 145 MachineInstr &DefMI) { 146 // We don't want NZCV bits live across blocks. 147 if (MI.getParent() != DefMI.getParent()) 148 return false; 149 150 bool IsFlagSetting = true; 151 unsigned MIOpc = MI.getOpcode(); 152 MachineInstr *NewCmp = nullptr, *NewBr = nullptr; 153 switch (DefMI.getOpcode()) { 154 default: 155 return false; 156 case AArch64::ADDWri: 157 case AArch64::ADDWrr: 158 case AArch64::ADDWrs: 159 case AArch64::ADDWrx: 160 case AArch64::ANDWri: 161 case AArch64::ANDWrr: 162 case AArch64::ANDWrs: 163 case AArch64::BICWrr: 164 case AArch64::BICWrs: 165 case AArch64::SUBWri: 166 case AArch64::SUBWrr: 167 case AArch64::SUBWrs: 168 case AArch64::SUBWrx: 169 IsFlagSetting = false; 170 case AArch64::ADDSWri: 171 case AArch64::ADDSWrr: 172 case AArch64::ADDSWrs: 173 case AArch64::ADDSWrx: 174 case AArch64::ANDSWri: 175 case AArch64::ANDSWrr: 176 case AArch64::ANDSWrs: 177 case AArch64::BICSWrr: 178 case AArch64::BICSWrs: 179 case AArch64::SUBSWri: 180 case AArch64::SUBSWrr: 181 case AArch64::SUBSWrs: 182 case AArch64::SUBSWrx: 183 switch (MIOpc) { 184 default: 185 llvm_unreachable("Unexpected opcode!"); 186 187 case AArch64::CBZW: 188 case AArch64::CBNZW: 189 case AArch64::TBZW: 190 case AArch64::TBNZW: 191 // Check to see if the TBZ/TBNZ is checking the sign bit. 192 if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && 193 MI.getOperand(1).getImm() != 31) 194 return false; 195 196 // There must not be any instruction between DefMI and MI that clobbers or 197 // reads NZCV. 198 MachineBasicBlock::iterator I(DefMI), E(MI); 199 for (I = std::next(I); I != E; ++I) { 200 if (I->modifiesRegister(AArch64::NZCV, TRI) || 201 I->readsRegister(AArch64::NZCV, TRI)) 202 return false; 203 } 204 DEBUG(dbgs() << " Replacing instructions:\n "); 205 DEBUG(DefMI.print(dbgs())); 206 DEBUG(dbgs() << " "); 207 DEBUG(MI.print(dbgs())); 208 209 NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); 210 NewBr = convertToCondBr(MI); 211 break; 212 } 213 break; 214 215 case AArch64::ADDXri: 216 case AArch64::ADDXrr: 217 case AArch64::ADDXrs: 218 case AArch64::ADDXrx: 219 case AArch64::ANDXri: 220 case AArch64::ANDXrr: 221 case AArch64::ANDXrs: 222 case AArch64::BICXrr: 223 case AArch64::BICXrs: 224 case AArch64::SUBXri: 225 case AArch64::SUBXrr: 226 case AArch64::SUBXrs: 227 case AArch64::SUBXrx: 228 IsFlagSetting = false; 229 case AArch64::ADDSXri: 230 case AArch64::ADDSXrr: 231 case AArch64::ADDSXrs: 232 case AArch64::ADDSXrx: 233 case AArch64::ANDSXri: 234 case AArch64::ANDSXrr: 235 case AArch64::ANDSXrs: 236 case AArch64::BICSXrr: 237 case AArch64::BICSXrs: 238 case AArch64::SUBSXri: 239 case AArch64::SUBSXrr: 240 case AArch64::SUBSXrs: 241 case AArch64::SUBSXrx: 242 switch (MIOpc) { 243 default: 244 llvm_unreachable("Unexpected opcode!"); 245 246 case AArch64::CBZX: 247 case AArch64::CBNZX: 248 case AArch64::TBZX: 249 case AArch64::TBNZX: { 250 // Check to see if the TBZ/TBNZ is checking the sign bit. 251 if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && 252 MI.getOperand(1).getImm() != 63) 253 return false; 254 // There must not be any instruction between DefMI and MI that clobbers or 255 // reads NZCV. 256 MachineBasicBlock::iterator I(DefMI), E(MI); 257 for (I = std::next(I); I != E; ++I) { 258 if (I->modifiesRegister(AArch64::NZCV, TRI) || 259 I->readsRegister(AArch64::NZCV, TRI)) 260 return false; 261 } 262 DEBUG(dbgs() << " Replacing instructions:\n "); 263 DEBUG(DefMI.print(dbgs())); 264 DEBUG(dbgs() << " "); 265 DEBUG(MI.print(dbgs())); 266 267 NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); 268 NewBr = convertToCondBr(MI); 269 break; 270 } 271 } 272 break; 273 } 274 assert(NewCmp && NewBr && "Expected new instructions."); 275 276 DEBUG(dbgs() << " with instruction:\n "); 277 DEBUG(NewCmp->print(dbgs())); 278 DEBUG(dbgs() << " "); 279 DEBUG(NewBr->print(dbgs())); 280 281 // If this was a flag setting version of the instruction, we use the original 282 // instruction by just clearing the dead marked on the implicit-def of NCZV. 283 // Therefore, we should not erase this instruction. 284 if (!IsFlagSetting) 285 DefMI.eraseFromParent(); 286 MI.eraseFromParent(); 287 return true; 288 } 289 290 bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { 291 if (skipFunction(*MF.getFunction())) 292 return false; 293 294 DEBUG(dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" 295 << "********** Function: " << MF.getName() << '\n'); 296 297 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 298 TRI = MF.getSubtarget().getRegisterInfo(); 299 MRI = &MF.getRegInfo(); 300 301 bool Changed = false; 302 for (MachineBasicBlock &MBB : MF) { 303 bool LocalChange = false; 304 for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), 305 E = MBB.end(); 306 I != E; ++I) { 307 MachineInstr &MI = *I; 308 switch (MI.getOpcode()) { 309 default: 310 break; 311 case AArch64::CBZW: 312 case AArch64::CBZX: 313 case AArch64::CBNZW: 314 case AArch64::CBNZX: 315 case AArch64::TBZW: 316 case AArch64::TBZX: 317 case AArch64::TBNZW: 318 case AArch64::TBNZX: 319 MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); 320 LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); 321 break; 322 } 323 // If the optimization was successful, we can't optimize any other 324 // branches because doing so would clobber the NZCV flags. 325 if (LocalChange) { 326 Changed = true; 327 break; 328 } 329 } 330 } 331 return Changed; 332 } 333 334 FunctionPass *llvm::createAArch64CondBrTuning() { 335 return new AArch64CondBrTuning(); 336 } 337