1 //===-- AArch64CondBrTuning.cpp --- Conditional branch tuning for AArch64 -===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file contains a pass that transforms CBZ/CBNZ/TBZ/TBNZ instructions 11 /// into a conditional branch (B.cond), when the NZCV flags can be set for 12 /// "free". This is preferred on targets that have more flexibility when 13 /// scheduling B.cond instructions as compared to CBZ/CBNZ/TBZ/TBNZ (assuming 14 /// all other variables are equal). This can also reduce register pressure. 15 /// 16 /// A few examples: 17 /// 18 /// 1) add w8, w0, w1 -> cmn w0, w1 ; CMN is an alias of ADDS. 19 /// cbz w8, .LBB_2 -> b.eq .LBB0_2 20 /// 21 /// 2) add w8, w0, w1 -> adds w8, w0, w1 ; w8 has multiple uses. 22 /// cbz w8, .LBB1_2 -> b.eq .LBB1_2 23 /// 24 /// 3) sub w8, w0, w1 -> subs w8, w0, w1 ; w8 has multiple uses. 25 /// tbz w8, #31, .LBB6_2 -> b.pl .LBB6_2 26 /// 27 //===----------------------------------------------------------------------===// 28 29 #include "AArch64.h" 30 #include "AArch64Subtarget.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/TargetInstrInfo.h" 37 #include "llvm/CodeGen/TargetRegisterInfo.h" 38 #include "llvm/CodeGen/TargetSubtargetInfo.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/raw_ostream.h" 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "aarch64-cond-br-tuning" 45 #define AARCH64_CONDBR_TUNING_NAME "AArch64 Conditional Branch Tuning" 46 47 namespace { 48 class AArch64CondBrTuning : public MachineFunctionPass { 49 const AArch64InstrInfo *TII; 50 const TargetRegisterInfo *TRI; 51 52 MachineRegisterInfo *MRI; 53 54 public: 55 static char ID; 56 AArch64CondBrTuning() : MachineFunctionPass(ID) { 57 initializeAArch64CondBrTuningPass(*PassRegistry::getPassRegistry()); 58 } 59 void getAnalysisUsage(AnalysisUsage &AU) const override; 60 bool runOnMachineFunction(MachineFunction &MF) override; 61 StringRef getPassName() const override { return AARCH64_CONDBR_TUNING_NAME; } 62 63 private: 64 MachineInstr *getOperandDef(const MachineOperand &MO); 65 MachineInstr *convertToFlagSetting(MachineInstr &MI, bool IsFlagSetting); 66 MachineInstr *convertToCondBr(MachineInstr &MI); 67 bool tryToTuneBranch(MachineInstr &MI, MachineInstr &DefMI); 68 }; 69 } // end anonymous namespace 70 71 char AArch64CondBrTuning::ID = 0; 72 73 INITIALIZE_PASS(AArch64CondBrTuning, "aarch64-cond-br-tuning", 74 AARCH64_CONDBR_TUNING_NAME, false, false) 75 76 void AArch64CondBrTuning::getAnalysisUsage(AnalysisUsage &AU) const { 77 AU.setPreservesCFG(); 78 MachineFunctionPass::getAnalysisUsage(AU); 79 } 80 81 MachineInstr *AArch64CondBrTuning::getOperandDef(const MachineOperand &MO) { 82 if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) 83 return nullptr; 84 return MRI->getUniqueVRegDef(MO.getReg()); 85 } 86 87 MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI, 88 bool IsFlagSetting) { 89 // If this is already the flag setting version of the instruction (e.g., SUBS) 90 // just make sure the implicit-def of NZCV isn't marked dead. 91 if (IsFlagSetting) { 92 for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands(); 93 I != E; ++I) { 94 MachineOperand &MO = MI.getOperand(I); 95 if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV) 96 MO.setIsDead(false); 97 } 98 return &MI; 99 } 100 bool Is64Bit; 101 unsigned NewOpc = TII->convertToFlagSettingOpc(MI.getOpcode(), Is64Bit); 102 unsigned NewDestReg = MI.getOperand(0).getReg(); 103 if (MRI->hasOneNonDBGUse(MI.getOperand(0).getReg())) 104 NewDestReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 105 106 MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), 107 TII->get(NewOpc), NewDestReg); 108 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) 109 MIB.add(MI.getOperand(I)); 110 111 return MIB; 112 } 113 114 MachineInstr *AArch64CondBrTuning::convertToCondBr(MachineInstr &MI) { 115 AArch64CC::CondCode CC; 116 MachineBasicBlock *TargetMBB = TII->getBranchDestBlock(MI); 117 switch (MI.getOpcode()) { 118 default: 119 llvm_unreachable("Unexpected opcode!"); 120 121 case AArch64::CBZW: 122 case AArch64::CBZX: 123 CC = AArch64CC::EQ; 124 break; 125 case AArch64::CBNZW: 126 case AArch64::CBNZX: 127 CC = AArch64CC::NE; 128 break; 129 case AArch64::TBZW: 130 case AArch64::TBZX: 131 CC = AArch64CC::PL; 132 break; 133 case AArch64::TBNZW: 134 case AArch64::TBNZX: 135 CC = AArch64CC::MI; 136 break; 137 } 138 return BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::Bcc)) 139 .addImm(CC) 140 .addMBB(TargetMBB); 141 } 142 143 bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, 144 MachineInstr &DefMI) { 145 // We don't want NZCV bits live across blocks. 146 if (MI.getParent() != DefMI.getParent()) 147 return false; 148 149 bool IsFlagSetting = true; 150 unsigned MIOpc = MI.getOpcode(); 151 MachineInstr *NewCmp = nullptr, *NewBr = nullptr; 152 switch (DefMI.getOpcode()) { 153 default: 154 return false; 155 case AArch64::ADDWri: 156 case AArch64::ADDWrr: 157 case AArch64::ADDWrs: 158 case AArch64::ADDWrx: 159 case AArch64::ANDWri: 160 case AArch64::ANDWrr: 161 case AArch64::ANDWrs: 162 case AArch64::BICWrr: 163 case AArch64::BICWrs: 164 case AArch64::SUBWri: 165 case AArch64::SUBWrr: 166 case AArch64::SUBWrs: 167 case AArch64::SUBWrx: 168 IsFlagSetting = false; 169 LLVM_FALLTHROUGH; 170 case AArch64::ADDSWri: 171 case AArch64::ADDSWrr: 172 case AArch64::ADDSWrs: 173 case AArch64::ADDSWrx: 174 case AArch64::ANDSWri: 175 case AArch64::ANDSWrr: 176 case AArch64::ANDSWrs: 177 case AArch64::BICSWrr: 178 case AArch64::BICSWrs: 179 case AArch64::SUBSWri: 180 case AArch64::SUBSWrr: 181 case AArch64::SUBSWrs: 182 case AArch64::SUBSWrx: 183 switch (MIOpc) { 184 default: 185 llvm_unreachable("Unexpected opcode!"); 186 187 case AArch64::CBZW: 188 case AArch64::CBNZW: 189 case AArch64::TBZW: 190 case AArch64::TBNZW: 191 // Check to see if the TBZ/TBNZ is checking the sign bit. 192 if ((MIOpc == AArch64::TBZW || MIOpc == AArch64::TBNZW) && 193 MI.getOperand(1).getImm() != 31) 194 return false; 195 196 // There must not be any instruction between DefMI and MI that clobbers or 197 // reads NZCV. 198 MachineBasicBlock::iterator I(DefMI), E(MI); 199 for (I = std::next(I); I != E; ++I) { 200 if (I->modifiesRegister(AArch64::NZCV, TRI) || 201 I->readsRegister(AArch64::NZCV, TRI)) 202 return false; 203 } 204 LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); 205 LLVM_DEBUG(DefMI.print(dbgs())); 206 LLVM_DEBUG(dbgs() << " "); 207 LLVM_DEBUG(MI.print(dbgs())); 208 209 NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); 210 NewBr = convertToCondBr(MI); 211 break; 212 } 213 break; 214 215 case AArch64::ADDXri: 216 case AArch64::ADDXrr: 217 case AArch64::ADDXrs: 218 case AArch64::ADDXrx: 219 case AArch64::ANDXri: 220 case AArch64::ANDXrr: 221 case AArch64::ANDXrs: 222 case AArch64::BICXrr: 223 case AArch64::BICXrs: 224 case AArch64::SUBXri: 225 case AArch64::SUBXrr: 226 case AArch64::SUBXrs: 227 case AArch64::SUBXrx: 228 IsFlagSetting = false; 229 LLVM_FALLTHROUGH; 230 case AArch64::ADDSXri: 231 case AArch64::ADDSXrr: 232 case AArch64::ADDSXrs: 233 case AArch64::ADDSXrx: 234 case AArch64::ANDSXri: 235 case AArch64::ANDSXrr: 236 case AArch64::ANDSXrs: 237 case AArch64::BICSXrr: 238 case AArch64::BICSXrs: 239 case AArch64::SUBSXri: 240 case AArch64::SUBSXrr: 241 case AArch64::SUBSXrs: 242 case AArch64::SUBSXrx: 243 switch (MIOpc) { 244 default: 245 llvm_unreachable("Unexpected opcode!"); 246 247 case AArch64::CBZX: 248 case AArch64::CBNZX: 249 case AArch64::TBZX: 250 case AArch64::TBNZX: { 251 // Check to see if the TBZ/TBNZ is checking the sign bit. 252 if ((MIOpc == AArch64::TBZX || MIOpc == AArch64::TBNZX) && 253 MI.getOperand(1).getImm() != 63) 254 return false; 255 // There must not be any instruction between DefMI and MI that clobbers or 256 // reads NZCV. 257 MachineBasicBlock::iterator I(DefMI), E(MI); 258 for (I = std::next(I); I != E; ++I) { 259 if (I->modifiesRegister(AArch64::NZCV, TRI) || 260 I->readsRegister(AArch64::NZCV, TRI)) 261 return false; 262 } 263 LLVM_DEBUG(dbgs() << " Replacing instructions:\n "); 264 LLVM_DEBUG(DefMI.print(dbgs())); 265 LLVM_DEBUG(dbgs() << " "); 266 LLVM_DEBUG(MI.print(dbgs())); 267 268 NewCmp = convertToFlagSetting(DefMI, IsFlagSetting); 269 NewBr = convertToCondBr(MI); 270 break; 271 } 272 } 273 break; 274 } 275 (void)NewCmp; (void)NewBr; 276 assert(NewCmp && NewBr && "Expected new instructions."); 277 278 LLVM_DEBUG(dbgs() << " with instruction:\n "); 279 LLVM_DEBUG(NewCmp->print(dbgs())); 280 LLVM_DEBUG(dbgs() << " "); 281 LLVM_DEBUG(NewBr->print(dbgs())); 282 283 // If this was a flag setting version of the instruction, we use the original 284 // instruction by just clearing the dead marked on the implicit-def of NCZV. 285 // Therefore, we should not erase this instruction. 286 if (!IsFlagSetting) 287 DefMI.eraseFromParent(); 288 MI.eraseFromParent(); 289 return true; 290 } 291 292 bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) { 293 if (skipFunction(MF.getFunction())) 294 return false; 295 296 LLVM_DEBUG( 297 dbgs() << "********** AArch64 Conditional Branch Tuning **********\n" 298 << "********** Function: " << MF.getName() << '\n'); 299 300 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 301 TRI = MF.getSubtarget().getRegisterInfo(); 302 MRI = &MF.getRegInfo(); 303 304 bool Changed = false; 305 for (MachineBasicBlock &MBB : MF) { 306 bool LocalChange = false; 307 for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(), 308 E = MBB.end(); 309 I != E; ++I) { 310 MachineInstr &MI = *I; 311 switch (MI.getOpcode()) { 312 default: 313 break; 314 case AArch64::CBZW: 315 case AArch64::CBZX: 316 case AArch64::CBNZW: 317 case AArch64::CBNZX: 318 case AArch64::TBZW: 319 case AArch64::TBZX: 320 case AArch64::TBNZW: 321 case AArch64::TBNZX: 322 MachineInstr *DefMI = getOperandDef(MI.getOperand(0)); 323 LocalChange = (DefMI && tryToTuneBranch(MI, *DefMI)); 324 break; 325 } 326 // If the optimization was successful, we can't optimize any other 327 // branches because doing so would clobber the NZCV flags. 328 if (LocalChange) { 329 Changed = true; 330 break; 331 } 332 } 333 } 334 return Changed; 335 } 336 337 FunctionPass *llvm::createAArch64CondBrTuning() { 338 return new AArch64CondBrTuning(); 339 } 340