1 //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ARM.h" 10 #include "ARMMachineFunctionInfo.h" 11 #include "ARMSubtarget.h" 12 #include "MCTargetDesc/ARMBaseInfo.h" 13 #include "Thumb2InstrInfo.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineInstrBundle.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/IR/DebugLoc.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include <cassert> 29 #include <new> 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "arm-mve-vpt" 34 35 namespace { 36 class MVEVPTBlock : public MachineFunctionPass { 37 public: 38 static char ID; 39 const Thumb2InstrInfo *TII; 40 const TargetRegisterInfo *TRI; 41 42 MVEVPTBlock() : MachineFunctionPass(ID) {} 43 44 bool runOnMachineFunction(MachineFunction &Fn) override; 45 46 MachineFunctionProperties getRequiredProperties() const override { 47 return MachineFunctionProperties().set( 48 MachineFunctionProperties::Property::NoVRegs); 49 } 50 51 StringRef getPassName() const override { 52 return "MVE VPT block insertion pass"; 53 } 54 55 private: 56 bool InsertVPTBlocks(MachineBasicBlock &MBB); 57 }; 58 59 char MVEVPTBlock::ID = 0; 60 61 } // end anonymous namespace 62 63 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) 64 65 enum VPTMaskValue { 66 T = 8, // 0b1000 67 TT = 4, // 0b0100 68 TE = 12, // 0b1100 69 TTT = 2, // 0b0010 70 TTE = 6, // 0b0110 71 TEE = 10, // 0b1010 72 TET = 14, // 0b1110 73 TTTT = 1, // 0b0001 74 TTTE = 3, // 0b0011 75 TTEE = 5, // 0b0101 76 TTET = 7, // 0b0111 77 TEEE = 9, // 0b1001 78 TEET = 11, // 0b1011 79 TETT = 13, // 0b1101 80 TETE = 15 // 0b1111 81 }; 82 83 static unsigned VCMPOpcodeToVPT(unsigned Opcode) { 84 switch (Opcode) { 85 case ARM::MVE_VCMPf32: 86 return ARM::MVE_VPTv4f32; 87 case ARM::MVE_VCMPf16: 88 return ARM::MVE_VPTv8f16; 89 case ARM::MVE_VCMPi8: 90 return ARM::MVE_VPTv16i8; 91 case ARM::MVE_VCMPi16: 92 return ARM::MVE_VPTv8i16; 93 case ARM::MVE_VCMPi32: 94 return ARM::MVE_VPTv4i32; 95 case ARM::MVE_VCMPu8: 96 return ARM::MVE_VPTv16u8; 97 case ARM::MVE_VCMPu16: 98 return ARM::MVE_VPTv8u16; 99 case ARM::MVE_VCMPu32: 100 return ARM::MVE_VPTv4u32; 101 case ARM::MVE_VCMPs8: 102 return ARM::MVE_VPTv16s8; 103 case ARM::MVE_VCMPs16: 104 return ARM::MVE_VPTv8s16; 105 case ARM::MVE_VCMPs32: 106 return ARM::MVE_VPTv4s32; 107 108 case ARM::MVE_VCMPf32r: 109 return ARM::MVE_VPTv4f32r; 110 case ARM::MVE_VCMPf16r: 111 return ARM::MVE_VPTv8f16r; 112 case ARM::MVE_VCMPi8r: 113 return ARM::MVE_VPTv16i8r; 114 case ARM::MVE_VCMPi16r: 115 return ARM::MVE_VPTv8i16r; 116 case ARM::MVE_VCMPi32r: 117 return ARM::MVE_VPTv4i32r; 118 case ARM::MVE_VCMPu8r: 119 return ARM::MVE_VPTv16u8r; 120 case ARM::MVE_VCMPu16r: 121 return ARM::MVE_VPTv8u16r; 122 case ARM::MVE_VCMPu32r: 123 return ARM::MVE_VPTv4u32r; 124 case ARM::MVE_VCMPs8r: 125 return ARM::MVE_VPTv16s8r; 126 case ARM::MVE_VCMPs16r: 127 return ARM::MVE_VPTv8s16r; 128 case ARM::MVE_VCMPs32r: 129 return ARM::MVE_VPTv4s32r; 130 131 default: 132 return 0; 133 } 134 } 135 136 static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, 137 const TargetRegisterInfo *TRI, 138 unsigned &NewOpcode) { 139 // Search backwards to the instruction that defines VPR. This may or not 140 // be a VCMP, we check that after this loop. If we find another instruction 141 // that reads cpsr, we return nullptr. 142 MachineBasicBlock::iterator CmpMI = MI; 143 while (CmpMI != MI->getParent()->begin()) { 144 --CmpMI; 145 if (CmpMI->modifiesRegister(ARM::VPR, TRI)) 146 break; 147 if (CmpMI->readsRegister(ARM::VPR, TRI)) 148 break; 149 } 150 151 if (CmpMI == MI) 152 return nullptr; 153 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); 154 if (NewOpcode == 0) 155 return nullptr; 156 157 // Search forward from CmpMI to MI, checking if either register was def'd 158 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), 159 MI, TRI)) 160 return nullptr; 161 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), 162 MI, TRI)) 163 return nullptr; 164 return &*CmpMI; 165 } 166 167 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { 168 bool Modified = false; 169 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); 170 MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); 171 172 while (MBIter != EndIter) { 173 MachineInstr *MI = &*MBIter; 174 unsigned PredReg = 0; 175 DebugLoc dl = MI->getDebugLoc(); 176 177 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); 178 179 // The idea of the predicate is that None, Then and Else are for use when 180 // handling assembly language: they correspond to the three possible 181 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read 182 // from assembly source or disassembled from object code, you expect to see 183 // a mixture whenever there's a long VPT block. But in code generation, we 184 // hope we'll never generate an Else as input to this pass. 185 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); 186 187 if (Pred == ARMVCC::None) { 188 ++MBIter; 189 continue; 190 } 191 192 LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); 193 int VPTInstCnt = 1; 194 ARMVCC::VPTCodes NextPred; 195 196 // Look at subsequent instructions, checking if they can be in the same VPT 197 // block. 198 ++MBIter; 199 while (MBIter != EndIter && VPTInstCnt < 4) { 200 NextPred = getVPTInstrPredicate(*MBIter, PredReg); 201 assert(NextPred != ARMVCC::Else && 202 "VPT block pass does not expect Else preds"); 203 if (NextPred != Pred) 204 break; 205 LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); 206 ++VPTInstCnt; 207 ++MBIter; 208 }; 209 210 unsigned BlockMask = 0; 211 switch (VPTInstCnt) { 212 case 1: 213 BlockMask = VPTMaskValue::T; 214 break; 215 case 2: 216 BlockMask = VPTMaskValue::TT; 217 break; 218 case 3: 219 BlockMask = VPTMaskValue::TTT; 220 break; 221 case 4: 222 BlockMask = VPTMaskValue::TTTT; 223 break; 224 default: 225 llvm_unreachable("Unexpected number of instruction in a VPT block"); 226 }; 227 228 // Search back for a VCMP that can be folded to create a VPT, or else create 229 // a VPST directly 230 MachineInstrBuilder MIBuilder; 231 unsigned NewOpcode; 232 MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); 233 if (VCMP) { 234 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); 235 MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); 236 MIBuilder.addImm(BlockMask); 237 MIBuilder.add(VCMP->getOperand(1)); 238 MIBuilder.add(VCMP->getOperand(2)); 239 MIBuilder.add(VCMP->getOperand(3)); 240 VCMP->eraseFromParent(); 241 } else { 242 MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); 243 MIBuilder.addImm(BlockMask); 244 } 245 246 finalizeBundle( 247 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); 248 249 Modified = true; 250 } 251 return Modified; 252 } 253 254 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { 255 const ARMSubtarget &STI = 256 static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 257 258 if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) 259 return false; 260 261 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 262 TRI = STI.getRegisterInfo(); 263 264 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" 265 << "********** Function: " << Fn.getName() << '\n'); 266 267 bool Modified = false; 268 for (MachineBasicBlock &MBB : Fn) 269 Modified |= InsertVPTBlocks(MBB); 270 271 LLVM_DEBUG(dbgs() << "**************************************\n"); 272 return Modified; 273 } 274 275 /// createMVEVPTBlock - Returns an instance of the MVE VPT block 276 /// insertion pass. 277 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } 278