1 //===-- X86PreTileConfig.cpp - Tile Register Configure---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Pass to pre-config the shape of AMX register 10 /// AMX register need to be configured before use. The shape of AMX register 11 /// is encoded in the 1st and 2nd machine operand of AMX pseudo instructions. 12 /// The pldtilecfg is to config tile registers. It should dominator all AMX 13 /// instructions. The pldtilecfg produce a virtual cfg register and the cfg 14 /// register is used by all AMX instructions. 15 /// This pass is to find the common dominator of all AMX instructions and 16 /// insert the pldtilecfg instruction. Besides the cfg register that pldtilecfg 17 /// produces is inserted as the last operand of each AMX instruction. We use 18 /// this scheme to model the def-use relationship between AMX config instruction 19 /// and other AMX instructions. Below is an example. 20 /// 21 /// ----B1---- 22 /// / \ 23 /// / \ 24 /// B2 B3 25 /// %1:tile = PTILELOADDV %2:tile = PTILELOADDV 26 /// 27 /// is transformed to 28 /// 29 /// B1 30 /// %25:tilecfg = PLDTILECFG 31 /// / \ 32 /// / \ 33 /// %1:tile = PTILELOADDV %25 %2:tile = PTILELOADDV %25 34 // 35 //===----------------------------------------------------------------------===// 36 37 #include "X86.h" 38 #include "X86InstrBuilder.h" 39 #include "X86RegisterInfo.h" 40 #include "X86Subtarget.h" 41 #include "llvm/CodeGen/MachineDominators.h" 42 #include "llvm/CodeGen/MachineFunctionPass.h" 43 #include "llvm/CodeGen/MachineInstr.h" 44 #include "llvm/CodeGen/MachineRegisterInfo.h" 45 #include "llvm/CodeGen/Passes.h" 46 #include "llvm/CodeGen/TargetInstrInfo.h" 47 #include "llvm/CodeGen/TargetRegisterInfo.h" 48 #include "llvm/CodeGen/TileShapeInfo.h" 49 #include "llvm/InitializePasses.h" 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "tile-pre-config" 54 55 namespace { 56 57 class X86PreTileConfig : public MachineFunctionPass { 58 // context 59 MachineFunction *MF = nullptr; 60 const X86Subtarget *ST = nullptr; 61 const TargetRegisterInfo *TRI; 62 const TargetInstrInfo *TII; 63 MachineDominatorTree *DomTree = nullptr; 64 MachineRegisterInfo *MRI = nullptr; 65 66 MachineInstr *getTileConfigPoint(); 67 68 public: 69 X86PreTileConfig() : MachineFunctionPass(ID) {} 70 71 /// Return the pass name. 72 StringRef getPassName() const override { 73 return "Tile Register Pre-configure"; 74 } 75 76 /// X86PreTileConfig analysis usage. 77 void getAnalysisUsage(AnalysisUsage &AU) const override; 78 79 /// Perform register allocation. 80 bool runOnMachineFunction(MachineFunction &mf) override; 81 82 static char ID; 83 }; 84 85 } // end anonymous namespace 86 87 char X86PreTileConfig::ID = 0; 88 89 INITIALIZE_PASS_BEGIN(X86PreTileConfig, "tilepreconfig", 90 "Tile Register Configure", false, false) 91 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 92 INITIALIZE_PASS_END(X86PreTileConfig, "tilepreconfig", 93 "Tile Register Configure", false, false) 94 95 void X86PreTileConfig::getAnalysisUsage(AnalysisUsage &AU) const { 96 AU.setPreservesAll(); 97 AU.addRequired<MachineDominatorTree>(); 98 MachineFunctionPass::getAnalysisUsage(AU); 99 } 100 101 static Register buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx, 102 const TargetInstrInfo *TII, 103 MachineRegisterInfo *MRI, 104 const X86Subtarget *ST) { 105 auto *MBB = MI->getParent(); 106 107 // FIXME: AMX should assume AVX512 enabled. 108 if (ST->hasAVX512()) { 109 // Zero stack slot. 110 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass); 111 BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm) 112 .addReg(Zmm, RegState::Undef) 113 .addReg(Zmm, RegState::Undef); 114 addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)), 115 FrameIdx) 116 .addReg(Zmm); 117 } 118 119 // build psuedo ldtilecfg 120 Register VReg = MRI->createVirtualRegister(&X86::TILECFGRegClass); 121 122 addFrameReference( 123 BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PLDTILECFG), VReg), FrameIdx); 124 125 return VReg; 126 } 127 128 static ShapeT getShape(const MachineInstr &MI, MachineRegisterInfo *MRI) { 129 unsigned Opcode = MI.getOpcode(); 130 switch (Opcode) { 131 default: 132 llvm_unreachable("Unexpected machine instruction on tile"); 133 case X86::PTILELOADDV: 134 case X86::PTDPBSSDV: 135 MachineOperand &MO1 = const_cast<MachineOperand &>(MI.getOperand(1)); 136 MachineOperand &MO2 = const_cast<MachineOperand &>(MI.getOperand(2)); 137 ShapeT Shape(&MO1, &MO2, MRI); 138 return Shape; 139 } 140 } 141 142 MachineInstr *X86PreTileConfig::getTileConfigPoint() { 143 DenseMap<Register, ShapeT> PhysShapeInfo; 144 MachineBasicBlock *MBB = nullptr; 145 DenseSet<const MachineInstr *> MIs; 146 for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { 147 Register VirtReg = Register::index2VirtReg(i); 148 if (MRI->reg_nodbg_empty(VirtReg)) 149 continue; 150 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); 151 if (RC.getID() != X86::TILERegClassID) 152 continue; 153 154 // Find the common dominator for all MI that define tile register. 155 for (const MachineOperand &MO : MRI->def_operands(VirtReg)) { 156 if (MO.isUndef()) 157 continue; 158 const auto *MI = MO.getParent(); 159 // PHI or IMPLICIT_DEF instructiion. 160 // There must be a input tile before PHI instruction. 161 if (MI->isTransient()) 162 continue; 163 if (!MBB) 164 MBB = const_cast<MachineBasicBlock *>(MI->getParent()); 165 MBB = DomTree->findNearestCommonDominator( 166 MBB, const_cast<MachineBasicBlock *>(MI->getParent())); 167 168 // Collect the instructions that define shape. 169 ShapeT Shape = getShape(*MI, MRI); 170 std::array<MachineOperand *, 2> ShapeMOs = {Shape.getRow(), 171 Shape.getCol()}; 172 for (auto *ShapeMO : ShapeMOs) { 173 Register ShapeReg = ShapeMO->getReg(); 174 for (const MachineOperand &MO : MRI->def_operands(ShapeReg)) { 175 const auto *ShapeMI = MO.getParent(); 176 MIs.insert(ShapeMI); 177 } 178 } 179 } 180 } 181 if (!MBB) 182 return nullptr; 183 // This pass is before the pass of eliminating PHI node, so it 184 // is in SSA form. 185 assert(MRI->isSSA() && "Not SSA form in pre-tile config"); 186 // Shape def should dominate tile config MBB. 187 // def s s1 s2 188 // / \ \ / 189 // / \ \ / 190 // conf s3=phi(s1,s2) 191 // | 192 // c 193 // 194 for (const auto *MI : MIs) { 195 const MachineBasicBlock *ShapeMBB = MI->getParent(); 196 if (DomTree->dominates(ShapeMBB, MBB)) 197 continue; 198 if (MI->isMoveImmediate()) 199 continue; 200 report_fatal_error(MF->getName() + ": Failed to config tile register, " 201 "please define the shape earlier"); 202 } 203 204 // ldtilecfg should be inserted after the MI that define the shape. 205 MachineBasicBlock::reverse_instr_iterator I, E; 206 for (I = MBB->instr_rbegin(), E = MBB->instr_rend(); I != E; ++I) { 207 auto *MI = &*I; 208 if (MIs.count(MI) && (!MI->isMoveImmediate())) 209 break; 210 } 211 MachineBasicBlock::iterator MII; 212 if (I == E) 213 MII = MBB->getFirstNonPHI(); 214 else { 215 MII = MachineBasicBlock::iterator(&*I); 216 MII++; 217 } 218 return &*MII; 219 } 220 221 static void addTileCFGUse(MachineFunction &MF, Register CFG) { 222 for (MachineBasicBlock &MBB : MF) { 223 224 // Traverse the basic block. 225 for (MachineInstr &MI : MBB) { 226 unsigned Opcode = MI.getOpcode(); 227 switch (Opcode) { 228 default: 229 break; 230 case X86::PTILELOADDV: 231 case X86::PTILESTOREDV: 232 case X86::PTDPBSSDV: 233 unsigned NumOperands = MI.getNumOperands(); 234 MI.RemoveOperand(NumOperands - 1); 235 MI.addOperand(MF, MachineOperand::CreateReg(CFG, false)); 236 break; 237 } 238 } 239 } 240 } 241 242 bool X86PreTileConfig::runOnMachineFunction(MachineFunction &mf) { 243 MF = &mf; 244 MRI = &mf.getRegInfo(); 245 ST = &mf.getSubtarget<X86Subtarget>(); 246 TRI = ST->getRegisterInfo(); 247 TII = mf.getSubtarget().getInstrInfo(); 248 DomTree = &getAnalysis<MachineDominatorTree>(); 249 250 MachineInstr *MI = getTileConfigPoint(); 251 if (!MI) 252 return false; 253 unsigned Size = ST->getTileConfigSize(); 254 Align Alignment = ST->getTileConfigAlignment(); 255 int SS = mf.getFrameInfo().CreateStackObject(Size, Alignment, false); 256 Register CFG = buildConfigMI(MI, SS, TII, MRI, ST); 257 addTileCFGUse(mf, CFG); 258 return true; 259 } 260 261 FunctionPass *llvm::createX86PreTileConfigPass() { 262 return new X86PreTileConfig(); 263 } 264