1d4bdeca5SXiang1 Zhang //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2d4bdeca5SXiang1 Zhang //
3d4bdeca5SXiang1 Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4d4bdeca5SXiang1 Zhang // See https://llvm.org/LICENSE.txt for license information.
5d4bdeca5SXiang1 Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6d4bdeca5SXiang1 Zhang //
7d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===//
8d4bdeca5SXiang1 Zhang //
9d4bdeca5SXiang1 Zhang /// \file Pass to config the shape of AMX physical registers
10d4bdeca5SXiang1 Zhang /// AMX register need to be configured before use. Before FastRegAllocation pass
11d4bdeca5SXiang1 Zhang /// the ldtilecfg instruction is inserted, however at that time we don't
12d4bdeca5SXiang1 Zhang /// know the shape of each physical tile registers, because the register
1363233da7SLuo, Yuanke /// allocation is not done yet. This pass runs after register allocation
14d4bdeca5SXiang1 Zhang /// pass. It collects the shape information of each physical tile register
15d4bdeca5SXiang1 Zhang /// and store the shape in the stack slot that is allocated for load config
16d4bdeca5SXiang1 Zhang /// to tile config register.
17d4bdeca5SXiang1 Zhang //
18d4bdeca5SXiang1 Zhang //===----------------------------------------------------------------------===//
19d4bdeca5SXiang1 Zhang 
20d4bdeca5SXiang1 Zhang #include "X86.h"
21d4bdeca5SXiang1 Zhang #include "X86InstrBuilder.h"
22d4bdeca5SXiang1 Zhang #include "X86MachineFunctionInfo.h"
23d4bdeca5SXiang1 Zhang #include "X86RegisterInfo.h"
24d4bdeca5SXiang1 Zhang #include "X86Subtarget.h"
25d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFrameInfo.h"
26d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineFunctionPass.h"
27d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineInstr.h"
28d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/MachineRegisterInfo.h"
29d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/Passes.h"
30d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetInstrInfo.h"
31d4bdeca5SXiang1 Zhang #include "llvm/CodeGen/TargetRegisterInfo.h"
32d4bdeca5SXiang1 Zhang #include "llvm/InitializePasses.h"
33d4bdeca5SXiang1 Zhang 
34d4bdeca5SXiang1 Zhang using namespace llvm;
35d4bdeca5SXiang1 Zhang 
36d4bdeca5SXiang1 Zhang #define DEBUG_TYPE "fasttileconfig"
37d4bdeca5SXiang1 Zhang 
38d4bdeca5SXiang1 Zhang namespace {
39d4bdeca5SXiang1 Zhang 
40d4bdeca5SXiang1 Zhang class X86FastTileConfig : public MachineFunctionPass {
41d4bdeca5SXiang1 Zhang   // context
42d4bdeca5SXiang1 Zhang   MachineFunction *MF = nullptr;
43d4bdeca5SXiang1 Zhang   const TargetInstrInfo *TII = nullptr;
44d4bdeca5SXiang1 Zhang   MachineRegisterInfo *MRI = nullptr;
45496156acSLuo, Yuanke   const TargetRegisterInfo *TRI = nullptr;
46c4dba471SLuo, Yuanke   X86MachineFunctionInfo *X86FI = nullptr;
47d4bdeca5SXiang1 Zhang 
48496156acSLuo, Yuanke   bool configBasicBlock(MachineBasicBlock &MBB);
49d4bdeca5SXiang1 Zhang 
50d4bdeca5SXiang1 Zhang public:
X86FastTileConfig()51d4bdeca5SXiang1 Zhang   X86FastTileConfig() : MachineFunctionPass(ID) {}
52d4bdeca5SXiang1 Zhang 
53d4bdeca5SXiang1 Zhang   /// Return the pass name.
getPassName() const54d4bdeca5SXiang1 Zhang   StringRef getPassName() const override {
55d4bdeca5SXiang1 Zhang     return "Fast Tile Register Configure";
56d4bdeca5SXiang1 Zhang   }
57d4bdeca5SXiang1 Zhang 
getAnalysisUsage(AnalysisUsage & AU) const58496156acSLuo, Yuanke   void getAnalysisUsage(AnalysisUsage &AU) const override {
59496156acSLuo, Yuanke     AU.setPreservesAll();
60496156acSLuo, Yuanke     MachineFunctionPass::getAnalysisUsage(AU);
61496156acSLuo, Yuanke   }
62d4bdeca5SXiang1 Zhang 
63d4bdeca5SXiang1 Zhang   /// Perform register allocation.
64d4bdeca5SXiang1 Zhang   bool runOnMachineFunction(MachineFunction &MFunc) override;
65d4bdeca5SXiang1 Zhang 
getRequiredProperties() const66d4bdeca5SXiang1 Zhang   MachineFunctionProperties getRequiredProperties() const override {
67d4bdeca5SXiang1 Zhang     return MachineFunctionProperties().set(
68d4bdeca5SXiang1 Zhang         MachineFunctionProperties::Property::NoPHIs);
69d4bdeca5SXiang1 Zhang   }
70d4bdeca5SXiang1 Zhang 
71d4bdeca5SXiang1 Zhang   static char ID;
72d4bdeca5SXiang1 Zhang };
73d4bdeca5SXiang1 Zhang 
74d4bdeca5SXiang1 Zhang } // end anonymous namespace
75d4bdeca5SXiang1 Zhang 
76d4bdeca5SXiang1 Zhang char X86FastTileConfig::ID = 0;
77d4bdeca5SXiang1 Zhang 
78d4bdeca5SXiang1 Zhang INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,
79d4bdeca5SXiang1 Zhang                       "Fast Tile Register Configure", false, false)
80d4bdeca5SXiang1 Zhang INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,
81d4bdeca5SXiang1 Zhang                     "Fast Tile Register Configure", false, false)
82d4bdeca5SXiang1 Zhang 
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)83496156acSLuo, Yuanke static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
84496156acSLuo, Yuanke   // There is no phi instruction after register allocation.
85496156acSLuo, Yuanke   assert(MI.isPHI() == false);
86496156acSLuo, Yuanke   // The instruction must have 3 operands: tile def, row, col.
87496156acSLuo, Yuanke   // It should be AMX pseudo instruction that have shape operand.
88496156acSLuo, Yuanke   if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
89496156acSLuo, Yuanke       !MI.isPseudo())
90d4bdeca5SXiang1 Zhang     return false;
91496156acSLuo, Yuanke   MachineOperand &MO = MI.getOperand(0);
92d4bdeca5SXiang1 Zhang 
93496156acSLuo, Yuanke   if (MO.isReg()) {
94496156acSLuo, Yuanke     Register Reg = MO.getReg();
95496156acSLuo, Yuanke     // FIXME it may be used after Greedy RA and the physical
96496156acSLuo, Yuanke     // register is not rewritten yet.
97496156acSLuo, Yuanke     if (Reg.isVirtual() &&
98496156acSLuo, Yuanke         MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
99496156acSLuo, Yuanke       return true;
100d4bdeca5SXiang1 Zhang     if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
101d4bdeca5SXiang1 Zhang       return true;
102496156acSLuo, Yuanke   }
103496156acSLuo, Yuanke 
104d4bdeca5SXiang1 Zhang   return false;
105d4bdeca5SXiang1 Zhang }
106d4bdeca5SXiang1 Zhang 
107496156acSLuo, Yuanke // PreTileConfig should configure the tile registers based on basic
108496156acSLuo, Yuanke // block.
configBasicBlock(MachineBasicBlock & MBB)109496156acSLuo, Yuanke bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
110496156acSLuo, Yuanke   bool Change = false;
111496156acSLuo, Yuanke   SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
112496156acSLuo, Yuanke   for (MachineInstr &MI : reverse(MBB)) {
113*aaaf9cedSLuo, Yuanke     if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
114d4bdeca5SXiang1 Zhang       continue;
115496156acSLuo, Yuanke     // AMX instructions that define tile register.
116*aaaf9cedSLuo, Yuanke     if (MI.getOpcode() != X86::PLDTILECFGV) {
117496156acSLuo, Yuanke       MachineOperand &Row = MI.getOperand(1);
118496156acSLuo, Yuanke       MachineOperand &Col = MI.getOperand(2);
119496156acSLuo, Yuanke       unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;
120496156acSLuo, Yuanke       ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});
121*aaaf9cedSLuo, Yuanke     } else { // PLDTILECFGV
122496156acSLuo, Yuanke       // Rewrite the shape information to memory. Stack slot should have
123496156acSLuo, Yuanke       // been initialized to zero in pre config.
124496156acSLuo, Yuanke       int SS = MI.getOperand(0).getIndex(); // tile config stack slot.
125496156acSLuo, Yuanke       for (auto &ShapeInfo : ShapeInfos) {
126496156acSLuo, Yuanke         DebugLoc DL;
127496156acSLuo, Yuanke         unsigned TMMIdx = ShapeInfo.first;
128496156acSLuo, Yuanke         Register RowReg = ShapeInfo.second.getRow()->getReg();
129496156acSLuo, Yuanke         Register ColReg = ShapeInfo.second.getCol()->getReg();
130d4bdeca5SXiang1 Zhang         // Here is the data format for the tile config.
131496156acSLuo, Yuanke         // 0      palette
132496156acSLuo, Yuanke         // 1      start_row
133d4bdeca5SXiang1 Zhang         // 2-15   reserved, must be zero
134d4bdeca5SXiang1 Zhang         // 16-17  tile0.colsb Tile 0 bytes per row.
135d4bdeca5SXiang1 Zhang         // 18-19  tile1.colsb Tile 1 bytes per row.
136d4bdeca5SXiang1 Zhang         // 20-21  tile2.colsb Tile 2 bytes per row.
137d4bdeca5SXiang1 Zhang         // ... (sequence continues)
138d4bdeca5SXiang1 Zhang         // 30-31  tile7.colsb Tile 7 bytes per row.
139d4bdeca5SXiang1 Zhang         // 32-47  reserved, must be zero
140d4bdeca5SXiang1 Zhang         // 48     tile0.rows Tile 0 rows.
141d4bdeca5SXiang1 Zhang         // 49     tile1.rows Tile 1 rows.
142d4bdeca5SXiang1 Zhang         // 50     tile2.rows Tile 2 rows.
143d4bdeca5SXiang1 Zhang         // ... (sequence continues)
144d4bdeca5SXiang1 Zhang         // 55     tile7.rows Tile 7 rows.
145d4bdeca5SXiang1 Zhang         // 56-63  reserved, must be zero
146496156acSLuo, Yuanke         int RowOffset = 48 + TMMIdx;
147496156acSLuo, Yuanke         int ColOffset = 16 + TMMIdx * 2;
148d4bdeca5SXiang1 Zhang 
149496156acSLuo, Yuanke         Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);
150496156acSLuo, Yuanke         BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);
151496156acSLuo, Yuanke         MachineInstrBuilder StoreRow =
152496156acSLuo, Yuanke             BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));
153496156acSLuo, Yuanke         addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);
154496156acSLuo, Yuanke 
155496156acSLuo, Yuanke         MachineInstrBuilder StoreCol =
156496156acSLuo, Yuanke             BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));
157496156acSLuo, Yuanke         addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);
158496156acSLuo, Yuanke       }
159496156acSLuo, Yuanke       ShapeInfos.clear();
160496156acSLuo, Yuanke       Change = true;
161d4bdeca5SXiang1 Zhang     }
162d4bdeca5SXiang1 Zhang   }
163d4bdeca5SXiang1 Zhang 
164496156acSLuo, Yuanke   if (Change)
165c4dba471SLuo, Yuanke     X86FI->setHasVirtualTileReg(true);
166496156acSLuo, Yuanke 
167496156acSLuo, Yuanke   return Change;
168d4bdeca5SXiang1 Zhang }
169d4bdeca5SXiang1 Zhang 
runOnMachineFunction(MachineFunction & MFunc)170d4bdeca5SXiang1 Zhang bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
171d4bdeca5SXiang1 Zhang   MF = &MFunc;
172d4bdeca5SXiang1 Zhang   MRI = &MFunc.getRegInfo();
173496156acSLuo, Yuanke   const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
174d4bdeca5SXiang1 Zhang   TRI = ST->getRegisterInfo();
175d4bdeca5SXiang1 Zhang   TII = MFunc.getSubtarget().getInstrInfo();
176c4dba471SLuo, Yuanke   X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
177496156acSLuo, Yuanke   bool Change = false;
178d4bdeca5SXiang1 Zhang 
179496156acSLuo, Yuanke   // Loop over all of the basic blocks, eliminating virtual register references
180496156acSLuo, Yuanke   for (MachineBasicBlock &MBB : MFunc)
181496156acSLuo, Yuanke     Change |= configBasicBlock(MBB);
182496156acSLuo, Yuanke 
183496156acSLuo, Yuanke   return Change;
184d4bdeca5SXiang1 Zhang }
185d4bdeca5SXiang1 Zhang 
createX86FastTileConfigPass()186d4bdeca5SXiang1 Zhang FunctionPass *llvm::createX86FastTileConfigPass() {
187d4bdeca5SXiang1 Zhang   return new X86FastTileConfig();
188d4bdeca5SXiang1 Zhang }
189