1496156acSLuo, Yuanke //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2496156acSLuo, Yuanke //
3496156acSLuo, Yuanke // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4496156acSLuo, Yuanke // See https://llvm.org/LICENSE.txt for license information.
5496156acSLuo, Yuanke // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6496156acSLuo, Yuanke //
7496156acSLuo, Yuanke //===----------------------------------------------------------------------===//
8496156acSLuo, Yuanke //
9496156acSLuo, Yuanke /// \file Pass to preconfig the shape of physical tile registers
10496156acSLuo, Yuanke /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11496156acSLuo, Yuanke /// walk each instruction of basic block in reverse order. All the tile
12496156acSLuo, Yuanke /// registers that live out the basic block would be spilled and reloaded
13496156acSLuo, Yuanke /// before its user. It also check the depenedency of the shape to ensure
14496156acSLuo, Yuanke /// the shape is defined before ldtilecfg.
15496156acSLuo, Yuanke //
16496156acSLuo, Yuanke //===----------------------------------------------------------------------===//
17496156acSLuo, Yuanke
18496156acSLuo, Yuanke #include "X86.h"
19496156acSLuo, Yuanke #include "X86InstrBuilder.h"
20496156acSLuo, Yuanke #include "X86MachineFunctionInfo.h"
21496156acSLuo, Yuanke #include "X86RegisterInfo.h"
22496156acSLuo, Yuanke #include "X86Subtarget.h"
23496156acSLuo, Yuanke #include "llvm/ADT/DepthFirstIterator.h"
24496156acSLuo, Yuanke #include "llvm/ADT/PostOrderIterator.h"
25496156acSLuo, Yuanke #include "llvm/ADT/Statistic.h"
26496156acSLuo, Yuanke #include "llvm/CodeGen/MachineFrameInfo.h"
27496156acSLuo, Yuanke #include "llvm/CodeGen/MachineFunctionPass.h"
28496156acSLuo, Yuanke #include "llvm/CodeGen/MachineInstr.h"
29496156acSLuo, Yuanke #include "llvm/CodeGen/MachineRegisterInfo.h"
30496156acSLuo, Yuanke #include "llvm/CodeGen/Passes.h"
31496156acSLuo, Yuanke #include "llvm/CodeGen/TargetInstrInfo.h"
32496156acSLuo, Yuanke #include "llvm/CodeGen/TargetRegisterInfo.h"
33496156acSLuo, Yuanke #include "llvm/InitializePasses.h"
34496156acSLuo, Yuanke #include "llvm/Support/Debug.h"
35496156acSLuo, Yuanke
36496156acSLuo, Yuanke using namespace llvm;
37496156acSLuo, Yuanke
38496156acSLuo, Yuanke #define DEBUG_TYPE "fastpretileconfig"
39496156acSLuo, Yuanke
40496156acSLuo, Yuanke STATISTIC(NumStores, "Number of stores added");
41496156acSLuo, Yuanke STATISTIC(NumLoads, "Number of loads added");
42496156acSLuo, Yuanke
43496156acSLuo, Yuanke namespace {
44496156acSLuo, Yuanke
45496156acSLuo, Yuanke class X86FastPreTileConfig : public MachineFunctionPass {
46496156acSLuo, Yuanke MachineFunction *MF = nullptr;
47496156acSLuo, Yuanke const X86Subtarget *ST = nullptr;
48496156acSLuo, Yuanke const TargetInstrInfo *TII = nullptr;
49496156acSLuo, Yuanke MachineRegisterInfo *MRI = nullptr;
50496156acSLuo, Yuanke X86MachineFunctionInfo *X86FI = nullptr;
51496156acSLuo, Yuanke MachineFrameInfo *MFI = nullptr;
52496156acSLuo, Yuanke const TargetRegisterInfo *TRI = nullptr;
53496156acSLuo, Yuanke MachineBasicBlock *MBB = nullptr;
54496156acSLuo, Yuanke int CfgSS = -1;
55496156acSLuo, Yuanke struct PHIInfo {
56496156acSLuo, Yuanke Register Row;
57496156acSLuo, Yuanke Register Col;
58496156acSLuo, Yuanke Register StackAddr;
59496156acSLuo, Yuanke };
60496156acSLuo, Yuanke DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
61496156acSLuo, Yuanke
62496156acSLuo, Yuanke /// Maps virtual regs to the frame index where these values are spilled.
63496156acSLuo, Yuanke IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64496156acSLuo, Yuanke
65496156acSLuo, Yuanke /// Has a bit set for tile virtual register for which it was determined
66496156acSLuo, Yuanke /// that it is alive across blocks.
67496156acSLuo, Yuanke BitVector MayLiveAcrossBlocks;
68496156acSLuo, Yuanke
69496156acSLuo, Yuanke int getStackSpaceFor(Register VirtReg);
70496156acSLuo, Yuanke void InitializeTileConfigStackSpace();
71496156acSLuo, Yuanke bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72496156acSLuo, Yuanke void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73496156acSLuo, Yuanke void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74496156acSLuo, Yuanke MachineOperand *RowMO, MachineOperand *ColMO);
75496156acSLuo, Yuanke void canonicalizePHIs(MachineBasicBlock &MBB);
76496156acSLuo, Yuanke void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77496156acSLuo, Yuanke void convertPHIs(MachineBasicBlock &MBB);
78496156acSLuo, Yuanke bool configBasicBlock(MachineBasicBlock &MBB);
79496156acSLuo, Yuanke
80496156acSLuo, Yuanke public:
X86FastPreTileConfig()81496156acSLuo, Yuanke X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82496156acSLuo, Yuanke
83496156acSLuo, Yuanke /// Return the pass name.
getPassName() const84496156acSLuo, Yuanke StringRef getPassName() const override {
85496156acSLuo, Yuanke return "Fast Tile Register Preconfigure";
86496156acSLuo, Yuanke }
87496156acSLuo, Yuanke
88496156acSLuo, Yuanke /// Perform tile register configure.
89496156acSLuo, Yuanke bool runOnMachineFunction(MachineFunction &MFunc) override;
90496156acSLuo, Yuanke
91496156acSLuo, Yuanke static char ID;
92496156acSLuo, Yuanke };
93496156acSLuo, Yuanke
94496156acSLuo, Yuanke } // end anonymous namespace
95496156acSLuo, Yuanke
96496156acSLuo, Yuanke char X86FastPreTileConfig::ID = 0;
97496156acSLuo, Yuanke
98496156acSLuo, Yuanke INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99496156acSLuo, Yuanke "Fast Tile Register Preconfigure", false, false)
100496156acSLuo, Yuanke INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
101496156acSLuo, Yuanke "Fast Tile Register Preconfigure", false, false)
102496156acSLuo, Yuanke
dominates(MachineBasicBlock & MBB,MachineBasicBlock::const_iterator A,MachineBasicBlock::const_iterator B)103496156acSLuo, Yuanke static bool dominates(MachineBasicBlock &MBB,
104496156acSLuo, Yuanke MachineBasicBlock::const_iterator A,
105496156acSLuo, Yuanke MachineBasicBlock::const_iterator B) {
106496156acSLuo, Yuanke auto MBBEnd = MBB.end();
107496156acSLuo, Yuanke if (B == MBBEnd)
108496156acSLuo, Yuanke return true;
109496156acSLuo, Yuanke
110496156acSLuo, Yuanke MachineBasicBlock::const_iterator I = MBB.begin();
111496156acSLuo, Yuanke for (; &*I != A && &*I != B; ++I)
112496156acSLuo, Yuanke ;
113496156acSLuo, Yuanke
114496156acSLuo, Yuanke return &*I == A;
115496156acSLuo, Yuanke }
116496156acSLuo, Yuanke
117496156acSLuo, Yuanke /// This allocates space for the specified virtual register to be held on the
118496156acSLuo, Yuanke /// stack.
getStackSpaceFor(Register VirtReg)119496156acSLuo, Yuanke int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120496156acSLuo, Yuanke // Find the location Reg would belong...
121496156acSLuo, Yuanke int SS = StackSlotForVirtReg[VirtReg];
122496156acSLuo, Yuanke // Already has space allocated?
123496156acSLuo, Yuanke if (SS != -1)
124496156acSLuo, Yuanke return SS;
125496156acSLuo, Yuanke
126496156acSLuo, Yuanke // Allocate a new stack object for this spill location...
127496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128496156acSLuo, Yuanke unsigned Size = TRI->getSpillSize(RC);
129496156acSLuo, Yuanke Align Alignment = TRI->getSpillAlign(RC);
130496156acSLuo, Yuanke int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131496156acSLuo, Yuanke
132496156acSLuo, Yuanke // Assign the slot.
133496156acSLuo, Yuanke StackSlotForVirtReg[VirtReg] = FrameIdx;
134496156acSLuo, Yuanke return FrameIdx;
135496156acSLuo, Yuanke }
136496156acSLuo, Yuanke
137496156acSLuo, Yuanke /// Returns false if \p VirtReg is known to not live out of the current config.
138496156acSLuo, Yuanke /// If \p VirtReg live out of the current MBB, it must live out of the current
139496156acSLuo, Yuanke /// config
mayLiveOut(Register VirtReg,MachineInstr * CfgMI)140496156acSLuo, Yuanke bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141496156acSLuo, Yuanke if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142496156acSLuo, Yuanke return true;
143496156acSLuo, Yuanke
144496156acSLuo, Yuanke for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145496156acSLuo, Yuanke if (UseInst.getParent() != MBB) {
146496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147496156acSLuo, Yuanke return true;
148496156acSLuo, Yuanke }
149496156acSLuo, Yuanke
150496156acSLuo, Yuanke // The use and def are in the same MBB. If the tile register is
151496156acSLuo, Yuanke // reconfigured, it is crobbered and we need to spill and reload
152496156acSLuo, Yuanke // tile register.
153496156acSLuo, Yuanke if (CfgMI) {
154496156acSLuo, Yuanke if (dominates(*MBB, *CfgMI, UseInst)) {
155496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156496156acSLuo, Yuanke return true;
157496156acSLuo, Yuanke }
158496156acSLuo, Yuanke }
159496156acSLuo, Yuanke }
160496156acSLuo, Yuanke
161496156acSLuo, Yuanke return false;
162496156acSLuo, Yuanke }
163496156acSLuo, Yuanke
InitializeTileConfigStackSpace()164496156acSLuo, Yuanke void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165496156acSLuo, Yuanke MachineBasicBlock &MBB = MF->front();
166496156acSLuo, Yuanke MachineInstr *MI = &*MBB.getFirstNonPHI();
167496156acSLuo, Yuanke DebugLoc DL;
168496156acSLuo, Yuanke if (ST->hasAVX512()) {
169496156acSLuo, Yuanke Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172496156acSLuo, Yuanke .addReg(Zmm);
173496156acSLuo, Yuanke } else if (ST->hasAVX2()) {
174496156acSLuo, Yuanke Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177496156acSLuo, Yuanke .addReg(Ymm);
178496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179496156acSLuo, Yuanke 32)
180496156acSLuo, Yuanke .addReg(Ymm);
181496156acSLuo, Yuanke } else {
182496156acSLuo, Yuanke assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183496156acSLuo, Yuanke unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184496156acSLuo, Yuanke Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185496156acSLuo, Yuanke BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187496156acSLuo, Yuanke .addReg(Xmm);
188496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189496156acSLuo, Yuanke .addReg(Xmm);
190496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191496156acSLuo, Yuanke .addReg(Xmm);
192496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193496156acSLuo, Yuanke .addReg(Xmm);
194496156acSLuo, Yuanke }
195496156acSLuo, Yuanke // Fill in the palette first.
196496156acSLuo, Yuanke addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197496156acSLuo, Yuanke .addImm(1);
198496156acSLuo, Yuanke }
199496156acSLuo, Yuanke
200496156acSLuo, Yuanke /// Insert spill instruction for \p AssignedReg before \p Before.
201496156acSLuo, Yuanke /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
spill(MachineBasicBlock::iterator Before,Register VirtReg,bool Kill)202496156acSLuo, Yuanke void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
203496156acSLuo, Yuanke Register VirtReg, bool Kill) {
204496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205496156acSLuo, Yuanke int FI = getStackSpaceFor(VirtReg);
206496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207496156acSLuo, Yuanke
208496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209496156acSLuo, Yuanke // Don't need shape information for tile store, becasue it is adjacent to
210496156acSLuo, Yuanke // the tile def instruction.
211496156acSLuo, Yuanke TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI);
212496156acSLuo, Yuanke ++NumStores;
213496156acSLuo, Yuanke
214496156acSLuo, Yuanke // TODO: update DBG_VALUEs
215496156acSLuo, Yuanke }
216496156acSLuo, Yuanke
217496156acSLuo, Yuanke /// Insert reload instruction for \p PhysReg before \p Before.
reload(MachineBasicBlock::iterator UseMI,Register OrigReg,MachineOperand * RowMO,MachineOperand * ColMO)218496156acSLuo, Yuanke void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
219496156acSLuo, Yuanke Register OrigReg, MachineOperand *RowMO,
220496156acSLuo, Yuanke MachineOperand *ColMO) {
221496156acSLuo, Yuanke int FI = getStackSpaceFor(OrigReg);
222496156acSLuo, Yuanke const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
223496156acSLuo, Yuanke Register TileReg;
224496156acSLuo, Yuanke // Fold copy to tileload
225496156acSLuo, Yuanke // BB1:
226496156acSLuo, Yuanke // spill src to s
227496156acSLuo, Yuanke //
228496156acSLuo, Yuanke // BB2:
229496156acSLuo, Yuanke // t = copy src
230496156acSLuo, Yuanke // -->
231496156acSLuo, Yuanke // t = tileload (s)
232496156acSLuo, Yuanke if (UseMI->isCopy())
233496156acSLuo, Yuanke TileReg = UseMI->getOperand(0).getReg();
234496156acSLuo, Yuanke else
235496156acSLuo, Yuanke TileReg = MRI->createVirtualRegister(&RC);
236496156acSLuo, Yuanke // Can't use TII->loadRegFromStackSlot(), because we need the shape
237496156acSLuo, Yuanke // information for reload.
238496156acSLuo, Yuanke // tileloadd (%sp, %idx), %tmm
239496156acSLuo, Yuanke unsigned Opc = X86::PTILELOADDV;
240496156acSLuo, Yuanke Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
241496156acSLuo, Yuanke // FIXME: MBB is not the parent of UseMI.
242496156acSLuo, Yuanke MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
243496156acSLuo, Yuanke TII->get(X86::MOV64ri), StrideReg)
244496156acSLuo, Yuanke .addImm(64);
245496156acSLuo, Yuanke NewMI = addFrameReference(
246496156acSLuo, Yuanke BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
247496156acSLuo, Yuanke .addReg(RowMO->getReg())
248496156acSLuo, Yuanke .addReg(ColMO->getReg()),
249496156acSLuo, Yuanke FI);
250496156acSLuo, Yuanke MachineOperand &MO = NewMI->getOperand(5);
251496156acSLuo, Yuanke MO.setReg(StrideReg);
252496156acSLuo, Yuanke MO.setIsKill(true);
253496156acSLuo, Yuanke RowMO->setIsKill(false);
254496156acSLuo, Yuanke ColMO->setIsKill(false);
255496156acSLuo, Yuanke // Erase copy instruction after it is folded.
256496156acSLuo, Yuanke if (UseMI->isCopy()) {
257496156acSLuo, Yuanke UseMI->eraseFromParent();
258496156acSLuo, Yuanke } else {
259496156acSLuo, Yuanke // Replace the register in the user MI.
260496156acSLuo, Yuanke for (auto &MO : UseMI->operands()) {
261496156acSLuo, Yuanke if (MO.isReg() && MO.getReg() == OrigReg)
262496156acSLuo, Yuanke MO.setReg(TileReg);
263496156acSLuo, Yuanke }
264496156acSLuo, Yuanke }
265496156acSLuo, Yuanke
266496156acSLuo, Yuanke ++NumLoads;
267496156acSLuo, Yuanke LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
268496156acSLuo, Yuanke << printReg(TileReg, TRI) << '\n');
269496156acSLuo, Yuanke }
270496156acSLuo, Yuanke
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)271496156acSLuo, Yuanke static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
272496156acSLuo, Yuanke // The instruction must have 3 operands: tile def, row, col.
273496156acSLuo, Yuanke if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
274496156acSLuo, Yuanke return false;
275496156acSLuo, Yuanke MachineOperand &MO = MI.getOperand(0);
276496156acSLuo, Yuanke
277496156acSLuo, Yuanke if (MO.isReg()) {
278496156acSLuo, Yuanke Register Reg = MO.getReg();
279496156acSLuo, Yuanke // FIXME it may be used after Greedy RA and the physical
280496156acSLuo, Yuanke // register is not rewritten yet.
281496156acSLuo, Yuanke if (Reg.isVirtual() &&
282496156acSLuo, Yuanke MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
283496156acSLuo, Yuanke return true;
284496156acSLuo, Yuanke if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
285496156acSLuo, Yuanke return true;
286496156acSLuo, Yuanke }
287496156acSLuo, Yuanke
288496156acSLuo, Yuanke return false;
289496156acSLuo, Yuanke }
290496156acSLuo, Yuanke
getShape(MachineRegisterInfo * MRI,Register TileReg)291496156acSLuo, Yuanke static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
292496156acSLuo, Yuanke MachineInstr *MI = MRI->getVRegDef(TileReg);
293496156acSLuo, Yuanke if (isTileDef(MRI, *MI)) {
294496156acSLuo, Yuanke MachineOperand *RowMO = &MI->getOperand(1);
295496156acSLuo, Yuanke MachineOperand *ColMO = &MI->getOperand(2);
296496156acSLuo, Yuanke return ShapeT(RowMO, ColMO, MRI);
297496156acSLuo, Yuanke } else if (MI->isCopy()) {
298496156acSLuo, Yuanke TileReg = MI->getOperand(1).getReg();
299496156acSLuo, Yuanke return getShape(MRI, TileReg);
300496156acSLuo, Yuanke }
301496156acSLuo, Yuanke
302496156acSLuo, Yuanke // The def should not be PHI node, because we walk the MBB in reverse post
303496156acSLuo, Yuanke // order.
304496156acSLuo, Yuanke assert(MI->isPHI() && "Unexpected PHI when get shape.");
305496156acSLuo, Yuanke llvm_unreachable("Unexpected MI when get shape.");
306496156acSLuo, Yuanke }
307496156acSLuo, Yuanke
308496156acSLuo, Yuanke // BB0:
309496156acSLuo, Yuanke // spill t0 to s0
310496156acSLuo, Yuanke // BB1:
311496156acSLuo, Yuanke // spill t1 to s1
312496156acSLuo, Yuanke //
313496156acSLuo, Yuanke // BB2:
314496156acSLuo, Yuanke // t = phi [t0, bb0] [t1, bb1]
315496156acSLuo, Yuanke // -->
316496156acSLuo, Yuanke // row = phi [r0, bb0] [r1, bb1]
317496156acSLuo, Yuanke // col = phi [c0, bb0] [c1, bb1]
318496156acSLuo, Yuanke // s = phi [s0, bb0] [s1, bb1]
319496156acSLuo, Yuanke // t = tileload row, col, s
320496156acSLuo, Yuanke // The new instruction is inserted at the end of the phi node. The order
321496156acSLuo, Yuanke // of the original phi node is not ensured.
convertPHI(MachineBasicBlock * MBB,MachineInstr & PHI)322496156acSLuo, Yuanke void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
323496156acSLuo, Yuanke MachineInstr &PHI) {
324496156acSLuo, Yuanke // 1. Create instruction to get stack slot address of each incoming block.
325496156acSLuo, Yuanke // 2. Create PHI node for the stack address.
326496156acSLuo, Yuanke // 3. Create PHI node for shape. If one of the incoming shape is immediate
327496156acSLuo, Yuanke // use the immediate and delete the PHI node.
328496156acSLuo, Yuanke // 4. Create tileload instruction from the stack address.
329496156acSLuo, Yuanke Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
330496156acSLuo, Yuanke MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
331496156acSLuo, Yuanke TII->get(X86::PHI), StackAddrReg);
332496156acSLuo, Yuanke Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
333496156acSLuo, Yuanke MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
334496156acSLuo, Yuanke TII->get(X86::PHI), RowReg);
335496156acSLuo, Yuanke Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
336496156acSLuo, Yuanke MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
337496156acSLuo, Yuanke TII->get(X86::PHI), ColReg);
338496156acSLuo, Yuanke // Record the mapping of phi node and its row/column information.
339496156acSLuo, Yuanke VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
340496156acSLuo, Yuanke
341496156acSLuo, Yuanke for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
342496156acSLuo, Yuanke // Get the 2 incoming value of tile register and MBB.
343496156acSLuo, Yuanke Register InTileReg = PHI.getOperand(I).getReg();
344496156acSLuo, Yuanke // Mark it as liveout, so that it will be spilled when visit
345496156acSLuo, Yuanke // the incoming MBB. Otherwise since phi will be deleted, it
346496156acSLuo, Yuanke // would miss spill when visit incoming MBB.
347496156acSLuo, Yuanke MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
348496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
349496156acSLuo, Yuanke
350496156acSLuo, Yuanke MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
351496156acSLuo, Yuanke MachineBasicBlock::iterator InsertPos;
352496156acSLuo, Yuanke if (TileDefMI->isPHI()) {
353496156acSLuo, Yuanke InsertPos = TileDefMI->getParent()->getFirstNonPHI();
354496156acSLuo, Yuanke if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
355496156acSLuo, Yuanke // def t1
356496156acSLuo, Yuanke // / \
357496156acSLuo, Yuanke // def t2 t3 = phi(t1, t4) <--
358496156acSLuo, Yuanke // \ / |
359496156acSLuo, Yuanke // t4 = phi(t2, t3)-------------
360496156acSLuo, Yuanke //
361496156acSLuo, Yuanke // For each (row, column and stack address) append phi incoming value.
362496156acSLuo, Yuanke // Create r3 = phi(r1, r4)
363496156acSLuo, Yuanke // Create r4 = phi(r2, r3)
364496156acSLuo, Yuanke Register InRowReg = VisitedPHIs[TileDefMI].Row;
365496156acSLuo, Yuanke Register InColReg = VisitedPHIs[TileDefMI].Col;
366496156acSLuo, Yuanke Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
367496156acSLuo, Yuanke RowPHI.addReg(InRowReg).addMBB(InMBB);
368496156acSLuo, Yuanke ColPHI.addReg(InColReg).addMBB(InMBB);
369496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
370496156acSLuo, Yuanke continue;
371496156acSLuo, Yuanke } else {
372496156acSLuo, Yuanke // Recursively convert PHI to tileload
373496156acSLuo, Yuanke convertPHI(TileDefMI->getParent(), *TileDefMI);
374496156acSLuo, Yuanke // The PHI node is coverted to tileload instruction. Get the stack
375496156acSLuo, Yuanke // address from tileload operands.
376496156acSLuo, Yuanke MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
377*54ec8e25SLuo, Yuanke assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
378496156acSLuo, Yuanke Register InRowReg = TileLoad->getOperand(1).getReg();
379496156acSLuo, Yuanke Register InColReg = TileLoad->getOperand(2).getReg();
380496156acSLuo, Yuanke Register InStackAddrReg = TileLoad->getOperand(3).getReg();
381496156acSLuo, Yuanke RowPHI.addReg(InRowReg).addMBB(InMBB);
382496156acSLuo, Yuanke ColPHI.addReg(InColReg).addMBB(InMBB);
383496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
384496156acSLuo, Yuanke }
385496156acSLuo, Yuanke } else {
386496156acSLuo, Yuanke InsertPos = TileDefMI->getIterator();
387496156acSLuo, Yuanke
388496156acSLuo, Yuanke // Fill the incoming operand of row/column phi instruction.
389496156acSLuo, Yuanke ShapeT Shape = getShape(MRI, InTileReg);
390496156acSLuo, Yuanke Shape.getRow()->setIsKill(false);
391496156acSLuo, Yuanke Shape.getCol()->setIsKill(false);
392496156acSLuo, Yuanke RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
393496156acSLuo, Yuanke ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
394496156acSLuo, Yuanke
395496156acSLuo, Yuanke // The incoming tile register live out of its def BB, it would be spilled.
396496156acSLuo, Yuanke // Create MI to get the spill stack slot address for the tile register
397496156acSLuo, Yuanke int FI = getStackSpaceFor(InTileReg);
398496156acSLuo, Yuanke Register InStackAddrReg =
399496156acSLuo, Yuanke MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
400496156acSLuo, Yuanke addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
401496156acSLuo, Yuanke TII->get(X86::LEA64r), InStackAddrReg)
402496156acSLuo, Yuanke .addFrameIndex(FI),
403496156acSLuo, Yuanke 0);
404496156acSLuo, Yuanke AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
405496156acSLuo, Yuanke }
406496156acSLuo, Yuanke }
407496156acSLuo, Yuanke
408496156acSLuo, Yuanke MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
409496156acSLuo, Yuanke Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
410496156acSLuo, Yuanke BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
411496156acSLuo, Yuanke .addImm(64);
412496156acSLuo, Yuanke Register TileReg = PHI.getOperand(0).getReg();
413496156acSLuo, Yuanke MachineInstr *NewMI = addDirectMem(
414496156acSLuo, Yuanke BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
415496156acSLuo, Yuanke .addReg(RowReg)
416496156acSLuo, Yuanke .addReg(ColReg),
417496156acSLuo, Yuanke StackAddrReg);
418496156acSLuo, Yuanke MachineOperand &MO = NewMI->getOperand(5);
419496156acSLuo, Yuanke MO.setReg(StrideReg);
420496156acSLuo, Yuanke MO.setIsKill(true);
421496156acSLuo, Yuanke PHI.eraseFromParent();
422496156acSLuo, Yuanke VisitedPHIs.erase(&PHI);
423496156acSLuo, Yuanke }
424496156acSLuo, Yuanke
isTileRegDef(MachineRegisterInfo * MRI,MachineInstr & MI)425496156acSLuo, Yuanke static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
426496156acSLuo, Yuanke MachineOperand &MO = MI.getOperand(0);
427496156acSLuo, Yuanke if (MO.isReg() && MO.getReg().isVirtual() &&
428496156acSLuo, Yuanke MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
429496156acSLuo, Yuanke return true;
430496156acSLuo, Yuanke return false;
431496156acSLuo, Yuanke }
432496156acSLuo, Yuanke
canonicalizePHIs(MachineBasicBlock & MBB)433496156acSLuo, Yuanke void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
434496156acSLuo, Yuanke SmallVector<MachineInstr *, 8> PHIs;
435496156acSLuo, Yuanke
436496156acSLuo, Yuanke for (MachineInstr &MI : MBB) {
437496156acSLuo, Yuanke if (!MI.isPHI())
438496156acSLuo, Yuanke break;
439496156acSLuo, Yuanke if (!isTileRegDef(MRI, MI))
440496156acSLuo, Yuanke continue;
441496156acSLuo, Yuanke PHIs.push_back(&MI);
442496156acSLuo, Yuanke }
443496156acSLuo, Yuanke // Canonicalize the phi node first. One tile phi may depeneds previous
444496156acSLuo, Yuanke // phi node. For below case, we need convert %t4.
445496156acSLuo, Yuanke //
446496156acSLuo, Yuanke // BB0:
447496156acSLuo, Yuanke // %t3 = phi (t1 BB1, t2 BB0)
448496156acSLuo, Yuanke // %t4 = phi (t5 BB1, t3 BB0)
449496156acSLuo, Yuanke // -->
450496156acSLuo, Yuanke // %t3 = phi (t1 BB1, t2 BB0)
451496156acSLuo, Yuanke // %t4 = phi (t5 BB1, t2 BB0)
452496156acSLuo, Yuanke //
453496156acSLuo, Yuanke while (!PHIs.empty()) {
454496156acSLuo, Yuanke MachineInstr *PHI = PHIs.pop_back_val();
455496156acSLuo, Yuanke
456496156acSLuo, Yuanke // Find the operand that is incoming from the same MBB and the def
457496156acSLuo, Yuanke // is also phi node.
458496156acSLuo, Yuanke MachineOperand *InMO = nullptr;
459496156acSLuo, Yuanke MachineInstr *DefMI = nullptr;
460496156acSLuo, Yuanke for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
461496156acSLuo, Yuanke Register InTileReg = PHI->getOperand(I).getReg();
462496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
463496156acSLuo, Yuanke DefMI = MRI->getVRegDef(InTileReg);
464496156acSLuo, Yuanke if (InMBB != &MBB || !DefMI->isPHI())
465496156acSLuo, Yuanke continue;
466496156acSLuo, Yuanke
467496156acSLuo, Yuanke InMO = &PHI->getOperand(I);
468496156acSLuo, Yuanke break;
469496156acSLuo, Yuanke }
470496156acSLuo, Yuanke // If can't find such operand, do nothing.
471496156acSLuo, Yuanke if (!InMO)
472496156acSLuo, Yuanke continue;
473496156acSLuo, Yuanke
474496156acSLuo, Yuanke // Current phi node depends on previous phi node. Break the
475496156acSLuo, Yuanke // dependency.
476496156acSLuo, Yuanke Register DefTileReg;
477496156acSLuo, Yuanke for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
478496156acSLuo, Yuanke MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
479496156acSLuo, Yuanke if (InMBB != &MBB)
480496156acSLuo, Yuanke continue;
481496156acSLuo, Yuanke DefTileReg = DefMI->getOperand(I).getReg();
482496156acSLuo, Yuanke InMO->setReg(DefTileReg);
483496156acSLuo, Yuanke break;
484496156acSLuo, Yuanke }
485496156acSLuo, Yuanke }
486496156acSLuo, Yuanke }
487496156acSLuo, Yuanke
convertPHIs(MachineBasicBlock & MBB)488496156acSLuo, Yuanke void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
489496156acSLuo, Yuanke SmallVector<MachineInstr *, 8> PHIs;
490496156acSLuo, Yuanke for (MachineInstr &MI : MBB) {
491496156acSLuo, Yuanke if (!MI.isPHI())
492496156acSLuo, Yuanke break;
493496156acSLuo, Yuanke if (!isTileRegDef(MRI, MI))
494496156acSLuo, Yuanke continue;
495496156acSLuo, Yuanke PHIs.push_back(&MI);
496496156acSLuo, Yuanke }
497496156acSLuo, Yuanke while (!PHIs.empty()) {
498496156acSLuo, Yuanke MachineInstr *MI = PHIs.pop_back_val();
499496156acSLuo, Yuanke VisitedPHIs.clear();
500496156acSLuo, Yuanke convertPHI(&MBB, *MI);
501496156acSLuo, Yuanke }
502496156acSLuo, Yuanke }
503496156acSLuo, Yuanke
504496156acSLuo, Yuanke // PreTileConfig should configure the tile registers based on basic
505496156acSLuo, Yuanke // block.
configBasicBlock(MachineBasicBlock & MBB)506496156acSLuo, Yuanke bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
507496156acSLuo, Yuanke this->MBB = &MBB;
508496156acSLuo, Yuanke bool Change = false;
509496156acSLuo, Yuanke MachineInstr *LastShapeMI = nullptr;
510496156acSLuo, Yuanke MachineInstr *LastTileCfg = nullptr;
511496156acSLuo, Yuanke bool HasUnconfigTile = false;
512496156acSLuo, Yuanke
513496156acSLuo, Yuanke auto Config = [&](MachineInstr &Before) {
514496156acSLuo, Yuanke if (CfgSS == -1)
515496156acSLuo, Yuanke CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
516496156acSLuo, Yuanke ST->getTileConfigAlignment(), false);
517496156acSLuo, Yuanke LastTileCfg = addFrameReference(
518aaaf9cedSLuo, Yuanke BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
519496156acSLuo, Yuanke LastShapeMI = nullptr;
520496156acSLuo, Yuanke Change = true;
521496156acSLuo, Yuanke };
522496156acSLuo, Yuanke auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
523496156acSLuo, Yuanke for (const MachineOperand &MO : MI.operands()) {
524496156acSLuo, Yuanke if (!MO.isReg())
525496156acSLuo, Yuanke continue;
526496156acSLuo, Yuanke Register Reg = MO.getReg();
527496156acSLuo, Yuanke if (Reg.isVirtual() &&
528496156acSLuo, Yuanke MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
529496156acSLuo, Yuanke return true;
530496156acSLuo, Yuanke }
531496156acSLuo, Yuanke return false;
532496156acSLuo, Yuanke };
533496156acSLuo, Yuanke for (MachineInstr &MI : reverse(MBB)) {
534496156acSLuo, Yuanke // We have transformed phi node before configuring BB.
535496156acSLuo, Yuanke if (MI.isPHI())
536496156acSLuo, Yuanke break;
537496156acSLuo, Yuanke // Don't collect the shape of used tile, the tile should be defined
538496156acSLuo, Yuanke // before the tile use. Spill and reload would happen if there is only
539496156acSLuo, Yuanke // tile use after ldtilecfg, so the shape can be collected from reload.
540496156acSLuo, Yuanke // Take below code for example. %t would be reloaded before tilestore
541496156acSLuo, Yuanke // call
542496156acSLuo, Yuanke // ....
543496156acSLuo, Yuanke // tilestore %r, %c, %t
544496156acSLuo, Yuanke // -->
545496156acSLuo, Yuanke // call
546496156acSLuo, Yuanke // ldtilecfg
547496156acSLuo, Yuanke // %t = tileload %r, %c
548496156acSLuo, Yuanke // tilestore %r, %c, %t
549496156acSLuo, Yuanke if (HasTileOperand(MRI, MI))
550496156acSLuo, Yuanke HasUnconfigTile = true;
551496156acSLuo, Yuanke // According to AMX ABI, all the tile registers including config register
552496156acSLuo, Yuanke // are volatile. Caller need to save/restore config register.
553496156acSLuo, Yuanke if (MI.isCall() && HasUnconfigTile) {
554496156acSLuo, Yuanke MachineBasicBlock::iterator I;
555496156acSLuo, Yuanke if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
556496156acSLuo, Yuanke I = ++LastShapeMI->getIterator();
557496156acSLuo, Yuanke else
558496156acSLuo, Yuanke I = ++MI.getIterator();
559496156acSLuo, Yuanke Config(*I);
560496156acSLuo, Yuanke HasUnconfigTile = false;
561496156acSLuo, Yuanke continue;
562496156acSLuo, Yuanke }
563496156acSLuo, Yuanke if (!isTileDef(MRI, MI))
564496156acSLuo, Yuanke continue;
565496156acSLuo, Yuanke //
566496156acSLuo, Yuanke //---------------------------------------------------------------------
567496156acSLuo, Yuanke // Don't handle COPY instruction. If the src and dst of the COPY can be
568496156acSLuo, Yuanke // in the same config in below case, we just check the shape of t0.
569496156acSLuo, Yuanke // def row0
570496156acSLuo, Yuanke // def col0
571496156acSLuo, Yuanke // ldtilecfg
572496156acSLuo, Yuanke // t0 = tielzero(row0, col0)
573496156acSLuo, Yuanke // t1 = copy t0
574496156acSLuo, Yuanke // ...
575496156acSLuo, Yuanke // If the src and dst of the COPY can NOT be in the same config in below
576496156acSLuo, Yuanke // case. Reload would be generated befor the copy instruction.
577496156acSLuo, Yuanke // def row0
578496156acSLuo, Yuanke // def col0
579496156acSLuo, Yuanke // t0 = tielzero(row0, col0)
580496156acSLuo, Yuanke // spill t0
581496156acSLuo, Yuanke // ...
582496156acSLuo, Yuanke // def row1
583496156acSLuo, Yuanke // def col1
584496156acSLuo, Yuanke // ldtilecfg
585496156acSLuo, Yuanke // t1 = tilezero(row1, col1)
586496156acSLuo, Yuanke // reload t0
587496156acSLuo, Yuanke // t1 = copy t0
588496156acSLuo, Yuanke //---------------------------------------------------------------------
589496156acSLuo, Yuanke //
590496156acSLuo, Yuanke // If MI dominate the last shape def instruction, we need insert
591496156acSLuo, Yuanke // ldtilecfg after LastShapeMI now. The config doesn't include
592496156acSLuo, Yuanke // current MI.
593496156acSLuo, Yuanke // def row0
594496156acSLuo, Yuanke // def col0
595496156acSLuo, Yuanke // tilezero(row0, col0) <- MI
596496156acSLuo, Yuanke // def row1
597496156acSLuo, Yuanke // def col1
598496156acSLuo, Yuanke // ldtilecfg <- insert
599496156acSLuo, Yuanke // tilezero(row1, col1)
600496156acSLuo, Yuanke if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
601496156acSLuo, Yuanke Config(*(++LastShapeMI->getIterator()));
602496156acSLuo, Yuanke MachineOperand *RowMO = &MI.getOperand(1);
603496156acSLuo, Yuanke MachineOperand *ColMO = &MI.getOperand(2);
604496156acSLuo, Yuanke MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
605496156acSLuo, Yuanke MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
606496156acSLuo, Yuanke // If the shape is defined in current MBB, check the domination.
607496156acSLuo, Yuanke // FIXME how about loop?
608496156acSLuo, Yuanke if (RowMI->getParent() == &MBB) {
609496156acSLuo, Yuanke if (!LastShapeMI)
610496156acSLuo, Yuanke LastShapeMI = RowMI;
611496156acSLuo, Yuanke else if (dominates(MBB, LastShapeMI, RowMI))
612496156acSLuo, Yuanke LastShapeMI = RowMI;
613496156acSLuo, Yuanke }
614496156acSLuo, Yuanke if (ColMI->getParent() == &MBB) {
615496156acSLuo, Yuanke if (!LastShapeMI)
616496156acSLuo, Yuanke LastShapeMI = ColMI;
617496156acSLuo, Yuanke else if (dominates(MBB, LastShapeMI, ColMI))
618496156acSLuo, Yuanke LastShapeMI = ColMI;
619496156acSLuo, Yuanke }
620496156acSLuo, Yuanke // If there is user live out of the tilecfg, spill it and reload in
621496156acSLuo, Yuanke // before the user.
622496156acSLuo, Yuanke Register TileReg = MI.getOperand(0).getReg();
623496156acSLuo, Yuanke if (mayLiveOut(TileReg, LastTileCfg))
624496156acSLuo, Yuanke spill(++MI.getIterator(), TileReg, false);
625496156acSLuo, Yuanke for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
626496156acSLuo, Yuanke if (UseMI.getParent() == &MBB) {
627496156acSLuo, Yuanke // check user should not across ldtilecfg
628496156acSLuo, Yuanke if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
629496156acSLuo, Yuanke continue;
630496156acSLuo, Yuanke // reload befor UseMI
631496156acSLuo, Yuanke reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
632496156acSLuo, Yuanke } else {
633496156acSLuo, Yuanke // Don't reload for phi instruction, we handle phi reload separately.
634496156acSLuo, Yuanke // TODO: merge the reload for the same user MBB.
635496156acSLuo, Yuanke if (!UseMI.isPHI())
636496156acSLuo, Yuanke reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
637496156acSLuo, Yuanke }
638496156acSLuo, Yuanke }
639496156acSLuo, Yuanke }
640496156acSLuo, Yuanke
641496156acSLuo, Yuanke // Configure tile registers at the head of the MBB
642496156acSLuo, Yuanke if (HasUnconfigTile) {
643496156acSLuo, Yuanke MachineInstr *Before;
644496156acSLuo, Yuanke if (LastShapeMI == nullptr || LastShapeMI->isPHI())
645496156acSLuo, Yuanke Before = &*MBB.getFirstNonPHI();
646496156acSLuo, Yuanke else
647496156acSLuo, Yuanke Before = &*(++LastShapeMI->getIterator());
648496156acSLuo, Yuanke
649496156acSLuo, Yuanke Config(*Before);
650496156acSLuo, Yuanke }
651496156acSLuo, Yuanke
652496156acSLuo, Yuanke return Change;
653496156acSLuo, Yuanke }
654496156acSLuo, Yuanke
runOnMachineFunction(MachineFunction & MFunc)655496156acSLuo, Yuanke bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
656496156acSLuo, Yuanke MF = &MFunc;
657496156acSLuo, Yuanke MRI = &MFunc.getRegInfo();
658496156acSLuo, Yuanke ST = &MFunc.getSubtarget<X86Subtarget>();
659496156acSLuo, Yuanke TII = ST->getInstrInfo();
660496156acSLuo, Yuanke X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
661496156acSLuo, Yuanke MFI = &MFunc.getFrameInfo();
662496156acSLuo, Yuanke TRI = ST->getRegisterInfo();
663496156acSLuo, Yuanke CfgSS = -1;
664496156acSLuo, Yuanke
665496156acSLuo, Yuanke unsigned NumVirtRegs = MRI->getNumVirtRegs();
6663b1de7abSLuo, Yuanke // Abandon early if there is no tile register to config.
6673b1de7abSLuo, Yuanke bool HasVirtTileReg = false;
6683b1de7abSLuo, Yuanke for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
6693b1de7abSLuo, Yuanke Register VirtReg = Register::index2VirtReg(I);
6703b1de7abSLuo, Yuanke if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
6713b1de7abSLuo, Yuanke HasVirtTileReg = true;
6723b1de7abSLuo, Yuanke break;
6733b1de7abSLuo, Yuanke }
6743b1de7abSLuo, Yuanke }
6753b1de7abSLuo, Yuanke if (!HasVirtTileReg)
6763b1de7abSLuo, Yuanke return false;
6773b1de7abSLuo, Yuanke
678496156acSLuo, Yuanke StackSlotForVirtReg.resize(NumVirtRegs);
679496156acSLuo, Yuanke MayLiveAcrossBlocks.clear();
680496156acSLuo, Yuanke // We will create register during config. *3 is to make sure
681496156acSLuo, Yuanke // the virtual register number doesn't exceed the size of
682496156acSLuo, Yuanke // the bit vector.
683496156acSLuo, Yuanke MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
684496156acSLuo, Yuanke bool Change = false;
685496156acSLuo, Yuanke assert(MRI->isSSA());
686496156acSLuo, Yuanke
687496156acSLuo, Yuanke // Canonicalize the phi node first.
688496156acSLuo, Yuanke for (MachineBasicBlock &MBB : MFunc)
689496156acSLuo, Yuanke canonicalizePHIs(MBB);
690496156acSLuo, Yuanke
691496156acSLuo, Yuanke // Loop over all of the basic blocks in reverse post order and insert
692496156acSLuo, Yuanke // ldtilecfg for tile registers. The reserse post order is to facilitate
693496156acSLuo, Yuanke // PHI node convert.
694496156acSLuo, Yuanke ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
695496156acSLuo, Yuanke for (MachineBasicBlock *MBB : RPOT) {
696496156acSLuo, Yuanke convertPHIs(*MBB);
697496156acSLuo, Yuanke Change |= configBasicBlock(*MBB);
698496156acSLuo, Yuanke }
699496156acSLuo, Yuanke
700496156acSLuo, Yuanke if (Change)
701496156acSLuo, Yuanke InitializeTileConfigStackSpace();
702496156acSLuo, Yuanke
703496156acSLuo, Yuanke StackSlotForVirtReg.clear();
704496156acSLuo, Yuanke return Change;
705496156acSLuo, Yuanke }
706496156acSLuo, Yuanke
createX86FastPreTileConfigPass()707496156acSLuo, Yuanke FunctionPass *llvm::createX86FastPreTileConfigPass() {
708496156acSLuo, Yuanke return new X86FastPreTileConfig();
709496156acSLuo, Yuanke }
710