1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27
28 namespace llvm {
29
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42 MachineMemOperand::MOTargetFlag1;
43
44 class SIInstrInfo final : public AMDGPUGenInstrInfo {
45 private:
46 const SIRegisterInfo RI;
47 const GCNSubtarget &ST;
48 TargetSchedModel SchedModel;
49 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
50
51 // The inverse predicate should have the negative value.
52 enum BranchPredicate {
53 INVALID_BR = 0,
54 SCC_TRUE = 1,
55 SCC_FALSE = -1,
56 VCCNZ = 2,
57 VCCZ = -2,
58 EXECNZ = -3,
59 EXECZ = 3
60 };
61
62 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
63
64 static unsigned getBranchOpcode(BranchPredicate Cond);
65 static BranchPredicate getBranchPredicate(unsigned Opcode);
66
67 public:
68 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
69 MachineRegisterInfo &MRI,
70 MachineOperand &SuperReg,
71 const TargetRegisterClass *SuperRC,
72 unsigned SubIdx,
73 const TargetRegisterClass *SubRC) const;
74 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
75 MachineRegisterInfo &MRI,
76 MachineOperand &SuperReg,
77 const TargetRegisterClass *SuperRC,
78 unsigned SubIdx,
79 const TargetRegisterClass *SubRC) const;
80 private:
81 void swapOperands(MachineInstr &Inst) const;
82
83 std::pair<bool, MachineBasicBlock *>
84 moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
85 MachineDominatorTree *MDT = nullptr) const;
86
87 void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
88 MachineDominatorTree *MDT = nullptr) const;
89
90 void lowerScalarAbs(SetVectorType &Worklist,
91 MachineInstr &Inst) const;
92
93 void lowerScalarXnor(SetVectorType &Worklist,
94 MachineInstr &Inst) const;
95
96 void splitScalarNotBinop(SetVectorType &Worklist,
97 MachineInstr &Inst,
98 unsigned Opcode) const;
99
100 void splitScalarBinOpN2(SetVectorType &Worklist,
101 MachineInstr &Inst,
102 unsigned Opcode) const;
103
104 void splitScalar64BitUnaryOp(SetVectorType &Worklist,
105 MachineInstr &Inst, unsigned Opcode,
106 bool Swap = false) const;
107
108 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
109 MachineDominatorTree *MDT = nullptr) const;
110
111 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
112 unsigned Opcode,
113 MachineDominatorTree *MDT = nullptr) const;
114
115 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
116 MachineDominatorTree *MDT = nullptr) const;
117
118 void splitScalar64BitBCNT(SetVectorType &Worklist,
119 MachineInstr &Inst) const;
120 void splitScalar64BitBFE(SetVectorType &Worklist,
121 MachineInstr &Inst) const;
122 void movePackToVALU(SetVectorType &Worklist,
123 MachineRegisterInfo &MRI,
124 MachineInstr &Inst) const;
125
126 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
127 SetVectorType &Worklist) const;
128
129 void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
130 MachineInstr &SCCDefInst,
131 SetVectorType &Worklist,
132 Register NewCond = Register()) const;
133 void addSCCDefsToVALUWorklist(MachineOperand &Op,
134 SetVectorType &Worklist) const;
135
136 const TargetRegisterClass *
137 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
138
139 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
140 const MachineInstr &MIb) const;
141
142 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
143
144 protected:
145 bool swapSourceModifiers(MachineInstr &MI,
146 MachineOperand &Src0, unsigned Src0OpName,
147 MachineOperand &Src1, unsigned Src1OpName) const;
148
149 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
150 unsigned OpIdx0,
151 unsigned OpIdx1) const override;
152
153 public:
154 enum TargetOperandFlags {
155 MO_MASK = 0xf,
156
157 MO_NONE = 0,
158 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
159 MO_GOTPCREL = 1,
160 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
161 MO_GOTPCREL32 = 2,
162 MO_GOTPCREL32_LO = 2,
163 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
164 MO_GOTPCREL32_HI = 3,
165 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
166 MO_REL32 = 4,
167 MO_REL32_LO = 4,
168 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
169 MO_REL32_HI = 5,
170
171 MO_FAR_BRANCH_OFFSET = 6,
172
173 MO_ABS32_LO = 8,
174 MO_ABS32_HI = 9,
175 };
176
177 explicit SIInstrInfo(const GCNSubtarget &ST);
178
getRegisterInfo()179 const SIRegisterInfo &getRegisterInfo() const {
180 return RI;
181 }
182
getSubtarget()183 const GCNSubtarget &getSubtarget() const {
184 return ST;
185 }
186
187 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
188
189 bool isIgnorableUse(const MachineOperand &MO) const override;
190
191 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
192 int64_t &Offset1,
193 int64_t &Offset2) const override;
194
195 bool getMemOperandsWithOffsetWidth(
196 const MachineInstr &LdSt,
197 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
198 bool &OffsetIsScalable, unsigned &Width,
199 const TargetRegisterInfo *TRI) const final;
200
201 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
202 ArrayRef<const MachineOperand *> BaseOps2,
203 unsigned NumLoads, unsigned NumBytes) const override;
204
205 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
206 int64_t Offset1, unsigned NumLoads) const override;
207
208 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
209 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
210 bool KillSrc) const override;
211
212 void materializeImmediate(MachineBasicBlock &MBB,
213 MachineBasicBlock::iterator MI,
214 const DebugLoc &DL,
215 unsigned DestReg,
216 int64_t Value) const;
217
218 const TargetRegisterClass *getPreferredSelectRegClass(
219 unsigned Size) const;
220
221 Register insertNE(MachineBasicBlock *MBB,
222 MachineBasicBlock::iterator I, const DebugLoc &DL,
223 Register SrcReg, int Value) const;
224
225 Register insertEQ(MachineBasicBlock *MBB,
226 MachineBasicBlock::iterator I, const DebugLoc &DL,
227 Register SrcReg, int Value) const;
228
229 void storeRegToStackSlot(MachineBasicBlock &MBB,
230 MachineBasicBlock::iterator MI, Register SrcReg,
231 bool isKill, int FrameIndex,
232 const TargetRegisterClass *RC,
233 const TargetRegisterInfo *TRI) const override;
234
235 void loadRegFromStackSlot(MachineBasicBlock &MBB,
236 MachineBasicBlock::iterator MI, Register DestReg,
237 int FrameIndex, const TargetRegisterClass *RC,
238 const TargetRegisterInfo *TRI) const override;
239
240 bool expandPostRAPseudo(MachineInstr &MI) const override;
241
242 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
243 // instructions. Returns a pair of generated instructions.
244 // Can split either post-RA with physical registers or pre-RA with
245 // virtual registers. In latter case IR needs to be in SSA form and
246 // and a REG_SEQUENCE is produced to define original register.
247 std::pair<MachineInstr*, MachineInstr*>
248 expandMovDPP64(MachineInstr &MI) const;
249
250 // Returns an opcode that can be used to move a value to a \p DstRC
251 // register. If there is no hardware instruction that can store to \p
252 // DstRC, then AMDGPU::COPY is returned.
253 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
254
255 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
256 unsigned EltSize,
257 bool IsSGPR) const;
258
259 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
260 bool IsIndirectSrc) const;
261 LLVM_READONLY
262 int commuteOpcode(unsigned Opc) const;
263
264 LLVM_READONLY
commuteOpcode(const MachineInstr & MI)265 inline int commuteOpcode(const MachineInstr &MI) const {
266 return commuteOpcode(MI.getOpcode());
267 }
268
269 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
270 unsigned &SrcOpIdx2) const override;
271
272 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
273 unsigned & SrcOpIdx1) const;
274
275 bool isBranchOffsetInRange(unsigned BranchOpc,
276 int64_t BrOffset) const override;
277
278 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
279
280 void insertIndirectBranch(MachineBasicBlock &MBB,
281 MachineBasicBlock &NewDestBB,
282 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
283 int64_t BrOffset, RegScavenger *RS) const override;
284
285 bool analyzeBranchImpl(MachineBasicBlock &MBB,
286 MachineBasicBlock::iterator I,
287 MachineBasicBlock *&TBB,
288 MachineBasicBlock *&FBB,
289 SmallVectorImpl<MachineOperand> &Cond,
290 bool AllowModify) const;
291
292 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
293 MachineBasicBlock *&FBB,
294 SmallVectorImpl<MachineOperand> &Cond,
295 bool AllowModify = false) const override;
296
297 unsigned removeBranch(MachineBasicBlock &MBB,
298 int *BytesRemoved = nullptr) const override;
299
300 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
301 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
302 const DebugLoc &DL,
303 int *BytesAdded = nullptr) const override;
304
305 bool reverseBranchCondition(
306 SmallVectorImpl<MachineOperand> &Cond) const override;
307
308 bool canInsertSelect(const MachineBasicBlock &MBB,
309 ArrayRef<MachineOperand> Cond, Register DstReg,
310 Register TrueReg, Register FalseReg, int &CondCycles,
311 int &TrueCycles, int &FalseCycles) const override;
312
313 void insertSelect(MachineBasicBlock &MBB,
314 MachineBasicBlock::iterator I, const DebugLoc &DL,
315 Register DstReg, ArrayRef<MachineOperand> Cond,
316 Register TrueReg, Register FalseReg) const override;
317
318 void insertVectorSelect(MachineBasicBlock &MBB,
319 MachineBasicBlock::iterator I, const DebugLoc &DL,
320 Register DstReg, ArrayRef<MachineOperand> Cond,
321 Register TrueReg, Register FalseReg) const;
322
323 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
324 Register &SrcReg2, int64_t &CmpMask,
325 int64_t &CmpValue) const override;
326
327 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
328 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
329 const MachineRegisterInfo *MRI) const override;
330
331 bool
332 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
333 const MachineInstr &MIb) const override;
334
335 static bool isFoldableCopy(const MachineInstr &MI);
336
337 void removeModOperands(MachineInstr &MI) const;
338
339 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
340 MachineRegisterInfo *MRI) const final;
341
getMachineCSELookAheadLimit()342 unsigned getMachineCSELookAheadLimit() const override { return 500; }
343
344 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
345 LiveIntervals *LIS) const override;
346
347 bool isSchedulingBoundary(const MachineInstr &MI,
348 const MachineBasicBlock *MBB,
349 const MachineFunction &MF) const override;
350
isSALU(const MachineInstr & MI)351 static bool isSALU(const MachineInstr &MI) {
352 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
353 }
354
isSALU(uint16_t Opcode)355 bool isSALU(uint16_t Opcode) const {
356 return get(Opcode).TSFlags & SIInstrFlags::SALU;
357 }
358
isVALU(const MachineInstr & MI)359 static bool isVALU(const MachineInstr &MI) {
360 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
361 }
362
isVALU(uint16_t Opcode)363 bool isVALU(uint16_t Opcode) const {
364 return get(Opcode).TSFlags & SIInstrFlags::VALU;
365 }
366
isVMEM(const MachineInstr & MI)367 static bool isVMEM(const MachineInstr &MI) {
368 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
369 }
370
isVMEM(uint16_t Opcode)371 bool isVMEM(uint16_t Opcode) const {
372 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
373 }
374
isSOP1(const MachineInstr & MI)375 static bool isSOP1(const MachineInstr &MI) {
376 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
377 }
378
isSOP1(uint16_t Opcode)379 bool isSOP1(uint16_t Opcode) const {
380 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
381 }
382
isSOP2(const MachineInstr & MI)383 static bool isSOP2(const MachineInstr &MI) {
384 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
385 }
386
isSOP2(uint16_t Opcode)387 bool isSOP2(uint16_t Opcode) const {
388 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
389 }
390
isSOPC(const MachineInstr & MI)391 static bool isSOPC(const MachineInstr &MI) {
392 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
393 }
394
isSOPC(uint16_t Opcode)395 bool isSOPC(uint16_t Opcode) const {
396 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
397 }
398
isSOPK(const MachineInstr & MI)399 static bool isSOPK(const MachineInstr &MI) {
400 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
401 }
402
isSOPK(uint16_t Opcode)403 bool isSOPK(uint16_t Opcode) const {
404 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
405 }
406
isSOPP(const MachineInstr & MI)407 static bool isSOPP(const MachineInstr &MI) {
408 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
409 }
410
isSOPP(uint16_t Opcode)411 bool isSOPP(uint16_t Opcode) const {
412 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
413 }
414
isPacked(const MachineInstr & MI)415 static bool isPacked(const MachineInstr &MI) {
416 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
417 }
418
isPacked(uint16_t Opcode)419 bool isPacked(uint16_t Opcode) const {
420 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
421 }
422
isVOP1(const MachineInstr & MI)423 static bool isVOP1(const MachineInstr &MI) {
424 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
425 }
426
isVOP1(uint16_t Opcode)427 bool isVOP1(uint16_t Opcode) const {
428 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
429 }
430
isVOP2(const MachineInstr & MI)431 static bool isVOP2(const MachineInstr &MI) {
432 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
433 }
434
isVOP2(uint16_t Opcode)435 bool isVOP2(uint16_t Opcode) const {
436 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
437 }
438
isVOP3(const MachineInstr & MI)439 static bool isVOP3(const MachineInstr &MI) {
440 return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
441 }
442
isVOP3(uint16_t Opcode)443 bool isVOP3(uint16_t Opcode) const {
444 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
445 }
446
isSDWA(const MachineInstr & MI)447 static bool isSDWA(const MachineInstr &MI) {
448 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
449 }
450
isSDWA(uint16_t Opcode)451 bool isSDWA(uint16_t Opcode) const {
452 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
453 }
454
isVOPC(const MachineInstr & MI)455 static bool isVOPC(const MachineInstr &MI) {
456 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
457 }
458
isVOPC(uint16_t Opcode)459 bool isVOPC(uint16_t Opcode) const {
460 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
461 }
462
isMUBUF(const MachineInstr & MI)463 static bool isMUBUF(const MachineInstr &MI) {
464 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
465 }
466
isMUBUF(uint16_t Opcode)467 bool isMUBUF(uint16_t Opcode) const {
468 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
469 }
470
isMTBUF(const MachineInstr & MI)471 static bool isMTBUF(const MachineInstr &MI) {
472 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
473 }
474
isMTBUF(uint16_t Opcode)475 bool isMTBUF(uint16_t Opcode) const {
476 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
477 }
478
isSMRD(const MachineInstr & MI)479 static bool isSMRD(const MachineInstr &MI) {
480 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
481 }
482
isSMRD(uint16_t Opcode)483 bool isSMRD(uint16_t Opcode) const {
484 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
485 }
486
487 bool isBufferSMRD(const MachineInstr &MI) const;
488
isDS(const MachineInstr & MI)489 static bool isDS(const MachineInstr &MI) {
490 return MI.getDesc().TSFlags & SIInstrFlags::DS;
491 }
492
isDS(uint16_t Opcode)493 bool isDS(uint16_t Opcode) const {
494 return get(Opcode).TSFlags & SIInstrFlags::DS;
495 }
496
497 bool isAlwaysGDS(uint16_t Opcode) const;
498
isMIMG(const MachineInstr & MI)499 static bool isMIMG(const MachineInstr &MI) {
500 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
501 }
502
isMIMG(uint16_t Opcode)503 bool isMIMG(uint16_t Opcode) const {
504 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
505 }
506
isGather4(const MachineInstr & MI)507 static bool isGather4(const MachineInstr &MI) {
508 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
509 }
510
isGather4(uint16_t Opcode)511 bool isGather4(uint16_t Opcode) const {
512 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
513 }
514
isFLAT(const MachineInstr & MI)515 static bool isFLAT(const MachineInstr &MI) {
516 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
517 }
518
519 // Is a FLAT encoded instruction which accesses a specific segment,
520 // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)521 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
522 auto Flags = MI.getDesc().TSFlags;
523 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
524 }
525
isSegmentSpecificFLAT(uint16_t Opcode)526 bool isSegmentSpecificFLAT(uint16_t Opcode) const {
527 auto Flags = get(Opcode).TSFlags;
528 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
529 }
530
isFLATGlobal(const MachineInstr & MI)531 static bool isFLATGlobal(const MachineInstr &MI) {
532 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
533 }
534
isFLATGlobal(uint16_t Opcode)535 bool isFLATGlobal(uint16_t Opcode) const {
536 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
537 }
538
isFLATScratch(const MachineInstr & MI)539 static bool isFLATScratch(const MachineInstr &MI) {
540 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
541 }
542
isFLATScratch(uint16_t Opcode)543 bool isFLATScratch(uint16_t Opcode) const {
544 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
545 }
546
547 // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)548 bool isFLAT(uint16_t Opcode) const {
549 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
550 }
551
isEXP(const MachineInstr & MI)552 static bool isEXP(const MachineInstr &MI) {
553 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
554 }
555
isDualSourceBlendEXP(const MachineInstr & MI)556 static bool isDualSourceBlendEXP(const MachineInstr &MI) {
557 if (!isEXP(MI))
558 return false;
559 unsigned Target = MI.getOperand(0).getImm();
560 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
561 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
562 }
563
isEXP(uint16_t Opcode)564 bool isEXP(uint16_t Opcode) const {
565 return get(Opcode).TSFlags & SIInstrFlags::EXP;
566 }
567
isAtomicNoRet(const MachineInstr & MI)568 static bool isAtomicNoRet(const MachineInstr &MI) {
569 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
570 }
571
isAtomicNoRet(uint16_t Opcode)572 bool isAtomicNoRet(uint16_t Opcode) const {
573 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
574 }
575
isAtomicRet(const MachineInstr & MI)576 static bool isAtomicRet(const MachineInstr &MI) {
577 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
578 }
579
isAtomicRet(uint16_t Opcode)580 bool isAtomicRet(uint16_t Opcode) const {
581 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
582 }
583
isAtomic(const MachineInstr & MI)584 static bool isAtomic(const MachineInstr &MI) {
585 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
586 SIInstrFlags::IsAtomicNoRet);
587 }
588
isAtomic(uint16_t Opcode)589 bool isAtomic(uint16_t Opcode) const {
590 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
591 SIInstrFlags::IsAtomicNoRet);
592 }
593
isWQM(const MachineInstr & MI)594 static bool isWQM(const MachineInstr &MI) {
595 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
596 }
597
isWQM(uint16_t Opcode)598 bool isWQM(uint16_t Opcode) const {
599 return get(Opcode).TSFlags & SIInstrFlags::WQM;
600 }
601
isDisableWQM(const MachineInstr & MI)602 static bool isDisableWQM(const MachineInstr &MI) {
603 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
604 }
605
isDisableWQM(uint16_t Opcode)606 bool isDisableWQM(uint16_t Opcode) const {
607 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
608 }
609
isVGPRSpill(const MachineInstr & MI)610 static bool isVGPRSpill(const MachineInstr &MI) {
611 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
612 }
613
isVGPRSpill(uint16_t Opcode)614 bool isVGPRSpill(uint16_t Opcode) const {
615 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
616 }
617
isSGPRSpill(const MachineInstr & MI)618 static bool isSGPRSpill(const MachineInstr &MI) {
619 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
620 }
621
isSGPRSpill(uint16_t Opcode)622 bool isSGPRSpill(uint16_t Opcode) const {
623 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
624 }
625
isDPP(const MachineInstr & MI)626 static bool isDPP(const MachineInstr &MI) {
627 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
628 }
629
isDPP(uint16_t Opcode)630 bool isDPP(uint16_t Opcode) const {
631 return get(Opcode).TSFlags & SIInstrFlags::DPP;
632 }
633
isTRANS(const MachineInstr & MI)634 static bool isTRANS(const MachineInstr &MI) {
635 return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
636 }
637
isTRANS(uint16_t Opcode)638 bool isTRANS(uint16_t Opcode) const {
639 return get(Opcode).TSFlags & SIInstrFlags::TRANS;
640 }
641
isVOP3P(const MachineInstr & MI)642 static bool isVOP3P(const MachineInstr &MI) {
643 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
644 }
645
isVOP3P(uint16_t Opcode)646 bool isVOP3P(uint16_t Opcode) const {
647 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
648 }
649
isVINTRP(const MachineInstr & MI)650 static bool isVINTRP(const MachineInstr &MI) {
651 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
652 }
653
isVINTRP(uint16_t Opcode)654 bool isVINTRP(uint16_t Opcode) const {
655 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
656 }
657
isMAI(const MachineInstr & MI)658 static bool isMAI(const MachineInstr &MI) {
659 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
660 }
661
isMAI(uint16_t Opcode)662 bool isMAI(uint16_t Opcode) const {
663 return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
664 }
665
isMFMA(const MachineInstr & MI)666 static bool isMFMA(const MachineInstr &MI) {
667 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
668 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
669 }
670
isDOT(const MachineInstr & MI)671 static bool isDOT(const MachineInstr &MI) {
672 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
673 }
674
isWMMA(const MachineInstr & MI)675 static bool isWMMA(const MachineInstr &MI) {
676 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
677 }
678
isWMMA(uint16_t Opcode)679 bool isWMMA(uint16_t Opcode) const {
680 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
681 }
682
isDOT(uint16_t Opcode)683 bool isDOT(uint16_t Opcode) const {
684 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
685 }
686
isLDSDIR(const MachineInstr & MI)687 static bool isLDSDIR(const MachineInstr &MI) {
688 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
689 }
690
isLDSDIR(uint16_t Opcode)691 bool isLDSDIR(uint16_t Opcode) const {
692 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
693 }
694
isVINTERP(const MachineInstr & MI)695 static bool isVINTERP(const MachineInstr &MI) {
696 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
697 }
698
isVINTERP(uint16_t Opcode)699 bool isVINTERP(uint16_t Opcode) const {
700 return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
701 }
702
isScalarUnit(const MachineInstr & MI)703 static bool isScalarUnit(const MachineInstr &MI) {
704 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
705 }
706
usesVM_CNT(const MachineInstr & MI)707 static bool usesVM_CNT(const MachineInstr &MI) {
708 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
709 }
710
usesLGKM_CNT(const MachineInstr & MI)711 static bool usesLGKM_CNT(const MachineInstr &MI) {
712 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
713 }
714
sopkIsZext(const MachineInstr & MI)715 static bool sopkIsZext(const MachineInstr &MI) {
716 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
717 }
718
sopkIsZext(uint16_t Opcode)719 bool sopkIsZext(uint16_t Opcode) const {
720 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
721 }
722
723 /// \returns true if this is an s_store_dword* instruction. This is more
724 /// specific than than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)725 static bool isScalarStore(const MachineInstr &MI) {
726 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
727 }
728
isScalarStore(uint16_t Opcode)729 bool isScalarStore(uint16_t Opcode) const {
730 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
731 }
732
isFixedSize(const MachineInstr & MI)733 static bool isFixedSize(const MachineInstr &MI) {
734 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
735 }
736
isFixedSize(uint16_t Opcode)737 bool isFixedSize(uint16_t Opcode) const {
738 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
739 }
740
hasFPClamp(const MachineInstr & MI)741 static bool hasFPClamp(const MachineInstr &MI) {
742 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
743 }
744
hasFPClamp(uint16_t Opcode)745 bool hasFPClamp(uint16_t Opcode) const {
746 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
747 }
748
hasIntClamp(const MachineInstr & MI)749 static bool hasIntClamp(const MachineInstr &MI) {
750 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
751 }
752
getClampMask(const MachineInstr & MI)753 uint64_t getClampMask(const MachineInstr &MI) const {
754 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
755 SIInstrFlags::IntClamp |
756 SIInstrFlags::ClampLo |
757 SIInstrFlags::ClampHi;
758 return MI.getDesc().TSFlags & ClampFlags;
759 }
760
usesFPDPRounding(const MachineInstr & MI)761 static bool usesFPDPRounding(const MachineInstr &MI) {
762 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
763 }
764
usesFPDPRounding(uint16_t Opcode)765 bool usesFPDPRounding(uint16_t Opcode) const {
766 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
767 }
768
isFPAtomic(const MachineInstr & MI)769 static bool isFPAtomic(const MachineInstr &MI) {
770 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
771 }
772
isFPAtomic(uint16_t Opcode)773 bool isFPAtomic(uint16_t Opcode) const {
774 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
775 }
776
isVGPRCopy(const MachineInstr & MI)777 bool isVGPRCopy(const MachineInstr &MI) const {
778 assert(MI.isCopy());
779 Register Dest = MI.getOperand(0).getReg();
780 const MachineFunction &MF = *MI.getParent()->getParent();
781 const MachineRegisterInfo &MRI = MF.getRegInfo();
782 return !RI.isSGPRReg(MRI, Dest);
783 }
784
hasVGPRUses(const MachineInstr & MI)785 bool hasVGPRUses(const MachineInstr &MI) const {
786 const MachineFunction &MF = *MI.getParent()->getParent();
787 const MachineRegisterInfo &MRI = MF.getRegInfo();
788 return llvm::any_of(MI.explicit_uses(),
789 [&MRI, this](const MachineOperand &MO) {
790 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
791 }
792
793 /// Return true if the instruction modifies the mode register.q
794 static bool modifiesModeRegister(const MachineInstr &MI);
795
796 /// Whether we must prevent this instruction from executing with EXEC = 0.
797 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
798
799 /// Returns true if the instruction could potentially depend on the value of
800 /// exec. If false, exec dependencies may safely be ignored.
801 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
802
803 bool isInlineConstant(const APInt &Imm) const;
804
isInlineConstant(const APFloat & Imm)805 bool isInlineConstant(const APFloat &Imm) const {
806 return isInlineConstant(Imm.bitcastToAPInt());
807 }
808
809 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
810
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)811 bool isInlineConstant(const MachineOperand &MO,
812 const MCOperandInfo &OpInfo) const {
813 return isInlineConstant(MO, OpInfo.OperandType);
814 }
815
816 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
817 /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)818 bool isInlineConstant(const MachineInstr &MI,
819 const MachineOperand &UseMO,
820 const MachineOperand &DefMO) const {
821 assert(UseMO.getParent() == &MI);
822 int OpIdx = MI.getOperandNo(&UseMO);
823 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
824 return false;
825 }
826
827 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
828 }
829
830 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
831 /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)832 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
833 const MachineOperand &MO = MI.getOperand(OpIdx);
834 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
835 }
836
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)837 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
838 const MachineOperand &MO) const {
839 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
840 return false;
841
842 if (MI.isCopy()) {
843 unsigned Size = getOpSize(MI, OpIdx);
844 assert(Size == 8 || Size == 4);
845
846 uint8_t OpType = (Size == 8) ?
847 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
848 return isInlineConstant(MO, OpType);
849 }
850
851 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
852 }
853
isInlineConstant(const MachineOperand & MO)854 bool isInlineConstant(const MachineOperand &MO) const {
855 const MachineInstr *Parent = MO.getParent();
856 return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
857 }
858
isLiteralConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)859 bool isLiteralConstant(const MachineOperand &MO,
860 const MCOperandInfo &OpInfo) const {
861 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
862 }
863
isLiteralConstant(const MachineInstr & MI,int OpIdx)864 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
865 const MachineOperand &MO = MI.getOperand(OpIdx);
866 return MO.isImm() && !isInlineConstant(MI, OpIdx);
867 }
868
869 // Returns true if this operand could potentially require a 32-bit literal
870 // operand, but not necessarily. A FrameIndex for example could resolve to an
871 // inline immediate value that will not require an additional 4-bytes; this
872 // assumes that it will.
873 bool isLiteralConstantLike(const MachineOperand &MO,
874 const MCOperandInfo &OpInfo) const;
875
876 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
877 const MachineOperand &MO) const;
878
879 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
880 /// This function will return false if you pass it a 32-bit instruction.
881 bool hasVALU32BitEncoding(unsigned Opcode) const;
882
883 /// Returns true if this operand uses the constant bus.
884 bool usesConstantBus(const MachineRegisterInfo &MRI,
885 const MachineOperand &MO,
886 const MCOperandInfo &OpInfo) const;
887
888 /// Return true if this instruction has any modifiers.
889 /// e.g. src[012]_mod, omod, clamp.
890 bool hasModifiers(unsigned Opcode) const;
891
892 bool hasModifiersSet(const MachineInstr &MI,
893 unsigned OpName) const;
894 bool hasAnyModifiersSet(const MachineInstr &MI) const;
895
896 bool canShrink(const MachineInstr &MI,
897 const MachineRegisterInfo &MRI) const;
898
899 MachineInstr *buildShrunkInst(MachineInstr &MI,
900 unsigned NewOpcode) const;
901
902 bool verifyInstruction(const MachineInstr &MI,
903 StringRef &ErrInfo) const override;
904
905 unsigned getVALUOp(const MachineInstr &MI) const;
906
907 /// Return the correct register class for \p OpNo. For target-specific
908 /// instructions, this will return the register class that has been defined
909 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
910 /// the register class of its machine operand.
911 /// to infer the correct register class base on the other operands.
912 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
913 unsigned OpNo) const;
914
915 /// Return the size in bytes of the operand OpNo on the given
916 // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)917 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
918 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
919
920 if (OpInfo.RegClass == -1) {
921 // If this is an immediate operand, this must be a 32-bit literal.
922 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
923 return 4;
924 }
925
926 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
927 }
928
929 /// This form should usually be preferred since it handles operands
930 /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)931 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
932 const MachineOperand &MO = MI.getOperand(OpNo);
933 if (MO.isReg()) {
934 if (unsigned SubReg = MO.getSubReg()) {
935 return RI.getSubRegIdxSize(SubReg) / 8;
936 }
937 }
938 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
939 }
940
941 /// Legalize the \p OpIndex operand of this instruction by inserting
942 /// a MOV. For example:
943 /// ADD_I32_e32 VGPR0, 15
944 /// to
945 /// MOV VGPR1, 15
946 /// ADD_I32_e32 VGPR0, VGPR1
947 ///
948 /// If the operand being legalized is a register, then a COPY will be used
949 /// instead of MOV.
950 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
951
952 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
953 /// for \p MI.
954 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
955 const MachineOperand *MO = nullptr) const;
956
957 /// Check if \p MO would be a valid operand for the given operand
958 /// definition \p OpInfo. Note this does not attempt to validate constant bus
959 /// restrictions (e.g. literal constant usage).
960 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
961 const MCOperandInfo &OpInfo,
962 const MachineOperand &MO) const;
963
964 /// Check if \p MO (a register operand) is a legal register for the
965 /// given operand description.
966 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
967 const MCOperandInfo &OpInfo,
968 const MachineOperand &MO) const;
969
970 /// Legalize operands in \p MI by either commuting it or inserting a
971 /// copy of src1.
972 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
973
974 /// Fix operands in \p MI to satisfy constant bus requirements.
975 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
976
977 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
978 /// be used when it is know that the value in SrcReg is same across all
979 /// threads in the wave.
980 /// \returns The SGPR register that \p SrcReg was copied to.
981 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
982 MachineRegisterInfo &MRI) const;
983
984 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
985 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
986
987 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
988 MachineBasicBlock::iterator I,
989 const TargetRegisterClass *DstRC,
990 MachineOperand &Op, MachineRegisterInfo &MRI,
991 const DebugLoc &DL) const;
992
993 /// Legalize all operands in this instruction. This function may create new
994 /// instructions and control-flow around \p MI. If present, \p MDT is
995 /// updated.
996 /// \returns A new basic block that contains \p MI if new blocks were created.
997 MachineBasicBlock *
998 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
999
1000 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1001 /// was moved to VGPR. \returns true if succeeded.
1002 bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1003
1004 /// Replace this instruction's opcode with the equivalent VALU
1005 /// opcode. This function will also move the users of \p MI to the
1006 /// VALU if necessary. If present, \p MDT is updated.
1007 MachineBasicBlock *moveToVALU(MachineInstr &MI,
1008 MachineDominatorTree *MDT = nullptr) const;
1009
1010 void insertNoop(MachineBasicBlock &MBB,
1011 MachineBasicBlock::iterator MI) const override;
1012
1013 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1014 unsigned Quantity) const override;
1015
1016 void insertReturn(MachineBasicBlock &MBB) const;
1017 /// Return the number of wait states that result from executing this
1018 /// instruction.
1019 static unsigned getNumWaitStates(const MachineInstr &MI);
1020
1021 /// Returns the operand named \p Op. If \p MI does not have an
1022 /// operand named \c Op, this function returns nullptr.
1023 LLVM_READONLY
1024 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1025
1026 LLVM_READONLY
getNamedOperand(const MachineInstr & MI,unsigned OpName)1027 const MachineOperand *getNamedOperand(const MachineInstr &MI,
1028 unsigned OpName) const {
1029 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1030 }
1031
1032 /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,unsigned OpName)1033 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1034 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1035 return MI.getOperand(Idx).getImm();
1036 }
1037
1038 uint64_t getDefaultRsrcDataFormat() const;
1039 uint64_t getScratchRsrcWords23() const;
1040
1041 bool isLowLatencyInstruction(const MachineInstr &MI) const;
1042 bool isHighLatencyDef(int Opc) const override;
1043
1044 /// Return the descriptor of the target-specific machine instruction
1045 /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)1046 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1047 return get(pseudoToMCOpcode(Opcode));
1048 }
1049
1050 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1051 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1052
1053 unsigned isLoadFromStackSlot(const MachineInstr &MI,
1054 int &FrameIndex) const override;
1055 unsigned isStoreToStackSlot(const MachineInstr &MI,
1056 int &FrameIndex) const override;
1057
1058 unsigned getInstBundleSize(const MachineInstr &MI) const;
1059 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1060
1061 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1062
1063 bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1064
1065 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1066 MachineBasicBlock *IfEnd) const;
1067
1068 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1069 MachineBasicBlock *LoopEnd) const;
1070
1071 std::pair<unsigned, unsigned>
1072 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1073
1074 ArrayRef<std::pair<int, const char *>>
1075 getSerializableTargetIndices() const override;
1076
1077 ArrayRef<std::pair<unsigned, const char *>>
1078 getSerializableDirectMachineOperandTargetFlags() const override;
1079
1080 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1081 getSerializableMachineMemOperandTargetFlags() const override;
1082
1083 ScheduleHazardRecognizer *
1084 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1085 const ScheduleDAG *DAG) const override;
1086
1087 ScheduleHazardRecognizer *
1088 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1089
1090 ScheduleHazardRecognizer *
1091 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1092 const ScheduleDAGMI *DAG) const override;
1093
1094 bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1095
1096 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1097 MachineBasicBlock::iterator InsPt,
1098 const DebugLoc &DL, Register Src,
1099 Register Dst) const override;
1100
1101 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1102 MachineBasicBlock::iterator InsPt,
1103 const DebugLoc &DL, Register Src,
1104 unsigned SrcSubReg,
1105 Register Dst) const override;
1106
1107 bool isWave32() const;
1108
1109 /// Return a partially built integer add instruction without carry.
1110 /// Caller must add source operands.
1111 /// For pre-GFX9 it will generate unused carry destination operand.
1112 /// TODO: After GFX9 it should return a no-carry operation.
1113 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1114 MachineBasicBlock::iterator I,
1115 const DebugLoc &DL,
1116 Register DestReg) const;
1117
1118 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1119 MachineBasicBlock::iterator I,
1120 const DebugLoc &DL,
1121 Register DestReg,
1122 RegScavenger &RS) const;
1123
1124 static bool isKillTerminator(unsigned Opcode);
1125 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1126
isLegalMUBUFImmOffset(unsigned Imm)1127 static bool isLegalMUBUFImmOffset(unsigned Imm) {
1128 return isUInt<12>(Imm);
1129 }
1130
1131 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1132 /// encoded instruction. If \p Signed, this is for an instruction that
1133 /// interprets the offset as signed.
1134 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1135 uint64_t FlatVariant) const;
1136
1137 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1138 /// values.
1139 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1140 unsigned AddrSpace,
1141 uint64_t FlatVariant) const;
1142
1143 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1144 /// Return -1 if the target-specific opcode for the pseudo instruction does
1145 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1146 int pseudoToMCOpcode(int Opcode) const;
1147
1148 /// \brief Check if this instruction should only be used by assembler.
1149 /// Return true if this opcode should not be used by codegen.
1150 bool isAsmOnlyOpcode(int MCOp) const;
1151
1152 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1153 const TargetRegisterInfo *TRI,
1154 const MachineFunction &MF)
1155 const override;
1156
1157 void fixImplicitOperands(MachineInstr &MI) const;
1158
1159 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1160 ArrayRef<unsigned> Ops,
1161 MachineBasicBlock::iterator InsertPt,
1162 int FrameIndex,
1163 LiveIntervals *LIS = nullptr,
1164 VirtRegMap *VRM = nullptr) const override;
1165
1166 unsigned getInstrLatency(const InstrItineraryData *ItinData,
1167 const MachineInstr &MI,
1168 unsigned *PredCost = nullptr) const override;
1169
getMIRFormatter()1170 const MIRFormatter *getMIRFormatter() const override {
1171 if (!Formatter.get())
1172 Formatter = std::make_unique<AMDGPUMIRFormatter>();
1173 return Formatter.get();
1174 }
1175
1176 static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1177
getSchedModel()1178 const TargetSchedModel &getSchedModel() const { return SchedModel; }
1179
1180 // Enforce operand's \p OpName even alignment if required by target.
1181 // This is used if an operand is a 32 bit register but needs to be aligned
1182 // regardless.
1183 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1184 };
1185
1186 /// \brief Returns true if a reg:subreg pair P has a TRC class
isOfRegClass(const TargetInstrInfo::RegSubRegPair & P,const TargetRegisterClass & TRC,MachineRegisterInfo & MRI)1187 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1188 const TargetRegisterClass &TRC,
1189 MachineRegisterInfo &MRI) {
1190 auto *RC = MRI.getRegClass(P.Reg);
1191 if (!P.SubReg)
1192 return RC == &TRC;
1193 auto *TRI = MRI.getTargetRegisterInfo();
1194 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1195 }
1196
1197 /// \brief Create RegSubRegPair from a register MachineOperand
1198 inline
getRegSubRegPair(const MachineOperand & O)1199 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1200 assert(O.isReg());
1201 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1202 }
1203
1204 /// \brief Return the SubReg component from REG_SEQUENCE
1205 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1206 unsigned SubReg);
1207
1208 /// \brief Return the defining instruction for a given reg:subreg pair
1209 /// skipping copy like instructions and subreg-manipulation pseudos.
1210 /// Following another subreg of a reg:subreg isn't supported.
1211 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1212 MachineRegisterInfo &MRI);
1213
1214 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1215 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1216 /// attempt to track between blocks.
1217 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1218 Register VReg,
1219 const MachineInstr &DefMI,
1220 const MachineInstr &UseMI);
1221
1222 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1223 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1224 /// track between blocks.
1225 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1226 Register VReg,
1227 const MachineInstr &DefMI);
1228
1229 namespace AMDGPU {
1230
1231 LLVM_READONLY
1232 int getVOPe64(uint16_t Opcode);
1233
1234 LLVM_READONLY
1235 int getVOPe32(uint16_t Opcode);
1236
1237 LLVM_READONLY
1238 int getSDWAOp(uint16_t Opcode);
1239
1240 LLVM_READONLY
1241 int getDPPOp32(uint16_t Opcode);
1242
1243 LLVM_READONLY
1244 int getDPPOp64(uint16_t Opcode);
1245
1246 LLVM_READONLY
1247 int getBasicFromSDWAOp(uint16_t Opcode);
1248
1249 LLVM_READONLY
1250 int getCommuteRev(uint16_t Opcode);
1251
1252 LLVM_READONLY
1253 int getCommuteOrig(uint16_t Opcode);
1254
1255 LLVM_READONLY
1256 int getAddr64Inst(uint16_t Opcode);
1257
1258 /// Check if \p Opcode is an Addr64 opcode.
1259 ///
1260 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1261 LLVM_READONLY
1262 int getIfAddr64Inst(uint16_t Opcode);
1263
1264 LLVM_READONLY
1265 int getAtomicNoRetOp(uint16_t Opcode);
1266
1267 LLVM_READONLY
1268 int getSOPKOp(uint16_t Opcode);
1269
1270 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1271 /// of a VADDR form.
1272 LLVM_READONLY
1273 int getGlobalSaddrOp(uint16_t Opcode);
1274
1275 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1276 /// of a SADDR form.
1277 LLVM_READONLY
1278 int getGlobalVaddrOp(uint16_t Opcode);
1279
1280 LLVM_READONLY
1281 int getVCMPXNoSDstOp(uint16_t Opcode);
1282
1283 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1284 /// given an \p Opcode of an SS (SADDR) form.
1285 LLVM_READONLY
1286 int getFlatScratchInstSTfromSS(uint16_t Opcode);
1287
1288 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1289 /// of an SVS (SADDR + VADDR) form.
1290 LLVM_READONLY
1291 int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1292
1293 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1294 /// of an SV (VADDR) form.
1295 LLVM_READONLY
1296 int getFlatScratchInstSSfromSV(uint16_t Opcode);
1297
1298 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1299 /// of an SS (SADDR) form.
1300 LLVM_READONLY
1301 int getFlatScratchInstSVfromSS(uint16_t Opcode);
1302
1303 /// \returns earlyclobber version of a MAC MFMA is exists.
1304 LLVM_READONLY
1305 int getMFMAEarlyClobberOp(uint16_t Opcode);
1306
1307 /// \returns v_cmpx version of a v_cmp instruction.
1308 LLVM_READONLY
1309 int getVCMPXOpFromVCMP(uint16_t Opcode);
1310
1311 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1312 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1313 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1314 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1315
1316 } // end namespace AMDGPU
1317
1318 namespace SI {
1319 namespace KernelInputOffsets {
1320
1321 /// Offsets in bytes from the start of the input buffer
1322 enum Offsets {
1323 NGROUPS_X = 0,
1324 NGROUPS_Y = 4,
1325 NGROUPS_Z = 8,
1326 GLOBAL_SIZE_X = 12,
1327 GLOBAL_SIZE_Y = 16,
1328 GLOBAL_SIZE_Z = 20,
1329 LOCAL_SIZE_X = 24,
1330 LOCAL_SIZE_Y = 28,
1331 LOCAL_SIZE_Z = 32
1332 };
1333
1334 } // end namespace KernelInputOffsets
1335 } // end namespace SI
1336
1337 } // end namespace llvm
1338
1339 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1340