1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Interface definition for SIInstrInfo.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
17
18 #include "AMDGPUInstrInfo.h"
19 #include "SIDefines.h"
20 #include "SIRegisterInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/Support/Compiler.h"
31 #include <cassert>
32 #include <cstdint>
33
34 #define GET_INSTRINFO_HEADER
35 #include "AMDGPUGenInstrInfo.inc"
36
37 namespace llvm {
38
39 class APInt;
40 class MachineDominatorTree;
41 class MachineRegisterInfo;
42 class RegScavenger;
43 class GCNSubtarget;
44 class TargetRegisterClass;
45
46 class SIInstrInfo final : public AMDGPUGenInstrInfo {
47 private:
48 const SIRegisterInfo RI;
49 const GCNSubtarget &ST;
50
51 // The inverse predicate should have the negative value.
52 enum BranchPredicate {
53 INVALID_BR = 0,
54 SCC_TRUE = 1,
55 SCC_FALSE = -1,
56 VCCNZ = 2,
57 VCCZ = -2,
58 EXECNZ = -3,
59 EXECZ = 3
60 };
61
62 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
63
64 static unsigned getBranchOpcode(BranchPredicate Cond);
65 static BranchPredicate getBranchPredicate(unsigned Opcode);
66
67 public:
68 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
69 MachineRegisterInfo &MRI,
70 MachineOperand &SuperReg,
71 const TargetRegisterClass *SuperRC,
72 unsigned SubIdx,
73 const TargetRegisterClass *SubRC) const;
74 MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
75 MachineRegisterInfo &MRI,
76 MachineOperand &SuperReg,
77 const TargetRegisterClass *SuperRC,
78 unsigned SubIdx,
79 const TargetRegisterClass *SubRC) const;
80 private:
81 void swapOperands(MachineInstr &Inst) const;
82
83 bool moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
84 MachineDominatorTree *MDT = nullptr) const;
85
86 void lowerScalarAbs(SetVectorType &Worklist,
87 MachineInstr &Inst) const;
88
89 void lowerScalarXnor(SetVectorType &Worklist,
90 MachineInstr &Inst) const;
91
92 void splitScalarNotBinop(SetVectorType &Worklist,
93 MachineInstr &Inst,
94 unsigned Opcode) const;
95
96 void splitScalarBinOpN2(SetVectorType &Worklist,
97 MachineInstr &Inst,
98 unsigned Opcode) const;
99
100 void splitScalar64BitUnaryOp(SetVectorType &Worklist,
101 MachineInstr &Inst, unsigned Opcode) const;
102
103 void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
104 MachineDominatorTree *MDT = nullptr) const;
105
106 void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
107 unsigned Opcode,
108 MachineDominatorTree *MDT = nullptr) const;
109
110 void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
111 MachineDominatorTree *MDT = nullptr) const;
112
113 void splitScalar64BitBCNT(SetVectorType &Worklist,
114 MachineInstr &Inst) const;
115 void splitScalar64BitBFE(SetVectorType &Worklist,
116 MachineInstr &Inst) const;
117 void movePackToVALU(SetVectorType &Worklist,
118 MachineRegisterInfo &MRI,
119 MachineInstr &Inst) const;
120
121 void addUsersToMoveToVALUWorklist(unsigned Reg, MachineRegisterInfo &MRI,
122 SetVectorType &Worklist) const;
123
124 void
125 addSCCDefUsersToVALUWorklist(MachineInstr &SCCDefInst,
126 SetVectorType &Worklist) const;
127
128 const TargetRegisterClass *
129 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
130
131 bool checkInstOffsetsDoNotOverlap(MachineInstr &MIa, MachineInstr &MIb) const;
132
133 unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
134
135 protected:
136 bool swapSourceModifiers(MachineInstr &MI,
137 MachineOperand &Src0, unsigned Src0OpName,
138 MachineOperand &Src1, unsigned Src1OpName) const;
139
140 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
141 unsigned OpIdx0,
142 unsigned OpIdx1) const override;
143
144 public:
145 enum TargetOperandFlags {
146 MO_MASK = 0x7,
147
148 MO_NONE = 0,
149 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
150 MO_GOTPCREL = 1,
151 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
152 MO_GOTPCREL32 = 2,
153 MO_GOTPCREL32_LO = 2,
154 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
155 MO_GOTPCREL32_HI = 3,
156 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
157 MO_REL32 = 4,
158 MO_REL32_LO = 4,
159 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
160 MO_REL32_HI = 5
161 };
162
163 explicit SIInstrInfo(const GCNSubtarget &ST);
164
getRegisterInfo()165 const SIRegisterInfo &getRegisterInfo() const {
166 return RI;
167 }
168
169 bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
170 AliasAnalysis *AA) const override;
171
172 bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
173 int64_t &Offset1,
174 int64_t &Offset2) const override;
175
176 bool getMemOperandWithOffset(MachineInstr &LdSt, MachineOperand *&BaseOp,
177 int64_t &Offset,
178 const TargetRegisterInfo *TRI) const final;
179
180 bool shouldClusterMemOps(MachineOperand &BaseOp1, MachineOperand &BaseOp2,
181 unsigned NumLoads) const override;
182
183 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
184 int64_t Offset1, unsigned NumLoads) const override;
185
186 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
187 const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
188 bool KillSrc) const override;
189
190 unsigned calculateLDSSpillAddress(MachineBasicBlock &MBB, MachineInstr &MI,
191 RegScavenger *RS, unsigned TmpReg,
192 unsigned Offset, unsigned Size) const;
193
194 void materializeImmediate(MachineBasicBlock &MBB,
195 MachineBasicBlock::iterator MI,
196 const DebugLoc &DL,
197 unsigned DestReg,
198 int64_t Value) const;
199
200 const TargetRegisterClass *getPreferredSelectRegClass(
201 unsigned Size) const;
202
203 unsigned insertNE(MachineBasicBlock *MBB,
204 MachineBasicBlock::iterator I, const DebugLoc &DL,
205 unsigned SrcReg, int Value) const;
206
207 unsigned insertEQ(MachineBasicBlock *MBB,
208 MachineBasicBlock::iterator I, const DebugLoc &DL,
209 unsigned SrcReg, int Value) const;
210
211 void storeRegToStackSlot(MachineBasicBlock &MBB,
212 MachineBasicBlock::iterator MI, unsigned SrcReg,
213 bool isKill, int FrameIndex,
214 const TargetRegisterClass *RC,
215 const TargetRegisterInfo *TRI) const override;
216
217 void loadRegFromStackSlot(MachineBasicBlock &MBB,
218 MachineBasicBlock::iterator MI, unsigned DestReg,
219 int FrameIndex, const TargetRegisterClass *RC,
220 const TargetRegisterInfo *TRI) const override;
221
222 bool expandPostRAPseudo(MachineInstr &MI) const override;
223
224 // Returns an opcode that can be used to move a value to a \p DstRC
225 // register. If there is no hardware instruction that can store to \p
226 // DstRC, then AMDGPU::COPY is returned.
227 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
228
229 LLVM_READONLY
230 int commuteOpcode(unsigned Opc) const;
231
232 LLVM_READONLY
commuteOpcode(const MachineInstr & MI)233 inline int commuteOpcode(const MachineInstr &MI) const {
234 return commuteOpcode(MI.getOpcode());
235 }
236
237 bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
238 unsigned &SrcOpIdx2) const override;
239
240 bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
241 unsigned & SrcOpIdx1) const;
242
243 bool isBranchOffsetInRange(unsigned BranchOpc,
244 int64_t BrOffset) const override;
245
246 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
247
248 unsigned insertIndirectBranch(MachineBasicBlock &MBB,
249 MachineBasicBlock &NewDestBB,
250 const DebugLoc &DL,
251 int64_t BrOffset,
252 RegScavenger *RS = nullptr) const override;
253
254 bool analyzeBranchImpl(MachineBasicBlock &MBB,
255 MachineBasicBlock::iterator I,
256 MachineBasicBlock *&TBB,
257 MachineBasicBlock *&FBB,
258 SmallVectorImpl<MachineOperand> &Cond,
259 bool AllowModify) const;
260
261 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
262 MachineBasicBlock *&FBB,
263 SmallVectorImpl<MachineOperand> &Cond,
264 bool AllowModify = false) const override;
265
266 unsigned removeBranch(MachineBasicBlock &MBB,
267 int *BytesRemoved = nullptr) const override;
268
269 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
270 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
271 const DebugLoc &DL,
272 int *BytesAdded = nullptr) const override;
273
274 bool reverseBranchCondition(
275 SmallVectorImpl<MachineOperand> &Cond) const override;
276
277 bool canInsertSelect(const MachineBasicBlock &MBB,
278 ArrayRef<MachineOperand> Cond,
279 unsigned TrueReg, unsigned FalseReg,
280 int &CondCycles,
281 int &TrueCycles, int &FalseCycles) const override;
282
283 void insertSelect(MachineBasicBlock &MBB,
284 MachineBasicBlock::iterator I, const DebugLoc &DL,
285 unsigned DstReg, ArrayRef<MachineOperand> Cond,
286 unsigned TrueReg, unsigned FalseReg) const override;
287
288 void insertVectorSelect(MachineBasicBlock &MBB,
289 MachineBasicBlock::iterator I, const DebugLoc &DL,
290 unsigned DstReg, ArrayRef<MachineOperand> Cond,
291 unsigned TrueReg, unsigned FalseReg) const;
292
293 unsigned getAddressSpaceForPseudoSourceKind(
294 unsigned Kind) const override;
295
296 bool
297 areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
298 AliasAnalysis *AA = nullptr) const override;
299
300 bool isFoldableCopy(const MachineInstr &MI) const;
301
302 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
303 MachineRegisterInfo *MRI) const final;
304
getMachineCSELookAheadLimit()305 unsigned getMachineCSELookAheadLimit() const override { return 500; }
306
307 MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
308 MachineInstr &MI,
309 LiveVariables *LV) const override;
310
311 bool isSchedulingBoundary(const MachineInstr &MI,
312 const MachineBasicBlock *MBB,
313 const MachineFunction &MF) const override;
314
isSALU(const MachineInstr & MI)315 static bool isSALU(const MachineInstr &MI) {
316 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
317 }
318
isSALU(uint16_t Opcode)319 bool isSALU(uint16_t Opcode) const {
320 return get(Opcode).TSFlags & SIInstrFlags::SALU;
321 }
322
isVALU(const MachineInstr & MI)323 static bool isVALU(const MachineInstr &MI) {
324 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
325 }
326
isVALU(uint16_t Opcode)327 bool isVALU(uint16_t Opcode) const {
328 return get(Opcode).TSFlags & SIInstrFlags::VALU;
329 }
330
isVMEM(const MachineInstr & MI)331 static bool isVMEM(const MachineInstr &MI) {
332 return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
333 }
334
isVMEM(uint16_t Opcode)335 bool isVMEM(uint16_t Opcode) const {
336 return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
337 }
338
isSOP1(const MachineInstr & MI)339 static bool isSOP1(const MachineInstr &MI) {
340 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
341 }
342
isSOP1(uint16_t Opcode)343 bool isSOP1(uint16_t Opcode) const {
344 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
345 }
346
isSOP2(const MachineInstr & MI)347 static bool isSOP2(const MachineInstr &MI) {
348 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
349 }
350
isSOP2(uint16_t Opcode)351 bool isSOP2(uint16_t Opcode) const {
352 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
353 }
354
isSOPC(const MachineInstr & MI)355 static bool isSOPC(const MachineInstr &MI) {
356 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
357 }
358
isSOPC(uint16_t Opcode)359 bool isSOPC(uint16_t Opcode) const {
360 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
361 }
362
isSOPK(const MachineInstr & MI)363 static bool isSOPK(const MachineInstr &MI) {
364 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
365 }
366
isSOPK(uint16_t Opcode)367 bool isSOPK(uint16_t Opcode) const {
368 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
369 }
370
isSOPP(const MachineInstr & MI)371 static bool isSOPP(const MachineInstr &MI) {
372 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
373 }
374
isSOPP(uint16_t Opcode)375 bool isSOPP(uint16_t Opcode) const {
376 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
377 }
378
isVOP1(const MachineInstr & MI)379 static bool isVOP1(const MachineInstr &MI) {
380 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
381 }
382
isVOP1(uint16_t Opcode)383 bool isVOP1(uint16_t Opcode) const {
384 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
385 }
386
isVOP2(const MachineInstr & MI)387 static bool isVOP2(const MachineInstr &MI) {
388 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
389 }
390
isVOP2(uint16_t Opcode)391 bool isVOP2(uint16_t Opcode) const {
392 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
393 }
394
isVOP3(const MachineInstr & MI)395 static bool isVOP3(const MachineInstr &MI) {
396 return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
397 }
398
isVOP3(uint16_t Opcode)399 bool isVOP3(uint16_t Opcode) const {
400 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
401 }
402
isSDWA(const MachineInstr & MI)403 static bool isSDWA(const MachineInstr &MI) {
404 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
405 }
406
isSDWA(uint16_t Opcode)407 bool isSDWA(uint16_t Opcode) const {
408 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
409 }
410
isVOPC(const MachineInstr & MI)411 static bool isVOPC(const MachineInstr &MI) {
412 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
413 }
414
isVOPC(uint16_t Opcode)415 bool isVOPC(uint16_t Opcode) const {
416 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
417 }
418
isMUBUF(const MachineInstr & MI)419 static bool isMUBUF(const MachineInstr &MI) {
420 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
421 }
422
isMUBUF(uint16_t Opcode)423 bool isMUBUF(uint16_t Opcode) const {
424 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
425 }
426
isMTBUF(const MachineInstr & MI)427 static bool isMTBUF(const MachineInstr &MI) {
428 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
429 }
430
isMTBUF(uint16_t Opcode)431 bool isMTBUF(uint16_t Opcode) const {
432 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
433 }
434
isSMRD(const MachineInstr & MI)435 static bool isSMRD(const MachineInstr &MI) {
436 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
437 }
438
isSMRD(uint16_t Opcode)439 bool isSMRD(uint16_t Opcode) const {
440 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
441 }
442
443 bool isBufferSMRD(const MachineInstr &MI) const;
444
isDS(const MachineInstr & MI)445 static bool isDS(const MachineInstr &MI) {
446 return MI.getDesc().TSFlags & SIInstrFlags::DS;
447 }
448
isDS(uint16_t Opcode)449 bool isDS(uint16_t Opcode) const {
450 return get(Opcode).TSFlags & SIInstrFlags::DS;
451 }
452
453 bool isAlwaysGDS(uint16_t Opcode) const;
454
isMIMG(const MachineInstr & MI)455 static bool isMIMG(const MachineInstr &MI) {
456 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
457 }
458
isMIMG(uint16_t Opcode)459 bool isMIMG(uint16_t Opcode) const {
460 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
461 }
462
isGather4(const MachineInstr & MI)463 static bool isGather4(const MachineInstr &MI) {
464 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
465 }
466
isGather4(uint16_t Opcode)467 bool isGather4(uint16_t Opcode) const {
468 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
469 }
470
isFLAT(const MachineInstr & MI)471 static bool isFLAT(const MachineInstr &MI) {
472 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
473 }
474
475 // Is a FLAT encoded instruction which accesses a specific segment,
476 // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)477 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
478 auto Flags = MI.getDesc().TSFlags;
479 return (Flags & SIInstrFlags::FLAT) && !(Flags & SIInstrFlags::LGKM_CNT);
480 }
481
482 // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)483 bool isFLAT(uint16_t Opcode) const {
484 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
485 }
486
isEXP(const MachineInstr & MI)487 static bool isEXP(const MachineInstr &MI) {
488 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
489 }
490
isEXP(uint16_t Opcode)491 bool isEXP(uint16_t Opcode) const {
492 return get(Opcode).TSFlags & SIInstrFlags::EXP;
493 }
494
isWQM(const MachineInstr & MI)495 static bool isWQM(const MachineInstr &MI) {
496 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
497 }
498
isWQM(uint16_t Opcode)499 bool isWQM(uint16_t Opcode) const {
500 return get(Opcode).TSFlags & SIInstrFlags::WQM;
501 }
502
isDisableWQM(const MachineInstr & MI)503 static bool isDisableWQM(const MachineInstr &MI) {
504 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
505 }
506
isDisableWQM(uint16_t Opcode)507 bool isDisableWQM(uint16_t Opcode) const {
508 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
509 }
510
isVGPRSpill(const MachineInstr & MI)511 static bool isVGPRSpill(const MachineInstr &MI) {
512 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
513 }
514
isVGPRSpill(uint16_t Opcode)515 bool isVGPRSpill(uint16_t Opcode) const {
516 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
517 }
518
isSGPRSpill(const MachineInstr & MI)519 static bool isSGPRSpill(const MachineInstr &MI) {
520 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
521 }
522
isSGPRSpill(uint16_t Opcode)523 bool isSGPRSpill(uint16_t Opcode) const {
524 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
525 }
526
isDPP(const MachineInstr & MI)527 static bool isDPP(const MachineInstr &MI) {
528 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
529 }
530
isDPP(uint16_t Opcode)531 bool isDPP(uint16_t Opcode) const {
532 return get(Opcode).TSFlags & SIInstrFlags::DPP;
533 }
534
isVOP3P(const MachineInstr & MI)535 static bool isVOP3P(const MachineInstr &MI) {
536 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
537 }
538
isVOP3P(uint16_t Opcode)539 bool isVOP3P(uint16_t Opcode) const {
540 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
541 }
542
isVINTRP(const MachineInstr & MI)543 static bool isVINTRP(const MachineInstr &MI) {
544 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
545 }
546
isVINTRP(uint16_t Opcode)547 bool isVINTRP(uint16_t Opcode) const {
548 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
549 }
550
isScalarUnit(const MachineInstr & MI)551 static bool isScalarUnit(const MachineInstr &MI) {
552 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
553 }
554
usesVM_CNT(const MachineInstr & MI)555 static bool usesVM_CNT(const MachineInstr &MI) {
556 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
557 }
558
usesLGKM_CNT(const MachineInstr & MI)559 static bool usesLGKM_CNT(const MachineInstr &MI) {
560 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
561 }
562
sopkIsZext(const MachineInstr & MI)563 static bool sopkIsZext(const MachineInstr &MI) {
564 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
565 }
566
sopkIsZext(uint16_t Opcode)567 bool sopkIsZext(uint16_t Opcode) const {
568 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
569 }
570
571 /// \returns true if this is an s_store_dword* instruction. This is more
572 /// specific than than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)573 static bool isScalarStore(const MachineInstr &MI) {
574 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
575 }
576
isScalarStore(uint16_t Opcode)577 bool isScalarStore(uint16_t Opcode) const {
578 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
579 }
580
isFixedSize(const MachineInstr & MI)581 static bool isFixedSize(const MachineInstr &MI) {
582 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
583 }
584
isFixedSize(uint16_t Opcode)585 bool isFixedSize(uint16_t Opcode) const {
586 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
587 }
588
hasFPClamp(const MachineInstr & MI)589 static bool hasFPClamp(const MachineInstr &MI) {
590 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
591 }
592
hasFPClamp(uint16_t Opcode)593 bool hasFPClamp(uint16_t Opcode) const {
594 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
595 }
596
hasIntClamp(const MachineInstr & MI)597 static bool hasIntClamp(const MachineInstr &MI) {
598 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
599 }
600
getClampMask(const MachineInstr & MI)601 uint64_t getClampMask(const MachineInstr &MI) const {
602 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
603 SIInstrFlags::IntClamp |
604 SIInstrFlags::ClampLo |
605 SIInstrFlags::ClampHi;
606 return MI.getDesc().TSFlags & ClampFlags;
607 }
608
usesFPDPRounding(const MachineInstr & MI)609 static bool usesFPDPRounding(const MachineInstr &MI) {
610 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
611 }
612
usesFPDPRounding(uint16_t Opcode)613 bool usesFPDPRounding(uint16_t Opcode) const {
614 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
615 }
616
isVGPRCopy(const MachineInstr & MI)617 bool isVGPRCopy(const MachineInstr &MI) const {
618 assert(MI.isCopy());
619 unsigned Dest = MI.getOperand(0).getReg();
620 const MachineFunction &MF = *MI.getParent()->getParent();
621 const MachineRegisterInfo &MRI = MF.getRegInfo();
622 return !RI.isSGPRReg(MRI, Dest);
623 }
624
625 /// Whether we must prevent this instruction from executing with EXEC = 0.
626 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
627
628 bool isInlineConstant(const APInt &Imm) const;
629
630 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
631
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)632 bool isInlineConstant(const MachineOperand &MO,
633 const MCOperandInfo &OpInfo) const {
634 return isInlineConstant(MO, OpInfo.OperandType);
635 }
636
637 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
638 /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)639 bool isInlineConstant(const MachineInstr &MI,
640 const MachineOperand &UseMO,
641 const MachineOperand &DefMO) const {
642 assert(UseMO.getParent() == &MI);
643 int OpIdx = MI.getOperandNo(&UseMO);
644 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
645 return false;
646 }
647
648 return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
649 }
650
651 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
652 /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)653 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
654 const MachineOperand &MO = MI.getOperand(OpIdx);
655 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
656 }
657
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)658 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
659 const MachineOperand &MO) const {
660 if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
661 return false;
662
663 if (MI.isCopy()) {
664 unsigned Size = getOpSize(MI, OpIdx);
665 assert(Size == 8 || Size == 4);
666
667 uint8_t OpType = (Size == 8) ?
668 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
669 return isInlineConstant(MO, OpType);
670 }
671
672 return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
673 }
674
isInlineConstant(const MachineOperand & MO)675 bool isInlineConstant(const MachineOperand &MO) const {
676 const MachineInstr *Parent = MO.getParent();
677 return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
678 }
679
isLiteralConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)680 bool isLiteralConstant(const MachineOperand &MO,
681 const MCOperandInfo &OpInfo) const {
682 return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
683 }
684
isLiteralConstant(const MachineInstr & MI,int OpIdx)685 bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
686 const MachineOperand &MO = MI.getOperand(OpIdx);
687 return MO.isImm() && !isInlineConstant(MI, OpIdx);
688 }
689
690 // Returns true if this operand could potentially require a 32-bit literal
691 // operand, but not necessarily. A FrameIndex for example could resolve to an
692 // inline immediate value that will not require an additional 4-bytes; this
693 // assumes that it will.
694 bool isLiteralConstantLike(const MachineOperand &MO,
695 const MCOperandInfo &OpInfo) const;
696
697 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
698 const MachineOperand &MO) const;
699
700 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
701 /// This function will return false if you pass it a 32-bit instruction.
702 bool hasVALU32BitEncoding(unsigned Opcode) const;
703
704 /// Returns true if this operand uses the constant bus.
705 bool usesConstantBus(const MachineRegisterInfo &MRI,
706 const MachineOperand &MO,
707 const MCOperandInfo &OpInfo) const;
708
709 /// Return true if this instruction has any modifiers.
710 /// e.g. src[012]_mod, omod, clamp.
711 bool hasModifiers(unsigned Opcode) const;
712
713 bool hasModifiersSet(const MachineInstr &MI,
714 unsigned OpName) const;
715 bool hasAnyModifiersSet(const MachineInstr &MI) const;
716
717 bool canShrink(const MachineInstr &MI,
718 const MachineRegisterInfo &MRI) const;
719
720 MachineInstr *buildShrunkInst(MachineInstr &MI,
721 unsigned NewOpcode) const;
722
723 bool verifyInstruction(const MachineInstr &MI,
724 StringRef &ErrInfo) const override;
725
726 unsigned getVALUOp(const MachineInstr &MI) const;
727
728 /// Return the correct register class for \p OpNo. For target-specific
729 /// instructions, this will return the register class that has been defined
730 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
731 /// the register class of its machine operand.
732 /// to infer the correct register class base on the other operands.
733 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
734 unsigned OpNo) const;
735
736 /// Return the size in bytes of the operand OpNo on the given
737 // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)738 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
739 const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
740
741 if (OpInfo.RegClass == -1) {
742 // If this is an immediate operand, this must be a 32-bit literal.
743 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
744 return 4;
745 }
746
747 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
748 }
749
750 /// This form should usually be preferred since it handles operands
751 /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)752 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
753 const MachineOperand &MO = MI.getOperand(OpNo);
754 if (MO.isReg()) {
755 if (unsigned SubReg = MO.getSubReg()) {
756 assert(RI.getRegSizeInBits(*RI.getSubClassWithSubReg(
757 MI.getParent()->getParent()->getRegInfo().
758 getRegClass(MO.getReg()), SubReg)) >= 32 &&
759 "Sub-dword subregs are not supported");
760 return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4;
761 }
762 }
763 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
764 }
765
766 /// \returns true if it is legal for the operand at index \p OpNo
767 /// to read a VGPR.
768 bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
769
770 /// Legalize the \p OpIndex operand of this instruction by inserting
771 /// a MOV. For example:
772 /// ADD_I32_e32 VGPR0, 15
773 /// to
774 /// MOV VGPR1, 15
775 /// ADD_I32_e32 VGPR0, VGPR1
776 ///
777 /// If the operand being legalized is a register, then a COPY will be used
778 /// instead of MOV.
779 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
780
781 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
782 /// for \p MI.
783 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
784 const MachineOperand *MO = nullptr) const;
785
786 /// Check if \p MO would be a valid operand for the given operand
787 /// definition \p OpInfo. Note this does not attempt to validate constant bus
788 /// restrictions (e.g. literal constant usage).
789 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
790 const MCOperandInfo &OpInfo,
791 const MachineOperand &MO) const;
792
793 /// Check if \p MO (a register operand) is a legal register for the
794 /// given operand description.
795 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
796 const MCOperandInfo &OpInfo,
797 const MachineOperand &MO) const;
798
799 /// Legalize operands in \p MI by either commuting it or inserting a
800 /// copy of src1.
801 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
802
803 /// Fix operands in \p MI to satisfy constant bus requirements.
804 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
805
806 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
807 /// be used when it is know that the value in SrcReg is same across all
808 /// threads in the wave.
809 /// \returns The SGPR register that \p SrcReg was copied to.
810 unsigned readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
811 MachineRegisterInfo &MRI) const;
812
813 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
814
815 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
816 MachineBasicBlock::iterator I,
817 const TargetRegisterClass *DstRC,
818 MachineOperand &Op, MachineRegisterInfo &MRI,
819 const DebugLoc &DL) const;
820
821 /// Legalize all operands in this instruction. This function may create new
822 /// instructions and control-flow around \p MI. If present, \p MDT is
823 /// updated.
824 void legalizeOperands(MachineInstr &MI,
825 MachineDominatorTree *MDT = nullptr) const;
826
827 /// Replace this instruction's opcode with the equivalent VALU
828 /// opcode. This function will also move the users of \p MI to the
829 /// VALU if necessary. If present, \p MDT is updated.
830 void moveToVALU(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
831
832 void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
833 int Count) const;
834
835 void insertNoop(MachineBasicBlock &MBB,
836 MachineBasicBlock::iterator MI) const override;
837
838 void insertReturn(MachineBasicBlock &MBB) const;
839 /// Return the number of wait states that result from executing this
840 /// instruction.
841 unsigned getNumWaitStates(const MachineInstr &MI) const;
842
843 /// Returns the operand named \p Op. If \p MI does not have an
844 /// operand named \c Op, this function returns nullptr.
845 LLVM_READONLY
846 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
847
848 LLVM_READONLY
getNamedOperand(const MachineInstr & MI,unsigned OpName)849 const MachineOperand *getNamedOperand(const MachineInstr &MI,
850 unsigned OpName) const {
851 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
852 }
853
854 /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,unsigned OpName)855 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
856 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
857 return MI.getOperand(Idx).getImm();
858 }
859
860 uint64_t getDefaultRsrcDataFormat() const;
861 uint64_t getScratchRsrcWords23() const;
862
863 bool isLowLatencyInstruction(const MachineInstr &MI) const;
864 bool isHighLatencyInstruction(const MachineInstr &MI) const;
865
866 /// Return the descriptor of the target-specific machine instruction
867 /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)868 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
869 return get(pseudoToMCOpcode(Opcode));
870 }
871
872 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
873 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
874
875 unsigned isLoadFromStackSlot(const MachineInstr &MI,
876 int &FrameIndex) const override;
877 unsigned isStoreToStackSlot(const MachineInstr &MI,
878 int &FrameIndex) const override;
879
880 unsigned getInstBundleSize(const MachineInstr &MI) const;
881 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
882
883 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
884
885 bool isNonUniformBranchInstr(MachineInstr &Instr) const;
886
887 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
888 MachineBasicBlock *IfEnd) const;
889
890 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
891 MachineBasicBlock *LoopEnd) const;
892
893 std::pair<unsigned, unsigned>
894 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
895
896 ArrayRef<std::pair<int, const char *>>
897 getSerializableTargetIndices() const override;
898
899 ArrayRef<std::pair<unsigned, const char *>>
900 getSerializableDirectMachineOperandTargetFlags() const override;
901
902 ScheduleHazardRecognizer *
903 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
904 const ScheduleDAG *DAG) const override;
905
906 ScheduleHazardRecognizer *
907 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
908
909 bool isBasicBlockPrologue(const MachineInstr &MI) const override;
910
911 /// Return a partially built integer add instruction without carry.
912 /// Caller must add source operands.
913 /// For pre-GFX9 it will generate unused carry destination operand.
914 /// TODO: After GFX9 it should return a no-carry operation.
915 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
916 MachineBasicBlock::iterator I,
917 const DebugLoc &DL,
918 unsigned DestReg) const;
919
920 static bool isKillTerminator(unsigned Opcode);
921 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
922
isLegalMUBUFImmOffset(unsigned Imm)923 static bool isLegalMUBUFImmOffset(unsigned Imm) {
924 return isUInt<12>(Imm);
925 }
926
927 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
928 /// Return -1 if the target-specific opcode for the pseudo instruction does
929 /// not exist. If Opcode is not a pseudo instruction, this is identity.
930 int pseudoToMCOpcode(int Opcode) const;
931 };
932
933 /// \brief Returns true if a reg:subreg pair P has a TRC class
isOfRegClass(const TargetInstrInfo::RegSubRegPair & P,const TargetRegisterClass & TRC,MachineRegisterInfo & MRI)934 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
935 const TargetRegisterClass &TRC,
936 MachineRegisterInfo &MRI) {
937 auto *RC = MRI.getRegClass(P.Reg);
938 if (!P.SubReg)
939 return RC == &TRC;
940 auto *TRI = MRI.getTargetRegisterInfo();
941 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
942 }
943
944 /// \brief Create RegSubRegPair from a register MachineOperand
945 inline
getRegSubRegPair(const MachineOperand & O)946 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
947 assert(O.isReg());
948 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
949 }
950
951 /// \brief Return the SubReg component from REG_SEQUENCE
952 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
953 unsigned SubReg);
954
955 /// \brief Return the defining instruction for a given reg:subreg pair
956 /// skipping copy like instructions and subreg-manipulation pseudos.
957 /// Following another subreg of a reg:subreg isn't supported.
958 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
959 MachineRegisterInfo &MRI);
960
961 namespace AMDGPU {
962
963 LLVM_READONLY
964 int getVOPe64(uint16_t Opcode);
965
966 LLVM_READONLY
967 int getVOPe32(uint16_t Opcode);
968
969 LLVM_READONLY
970 int getSDWAOp(uint16_t Opcode);
971
972 LLVM_READONLY
973 int getDPPOp32(uint16_t Opcode);
974
975 LLVM_READONLY
976 int getBasicFromSDWAOp(uint16_t Opcode);
977
978 LLVM_READONLY
979 int getCommuteRev(uint16_t Opcode);
980
981 LLVM_READONLY
982 int getCommuteOrig(uint16_t Opcode);
983
984 LLVM_READONLY
985 int getAddr64Inst(uint16_t Opcode);
986
987 /// Check if \p Opcode is an Addr64 opcode.
988 ///
989 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
990 LLVM_READONLY
991 int getIfAddr64Inst(uint16_t Opcode);
992
993 LLVM_READONLY
994 int getMUBUFNoLdsInst(uint16_t Opcode);
995
996 LLVM_READONLY
997 int getAtomicRetOp(uint16_t Opcode);
998
999 LLVM_READONLY
1000 int getAtomicNoRetOp(uint16_t Opcode);
1001
1002 LLVM_READONLY
1003 int getSOPKOp(uint16_t Opcode);
1004
1005 LLVM_READONLY
1006 int getGlobalSaddrOp(uint16_t Opcode);
1007
1008 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1009 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1010 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1011 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1012
1013 // For MachineOperands.
1014 enum TargetFlags {
1015 TF_LONG_BRANCH_FORWARD = 1 << 0,
1016 TF_LONG_BRANCH_BACKWARD = 1 << 1
1017 };
1018
1019 } // end namespace AMDGPU
1020
1021 namespace SI {
1022 namespace KernelInputOffsets {
1023
1024 /// Offsets in bytes from the start of the input buffer
1025 enum Offsets {
1026 NGROUPS_X = 0,
1027 NGROUPS_Y = 4,
1028 NGROUPS_Z = 8,
1029 GLOBAL_SIZE_X = 12,
1030 GLOBAL_SIZE_Y = 16,
1031 GLOBAL_SIZE_Z = 20,
1032 LOCAL_SIZE_X = 24,
1033 LOCAL_SIZE_Y = 28,
1034 LOCAL_SIZE_Z = 32
1035 };
1036
1037 } // end namespace KernelInputOffsets
1038 } // end namespace SI
1039
1040 } // end namespace llvm
1041
1042 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1043