1 //===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The SI code emitter produces machine code that can be executed
11 /// directly on the GPU device.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "MCTargetDesc/AMDGPUFixupKinds.h"
17 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCCodeEmitter.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCFixup.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <cstdlib>
38 
39 using namespace llvm;
40 
41 namespace {
42 
43 class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
44   const MCRegisterInfo &MRI;
45 
46   /// Encode an fp or int literal
47   uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
48                           const MCSubtargetInfo &STI) const;
49 
50 public:
51   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
52                   MCContext &ctx)
53       : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
54   SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
55   SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
56 
57   /// Encode the instruction and write it to the OS.
58   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
59                          SmallVectorImpl<MCFixup> &Fixups,
60                          const MCSubtargetInfo &STI) const override;
61 
62   /// \returns the encoding for an MCOperand.
63   uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
64                              SmallVectorImpl<MCFixup> &Fixups,
65                              const MCSubtargetInfo &STI) const override;
66 
67   /// Use a fixup to encode the simm16 field for SOPP branch
68   ///        instructions.
69   unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
70                              SmallVectorImpl<MCFixup> &Fixups,
71                              const MCSubtargetInfo &STI) const override;
72 
73   unsigned getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
74                                  SmallVectorImpl<MCFixup> &Fixups,
75                                  const MCSubtargetInfo &STI) const override;
76 
77   unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
78                               SmallVectorImpl<MCFixup> &Fixups,
79                               const MCSubtargetInfo &STI) const override;
80 
81   unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
82                                   SmallVectorImpl<MCFixup> &Fixups,
83                                   const MCSubtargetInfo &STI) const override;
84 
85   unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
86                                 SmallVectorImpl<MCFixup> &Fixups,
87                                 const MCSubtargetInfo &STI) const override;
88 };
89 
90 } // end anonymous namespace
91 
92 MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
93                                            const MCRegisterInfo &MRI,
94                                            MCContext &Ctx) {
95   return new SIMCCodeEmitter(MCII, MRI, Ctx);
96 }
97 
98 // Returns the encoding value to use if the given integer is an integer inline
99 // immediate value, or 0 if it is not.
100 template <typename IntTy>
101 static uint32_t getIntInlineImmEncoding(IntTy Imm) {
102   if (Imm >= 0 && Imm <= 64)
103     return 128 + Imm;
104 
105   if (Imm >= -16 && Imm <= -1)
106     return 192 + std::abs(Imm);
107 
108   return 0;
109 }
110 
111 static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
112   uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
113   if (IntImm != 0)
114     return IntImm;
115 
116   if (Val == 0x3800) // 0.5
117     return 240;
118 
119   if (Val == 0xB800) // -0.5
120     return 241;
121 
122   if (Val == 0x3C00) // 1.0
123     return 242;
124 
125   if (Val == 0xBC00) // -1.0
126     return 243;
127 
128   if (Val == 0x4000) // 2.0
129     return 244;
130 
131   if (Val == 0xC000) // -2.0
132     return 245;
133 
134   if (Val == 0x4400) // 4.0
135     return 246;
136 
137   if (Val == 0xC400) // -4.0
138     return 247;
139 
140   if (Val == 0x3118 && // 1.0 / (2.0 * pi)
141       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
142     return 248;
143 
144   return 255;
145 }
146 
147 static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
148   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
149   if (IntImm != 0)
150     return IntImm;
151 
152   if (Val == FloatToBits(0.5f))
153     return 240;
154 
155   if (Val == FloatToBits(-0.5f))
156     return 241;
157 
158   if (Val == FloatToBits(1.0f))
159     return 242;
160 
161   if (Val == FloatToBits(-1.0f))
162     return 243;
163 
164   if (Val == FloatToBits(2.0f))
165     return 244;
166 
167   if (Val == FloatToBits(-2.0f))
168     return 245;
169 
170   if (Val == FloatToBits(4.0f))
171     return 246;
172 
173   if (Val == FloatToBits(-4.0f))
174     return 247;
175 
176   if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
177       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
178     return 248;
179 
180   return 255;
181 }
182 
183 static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
184   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
185   if (IntImm != 0)
186     return IntImm;
187 
188   if (Val == DoubleToBits(0.5))
189     return 240;
190 
191   if (Val == DoubleToBits(-0.5))
192     return 241;
193 
194   if (Val == DoubleToBits(1.0))
195     return 242;
196 
197   if (Val == DoubleToBits(-1.0))
198     return 243;
199 
200   if (Val == DoubleToBits(2.0))
201     return 244;
202 
203   if (Val == DoubleToBits(-2.0))
204     return 245;
205 
206   if (Val == DoubleToBits(4.0))
207     return 246;
208 
209   if (Val == DoubleToBits(-4.0))
210     return 247;
211 
212   if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
213       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
214     return 248;
215 
216   return 255;
217 }
218 
219 uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
220                                          const MCOperandInfo &OpInfo,
221                                          const MCSubtargetInfo &STI) const {
222   int64_t Imm;
223   if (MO.isExpr()) {
224     const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
225     if (!C)
226       return 255;
227 
228     Imm = C->getValue();
229   } else {
230 
231     assert(!MO.isFPImm());
232 
233     if (!MO.isImm())
234       return ~0;
235 
236     Imm = MO.getImm();
237   }
238 
239   switch (OpInfo.OperandType) {
240   case AMDGPU::OPERAND_REG_IMM_INT32:
241   case AMDGPU::OPERAND_REG_IMM_FP32:
242   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
243   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
244   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
245   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
246     return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
247 
248   case AMDGPU::OPERAND_REG_IMM_INT64:
249   case AMDGPU::OPERAND_REG_IMM_FP64:
250   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
251   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
252     return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
253 
254   case AMDGPU::OPERAND_REG_IMM_INT16:
255   case AMDGPU::OPERAND_REG_IMM_FP16:
256   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
257   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
258   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
259   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
260     // FIXME Is this correct? What do inline immediates do on SI for f16 src
261     // which does not have f16 support?
262     return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
263 
264   case AMDGPU::OPERAND_REG_IMM_V2INT16:
265   case AMDGPU::OPERAND_REG_IMM_V2FP16:
266     if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
267       return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
268     LLVM_FALLTHROUGH;
269   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
270   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
271   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
272   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
273     uint16_t Lo16 = static_cast<uint16_t>(Imm);
274     uint32_t Encoding = getLit16Encoding(Lo16, STI);
275     return Encoding;
276   }
277   default:
278     llvm_unreachable("invalid operand size");
279   }
280 }
281 
282 void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
283                                        SmallVectorImpl<MCFixup> &Fixups,
284                                        const MCSubtargetInfo &STI) const {
285   verifyInstructionPredicates(MI,
286                               computeAvailableFeatures(STI.getFeatureBits()));
287 
288   uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
289   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
290   unsigned bytes = Desc.getSize();
291 
292   for (unsigned i = 0; i < bytes; i++) {
293     OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
294   }
295 
296   // NSA encoding.
297   if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
298     int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
299                                             AMDGPU::OpName::vaddr0);
300     int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
301                                            AMDGPU::OpName::srsrc);
302     assert(vaddr0 >= 0 && srsrc > vaddr0);
303     unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
304     unsigned NumPadding = (-NumExtraAddrs) & 3;
305 
306     for (unsigned i = 0; i < NumExtraAddrs; ++i)
307       OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
308                                           Fixups, STI));
309     for (unsigned i = 0; i < NumPadding; ++i)
310       OS.write(0);
311   }
312 
313   if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
314       (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
315     return;
316 
317   // Check for additional literals in SRC0/1/2 (Op 1/2/3)
318   for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
319 
320     // Check if this operand should be encoded as [SV]Src
321     if (!AMDGPU::isSISrcOperand(Desc, i))
322       continue;
323 
324     // Is this operand a literal immediate?
325     const MCOperand &Op = MI.getOperand(i);
326     if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255)
327       continue;
328 
329     // Yes! Encode it
330     int64_t Imm = 0;
331 
332     if (Op.isImm())
333       Imm = Op.getImm();
334     else if (Op.isExpr()) {
335       if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
336         Imm = C->getValue();
337 
338     } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
339       llvm_unreachable("Must be immediate or expr");
340 
341     for (unsigned j = 0; j < 4; j++) {
342       OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
343     }
344 
345     // Only one literal value allowed
346     break;
347   }
348 }
349 
350 unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
351                                             SmallVectorImpl<MCFixup> &Fixups,
352                                             const MCSubtargetInfo &STI) const {
353   const MCOperand &MO = MI.getOperand(OpNo);
354 
355   if (MO.isExpr()) {
356     const MCExpr *Expr = MO.getExpr();
357     MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
358     Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
359     return 0;
360   }
361 
362   return getMachineOpValue(MI, MO, Fixups, STI);
363 }
364 
365 unsigned SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
366                                                 SmallVectorImpl<MCFixup> &Fixups,
367                                                 const MCSubtargetInfo &STI) const {
368   auto Offset = MI.getOperand(OpNo).getImm();
369   // VI only supports 20-bit unsigned offsets.
370   assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
371   return Offset;
372 }
373 
374 unsigned
375 SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
376                                     SmallVectorImpl<MCFixup> &Fixups,
377                                     const MCSubtargetInfo &STI) const {
378   using namespace AMDGPU::SDWA;
379 
380   uint64_t RegEnc = 0;
381 
382   const MCOperand &MO = MI.getOperand(OpNo);
383 
384   if (MO.isReg()) {
385     unsigned Reg = MO.getReg();
386     RegEnc |= MRI.getEncodingValue(Reg);
387     RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
388     if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
389       RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
390     }
391     return RegEnc;
392   } else {
393     const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
394     uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
395     if (Enc != ~0U && Enc != 255) {
396       return Enc | SDWA9EncValues::SRC_SGPR_MASK;
397     }
398   }
399 
400   llvm_unreachable("Unsupported operand kind");
401   return 0;
402 }
403 
404 unsigned
405 SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
406                                         SmallVectorImpl<MCFixup> &Fixups,
407                                         const MCSubtargetInfo &STI) const {
408   using namespace AMDGPU::SDWA;
409 
410   uint64_t RegEnc = 0;
411 
412   const MCOperand &MO = MI.getOperand(OpNo);
413 
414   unsigned Reg = MO.getReg();
415   if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
416     RegEnc |= MRI.getEncodingValue(Reg);
417     RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
418     RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
419   }
420   return RegEnc;
421 }
422 
423 unsigned
424 SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
425                                       SmallVectorImpl<MCFixup> &Fixups,
426                                       const MCSubtargetInfo &STI) const {
427   unsigned Reg = MI.getOperand(OpNo).getReg();
428   uint64_t Enc = MRI.getEncodingValue(Reg);
429 
430   // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
431   // instructions use acc[0:1] modifier bits to distinguish. These bits are
432   // encoded as a virtual 9th bit of the register for these operands.
433   if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
434       MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
435       MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
436       MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
437       MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
438       MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
439       MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
440       MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
441     Enc |= 512;
442 
443   return Enc;
444 }
445 
446 static bool needsPCRel(const MCExpr *Expr) {
447   switch (Expr->getKind()) {
448   case MCExpr::SymbolRef: {
449     auto *SE = cast<MCSymbolRefExpr>(Expr);
450     MCSymbolRefExpr::VariantKind Kind = SE->getKind();
451     return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
452            Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
453   }
454   case MCExpr::Binary: {
455     auto *BE = cast<MCBinaryExpr>(Expr);
456     if (BE->getOpcode() == MCBinaryExpr::Sub)
457       return false;
458     return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
459   }
460   case MCExpr::Unary:
461     return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
462   case MCExpr::Target:
463   case MCExpr::Constant:
464     return false;
465   }
466   llvm_unreachable("invalid kind");
467 }
468 
469 uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
470                                             const MCOperand &MO,
471                                        SmallVectorImpl<MCFixup> &Fixups,
472                                        const MCSubtargetInfo &STI) const {
473   if (MO.isReg())
474     return MRI.getEncodingValue(MO.getReg());
475 
476   if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
477     // FIXME: If this is expression is PCRel or not should not depend on what
478     // the expression looks like. Given that this is just a general expression,
479     // it should probably be FK_Data_4 and whatever is producing
480     //
481     //    s_add_u32 s2, s2, (extern_const_addrspace+16
482     //
483     // And expecting a PCRel should instead produce
484     //
485     // .Ltmp1:
486     //   s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
487     MCFixupKind Kind;
488     if (needsPCRel(MO.getExpr()))
489       Kind = FK_PCRel_4;
490     else
491       Kind = FK_Data_4;
492 
493     const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494     uint32_t Offset = Desc.getSize();
495     assert(Offset == 4 || Offset == 8);
496 
497     Fixups.push_back(
498       MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
499   }
500 
501   // Figure out the operand number, needed for isSrcOperand check
502   unsigned OpNo = 0;
503   for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
504     if (&MO == &MI.getOperand(OpNo))
505       break;
506   }
507 
508   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
509   if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
510     uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
511     if (Enc != ~0U &&
512         (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
513       return Enc;
514 
515   } else if (MO.isImm())
516     return MO.getImm();
517 
518   llvm_unreachable("Encoding of this operand type is not supported yet.");
519   return 0;
520 }
521 
522 #define ENABLE_INSTR_PREDICATE_VERIFIER
523 #include "AMDGPUGenMCCodeEmitter.inc"
524