1 //===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The SI code emitter produces machine code that can be executed
11 /// directly on the GPU device.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "MCTargetDesc/AMDGPUFixupKinds.h"
17 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIDefines.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/MC/MCCodeEmitter.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCFixup.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <cassert>
36 #include <cstdint>
37 #include <cstdlib>
38 
39 using namespace llvm;
40 
41 namespace {
42 
43 class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
44   const MCRegisterInfo &MRI;
45 
46   /// Encode an fp or int literal
47   uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
48                           const MCSubtargetInfo &STI) const;
49 
50 public:
51   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
52                   MCContext &ctx)
53       : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
54   SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
55   SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
56 
57   /// Encode the instruction and write it to the OS.
58   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
59                          SmallVectorImpl<MCFixup> &Fixups,
60                          const MCSubtargetInfo &STI) const override;
61 
62   /// \returns the encoding for an MCOperand.
63   uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
64                              SmallVectorImpl<MCFixup> &Fixups,
65                              const MCSubtargetInfo &STI) const override;
66 
67   /// Use a fixup to encode the simm16 field for SOPP branch
68   ///        instructions.
69   unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
70                              SmallVectorImpl<MCFixup> &Fixups,
71                              const MCSubtargetInfo &STI) const override;
72 
73   unsigned getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
74                               SmallVectorImpl<MCFixup> &Fixups,
75                               const MCSubtargetInfo &STI) const override;
76 
77   unsigned getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
78                                   SmallVectorImpl<MCFixup> &Fixups,
79                                   const MCSubtargetInfo &STI) const override;
80 
81   unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
82                                 SmallVectorImpl<MCFixup> &Fixups,
83                                 const MCSubtargetInfo &STI) const override;
84 };
85 
86 } // end anonymous namespace
87 
88 MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
89                                            const MCRegisterInfo &MRI,
90                                            MCContext &Ctx) {
91   return new SIMCCodeEmitter(MCII, MRI, Ctx);
92 }
93 
94 // Returns the encoding value to use if the given integer is an integer inline
95 // immediate value, or 0 if it is not.
96 template <typename IntTy>
97 static uint32_t getIntInlineImmEncoding(IntTy Imm) {
98   if (Imm >= 0 && Imm <= 64)
99     return 128 + Imm;
100 
101   if (Imm >= -16 && Imm <= -1)
102     return 192 + std::abs(Imm);
103 
104   return 0;
105 }
106 
107 static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
108   uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
109   if (IntImm != 0)
110     return IntImm;
111 
112   if (Val == 0x3800) // 0.5
113     return 240;
114 
115   if (Val == 0xB800) // -0.5
116     return 241;
117 
118   if (Val == 0x3C00) // 1.0
119     return 242;
120 
121   if (Val == 0xBC00) // -1.0
122     return 243;
123 
124   if (Val == 0x4000) // 2.0
125     return 244;
126 
127   if (Val == 0xC000) // -2.0
128     return 245;
129 
130   if (Val == 0x4400) // 4.0
131     return 246;
132 
133   if (Val == 0xC400) // -4.0
134     return 247;
135 
136   if (Val == 0x3118 && // 1.0 / (2.0 * pi)
137       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
138     return 248;
139 
140   return 255;
141 }
142 
143 static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
144   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
145   if (IntImm != 0)
146     return IntImm;
147 
148   if (Val == FloatToBits(0.5f))
149     return 240;
150 
151   if (Val == FloatToBits(-0.5f))
152     return 241;
153 
154   if (Val == FloatToBits(1.0f))
155     return 242;
156 
157   if (Val == FloatToBits(-1.0f))
158     return 243;
159 
160   if (Val == FloatToBits(2.0f))
161     return 244;
162 
163   if (Val == FloatToBits(-2.0f))
164     return 245;
165 
166   if (Val == FloatToBits(4.0f))
167     return 246;
168 
169   if (Val == FloatToBits(-4.0f))
170     return 247;
171 
172   if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
173       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
174     return 248;
175 
176   return 255;
177 }
178 
179 static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
180   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
181   if (IntImm != 0)
182     return IntImm;
183 
184   if (Val == DoubleToBits(0.5))
185     return 240;
186 
187   if (Val == DoubleToBits(-0.5))
188     return 241;
189 
190   if (Val == DoubleToBits(1.0))
191     return 242;
192 
193   if (Val == DoubleToBits(-1.0))
194     return 243;
195 
196   if (Val == DoubleToBits(2.0))
197     return 244;
198 
199   if (Val == DoubleToBits(-2.0))
200     return 245;
201 
202   if (Val == DoubleToBits(4.0))
203     return 246;
204 
205   if (Val == DoubleToBits(-4.0))
206     return 247;
207 
208   if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
209       STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
210     return 248;
211 
212   return 255;
213 }
214 
215 uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
216                                          const MCOperandInfo &OpInfo,
217                                          const MCSubtargetInfo &STI) const {
218   int64_t Imm;
219   if (MO.isExpr()) {
220     const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
221     if (!C)
222       return 255;
223 
224     Imm = C->getValue();
225   } else {
226 
227     assert(!MO.isFPImm());
228 
229     if (!MO.isImm())
230       return ~0;
231 
232     Imm = MO.getImm();
233   }
234 
235   switch (OpInfo.OperandType) {
236   case AMDGPU::OPERAND_REG_IMM_INT32:
237   case AMDGPU::OPERAND_REG_IMM_FP32:
238   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
239   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
240   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
241   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
242     return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
243 
244   case AMDGPU::OPERAND_REG_IMM_INT64:
245   case AMDGPU::OPERAND_REG_IMM_FP64:
246   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
247   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
248     return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
249 
250   case AMDGPU::OPERAND_REG_IMM_INT16:
251   case AMDGPU::OPERAND_REG_IMM_FP16:
252   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
253   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
254   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
255   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
256     // FIXME Is this correct? What do inline immediates do on SI for f16 src
257     // which does not have f16 support?
258     return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
259 
260   case AMDGPU::OPERAND_REG_IMM_V2INT16:
261   case AMDGPU::OPERAND_REG_IMM_V2FP16:
262     if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
263       return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
264     LLVM_FALLTHROUGH;
265   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
266   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
267   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
268   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
269     uint16_t Lo16 = static_cast<uint16_t>(Imm);
270     uint32_t Encoding = getLit16Encoding(Lo16, STI);
271     return Encoding;
272   }
273   default:
274     llvm_unreachable("invalid operand size");
275   }
276 }
277 
278 void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
279                                        SmallVectorImpl<MCFixup> &Fixups,
280                                        const MCSubtargetInfo &STI) const {
281   verifyInstructionPredicates(MI,
282                               computeAvailableFeatures(STI.getFeatureBits()));
283 
284   uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
285   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
286   unsigned bytes = Desc.getSize();
287 
288   for (unsigned i = 0; i < bytes; i++) {
289     OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
290   }
291 
292   // NSA encoding.
293   if (AMDGPU::isGFX10(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
294     int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
295                                             AMDGPU::OpName::vaddr0);
296     int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
297                                            AMDGPU::OpName::srsrc);
298     assert(vaddr0 >= 0 && srsrc > vaddr0);
299     unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
300     unsigned NumPadding = (-NumExtraAddrs) & 3;
301 
302     for (unsigned i = 0; i < NumExtraAddrs; ++i)
303       OS.write((uint8_t)getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i),
304                                           Fixups, STI));
305     for (unsigned i = 0; i < NumPadding; ++i)
306       OS.write(0);
307   }
308 
309   if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
310       (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
311     return;
312 
313   // Check for additional literals in SRC0/1/2 (Op 1/2/3)
314   for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
315 
316     // Check if this operand should be encoded as [SV]Src
317     if (!AMDGPU::isSISrcOperand(Desc, i))
318       continue;
319 
320     // Is this operand a literal immediate?
321     const MCOperand &Op = MI.getOperand(i);
322     if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255)
323       continue;
324 
325     // Yes! Encode it
326     int64_t Imm = 0;
327 
328     if (Op.isImm())
329       Imm = Op.getImm();
330     else if (Op.isExpr()) {
331       if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
332         Imm = C->getValue();
333 
334     } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
335       llvm_unreachable("Must be immediate or expr");
336 
337     for (unsigned j = 0; j < 4; j++) {
338       OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
339     }
340 
341     // Only one literal value allowed
342     break;
343   }
344 }
345 
346 unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
347                                             SmallVectorImpl<MCFixup> &Fixups,
348                                             const MCSubtargetInfo &STI) const {
349   const MCOperand &MO = MI.getOperand(OpNo);
350 
351   if (MO.isExpr()) {
352     const MCExpr *Expr = MO.getExpr();
353     MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
354     Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
355     return 0;
356   }
357 
358   return getMachineOpValue(MI, MO, Fixups, STI);
359 }
360 
361 unsigned
362 SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
363                                     SmallVectorImpl<MCFixup> &Fixups,
364                                     const MCSubtargetInfo &STI) const {
365   using namespace AMDGPU::SDWA;
366 
367   uint64_t RegEnc = 0;
368 
369   const MCOperand &MO = MI.getOperand(OpNo);
370 
371   if (MO.isReg()) {
372     unsigned Reg = MO.getReg();
373     RegEnc |= MRI.getEncodingValue(Reg);
374     RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
375     if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
376       RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
377     }
378     return RegEnc;
379   } else {
380     const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
381     uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
382     if (Enc != ~0U && Enc != 255) {
383       return Enc | SDWA9EncValues::SRC_SGPR_MASK;
384     }
385   }
386 
387   llvm_unreachable("Unsupported operand kind");
388   return 0;
389 }
390 
391 unsigned
392 SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
393                                         SmallVectorImpl<MCFixup> &Fixups,
394                                         const MCSubtargetInfo &STI) const {
395   using namespace AMDGPU::SDWA;
396 
397   uint64_t RegEnc = 0;
398 
399   const MCOperand &MO = MI.getOperand(OpNo);
400 
401   unsigned Reg = MO.getReg();
402   if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
403     RegEnc |= MRI.getEncodingValue(Reg);
404     RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
405     RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
406   }
407   return RegEnc;
408 }
409 
410 unsigned
411 SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
412                                       SmallVectorImpl<MCFixup> &Fixups,
413                                       const MCSubtargetInfo &STI) const {
414   unsigned Reg = MI.getOperand(OpNo).getReg();
415   uint64_t Enc = MRI.getEncodingValue(Reg);
416 
417   // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
418   // instructions use acc[0:1] modifier bits to distinguish. These bits are
419   // encoded as a virtual 9th bit of the register for these operands.
420   if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
421       MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
422       MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
423       MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
424       MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
425       MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
426       MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg))
427     Enc |= 512;
428 
429   return Enc;
430 }
431 
432 static bool needsPCRel(const MCExpr *Expr) {
433   switch (Expr->getKind()) {
434   case MCExpr::SymbolRef: {
435     auto *SE = cast<MCSymbolRefExpr>(Expr);
436     MCSymbolRefExpr::VariantKind Kind = SE->getKind();
437     return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
438            Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
439   }
440   case MCExpr::Binary: {
441     auto *BE = cast<MCBinaryExpr>(Expr);
442     if (BE->getOpcode() == MCBinaryExpr::Sub)
443       return false;
444     return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
445   }
446   case MCExpr::Unary:
447     return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
448   case MCExpr::Target:
449   case MCExpr::Constant:
450     return false;
451   }
452   llvm_unreachable("invalid kind");
453 }
454 
455 uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
456                                             const MCOperand &MO,
457                                        SmallVectorImpl<MCFixup> &Fixups,
458                                        const MCSubtargetInfo &STI) const {
459   if (MO.isReg())
460     return MRI.getEncodingValue(MO.getReg());
461 
462   if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
463     // FIXME: If this is expression is PCRel or not should not depend on what
464     // the expression looks like. Given that this is just a general expression,
465     // it should probably be FK_Data_4 and whatever is producing
466     //
467     //    s_add_u32 s2, s2, (extern_const_addrspace+16
468     //
469     // And expecting a PCRel should instead produce
470     //
471     // .Ltmp1:
472     //   s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
473     MCFixupKind Kind;
474     if (needsPCRel(MO.getExpr()))
475       Kind = FK_PCRel_4;
476     else
477       Kind = FK_Data_4;
478 
479     const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
480     uint32_t Offset = Desc.getSize();
481     assert(Offset == 4 || Offset == 8);
482 
483     Fixups.push_back(
484       MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
485   }
486 
487   // Figure out the operand number, needed for isSrcOperand check
488   unsigned OpNo = 0;
489   for (unsigned e = MI.getNumOperands(); OpNo < e; ++OpNo) {
490     if (&MO == &MI.getOperand(OpNo))
491       break;
492   }
493 
494   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
495   if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
496     uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
497     if (Enc != ~0U &&
498         (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
499       return Enc;
500 
501   } else if (MO.isImm())
502     return MO.getImm();
503 
504   llvm_unreachable("Encoding of this operand type is not supported yet.");
505   return 0;
506 }
507 
508 #define ENABLE_INSTR_PREDICATE_VERIFIER
509 #include "AMDGPUGenMCCodeEmitter.inc"
510