1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains instruction defs that are common to all hw codegen
10// targets.
11//
12//===----------------------------------------------------------------------===//
13
14class AddressSpacesImpl {
15  int Flat = 0;
16  int Global = 1;
17  int Region = 2;
18  int Local = 3;
19  int Constant = 4;
20  int Private = 5;
21  int Constant32Bit = 6;
22}
23
24def AddrSpaces : AddressSpacesImpl;
25
26
27class AMDGPUInst <dag outs, dag ins, string asm = "",
28  list<dag> pattern = []> : Instruction {
29  field bit isRegisterLoad = 0;
30  field bit isRegisterStore = 0;
31
32  let Namespace = "AMDGPU";
33  let OutOperandList = outs;
34  let InOperandList = ins;
35  let AsmString = asm;
36  let Pattern = pattern;
37  let Itinerary = NullALU;
38
39  // SoftFail is a field the disassembler can use to provide a way for
40  // instructions to not match without killing the whole decode process. It is
41  // mainly used for ARM, but Tablegen expects this field to exist or it fails
42  // to build the decode table.
43  field bits<64> SoftFail = 0;
44
45  let DecoderNamespace = Namespace;
46
47  let TSFlags{63} = isRegisterLoad;
48  let TSFlags{62} = isRegisterStore;
49}
50
51class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
52  list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> {
53
54  field bits<32> Inst = 0xffffffff;
55}
56
57//===---------------------------------------------------------------------===//
58// Return instruction
59//===---------------------------------------------------------------------===//
60
61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
62: Instruction {
63
64     let Namespace = "AMDGPU";
65     dag OutOperandList = outs;
66     dag InOperandList = ins;
67     let Pattern = pattern;
68     let AsmString = !strconcat(asmstr, "\n");
69     let isPseudo = 1;
70     let Itinerary = NullALU;
71     bit hasIEEEFlag = 0;
72     bit hasZeroOpFlag = 0;
73     let mayLoad = 0;
74     let mayStore = 0;
75     let hasSideEffects = 0;
76     let isCodeGenOnly = 1;
77}
78
79def TruePredicate : Predicate<"">;
80
81// FIXME: Tablegen should specially supports this
82def FalsePredicate : Predicate<"false">;
83
84// Add a predicate to the list if does not already exist to deduplicate it.
85class PredConcat<list<Predicate> lst, Predicate pred> {
86  list<Predicate> ret =
87      !listconcat([pred], !filter(item, lst, !ne(item, pred)));
88}
89
90class PredicateControl {
91  Predicate SubtargetPredicate = TruePredicate;
92  Predicate AssemblerPredicate = TruePredicate;
93  Predicate WaveSizePredicate = TruePredicate;
94  list<Predicate> OtherPredicates = [];
95  list<Predicate> Predicates = PredConcat<
96                                 PredConcat<PredConcat<OtherPredicates,
97                                                       SubtargetPredicate>.ret,
98                                            AssemblerPredicate>.ret,
99                                 WaveSizePredicate>.ret;
100}
101
102class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
103      PredicateControl;
104
105let RecomputePerFunction = 1 in {
106def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
107def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
108def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
109def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
110def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
111def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
112def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
113}
114
115def FMA : Predicate<"Subtarget->hasFMA()">;
116
117def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
118
119def u16ImmTarget : AsmOperandClass {
120  let Name = "U16Imm";
121  let RenderMethod = "addImmOperands";
122}
123
124def s16ImmTarget : AsmOperandClass {
125  let Name = "S16Imm";
126  let RenderMethod = "addImmOperands";
127}
128
129let OperandType = "OPERAND_IMMEDIATE" in {
130
131def u32imm : Operand<i32> {
132  let PrintMethod = "printU32ImmOperand";
133}
134
135def u16imm : Operand<i16> {
136  let PrintMethod = "printU16ImmOperand";
137  let ParserMatchClass = u16ImmTarget;
138}
139
140def s16imm : Operand<i16> {
141  let PrintMethod = "printU16ImmOperand";
142  let ParserMatchClass = s16ImmTarget;
143}
144
145def u8imm : Operand<i8> {
146  let PrintMethod = "printU8ImmOperand";
147}
148
149} // End OperandType = "OPERAND_IMMEDIATE"
150
151//===--------------------------------------------------------------------===//
152// Custom Operands
153//===--------------------------------------------------------------------===//
154def brtarget   : Operand<OtherVT>;
155
156//===----------------------------------------------------------------------===//
157// Misc. PatFrags
158//===----------------------------------------------------------------------===//
159
160class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
161  (ops node:$src0),
162  (op $src0),
163  [{ return N->hasOneUse(); }]> {
164
165  let GISelPredicateCode = [{
166    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
167  }];
168}
169
170class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
171  (ops node:$src0, node:$src1),
172  (op $src0, $src1),
173  [{ return N->hasOneUse(); }]> {
174  let GISelPredicateCode = [{
175    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
176  }];
177}
178
179class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
180  (ops node:$src0, node:$src1, node:$src2),
181  (op $src0, $src1, $src2),
182  [{ return N->hasOneUse(); }]> {
183  let GISelPredicateCode = [{
184    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
185  }];
186}
187
188class is_canonicalized<SDPatternOperator op> : PatFrag<
189  (ops node:$src0, node:$src1),
190  (op $src0, $src1),
191  [{
192    const SITargetLowering &Lowering =
193              *static_cast<const SITargetLowering *>(getTargetLowering());
194
195    return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) &&
196      Lowering.isCanonicalized(*CurDAG, N->getOperand(1));
197   }]> {
198
199  // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class
200  let GISelPredicateCode = [{
201    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
202      MF.getSubtarget().getTargetLowering());
203
204    return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) &&
205      TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF));
206  }];
207}
208
209
210let Properties = [SDNPCommutative, SDNPAssociative] in {
211def smax_oneuse : HasOneUseBinOp<smax>;
212def smin_oneuse : HasOneUseBinOp<smin>;
213def umax_oneuse : HasOneUseBinOp<umax>;
214def umin_oneuse : HasOneUseBinOp<umin>;
215
216def fminnum_oneuse : HasOneUseBinOp<fminnum>;
217def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
218
219def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>;
220def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>;
221
222
223def and_oneuse : HasOneUseBinOp<and>;
224def or_oneuse : HasOneUseBinOp<or>;
225def xor_oneuse : HasOneUseBinOp<xor>;
226} // Properties = [SDNPCommutative, SDNPAssociative]
227
228def not_oneuse : HasOneUseUnaryOp<not>;
229
230def add_oneuse : HasOneUseBinOp<add>;
231def sub_oneuse : HasOneUseBinOp<sub>;
232
233def srl_oneuse : HasOneUseBinOp<srl>;
234def shl_oneuse : HasOneUseBinOp<shl>;
235
236def select_oneuse : HasOneUseTernaryOp<select>;
237
238def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
239def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
240
241//===----------------------------------------------------------------------===//
242// PatFrags for shifts
243//===----------------------------------------------------------------------===//
244
245// Constrained shift PatFrags.
246
247def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
248  [{ return isUnneededShiftMask(N, 4); }]> {
249    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
250  }
251
252def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
253  [{ return isUnneededShiftMask(N, 5); }]> {
254    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
255  }
256
257def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
258  [{ return isUnneededShiftMask(N, 6); }]> {
259    let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
260  }
261
262foreach width = [16, 32, 64] in {
263defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
264
265def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
266  [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
267defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
268def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
269def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
270  (cshl $src1, $src0)>;
271
272def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
273  [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
274defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
275def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
276def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
277  (csrl $src1, $src0)>;
278
279def csra_#width : PatFrags<(ops node:$src0, node:$src1),
280  [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
281defvar csra = !cast<SDPatternOperator>("csra_"#width);
282def csra_#width#_oneuse : HasOneUseBinOp<csra>;
283def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
284  (csra $src1, $src0)>;
285} // end foreach width
286
287def srl_16 : PatFrag<
288  (ops node:$src0), (srl_oneuse node:$src0, (i32 16))
289>;
290
291
292def hi_i16_elt : PatFrag<
293  (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0))))
294>;
295
296
297def hi_f16_elt : PatLeaf<
298  (vt), [{
299  if (N->getOpcode() != ISD::BITCAST)
300    return false;
301  SDValue Tmp = N->getOperand(0);
302
303  if (Tmp.getOpcode() != ISD::SRL)
304    return false;
305    if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1))
306      return RHS->getZExtValue() == 16;
307    return false;
308}]>;
309
310//===----------------------------------------------------------------------===//
311// PatLeafs for floating-point comparisons
312//===----------------------------------------------------------------------===//
313
314def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>;
315def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>;
316def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>;
317def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>;
318def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>;
319def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>;
320def COND_O   : PatFrags<(ops), [(OtherVT SETO)]>;
321def COND_UO  : PatFrags<(ops), [(OtherVT SETUO)]>;
322
323//===----------------------------------------------------------------------===//
324// PatLeafs for unsigned / unordered comparisons
325//===----------------------------------------------------------------------===//
326
327def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>;
328def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>;
329def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>;
330def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>;
331def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>;
332def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>;
333
334// XXX - For some reason R600 version is preferring to use unordered
335// for setne?
336def COND_UNE_NE  : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>;
337
338//===----------------------------------------------------------------------===//
339// PatLeafs for signed comparisons
340//===----------------------------------------------------------------------===//
341
342def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>;
343def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>;
344def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>;
345def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>;
346
347//===----------------------------------------------------------------------===//
348// PatLeafs for integer equality
349//===----------------------------------------------------------------------===//
350
351def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>;
352def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>;
353
354// FIXME: Should not need code predicate
355//def COND_NULL : PatLeaf<(OtherVT null_frag)>;
356def COND_NULL : PatLeaf <
357  (cond),
358  [{(void)N; return false;}]
359>;
360
361//===----------------------------------------------------------------------===//
362// PatLeafs for Texture Constants
363//===----------------------------------------------------------------------===//
364
365def TEX_ARRAY : PatLeaf<
366  (imm),
367  [{uint32_t TType = (uint32_t)N->getZExtValue();
368    return TType == 9 || TType == 10 || TType == 16;
369  }]
370>;
371
372def TEX_RECT : PatLeaf<
373  (imm),
374  [{uint32_t TType = (uint32_t)N->getZExtValue();
375    return TType == 5;
376  }]
377>;
378
379def TEX_SHADOW : PatLeaf<
380  (imm),
381  [{uint32_t TType = (uint32_t)N->getZExtValue();
382    return (TType >= 6 && TType <= 8) || TType == 13;
383  }]
384>;
385
386def TEX_SHADOW_ARRAY : PatLeaf<
387  (imm),
388  [{uint32_t TType = (uint32_t)N->getZExtValue();
389    return TType == 11 || TType == 12 || TType == 17;
390  }]
391>;
392
393//===----------------------------------------------------------------------===//
394// Load/Store Pattern Fragments
395//===----------------------------------------------------------------------===//
396
397def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
398  [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
399>;
400
401class AddressSpaceList<list<int> AS> {
402  list<int> AddrSpaces = AS;
403}
404
405class Aligned<int Bytes> {
406  int MinAlignment = Bytes;
407}
408
409class StoreHi16<SDPatternOperator op> : PatFrag <
410  (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> {
411  let IsStore = 1;
412}
413
414def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant,
415                                              AddrSpaces.Constant32Bit ]>;
416def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global,
417                                            AddrSpaces.Constant,
418                                            AddrSpaces.Constant32Bit ]>;
419def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>;
420
421def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat,
422                                          AddrSpaces.Global,
423                                          AddrSpaces.Constant,
424                                          AddrSpaces.Constant32Bit ]>;
425def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>;
426
427def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
428def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>;
429
430def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
431def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>;
432
433def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
434def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>;
435
436
437
438foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
439let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
440
441def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> {
442  let IsLoad = 1;
443  let IsNonExtLoad = 1;
444}
445
446def extloadi8_#as  : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
447  let IsLoad = 1;
448  let MemoryVT = i8;
449}
450
451def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
452  let IsLoad = 1;
453  let MemoryVT = i16;
454}
455
456def sextloadi8_#as  : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
457  let IsLoad = 1;
458  let MemoryVT = i8;
459}
460
461def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> {
462  let IsLoad = 1;
463  let MemoryVT = i16;
464}
465
466def zextloadi8_#as  : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
467  let IsLoad = 1;
468  let MemoryVT = i8;
469}
470
471def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
472  let IsLoad = 1;
473  let MemoryVT = i16;
474}
475
476def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
477  let IsAtomic = 1;
478  let MemoryVT = i8;
479}
480
481def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
482  let IsAtomic = 1;
483  let MemoryVT = i16;
484}
485
486def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
487  let IsAtomic = 1;
488  let MemoryVT = i32;
489}
490
491def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
492  let IsAtomic = 1;
493  let MemoryVT = i64;
494}
495} // End let AddressSpaces
496} // End foreach as
497
498
499foreach as = [ "global", "flat", "local", "private", "region" ] in {
500let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in {
501def store_#as : PatFrag<(ops node:$val, node:$ptr),
502                    (unindexedstore node:$val, node:$ptr)> {
503  let IsStore = 1;
504  let IsTruncStore = 0;
505}
506
507// truncstore fragments.
508def truncstore_#as : PatFrag<(ops node:$val, node:$ptr),
509                             (unindexedstore node:$val, node:$ptr)> {
510  let IsStore = 1;
511  let IsTruncStore = 1;
512}
513
514// TODO: We don't really need the truncstore here. We can use
515// unindexedstore with MemoryVT directly, which will save an
516// unnecessary check that the memory size is less than the value type
517// in the generated matcher table.
518def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr),
519                               (truncstore node:$val, node:$ptr)> {
520  let IsStore = 1;
521  let MemoryVT = i8;
522}
523
524def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr),
525                                (truncstore node:$val, node:$ptr)> {
526  let IsStore = 1;
527  let MemoryVT = i16;
528}
529
530def store_hi16_#as : StoreHi16 <truncstorei16>;
531def truncstorei8_hi16_#as : StoreHi16<truncstorei8>;
532def truncstorei16_hi16_#as : StoreHi16<truncstorei16>;
533
534defm atomic_store_#as : binary_atomic_op<atomic_store>;
535
536} // End let AddressSpaces
537} // End foreach as
538
539
540multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
541  foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
542    let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
543      defm "_"#as : binary_atomic_op<atomic_op, IsInt>;
544
545      let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in {
546        defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>;
547      }
548
549      let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in {
550        defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>;
551      }
552    }
553  }
554}
555
556defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>;
557defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>;
558defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>;
559defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>;
560defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>;
561defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>;
562defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>;
563defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>;
564defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>;
565defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>;
566defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
567let MemoryVT = v2f16 in
568defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>;
569defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>;
570
571def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
572                        Aligned<8> {
573  let IsLoad = 1;
574  let IsNonExtLoad = 1;
575}
576
577def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>,
578                        Aligned<16> {
579  let IsLoad = 1;
580  let IsNonExtLoad = 1;
581}
582
583def store_align8_local: PatFrag<(ops node:$val, node:$ptr),
584                                (store_local node:$val, node:$ptr)>, Aligned<8> {
585  let IsStore = 1;
586  let IsTruncStore = 0;
587}
588
589def store_align16_local: PatFrag<(ops node:$val, node:$ptr),
590                                (store_local node:$val, node:$ptr)>, Aligned<16> {
591  let IsStore = 1;
592  let IsTruncStore = 0;
593}
594
595let AddressSpaces = StoreAddress_local.AddrSpaces in {
596defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>;
597defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
598}
599
600let AddressSpaces = StoreAddress_region.AddrSpaces in {
601defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>;
602defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>;
603}
604
605//===----------------------------------------------------------------------===//
606// Misc Pattern Fragments
607//===----------------------------------------------------------------------===//
608
609class Constants {
610int TWO_PI = 0x40c90fdb;
611int PI = 0x40490fdb;
612int TWO_PI_INV = 0x3e22f983;
613int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9
614int FP16_ONE = 0x3C00;
615int FP16_NEG_ONE = 0xBC00;
616int FP32_ONE = 0x3f800000;
617int FP32_NEG_ONE = 0xbf800000;
618int FP64_ONE = 0x3ff0000000000000;
619int FP64_NEG_ONE = 0xbff0000000000000;
620}
621def CONST : Constants;
622
623def FP_ZERO : PatLeaf <
624  (fpimm),
625  [{return N->getValueAPF().isZero();}]
626>;
627
628def FP_ONE : PatLeaf <
629  (fpimm),
630  [{return N->isExactlyValue(1.0);}]
631>;
632
633def FP_HALF : PatLeaf <
634  (fpimm),
635  [{return N->isExactlyValue(0.5);}]
636>;
637
638/* Generic helper patterns for intrinsics */
639/* -------------------------------------- */
640
641class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
642  : AMDGPUPat <
643  (fpow f32:$src0, f32:$src1),
644  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
645>;
646
647/* Other helper patterns */
648/* --------------------- */
649
650/* Extract element pattern */
651class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
652                       SubRegIndex sub_reg>
653  : AMDGPUPat<
654  (sub_type (extractelt vec_type:$src, sub_idx)),
655  (EXTRACT_SUBREG $src, sub_reg)
656>;
657
658/* Insert element pattern */
659class Insert_Element <ValueType elem_type, ValueType vec_type,
660                      int sub_idx, SubRegIndex sub_reg>
661  : AMDGPUPat <
662  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
663  (INSERT_SUBREG $vec, $elem, sub_reg)
664>;
665
666// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
667// can handle COPY instructions.
668// bitconvert pattern
669class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat <
670  (dt (bitconvert (st rc:$src0))),
671  (dt rc:$src0)
672>;
673
674// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
675// can handle COPY instructions.
676class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
677  (vt (AMDGPUdwordaddr (vt rc:$addr))),
678  (vt rc:$addr)
679>;
680
681// rotr pattern
682class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
683  (rotr i32:$src0, i32:$src1),
684  (BIT_ALIGN $src0, $src0, $src1)
685>;
686
687// Special conversion patterns
688
689def cvt_rpi_i32_f32 : PatFrag <
690  (ops node:$src),
691  (fp_to_sint (ffloor (fadd $src, FP_HALF))),
692  [{ (void) N; return TM.Options.NoNaNsFPMath; }]
693>;
694
695def cvt_flr_i32_f32 : PatFrag <
696  (ops node:$src),
697  (fp_to_sint (ffloor $src)),
698  [{ (void)N; return TM.Options.NoNaNsFPMath; }]
699>;
700
701let AddedComplexity = 2 in {
702class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
703  (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
704  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
705                (Inst $src0, $src1, $src2))
706>;
707
708class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
709  (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
710  !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)),
711                (Inst $src0, $src1, $src2))
712>;
713} // AddedComplexity.
714
715class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
716  (fdiv FP_ONE, vt:$src),
717  (RcpInst $src)
718>;
719
720// Instructions which select to the same v_min_f*
721def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
722  [(fminnum_ieee node:$src0, node:$src1),
723   (fminnum node:$src0, node:$src1)]
724>;
725
726// Instructions which select to the same v_max_f*
727def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1),
728  [(fmaxnum_ieee node:$src0, node:$src1),
729   (fmaxnum node:$src0, node:$src1)]
730>;
731
732def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
733  [(fminnum_ieee_oneuse node:$src0, node:$src1),
734   (fminnum_oneuse node:$src0, node:$src1)]
735>;
736
737def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1),
738  [(fmaxnum_ieee_oneuse node:$src0, node:$src1),
739   (fmaxnum_oneuse node:$src0, node:$src1)]
740>;
741
742def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2),
743  [(fmad node:$src0, node:$src1, node:$src2),
744   (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)]
745>;
746
747// FIXME: fsqrt should not select directly
748def any_amdgcn_sqrt : PatFrags<(ops node:$src0),
749  [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)]
750>;
751