1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}mac_f16
5; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
6; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
7; GCN: buffer_load_ushort v[[C_F16:[0-9]+]]
8; SI:  v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
9; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
10; SI:  v_cvt_f32_f16_e32 v[[C_F32:[0-9]+]], v[[C_F16]]
11; SI:  v_mac_f32_e32 v[[C_F32]], v[[B_F32]], v[[A_F32]]
12; SI:  v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[C_F32]]
13; SI:  buffer_store_short v[[R_F16]]
14; VI:  v_mac_f16_e32 v[[C_F16]], v[[B_F16]], v[[A_F16]]
15; VI:  buffer_store_short v[[C_F16]]
16; GCN: s_endpgm
17define void @mac_f16(
18    half addrspace(1)* %r,
19    half addrspace(1)* %a,
20    half addrspace(1)* %b,
21    half addrspace(1)* %c) #0 {
22entry:
23  %a.val = load half, half addrspace(1)* %a
24  %b.val = load half, half addrspace(1)* %b
25  %c.val = load half, half addrspace(1)* %c
26
27  %t.val = fmul half %a.val, %b.val
28  %r.val = fadd half %t.val, %c.val
29
30  store half %r.val, half addrspace(1)* %r
31  ret void
32}
33
34; GCN-LABEL: {{^}}mac_f16_same_add
35; SI:  v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
36; SI:  v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
37; VI:  v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
38; VI:  v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
39; GCN: s_endpgm
40define void @mac_f16_same_add(
41    half addrspace(1)* %r0,
42    half addrspace(1)* %r1,
43    half addrspace(1)* %a,
44    half addrspace(1)* %b,
45    half addrspace(1)* %c,
46    half addrspace(1)* %d,
47    half addrspace(1)* %e) #0 {
48entry:
49  %a.val = load half, half addrspace(1)* %a
50  %b.val = load half, half addrspace(1)* %b
51  %c.val = load half, half addrspace(1)* %c
52  %d.val = load half, half addrspace(1)* %d
53  %e.val = load half, half addrspace(1)* %e
54
55  %t0.val = fmul half %a.val, %b.val
56  %r0.val = fadd half %t0.val, %c.val
57
58  %t1.val = fmul half %d.val, %e.val
59  %r1.val = fadd half %t1.val, %c.val
60
61  store half %r0.val, half addrspace(1)* %r0
62  store half %r1.val, half addrspace(1)* %r1
63  ret void
64}
65
66; GCN-LABEL: {{^}}mac_f16_neg_a
67; SI-NOT: v_mac_f32
68; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
69; VI-NOT: v_mac_f16
70; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
71; GCN:    s_endpgm
72define void @mac_f16_neg_a(
73    half addrspace(1)* %r,
74    half addrspace(1)* %a,
75    half addrspace(1)* %b,
76    half addrspace(1)* %c) #0 {
77entry:
78  %a.val = load half, half addrspace(1)* %a
79  %b.val = load half, half addrspace(1)* %b
80  %c.val = load half, half addrspace(1)* %c
81
82  %a.neg = fsub half -0.0, %a.val
83  %t.val = fmul half %a.neg, %b.val
84  %r.val = fadd half %t.val, %c.val
85
86  store half %r.val, half addrspace(1)* %r
87  ret void
88}
89
90; GCN-LABEL: {{^}}mac_f16_neg_b
91; SI-NOT: v_mac_f32
92; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
93; VI-NOT: v_mac_f16
94; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
95; GCN:    s_endpgm
96define void @mac_f16_neg_b(
97    half addrspace(1)* %r,
98    half addrspace(1)* %a,
99    half addrspace(1)* %b,
100    half addrspace(1)* %c) #0 {
101entry:
102  %a.val = load half, half addrspace(1)* %a
103  %b.val = load half, half addrspace(1)* %b
104  %c.val = load half, half addrspace(1)* %c
105
106  %b.neg = fsub half -0.0, %b.val
107  %t.val = fmul half %a.val, %b.neg
108  %r.val = fadd half %t.val, %c.val
109
110  store half %r.val, half addrspace(1)* %r
111  ret void
112}
113
114; GCN-LABEL: {{^}}mac_f16_neg_c
115; SI-NOT: v_mac_f32
116; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
117; VI-NOT: v_mac_f16
118; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
119; GCN:    s_endpgm
120define void @mac_f16_neg_c(
121    half addrspace(1)* %r,
122    half addrspace(1)* %a,
123    half addrspace(1)* %b,
124    half addrspace(1)* %c) #0 {
125entry:
126  %a.val = load half, half addrspace(1)* %a
127  %b.val = load half, half addrspace(1)* %b
128  %c.val = load half, half addrspace(1)* %c
129
130  %c.neg = fsub half -0.0, %c.val
131  %t.val = fmul half %a.val, %b.val
132  %r.val = fadd half %t.val, %c.neg
133
134  store half %r.val, half addrspace(1)* %r
135  ret void
136}
137
138; GCN-LABEL: {{^}}mac_f16_neg_a_safe_fp_math
139; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
140; SI:  v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
141; SI:  v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
142; VI:  v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
143; VI:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A]]
144; GCN: s_endpgm
145define void @mac_f16_neg_a_safe_fp_math(
146    half addrspace(1)* %r,
147    half addrspace(1)* %a,
148    half addrspace(1)* %b,
149    half addrspace(1)* %c) #0 {
150entry:
151  %a.val = load half, half addrspace(1)* %a
152  %b.val = load half, half addrspace(1)* %b
153  %c.val = load half, half addrspace(1)* %c
154
155  %a.neg = fsub half 0.0, %a.val
156  %t.val = fmul half %a.neg, %b.val
157  %r.val = fadd half %t.val, %c.val
158
159  store half %r.val, half addrspace(1)* %r
160  ret void
161}
162
163; GCN-LABEL: {{^}}mac_f16_neg_b_safe_fp_math
164; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
165; SI:  v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
166; SI:  v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
167; VI:  v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
168; VI:  v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A]], v{{[0-9]+}}
169; GCN: s_endpgm
170define void @mac_f16_neg_b_safe_fp_math(
171    half addrspace(1)* %r,
172    half addrspace(1)* %a,
173    half addrspace(1)* %b,
174    half addrspace(1)* %c) #0 {
175entry:
176  %a.val = load half, half addrspace(1)* %a
177  %b.val = load half, half addrspace(1)* %b
178  %c.val = load half, half addrspace(1)* %c
179
180  %b.neg = fsub half 0.0, %b.val
181  %t.val = fmul half %a.val, %b.neg
182  %r.val = fadd half %t.val, %c.val
183
184  store half %r.val, half addrspace(1)* %r
185  ret void
186}
187
188; GCN-LABEL: {{^}}mac_f16_neg_c_safe_fp_math
189; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
190; SI:  v_subrev_f32_e32 v[[NEG_A:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
191; SI:  v_mac_f32_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
192; VI:  v_sub_f16_e32 v[[NEG_A:[0-9]+]], 0, v{{[0-9]+}}
193; VI:  v_mac_f16_e32 v[[NEG_A]], v{{[0-9]+}}, v{{[0-9]+}}
194; GCN: s_endpgm
195define void @mac_f16_neg_c_safe_fp_math(
196    half addrspace(1)* %r,
197    half addrspace(1)* %a,
198    half addrspace(1)* %b,
199    half addrspace(1)* %c) #0 {
200entry:
201  %a.val = load half, half addrspace(1)* %a
202  %b.val = load half, half addrspace(1)* %b
203  %c.val = load half, half addrspace(1)* %c
204
205  %c.neg = fsub half 0.0, %c.val
206  %t.val = fmul half %a.val, %b.val
207  %r.val = fadd half %t.val, %c.neg
208
209  store half %r.val, half addrspace(1)* %r
210  ret void
211}
212
213; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math
214; SI-NOT: v_mac_f32
215; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
216; VI-NOT: v_mac_f16
217; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
218; GCN:    s_endpgm
219define void @mac_f16_neg_a_unsafe_fp_math(
220    half addrspace(1)* %r,
221    half addrspace(1)* %a,
222    half addrspace(1)* %b,
223    half addrspace(1)* %c) #1 {
224entry:
225  %a.val = load half, half addrspace(1)* %a
226  %b.val = load half, half addrspace(1)* %b
227  %c.val = load half, half addrspace(1)* %c
228
229  %a.neg = fsub half 0.0, %a.val
230  %t.val = fmul half %a.neg, %b.val
231  %r.val = fadd half %t.val, %c.val
232
233  store half %r.val, half addrspace(1)* %r
234  ret void
235}
236
237; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math
238; SI-NOT: v_mac_f32
239; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
240; VI-NOT: v_mac_f16
241; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
242; GCN:    s_endpgm
243define void @mac_f16_neg_b_unsafe_fp_math(
244    half addrspace(1)* %r,
245    half addrspace(1)* %a,
246    half addrspace(1)* %b,
247    half addrspace(1)* %c) #1 {
248entry:
249  %a.val = load half, half addrspace(1)* %a
250  %b.val = load half, half addrspace(1)* %b
251  %c.val = load half, half addrspace(1)* %c
252
253  %b.neg = fsub half 0.0, %b.val
254  %t.val = fmul half %a.val, %b.neg
255  %r.val = fadd half %t.val, %c.val
256
257  store half %r.val, half addrspace(1)* %r
258  ret void
259}
260
261; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math
262; SI-NOT: v_mac_f32
263; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
264; VI-NOT: v_mac_f16
265; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
266; GCN:    s_endpgm
267define void @mac_f16_neg_c_unsafe_fp_math(
268    half addrspace(1)* %r,
269    half addrspace(1)* %a,
270    half addrspace(1)* %b,
271    half addrspace(1)* %c) #1 {
272entry:
273  %a.val = load half, half addrspace(1)* %a
274  %b.val = load half, half addrspace(1)* %b
275  %c.val = load half, half addrspace(1)* %c
276
277  %c.neg = fsub half 0.0, %c.val
278  %t.val = fmul half %a.val, %b.val
279  %r.val = fadd half %t.val, %c.neg
280
281  store half %r.val, half addrspace(1)* %r
282  ret void
283}
284
285; GCN-LABEL: {{^}}mac_v2f16
286; GCN: buffer_load_dword v[[A_V2_F16:[0-9]+]]
287; GCN: buffer_load_dword v[[B_V2_F16:[0-9]+]]
288; GCN: buffer_load_dword v[[C_V2_F16:[0-9]+]]
289; GCN: v_lshrrev_b32_e32 v[[A_F16_1:[0-9]+]], 16, v[[A_V2_F16]]
290; GCN: v_lshrrev_b32_e32 v[[B_F16_1:[0-9]+]], 16, v[[B_V2_F16]]
291; GCN: v_lshrrev_b32_e32 v[[C_F16_1:[0-9]+]], 16, v[[C_V2_F16]]
292; SI:  v_cvt_f32_f16_e32 v[[A_F32_0:[0-9]+]], v[[A_V2_F16]]
293; SI:  v_cvt_f32_f16_e32 v[[B_F32_0:[0-9]+]], v[[B_V2_F16]]
294; SI:  v_cvt_f32_f16_e32 v[[C_F32_0:[0-9]+]], v[[C_V2_F16]]
295; SI:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
296; SI:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
297; SI:  v_cvt_f32_f16_e32 v[[C_F32_1:[0-9]+]], v[[C_F16_1]]
298; SI:  v_mac_f32_e32 v[[C_F32_0]], v[[B_F32_0]], v[[A_F32_0]]
299; SI:  v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[C_F32_0]]
300; SI:  v_mac_f32_e32 v[[C_F32_1]], v[[B_F32_1]], v[[A_F32_1]]
301; SI:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[C_F32_1]]
302; SI:  v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
303; SI:  v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
304; VI:  v_mac_f16_e32 v[[C_V2_F16]], v[[B_V2_F16]], v[[A_V2_F16]]
305; VI:  v_mac_f16_e32 v[[C_F16_1]], v[[B_F16_1]], v[[A_F16_1]]
306; VI:  v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[C_V2_F16]]
307; VI:  v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[C_F16_1]]
308; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
309; GCN: buffer_store_dword v[[R_V2_F16]]
310; GCN: s_endpgm
311define void @mac_v2f16(
312    <2 x half> addrspace(1)* %r,
313    <2 x half> addrspace(1)* %a,
314    <2 x half> addrspace(1)* %b,
315    <2 x half> addrspace(1)* %c) #0 {
316entry:
317  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
318  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
319  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
320
321  %t.val = fmul <2 x half> %a.val, %b.val
322  %r.val = fadd <2 x half> %t.val, %c.val
323
324  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
325  ret void
326}
327
328; GCN-LABEL: {{^}}mac_v2f16_same_add
329; SI:  v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
330; SI:  v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
331; SI:  v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
332; SI:  v_mac_f32_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}
333; VI:  v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
334; VI:  v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
335; VI:  v_mac_f16_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
336; VI:  v_mac_f16_e32 [[ADD1]], v{{[0-9]+}}, v{{[0-9]+}}
337; GCN: s_endpgm
338define void @mac_v2f16_same_add(
339    <2 x half> addrspace(1)* %r0,
340    <2 x half> addrspace(1)* %r1,
341    <2 x half> addrspace(1)* %a,
342    <2 x half> addrspace(1)* %b,
343    <2 x half> addrspace(1)* %c,
344    <2 x half> addrspace(1)* %d,
345    <2 x half> addrspace(1)* %e) #0 {
346entry:
347  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
348  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
349  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
350  %d.val = load <2 x half>, <2 x half> addrspace(1)* %d
351  %e.val = load <2 x half>, <2 x half> addrspace(1)* %e
352
353  %t0.val = fmul <2 x half> %a.val, %b.val
354  %r0.val = fadd <2 x half> %t0.val, %c.val
355
356  %t1.val = fmul <2 x half> %d.val, %e.val
357  %r1.val = fadd <2 x half> %t1.val, %c.val
358
359  store <2 x half> %r0.val, <2 x half> addrspace(1)* %r0
360  store <2 x half> %r1.val, <2 x half> addrspace(1)* %r1
361  ret void
362}
363
364; GCN-LABEL: {{^}}mac_v2f16_neg_a
365; SI-NOT: v_mac_f32
366; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
367; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
368; VI-NOT: v_mac_f16
369; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
370; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
371; GCN:    s_endpgm
372define void @mac_v2f16_neg_a(
373    <2 x half> addrspace(1)* %r,
374    <2 x half> addrspace(1)* %a,
375    <2 x half> addrspace(1)* %b,
376    <2 x half> addrspace(1)* %c) #0 {
377entry:
378  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
379  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
380  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
381
382  %a.neg = fsub <2 x half> <half -0.0, half -0.0>, %a.val
383  %t.val = fmul <2 x half> %a.neg, %b.val
384  %r.val = fadd <2 x half> %t.val, %c.val
385
386  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
387  ret void
388}
389
390; GCN-LABEL: {{^}}mac_v2f16_neg_b
391; SI-NOT: v_mac_f32
392; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
393; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
394; VI-NOT: v_mac_f16
395; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
396; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
397; GCN:    s_endpgm
398define void @mac_v2f16_neg_b(
399    <2 x half> addrspace(1)* %r,
400    <2 x half> addrspace(1)* %a,
401    <2 x half> addrspace(1)* %b,
402    <2 x half> addrspace(1)* %c) #0 {
403entry:
404  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
405  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
406  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
407
408  %b.neg = fsub <2 x half> <half -0.0, half -0.0>, %b.val
409  %t.val = fmul <2 x half> %a.val, %b.neg
410  %r.val = fadd <2 x half> %t.val, %c.val
411
412  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
413  ret void
414}
415
416; GCN-LABEL: {{^}}mac_v2f16_neg_c
417; SI-NOT: v_mac_f32
418; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
419; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
420; VI-NOT: v_mac_f16
421; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
422; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
423; GCN:    s_endpgm
424define void @mac_v2f16_neg_c(
425    <2 x half> addrspace(1)* %r,
426    <2 x half> addrspace(1)* %a,
427    <2 x half> addrspace(1)* %b,
428    <2 x half> addrspace(1)* %c) #0 {
429entry:
430  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
431  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
432  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
433
434  %c.neg = fsub <2 x half> <half -0.0, half -0.0>, %c.val
435  %t.val = fmul <2 x half> %a.val, %b.val
436  %r.val = fadd <2 x half> %t.val, %c.neg
437
438  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
439  ret void
440}
441
442; GCN-LABEL: {{^}}mac_v2f16_neg_a_safe_fp_math
443; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
444; SI:  v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
445; SI:  v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
446; SI:  v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
447; SI:  v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
448; VI:  v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
449; VI:  v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
450; VI:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A0]]
451; VI:  v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[NEG_A1]]
452; GCN: s_endpgm
453define void @mac_v2f16_neg_a_safe_fp_math(
454    <2 x half> addrspace(1)* %r,
455    <2 x half> addrspace(1)* %a,
456    <2 x half> addrspace(1)* %b,
457    <2 x half> addrspace(1)* %c) #0 {
458entry:
459  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
460  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
461  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
462
463  %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
464  %t.val = fmul <2 x half> %a.neg, %b.val
465  %r.val = fadd <2 x half> %t.val, %c.val
466
467  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
468  ret void
469}
470
471; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math
472; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
473; SI:  v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
474; SI:  v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
475; SI:  v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
476; SI:  v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
477; VI:  v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
478; VI:  v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
479; VI:  v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
480; VI:  v_mac_f16_e32 v{{[0-9]+}}, v[[NEG_A1]], v{{[0-9]+}}
481; GCN: s_endpgm
482define void @mac_v2f16_neg_b_safe_fp_math(
483    <2 x half> addrspace(1)* %r,
484    <2 x half> addrspace(1)* %a,
485    <2 x half> addrspace(1)* %b,
486    <2 x half> addrspace(1)* %c) #0 {
487entry:
488  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
489  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
490  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
491
492  %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
493  %t.val = fmul <2 x half> %a.val, %b.neg
494  %r.val = fadd <2 x half> %t.val, %c.val
495
496  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
497  ret void
498}
499
500; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math
501; SI:  v_cvt_f32_f16_e32 v[[ZERO:[0-9]+]], 0{{$}}
502; SI:  v_subrev_f32_e32 v[[NEG_A0:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
503; SI:  v_subrev_f32_e32 v[[NEG_A1:[0-9]+]], v{{[0-9]+}}, v[[ZERO]]
504; SI:  v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
505; SI:  v_mac_f32_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
506; VI:  v_sub_f16_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
507; VI:  v_sub_f16_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
508; VI:  v_mac_f16_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
509; VI:  v_mac_f16_e32 v[[NEG_A1]], v{{[0-9]+}}, v{{[0-9]+}}
510; GCN: s_endpgm
511define void @mac_v2f16_neg_c_safe_fp_math(
512    <2 x half> addrspace(1)* %r,
513    <2 x half> addrspace(1)* %a,
514    <2 x half> addrspace(1)* %b,
515    <2 x half> addrspace(1)* %c) #0 {
516entry:
517  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
518  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
519  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
520
521  %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
522  %t.val = fmul <2 x half> %a.val, %b.val
523  %r.val = fadd <2 x half> %t.val, %c.neg
524
525  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
526  ret void
527}
528
529; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math
530; SI-NOT: v_mac_f32
531; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
532; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
533; VI-NOT: v_mac_f16
534; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
535; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
536; GCN:    s_endpgm
537define void @mac_v2f16_neg_a_unsafe_fp_math(
538    <2 x half> addrspace(1)* %r,
539    <2 x half> addrspace(1)* %a,
540    <2 x half> addrspace(1)* %b,
541    <2 x half> addrspace(1)* %c) #1 {
542entry:
543  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
544  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
545  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
546
547  %a.neg = fsub <2 x half> <half 0.0, half 0.0>, %a.val
548  %t.val = fmul <2 x half> %a.neg, %b.val
549  %r.val = fadd <2 x half> %t.val, %c.val
550
551  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
552  ret void
553}
554
555; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math
556; SI-NOT: v_mac_f32
557; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
558; SI:     v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
559; VI-NOT: v_mac_f16
560; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
561; VI:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
562; GCN:    s_endpgm
563define void @mac_v2f16_neg_b_unsafe_fp_math(
564    <2 x half> addrspace(1)* %r,
565    <2 x half> addrspace(1)* %a,
566    <2 x half> addrspace(1)* %b,
567    <2 x half> addrspace(1)* %c) #1 {
568entry:
569  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
570  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
571  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
572
573  %b.neg = fsub <2 x half> <half 0.0, half 0.0>, %b.val
574  %t.val = fmul <2 x half> %a.val, %b.neg
575  %r.val = fadd <2 x half> %t.val, %c.val
576
577  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
578  ret void
579}
580
581; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math
582; SI-NOT: v_mac_f32
583; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
584; SI:     v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
585; VI-NOT: v_mac_f16
586; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
587; VI:     v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
588; GCN:    s_endpgm
589define void @mac_v2f16_neg_c_unsafe_fp_math(
590    <2 x half> addrspace(1)* %r,
591    <2 x half> addrspace(1)* %a,
592    <2 x half> addrspace(1)* %b,
593    <2 x half> addrspace(1)* %c) #1 {
594entry:
595  %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
596  %b.val = load <2 x half>, <2 x half> addrspace(1)* %b
597  %c.val = load <2 x half>, <2 x half> addrspace(1)* %c
598
599  %c.neg = fsub <2 x half> <half 0.0, half 0.0>, %c.val
600  %t.val = fmul <2 x half> %a.val, %b.val
601  %r.val = fadd <2 x half> %t.val, %c.neg
602
603  store <2 x half> %r.val, <2 x half> addrspace(1)* %r
604  ret void
605}
606
607attributes #0 = {"unsafe-fp-math"="false"}
608attributes #1 = {"unsafe-fp-math"="true"}
609