1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG %s
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL %s
4
5define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) {
6; GFX11-LABEL: test_minmax_i32:
7; GFX11:       ; %bb.0:
8; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
10; GFX11-NEXT:    v_maxmin_i32 v0, v0, v1, v2
11; GFX11-NEXT:    s_setpc_b64 s[30:31]
12  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
13  %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
14  ret i32 %sminmax
15}
16
17define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* inreg %out) {
18; SDAG-LABEL: s_test_minmax_i32:
19; SDAG:       ; %bb.0:
20; SDAG-NEXT:    s_max_i32 s0, s0, s1
21; SDAG-NEXT:    s_mov_b32 s5, s4
22; SDAG-NEXT:    s_min_i32 s0, s0, s2
23; SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
24; SDAG-NEXT:    s_mov_b32 s4, s3
25; SDAG-NEXT:    global_store_b32 v0, v1, s[4:5]
26; SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
27; SDAG-NEXT:    s_endpgm
28;
29; GISEL-LABEL: s_test_minmax_i32:
30; GISEL:       ; %bb.0:
31; GISEL-NEXT:    s_max_i32 s0, s0, s1
32; GISEL-NEXT:    s_mov_b32 s6, s3
33; GISEL-NEXT:    s_min_i32 s0, s0, s2
34; GISEL-NEXT:    s_mov_b32 s7, s4
35; GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
36; GISEL-NEXT:    global_store_b32 v1, v0, s[6:7]
37; GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
38; GISEL-NEXT:    s_endpgm
39  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
40  %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c)
41  store i32 %sminmax, i32 addrspace(1)* %out
42  ret void
43}
44
45define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) {
46; GFX11-LABEL: test_minmax_commuted_i32:
47; GFX11:       ; %bb.0:
48; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
50; GFX11-NEXT:    v_maxmin_i32 v0, v0, v1, v2
51; GFX11-NEXT:    s_setpc_b64 s[30:31]
52  %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b)
53  %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax)
54  ret i32 %sminmax
55}
56
57define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) {
58; GFX11-LABEL: test_maxmin_i32:
59; GFX11:       ; %bb.0:
60; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
62; GFX11-NEXT:    v_minmax_i32 v0, v0, v1, v2
63; GFX11-NEXT:    s_setpc_b64 s[30:31]
64  %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
65  %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c)
66  ret i32 %smaxmin
67}
68
69define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) {
70; GFX11-LABEL: test_maxmin_commuted_i32:
71; GFX11:       ; %bb.0:
72; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
74; GFX11-NEXT:    v_minmax_i32 v0, v0, v1, v2
75; GFX11-NEXT:    s_setpc_b64 s[30:31]
76  %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b)
77  %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin)
78  ret i32 %smaxmin
79}
80
81define void @test_smed3_i32(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) {
82; GFX11-LABEL: test_smed3_i32:
83; GFX11:       ; %bb.0:
84; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
86; GFX11-NEXT:    v_med3_i32 v2, v2, v3, v4
87; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
88; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
89; GFX11-NEXT:    s_setpc_b64 s[30:31]
90  %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y)
91  %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y)
92  %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z)
93  %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2)
94  store i32 %tmp3, i32 addrspace(1)* %arg
95  ret void
96}
97
98define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) {
99; GFX11-LABEL: test_minmax_u32:
100; GFX11:       ; %bb.0:
101; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
103; GFX11-NEXT:    v_maxmin_u32 v0, v0, v1, v2
104; GFX11-NEXT:    s_setpc_b64 s[30:31]
105  %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
106  %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c)
107  ret i32 %uminmax
108}
109
110define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* inreg %out) {
111; SDAG-LABEL: s_test_minmax_u32:
112; SDAG:       ; %bb.0:
113; SDAG-NEXT:    s_max_u32 s0, s0, s1
114; SDAG-NEXT:    s_mov_b32 s5, s4
115; SDAG-NEXT:    s_min_u32 s0, s0, s2
116; SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
117; SDAG-NEXT:    s_mov_b32 s4, s3
118; SDAG-NEXT:    global_store_b32 v0, v1, s[4:5]
119; SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
120; SDAG-NEXT:    s_endpgm
121;
122; GISEL-LABEL: s_test_minmax_u32:
123; GISEL:       ; %bb.0:
124; GISEL-NEXT:    s_max_u32 s0, s0, s1
125; GISEL-NEXT:    s_mov_b32 s6, s3
126; GISEL-NEXT:    s_min_u32 s0, s0, s2
127; GISEL-NEXT:    s_mov_b32 s7, s4
128; GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
129; GISEL-NEXT:    global_store_b32 v1, v0, s[6:7]
130; GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
131; GISEL-NEXT:    s_endpgm
132  %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
133  %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c)
134  store i32 %sminmax, i32 addrspace(1)* %out
135  ret void
136}
137
138define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) {
139; GFX11-LABEL: test_minmax_commuted_u32:
140; GFX11:       ; %bb.0:
141; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX11-NEXT:    v_maxmin_u32 v0, v0, v1, v2
144; GFX11-NEXT:    s_setpc_b64 s[30:31]
145  %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b)
146  %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax)
147  ret i32 %uminmax
148}
149
150define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) {
151; GFX11-LABEL: test_maxmin_u32:
152; GFX11:       ; %bb.0:
153; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
155; GFX11-NEXT:    v_minmax_u32 v0, v0, v1, v2
156; GFX11-NEXT:    s_setpc_b64 s[30:31]
157  %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
158  %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c)
159  ret i32 %umaxmin
160}
161
162define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) {
163; GFX11-LABEL: test_maxmin_commuted_u32:
164; GFX11:       ; %bb.0:
165; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
167; GFX11-NEXT:    v_minmax_u32 v0, v0, v1, v2
168; GFX11-NEXT:    s_setpc_b64 s[30:31]
169  %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b)
170  %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin)
171  ret i32 %umaxmin
172}
173
174define void @test_umed3_i32(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) {
175; GFX11-LABEL: test_umed3_i32:
176; GFX11:       ; %bb.0:
177; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
179; GFX11-NEXT:    v_med3_u32 v2, v2, v3, v4
180; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
181; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
182; GFX11-NEXT:    s_setpc_b64 s[30:31]
183  %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y)
184  %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y)
185  %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z)
186  %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2)
187  store i32 %tmp3, i32 addrspace(1)* %arg
188  ret void
189}
190
191define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) {
192; SDAG-LABEL: test_minmax_f32_ieee_true:
193; SDAG:       ; %bb.0:
194; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
196; SDAG-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
197; SDAG-NEXT:    v_max_f32_e32 v2, v2, v2
198; SDAG-NEXT:    v_maxmin_f32 v0, v0, v1, v2
199; SDAG-NEXT:    s_setpc_b64 s[30:31]
200;
201; GISEL-LABEL: test_minmax_f32_ieee_true:
202; GISEL:       ; %bb.0:
203; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204; GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
205; GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
206; GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
207; GISEL-NEXT:    v_maxmin_f32 v0, v0, v1, v2
208; GISEL-NEXT:    s_setpc_b64 s[30:31]
209  %max = call float @llvm.maxnum.f32(float %a, float %b)
210  %minmax = call float @llvm.minnum.f32(float %max, float %c)
211  ret float %minmax
212}
213
214define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, float addrspace(1)* inreg %out) {
215; SDAG-LABEL: s_test_minmax_f32_ieee_false:
216; SDAG:       ; %bb.0:
217; SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
218; SDAG-NEXT:    s_mov_b32 s5, s4
219; SDAG-NEXT:    s_mov_b32 s4, s3
220; SDAG-NEXT:    v_maxmin_f32 v0, s0, s1, v0
221; SDAG-NEXT:    global_store_b32 v1, v0, s[4:5]
222; SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
223; SDAG-NEXT:    s_endpgm
224;
225; GISEL-LABEL: s_test_minmax_f32_ieee_false:
226; GISEL:       ; %bb.0:
227; GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
228; GISEL-NEXT:    s_mov_b32 s6, s3
229; GISEL-NEXT:    s_mov_b32 s7, s4
230; GISEL-NEXT:    v_maxmin_f32 v0, s0, s1, v0
231; GISEL-NEXT:    global_store_b32 v1, v0, s[6:7]
232; GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
233; GISEL-NEXT:    s_endpgm
234  %smax = call float @llvm.maxnum.f32(float %a, float %b)
235  %sminmax = call float @llvm.minnum.f32(float %smax, float %c)
236  store float %sminmax, float addrspace(1)* %out
237  ret void
238}
239
240define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) {
241; GFX11-LABEL: test_minmax_commuted_f32_ieee_false:
242; GFX11:       ; %bb.0:
243; GFX11-NEXT:    v_maxmin_f32 v0, v0, v1, v2
244; GFX11-NEXT:    ; return to shader part epilog
245  %max = call float @llvm.maxnum.f32(float %a, float %b)
246  %minmax = call float @llvm.minnum.f32(float %c, float %max)
247  ret float %minmax
248}
249
250define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) {
251; SDAG-LABEL: test_maxmin_f32_ieee_true:
252; SDAG:       ; %bb.0:
253; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
255; SDAG-NEXT:    v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0
256; SDAG-NEXT:    v_max_f32_e32 v2, v2, v2
257; SDAG-NEXT:    v_minmax_f32 v0, v0, v1, v2
258; SDAG-NEXT:    s_setpc_b64 s[30:31]
259;
260; GISEL-LABEL: test_maxmin_f32_ieee_true:
261; GISEL:       ; %bb.0:
262; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
264; GISEL-NEXT:    v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1
265; GISEL-NEXT:    v_max_f32_e32 v2, v2, v2
266; GISEL-NEXT:    v_minmax_f32 v0, v0, v1, v2
267; GISEL-NEXT:    s_setpc_b64 s[30:31]
268  %min = call float @llvm.minnum.f32(float %a, float %b)
269  %maxmin = call float @llvm.maxnum.f32(float %min, float %c)
270  ret float %maxmin
271}
272
273define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) {
274; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false:
275; GFX11:       ; %bb.0:
276; GFX11-NEXT:    v_minmax_f32 v0, v0, v1, v2
277; GFX11-NEXT:    ; return to shader part epilog
278  %min = call float @llvm.minnum.f32(float %a, float %b)
279  %maxmin = call float @llvm.maxnum.f32(float %c, float %min)
280  ret float %maxmin
281}
282
283define void @test_med3_f32(float addrspace(1)* %arg, float %x, float %y, float %z) #0 {
284; GFX11-LABEL: test_med3_f32:
285; GFX11:       ; %bb.0:
286; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
288; GFX11-NEXT:    v_med3_f32 v2, v2, v3, v4
289; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
290; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
291; GFX11-NEXT:    s_setpc_b64 s[30:31]
292  %tmp0 = call float @llvm.minnum.f32(float %x, float %y)
293  %tmp1 = call float @llvm.maxnum.f32(float %x, float %y)
294  %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z)
295  %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
296  store float %tmp3, float addrspace(1)* %arg
297  ret void
298}
299
300define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) {
301; GFX11-LABEL: test_minmax_f16_ieee_false:
302; GFX11:       ; %bb.0:
303; GFX11-NEXT:    v_maxmin_f16 v0, v0, v1, v2
304; GFX11-NEXT:    ; return to shader part epilog
305  %max = call half @llvm.maxnum.f16(half %a, half %b)
306  %minmax = call half @llvm.minnum.f16(half %max, half %c)
307  ret half %minmax
308}
309
310define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, half addrspace(1)* inreg %out) {
311; SDAG-LABEL: s_test_minmax_f16_ieee_false:
312; SDAG:       ; %bb.0:
313; SDAG-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
314; SDAG-NEXT:    s_mov_b32 s5, s4
315; SDAG-NEXT:    s_mov_b32 s4, s3
316; SDAG-NEXT:    v_maxmin_f16 v0, s0, s1, v0
317; SDAG-NEXT:    global_store_b16 v1, v0, s[4:5]
318; SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
319; SDAG-NEXT:    s_endpgm
320;
321; GISEL-LABEL: s_test_minmax_f16_ieee_false:
322; GISEL:       ; %bb.0:
323; GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
324; GISEL-NEXT:    s_mov_b32 s6, s3
325; GISEL-NEXT:    s_mov_b32 s7, s4
326; GISEL-NEXT:    v_maxmin_f16 v0, s0, s1, v0
327; GISEL-NEXT:    global_store_b16 v1, v0, s[6:7]
328; GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
329; GISEL-NEXT:    s_endpgm
330  %smax = call half @llvm.maxnum.f16(half %a, half %b)
331  %sminmax = call half @llvm.minnum.f16(half %smax, half %c)
332  store half %sminmax, half addrspace(1)* %out
333  ret void
334}
335
336define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) {
337; SDAG-LABEL: test_minmax_commuted_f16_ieee_true:
338; SDAG:       ; %bb.0:
339; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
341; SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
342; SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
343; SDAG-NEXT:    v_max_f16_e32 v2, v2, v2
344; SDAG-NEXT:    v_maxmin_f16 v0, v0, v1, v2
345; SDAG-NEXT:    s_setpc_b64 s[30:31]
346;
347; GISEL-LABEL: test_minmax_commuted_f16_ieee_true:
348; GISEL:       ; %bb.0:
349; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350; GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
351; GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
352; GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
353; GISEL-NEXT:    v_max_f16_e32 v2, v2, v2
354; GISEL-NEXT:    v_maxmin_f16 v0, v0, v1, v2
355; GISEL-NEXT:    s_setpc_b64 s[30:31]
356  %max = call half @llvm.maxnum.f16(half %a, half %b)
357  %minmax = call half @llvm.minnum.f16(half %c, half %max)
358  ret half %minmax
359}
360
361define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) {
362; GFX11-LABEL: test_maxmin_f16_ieee_false:
363; GFX11:       ; %bb.0:
364; GFX11-NEXT:    v_minmax_f16 v0, v0, v1, v2
365; GFX11-NEXT:    ; return to shader part epilog
366  %min = call half @llvm.minnum.f16(half %a, half %b)
367  %maxmin = call half @llvm.maxnum.f16(half %min, half %c)
368  ret half %maxmin
369}
370
371define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) {
372; SDAG-LABEL: test_maxmin_commuted_f16_ieee_true:
373; SDAG:       ; %bb.0:
374; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
376; SDAG-NEXT:    v_max_f16_e32 v1, v1, v1
377; SDAG-NEXT:    v_max_f16_e32 v0, v0, v0
378; SDAG-NEXT:    v_max_f16_e32 v2, v2, v2
379; SDAG-NEXT:    v_minmax_f16 v0, v0, v1, v2
380; SDAG-NEXT:    s_setpc_b64 s[30:31]
381;
382; GISEL-LABEL: test_maxmin_commuted_f16_ieee_true:
383; GISEL:       ; %bb.0:
384; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
386; GISEL-NEXT:    v_max_f16_e32 v0, v0, v0
387; GISEL-NEXT:    v_max_f16_e32 v1, v1, v1
388; GISEL-NEXT:    v_max_f16_e32 v2, v2, v2
389; GISEL-NEXT:    v_minmax_f16 v0, v0, v1, v2
390; GISEL-NEXT:    s_setpc_b64 s[30:31]
391  %min = call half @llvm.minnum.f16(half %a, half %b)
392  %maxmin = call half @llvm.maxnum.f16(half %c, half %min)
393  ret half %maxmin
394}
395
396define void @test_med3_f16(half addrspace(1)* %arg, half %x, half %y, half %z) #0 {
397; GFX11-LABEL: test_med3_f16:
398; GFX11:       ; %bb.0:
399; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
401; GFX11-NEXT:    v_med3_f16 v2, v2, v3, v4
402; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
403; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
404; GFX11-NEXT:    s_setpc_b64 s[30:31]
405  %tmp0 = call half @llvm.minnum.f16(half %x, half %y)
406  %tmp1 = call half @llvm.maxnum.f16(half %x, half %y)
407  %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z)
408  %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2)
409  store half %tmp3, half addrspace(1)* %arg
410  ret void
411}
412
413declare i32 @llvm.smin.i32(i32, i32)
414declare i32 @llvm.smax.i32(i32, i32)
415declare i32 @llvm.umin.i32(i32, i32)
416declare i32 @llvm.umax.i32(i32, i32)
417declare half @llvm.minnum.f16(half, half)
418declare half @llvm.maxnum.f16(half, half)
419declare float @llvm.minnum.f32(float, float)
420declare float @llvm.maxnum.f32(float, float)
421attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
422
423