1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,SDAG %s 3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GISEL %s 4 5define i32 @test_minmax_i32(i32 %a, i32 %b, i32 %c) { 6; GFX11-LABEL: test_minmax_i32: 7; GFX11: ; %bb.0: 8; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 10; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 11; GFX11-NEXT: s_setpc_b64 s[30:31] 12 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 13 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c) 14 ret i32 %sminmax 15} 16 17define amdgpu_ps void @s_test_minmax_i32(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* inreg %out) { 18; SDAG-LABEL: s_test_minmax_i32: 19; SDAG: ; %bb.0: 20; SDAG-NEXT: s_max_i32 s0, s0, s1 21; SDAG-NEXT: s_mov_b32 s5, s4 22; SDAG-NEXT: s_min_i32 s0, s0, s2 23; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 24; SDAG-NEXT: s_mov_b32 s4, s3 25; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] 26; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 27; SDAG-NEXT: s_endpgm 28; 29; GISEL-LABEL: s_test_minmax_i32: 30; GISEL: ; %bb.0: 31; GISEL-NEXT: s_max_i32 s0, s0, s1 32; GISEL-NEXT: s_mov_b32 s6, s3 33; GISEL-NEXT: s_min_i32 s0, s0, s2 34; GISEL-NEXT: s_mov_b32 s7, s4 35; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 36; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] 37; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 38; GISEL-NEXT: s_endpgm 39 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 40 %sminmax = call i32 @llvm.smin.i32(i32 %smax, i32 %c) 41 store i32 %sminmax, i32 addrspace(1)* %out 42 ret void 43} 44 45define i32 @test_minmax_commuted_i32(i32 %a, i32 %b, i32 %c) { 46; GFX11-LABEL: test_minmax_commuted_i32: 47; GFX11: ; %bb.0: 48; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 50; GFX11-NEXT: v_maxmin_i32 v0, v0, v1, v2 51; GFX11-NEXT: s_setpc_b64 s[30:31] 52 %smax = call i32 @llvm.smax.i32(i32 %a, i32 %b) 53 %sminmax = call i32 @llvm.smin.i32(i32 %c, i32 %smax) 54 ret i32 %sminmax 55} 56 57define i32 @test_maxmin_i32(i32 %a, i32 %b, i32 %c) { 58; GFX11-LABEL: test_maxmin_i32: 59; GFX11: ; %bb.0: 60; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 62; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 63; GFX11-NEXT: s_setpc_b64 s[30:31] 64 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) 65 %smaxmin = call i32 @llvm.smax.i32(i32 %smin, i32 %c) 66 ret i32 %smaxmin 67} 68 69define i32 @test_maxmin_commuted_i32(i32 %a, i32 %b, i32 %c) { 70; GFX11-LABEL: test_maxmin_commuted_i32: 71; GFX11: ; %bb.0: 72; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 74; GFX11-NEXT: v_minmax_i32 v0, v0, v1, v2 75; GFX11-NEXT: s_setpc_b64 s[30:31] 76 %smin = call i32 @llvm.smin.i32(i32 %a, i32 %b) 77 %smaxmin = call i32 @llvm.smax.i32(i32 %c, i32 %smin) 78 ret i32 %smaxmin 79} 80 81define void @test_smed3_i32(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) { 82; GFX11-LABEL: test_smed3_i32: 83; GFX11: ; %bb.0: 84; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 86; GFX11-NEXT: v_med3_i32 v2, v2, v3, v4 87; GFX11-NEXT: global_store_b32 v[0:1], v2, off 88; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 89; GFX11-NEXT: s_setpc_b64 s[30:31] 90 %tmp0 = call i32 @llvm.smin.i32(i32 %x, i32 %y) 91 %tmp1 = call i32 @llvm.smax.i32(i32 %x, i32 %y) 92 %tmp2 = call i32 @llvm.smin.i32(i32 %tmp1, i32 %z) 93 %tmp3 = call i32 @llvm.smax.i32(i32 %tmp0, i32 %tmp2) 94 store i32 %tmp3, i32 addrspace(1)* %arg 95 ret void 96} 97 98define i32 @test_minmax_u32(i32 %a, i32 %b, i32 %c) { 99; GFX11-LABEL: test_minmax_u32: 100; GFX11: ; %bb.0: 101; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 103; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 104; GFX11-NEXT: s_setpc_b64 s[30:31] 105 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 106 %uminmax = call i32 @llvm.umin.i32(i32 %umax, i32 %c) 107 ret i32 %uminmax 108} 109 110define amdgpu_ps void @s_test_minmax_u32(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 addrspace(1)* inreg %out) { 111; SDAG-LABEL: s_test_minmax_u32: 112; SDAG: ; %bb.0: 113; SDAG-NEXT: s_max_u32 s0, s0, s1 114; SDAG-NEXT: s_mov_b32 s5, s4 115; SDAG-NEXT: s_min_u32 s0, s0, s2 116; SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 117; SDAG-NEXT: s_mov_b32 s4, s3 118; SDAG-NEXT: global_store_b32 v0, v1, s[4:5] 119; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 120; SDAG-NEXT: s_endpgm 121; 122; GISEL-LABEL: s_test_minmax_u32: 123; GISEL: ; %bb.0: 124; GISEL-NEXT: s_max_u32 s0, s0, s1 125; GISEL-NEXT: s_mov_b32 s6, s3 126; GISEL-NEXT: s_min_u32 s0, s0, s2 127; GISEL-NEXT: s_mov_b32 s7, s4 128; GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 129; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] 130; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 131; GISEL-NEXT: s_endpgm 132 %smax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 133 %sminmax = call i32 @llvm.umin.i32(i32 %smax, i32 %c) 134 store i32 %sminmax, i32 addrspace(1)* %out 135 ret void 136} 137 138define i32 @test_minmax_commuted_u32(i32 %a, i32 %b, i32 %c) { 139; GFX11-LABEL: test_minmax_commuted_u32: 140; GFX11: ; %bb.0: 141; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX11-NEXT: v_maxmin_u32 v0, v0, v1, v2 144; GFX11-NEXT: s_setpc_b64 s[30:31] 145 %umax = call i32 @llvm.umax.i32(i32 %a, i32 %b) 146 %uminmax = call i32 @llvm.umin.i32(i32 %c, i32 %umax) 147 ret i32 %uminmax 148} 149 150define i32 @test_maxmin_u32(i32 %a, i32 %b, i32 %c) { 151; GFX11-LABEL: test_maxmin_u32: 152; GFX11: ; %bb.0: 153; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 155; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 156; GFX11-NEXT: s_setpc_b64 s[30:31] 157 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) 158 %umaxmin = call i32 @llvm.umax.i32(i32 %umin, i32 %c) 159 ret i32 %umaxmin 160} 161 162define i32 @test_maxmin_commuted_u32(i32 %a, i32 %b, i32 %c) { 163; GFX11-LABEL: test_maxmin_commuted_u32: 164; GFX11: ; %bb.0: 165; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 167; GFX11-NEXT: v_minmax_u32 v0, v0, v1, v2 168; GFX11-NEXT: s_setpc_b64 s[30:31] 169 %umin = call i32 @llvm.umin.i32(i32 %a, i32 %b) 170 %umaxmin = call i32 @llvm.umax.i32(i32 %c, i32 %umin) 171 ret i32 %umaxmin 172} 173 174define void @test_umed3_i32(i32 addrspace(1)* %arg, i32 %x, i32 %y, i32 %z) { 175; GFX11-LABEL: test_umed3_i32: 176; GFX11: ; %bb.0: 177; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 179; GFX11-NEXT: v_med3_u32 v2, v2, v3, v4 180; GFX11-NEXT: global_store_b32 v[0:1], v2, off 181; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 182; GFX11-NEXT: s_setpc_b64 s[30:31] 183 %tmp0 = call i32 @llvm.umin.i32(i32 %x, i32 %y) 184 %tmp1 = call i32 @llvm.umax.i32(i32 %x, i32 %y) 185 %tmp2 = call i32 @llvm.umin.i32(i32 %tmp1, i32 %z) 186 %tmp3 = call i32 @llvm.umax.i32(i32 %tmp0, i32 %tmp2) 187 store i32 %tmp3, i32 addrspace(1)* %arg 188 ret void 189} 190 191define float @test_minmax_f32_ieee_true(float %a, float %b, float %c) { 192; SDAG-LABEL: test_minmax_f32_ieee_true: 193; SDAG: ; %bb.0: 194; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 196; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 197; SDAG-NEXT: v_max_f32_e32 v2, v2, v2 198; SDAG-NEXT: v_maxmin_f32 v0, v0, v1, v2 199; SDAG-NEXT: s_setpc_b64 s[30:31] 200; 201; GISEL-LABEL: test_minmax_f32_ieee_true: 202; GISEL: ; %bb.0: 203; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 205; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 206; GISEL-NEXT: v_max_f32_e32 v2, v2, v2 207; GISEL-NEXT: v_maxmin_f32 v0, v0, v1, v2 208; GISEL-NEXT: s_setpc_b64 s[30:31] 209 %max = call float @llvm.maxnum.f32(float %a, float %b) 210 %minmax = call float @llvm.minnum.f32(float %max, float %c) 211 ret float %minmax 212} 213 214define amdgpu_ps void @s_test_minmax_f32_ieee_false(float inreg %a, float inreg %b, float inreg %c, float addrspace(1)* inreg %out) { 215; SDAG-LABEL: s_test_minmax_f32_ieee_false: 216; SDAG: ; %bb.0: 217; SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 218; SDAG-NEXT: s_mov_b32 s5, s4 219; SDAG-NEXT: s_mov_b32 s4, s3 220; SDAG-NEXT: v_maxmin_f32 v0, s0, s1, v0 221; SDAG-NEXT: global_store_b32 v1, v0, s[4:5] 222; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 223; SDAG-NEXT: s_endpgm 224; 225; GISEL-LABEL: s_test_minmax_f32_ieee_false: 226; GISEL: ; %bb.0: 227; GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 228; GISEL-NEXT: s_mov_b32 s6, s3 229; GISEL-NEXT: s_mov_b32 s7, s4 230; GISEL-NEXT: v_maxmin_f32 v0, s0, s1, v0 231; GISEL-NEXT: global_store_b32 v1, v0, s[6:7] 232; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 233; GISEL-NEXT: s_endpgm 234 %smax = call float @llvm.maxnum.f32(float %a, float %b) 235 %sminmax = call float @llvm.minnum.f32(float %smax, float %c) 236 store float %sminmax, float addrspace(1)* %out 237 ret void 238} 239 240define amdgpu_ps float @test_minmax_commuted_f32_ieee_false(float %a, float %b, float %c) { 241; GFX11-LABEL: test_minmax_commuted_f32_ieee_false: 242; GFX11: ; %bb.0: 243; GFX11-NEXT: v_maxmin_f32 v0, v0, v1, v2 244; GFX11-NEXT: ; return to shader part epilog 245 %max = call float @llvm.maxnum.f32(float %a, float %b) 246 %minmax = call float @llvm.minnum.f32(float %c, float %max) 247 ret float %minmax 248} 249 250define float @test_maxmin_f32_ieee_true(float %a, float %b, float %c) { 251; SDAG-LABEL: test_maxmin_f32_ieee_true: 252; SDAG: ; %bb.0: 253; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 255; SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v0, v0, v0 256; SDAG-NEXT: v_max_f32_e32 v2, v2, v2 257; SDAG-NEXT: v_minmax_f32 v0, v0, v1, v2 258; SDAG-NEXT: s_setpc_b64 s[30:31] 259; 260; GISEL-LABEL: test_maxmin_f32_ieee_true: 261; GISEL: ; %bb.0: 262; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 264; GISEL-NEXT: v_dual_max_f32 v0, v0, v0 :: v_dual_max_f32 v1, v1, v1 265; GISEL-NEXT: v_max_f32_e32 v2, v2, v2 266; GISEL-NEXT: v_minmax_f32 v0, v0, v1, v2 267; GISEL-NEXT: s_setpc_b64 s[30:31] 268 %min = call float @llvm.minnum.f32(float %a, float %b) 269 %maxmin = call float @llvm.maxnum.f32(float %min, float %c) 270 ret float %maxmin 271} 272 273define amdgpu_ps float @test_maxmin_commuted_f32_ieee_false(float %a, float %b, float %c) { 274; GFX11-LABEL: test_maxmin_commuted_f32_ieee_false: 275; GFX11: ; %bb.0: 276; GFX11-NEXT: v_minmax_f32 v0, v0, v1, v2 277; GFX11-NEXT: ; return to shader part epilog 278 %min = call float @llvm.minnum.f32(float %a, float %b) 279 %maxmin = call float @llvm.maxnum.f32(float %c, float %min) 280 ret float %maxmin 281} 282 283define void @test_med3_f32(float addrspace(1)* %arg, float %x, float %y, float %z) #0 { 284; GFX11-LABEL: test_med3_f32: 285; GFX11: ; %bb.0: 286; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 287; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 288; GFX11-NEXT: v_med3_f32 v2, v2, v3, v4 289; GFX11-NEXT: global_store_b32 v[0:1], v2, off 290; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 291; GFX11-NEXT: s_setpc_b64 s[30:31] 292 %tmp0 = call float @llvm.minnum.f32(float %x, float %y) 293 %tmp1 = call float @llvm.maxnum.f32(float %x, float %y) 294 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %z) 295 %tmp3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 296 store float %tmp3, float addrspace(1)* %arg 297 ret void 298} 299 300define amdgpu_ps half @test_minmax_f16_ieee_false(half %a, half %b, half %c) { 301; GFX11-LABEL: test_minmax_f16_ieee_false: 302; GFX11: ; %bb.0: 303; GFX11-NEXT: v_maxmin_f16 v0, v0, v1, v2 304; GFX11-NEXT: ; return to shader part epilog 305 %max = call half @llvm.maxnum.f16(half %a, half %b) 306 %minmax = call half @llvm.minnum.f16(half %max, half %c) 307 ret half %minmax 308} 309 310define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b, half inreg %c, half addrspace(1)* inreg %out) { 311; SDAG-LABEL: s_test_minmax_f16_ieee_false: 312; SDAG: ; %bb.0: 313; SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 314; SDAG-NEXT: s_mov_b32 s5, s4 315; SDAG-NEXT: s_mov_b32 s4, s3 316; SDAG-NEXT: v_maxmin_f16 v0, s0, s1, v0 317; SDAG-NEXT: global_store_b16 v1, v0, s[4:5] 318; SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 319; SDAG-NEXT: s_endpgm 320; 321; GISEL-LABEL: s_test_minmax_f16_ieee_false: 322; GISEL: ; %bb.0: 323; GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 324; GISEL-NEXT: s_mov_b32 s6, s3 325; GISEL-NEXT: s_mov_b32 s7, s4 326; GISEL-NEXT: v_maxmin_f16 v0, s0, s1, v0 327; GISEL-NEXT: global_store_b16 v1, v0, s[6:7] 328; GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 329; GISEL-NEXT: s_endpgm 330 %smax = call half @llvm.maxnum.f16(half %a, half %b) 331 %sminmax = call half @llvm.minnum.f16(half %smax, half %c) 332 store half %sminmax, half addrspace(1)* %out 333 ret void 334} 335 336define half @test_minmax_commuted_f16_ieee_true(half %a, half %b, half %c) { 337; SDAG-LABEL: test_minmax_commuted_f16_ieee_true: 338; SDAG: ; %bb.0: 339; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 341; SDAG-NEXT: v_max_f16_e32 v1, v1, v1 342; SDAG-NEXT: v_max_f16_e32 v0, v0, v0 343; SDAG-NEXT: v_max_f16_e32 v2, v2, v2 344; SDAG-NEXT: v_maxmin_f16 v0, v0, v1, v2 345; SDAG-NEXT: s_setpc_b64 s[30:31] 346; 347; GISEL-LABEL: test_minmax_commuted_f16_ieee_true: 348; GISEL: ; %bb.0: 349; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 350; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 351; GISEL-NEXT: v_max_f16_e32 v0, v0, v0 352; GISEL-NEXT: v_max_f16_e32 v1, v1, v1 353; GISEL-NEXT: v_max_f16_e32 v2, v2, v2 354; GISEL-NEXT: v_maxmin_f16 v0, v0, v1, v2 355; GISEL-NEXT: s_setpc_b64 s[30:31] 356 %max = call half @llvm.maxnum.f16(half %a, half %b) 357 %minmax = call half @llvm.minnum.f16(half %c, half %max) 358 ret half %minmax 359} 360 361define amdgpu_ps half @test_maxmin_f16_ieee_false(half %a, half %b, half %c) { 362; GFX11-LABEL: test_maxmin_f16_ieee_false: 363; GFX11: ; %bb.0: 364; GFX11-NEXT: v_minmax_f16 v0, v0, v1, v2 365; GFX11-NEXT: ; return to shader part epilog 366 %min = call half @llvm.minnum.f16(half %a, half %b) 367 %maxmin = call half @llvm.maxnum.f16(half %min, half %c) 368 ret half %maxmin 369} 370 371define half @test_maxmin_commuted_f16_ieee_true(half %a, half %b, half %c) { 372; SDAG-LABEL: test_maxmin_commuted_f16_ieee_true: 373; SDAG: ; %bb.0: 374; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; SDAG-NEXT: s_waitcnt_vscnt null, 0x0 376; SDAG-NEXT: v_max_f16_e32 v1, v1, v1 377; SDAG-NEXT: v_max_f16_e32 v0, v0, v0 378; SDAG-NEXT: v_max_f16_e32 v2, v2, v2 379; SDAG-NEXT: v_minmax_f16 v0, v0, v1, v2 380; SDAG-NEXT: s_setpc_b64 s[30:31] 381; 382; GISEL-LABEL: test_maxmin_commuted_f16_ieee_true: 383; GISEL: ; %bb.0: 384; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 385; GISEL-NEXT: s_waitcnt_vscnt null, 0x0 386; GISEL-NEXT: v_max_f16_e32 v0, v0, v0 387; GISEL-NEXT: v_max_f16_e32 v1, v1, v1 388; GISEL-NEXT: v_max_f16_e32 v2, v2, v2 389; GISEL-NEXT: v_minmax_f16 v0, v0, v1, v2 390; GISEL-NEXT: s_setpc_b64 s[30:31] 391 %min = call half @llvm.minnum.f16(half %a, half %b) 392 %maxmin = call half @llvm.maxnum.f16(half %c, half %min) 393 ret half %maxmin 394} 395 396define void @test_med3_f16(half addrspace(1)* %arg, half %x, half %y, half %z) #0 { 397; GFX11-LABEL: test_med3_f16: 398; GFX11: ; %bb.0: 399; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 401; GFX11-NEXT: v_med3_f16 v2, v2, v3, v4 402; GFX11-NEXT: global_store_b16 v[0:1], v2, off 403; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 404; GFX11-NEXT: s_setpc_b64 s[30:31] 405 %tmp0 = call half @llvm.minnum.f16(half %x, half %y) 406 %tmp1 = call half @llvm.maxnum.f16(half %x, half %y) 407 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %z) 408 %tmp3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2) 409 store half %tmp3, half addrspace(1)* %arg 410 ret void 411} 412 413declare i32 @llvm.smin.i32(i32, i32) 414declare i32 @llvm.smax.i32(i32, i32) 415declare i32 @llvm.umin.i32(i32, i32) 416declare i32 @llvm.umax.i32(i32, i32) 417declare half @llvm.minnum.f16(half, half) 418declare half @llvm.maxnum.f16(half, half) 419declare float @llvm.minnum.f32(float, float) 420declare float @llvm.maxnum.f32(float, float) 421attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } 422 423