1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 5 6define i32 @test_min_max_ValK0_K1_u32(i32 %a) { 7; GFX89-LABEL: test_min_max_ValK0_K1_u32: 8; GFX89: ; %bb.0: 9; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 11; GFX89-NEXT: s_setpc_b64 s[30:31] 12; 13; GFX10-LABEL: test_min_max_ValK0_K1_u32: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 17; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 18; GFX10-NEXT: s_setpc_b64 s[30:31] 19 %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) 20 %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) 21 ret i32 %umed 22} 23 24define i32 @min_max_ValK0_K1_i32(i32 %a) { 25; GFX89-LABEL: min_max_ValK0_K1_i32: 26; GFX89: ; %bb.0: 27; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 29; GFX89-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: min_max_ValK0_K1_i32: 32; GFX10: ; %bb.0: 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 35; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 36; GFX10-NEXT: s_setpc_b64 s[30:31] 37 %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) 38 %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) 39 ret i32 %umed 40} 41 42define i32 @test_min_K1max_ValK0__u32(i32 %a) { 43; GFX89-LABEL: test_min_K1max_ValK0__u32: 44; GFX89: ; %bb.0: 45; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 47; GFX89-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10-LABEL: test_min_K1max_ValK0__u32: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 53; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 54; GFX10-NEXT: s_setpc_b64 s[30:31] 55 %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) 56 %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) 57 ret i32 %umed 58} 59 60define i32 @test_min_K1max_K0Val__u32(i32 %a) { 61; GFX89-LABEL: test_min_K1max_K0Val__u32: 62; GFX89: ; %bb.0: 63; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 65; GFX89-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX10-LABEL: test_min_K1max_K0Val__u32: 68; GFX10: ; %bb.0: 69; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 71; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 72; GFX10-NEXT: s_setpc_b64 s[30:31] 73 %umax = call i32 @llvm.umax.i32(i32 12, i32 %a) 74 %umed = call i32 @llvm.umin.i32(i32 17, i32 %umax) 75 ret i32 %umed 76} 77 78define i32 @test_max_min_ValK1_K0_u32(i32 %a) { 79; GFX89-LABEL: test_max_min_ValK1_K0_u32: 80; GFX89: ; %bb.0: 81; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 83; GFX89-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX10-LABEL: test_max_min_ValK1_K0_u32: 86; GFX10: ; %bb.0: 87; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 89; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 90; GFX10-NEXT: s_setpc_b64 s[30:31] 91 %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) 92 %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) 93 ret i32 %umed 94} 95 96define i32 @test_max_min_K1Val_K0_u32(i32 %a) { 97; GFX89-LABEL: test_max_min_K1Val_K0_u32: 98; GFX89: ; %bb.0: 99; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 101; GFX89-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX10-LABEL: test_max_min_K1Val_K0_u32: 104; GFX10: ; %bb.0: 105; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 107; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) 110 %umed = call i32 @llvm.umax.i32(i32 %umin, i32 12) 111 ret i32 %umed 112} 113 114define i32 @test_max_K0min_ValK1__u32(i32 %a) { 115; GFX89-LABEL: test_max_K0min_ValK1__u32: 116; GFX89: ; %bb.0: 117; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 119; GFX89-NEXT: s_setpc_b64 s[30:31] 120; 121; GFX10-LABEL: test_max_K0min_ValK1__u32: 122; GFX10: ; %bb.0: 123; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 125; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 126; GFX10-NEXT: s_setpc_b64 s[30:31] 127 %umin = call i32 @llvm.umin.i32(i32 %a, i32 17) 128 %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) 129 ret i32 %umed 130} 131 132define i32 @test_max_K0min_K1Val__u32(i32 %a) { 133; GFX89-LABEL: test_max_K0min_K1Val__u32: 134; GFX89: ; %bb.0: 135; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX89-NEXT: v_med3_u32 v0, v0, 12, 17 137; GFX89-NEXT: s_setpc_b64 s[30:31] 138; 139; GFX10-LABEL: test_max_K0min_K1Val__u32: 140; GFX10: ; %bb.0: 141; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 143; GFX10-NEXT: v_med3_u32 v0, v0, 12, 17 144; GFX10-NEXT: s_setpc_b64 s[30:31] 145 %umin = call i32 @llvm.umin.i32(i32 17, i32 %a) 146 %umed = call i32 @llvm.umax.i32(i32 12, i32 %umin) 147 ret i32 %umed 148} 149 150define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) { 151; GFX8-LABEL: test_max_K0min_K1Val__v2u16: 152; GFX8: ; %bb.0: 153; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX8-NEXT: v_mov_b32_e32 v2, 17 155; GFX8-NEXT: v_min_u16_e32 v1, 17, v0 156; GFX8-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 157; GFX8-NEXT: v_mov_b32_e32 v2, 12 158; GFX8-NEXT: v_max_u16_e32 v1, 12, v1 159; GFX8-NEXT: v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 160; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 161; GFX8-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX9-LABEL: test_max_K0min_K1Val__v2u16: 164; GFX9: ; %bb.0: 165; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX9-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1] 167; GFX9-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1] 168; GFX9-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX10-LABEL: test_max_K0min_K1Val__v2u16: 171; GFX10: ; %bb.0: 172; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 174; GFX10-NEXT: v_pk_min_u16 v0, 17, v0 op_sel_hi:[0,1] 175; GFX10-NEXT: v_pk_max_u16 v0, 12, v0 op_sel_hi:[0,1] 176; GFX10-NEXT: s_setpc_b64 s[30:31] 177 %umin = call <2 x i16> @llvm.umin.v2i16(<2 x i16> <i16 17, i16 17>, <2 x i16> %a) 178 %umed = call <2 x i16> @llvm.umax.v2i16(<2 x i16> <i16 12, i16 12>, <2 x i16> %umin) 179 ret <2 x i16> %umed 180} 181 182define amdgpu_ps i32 @test_uniform_min_max(i32 inreg %a) { 183; GFX89-LABEL: test_uniform_min_max: 184; GFX89: ; %bb.0: 185; GFX89-NEXT: s_max_u32 s0, s2, 12 186; GFX89-NEXT: s_min_u32 s0, s0, 17 187; GFX89-NEXT: ; return to shader part epilog 188; 189; GFX10-LABEL: test_uniform_min_max: 190; GFX10: ; %bb.0: 191; GFX10-NEXT: s_max_u32 s0, s2, 12 192; GFX10-NEXT: s_min_u32 s0, s0, 17 193; GFX10-NEXT: ; return to shader part epilog 194 %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) 195 %umed = call i32 @llvm.umin.i32(i32 %umax, i32 17) 196 ret i32 %umed 197} 198 199define i32 @test_non_inline_constant_u32(i32 %a) { 200; GFX89-LABEL: test_non_inline_constant_u32: 201; GFX89: ; %bb.0: 202; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX89-NEXT: v_mov_b32_e32 v1, 0x41 204; GFX89-NEXT: v_med3_u32 v0, v0, 12, v1 205; GFX89-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX10-LABEL: test_non_inline_constant_u32: 208; GFX10: ; %bb.0: 209; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 211; GFX10-NEXT: v_med3_u32 v0, v0, 12, 0x41 212; GFX10-NEXT: s_setpc_b64 s[30:31] 213 %umax = call i32 @llvm.umax.i32(i32 %a, i32 12) 214 %umed = call i32 @llvm.umin.i32(i32 %umax, i32 65) 215 ret i32 %umed 216} 217 218declare i32 @llvm.umin.i32(i32, i32) 219declare i32 @llvm.umax.i32(i32, i32) 220declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>) 221declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>) 222