1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=pitcairn < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s 3 4 5; FUNC-LABEL: {{^}}v_test_imax_sge_i32: 6; SI: v_max_i32_e32 7 8; EG: MAX_INT 9define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 10 %a = load i32, i32 addrspace(1)* %aptr, align 4 11 %b = load i32, i32 addrspace(1)* %bptr, align 4 12 %cmp = icmp sge i32 %a, %b 13 %val = select i1 %cmp, i32 %a, i32 %b 14 store i32 %val, i32 addrspace(1)* %out, align 4 15 ret void 16} 17 18; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: 19; SI: v_max_i32_e32 20; SI: v_max_i32_e32 21; SI: v_max_i32_e32 22; SI: v_max_i32_e32 23 24; These could be merged into one 25; EG: MAX_INT 26; EG: MAX_INT 27; EG: MAX_INT 28; EG: MAX_INT 29define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { 30 %a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4 31 %b = load <4 x i32>, <4 x i32> addrspace(1)* %bptr, align 4 32 %cmp = icmp sge <4 x i32> %a, %b 33 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 34 store <4 x i32> %val, <4 x i32> addrspace(1)* %out, align 4 35 ret void 36} 37 38; FUNC-LABEL: @s_test_imax_sge_i32 39; SI: s_max_i32 40 41; EG: MAX_INT 42define amdgpu_kernel void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 43 %cmp = icmp sge i32 %a, %b 44 %val = select i1 %cmp, i32 %a, i32 %b 45 store i32 %val, i32 addrspace(1)* %out, align 4 46 ret void 47} 48 49; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32: 50; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 51 52; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 53define amdgpu_kernel void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 54 %cmp = icmp sge i32 %a, 9 55 %val = select i1 %cmp, i32 %a, i32 9 56 store i32 %val, i32 addrspace(1)* %out, align 4 57 ret void 58} 59 60; FUNC-LABEL: {{^}}v_test_imax_sge_i8: 61; SI: buffer_load_sbyte 62; SI: buffer_load_sbyte 63; SI: v_max_i32_e32 64 65; EG: MAX_INT 66define amdgpu_kernel void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 67 %a = load i8, i8 addrspace(1)* %aptr, align 1 68 %b = load i8, i8 addrspace(1)* %bptr, align 1 69 %cmp = icmp sge i8 %a, %b 70 %val = select i1 %cmp, i8 %a, i8 %b 71 store i8 %val, i8 addrspace(1)* %out, align 1 72 ret void 73} 74 75; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: 76; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 77 78; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 79define amdgpu_kernel void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 80 %cmp = icmp sgt i32 %a, 9 81 %val = select i1 %cmp, i32 %a, i32 9 82 store i32 %val, i32 addrspace(1)* %out, align 4 83 ret void 84} 85 86; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: 87; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 88; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 89 90; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 91; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 92define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 93 %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9> 94 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9> 95 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 96 ret void 97} 98 99; FUNC-LABEL: @v_test_imax_sgt_i32 100; SI: v_max_i32_e32 101 102; EG: MAX_INT 103define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 104 %a = load i32, i32 addrspace(1)* %aptr, align 4 105 %b = load i32, i32 addrspace(1)* %bptr, align 4 106 %cmp = icmp sgt i32 %a, %b 107 %val = select i1 %cmp, i32 %a, i32 %b 108 store i32 %val, i32 addrspace(1)* %out, align 4 109 ret void 110} 111 112; FUNC-LABEL: @s_test_imax_sgt_i32 113; SI: s_max_i32 114 115; EG: MAX_INT 116define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 117 %cmp = icmp sgt i32 %a, %b 118 %val = select i1 %cmp, i32 %a, i32 %b 119 store i32 %val, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; FUNC-LABEL: @v_test_umax_uge_i32 124; SI: v_max_u32_e32 125 126; EG: MAX_UINT 127define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 128 %a = load i32, i32 addrspace(1)* %aptr, align 4 129 %b = load i32, i32 addrspace(1)* %bptr, align 4 130 %cmp = icmp uge i32 %a, %b 131 %val = select i1 %cmp, i32 %a, i32 %b 132 store i32 %val, i32 addrspace(1)* %out, align 4 133 ret void 134} 135 136; FUNC-LABEL: @s_test_umax_uge_i32 137; SI: s_max_u32 138 139; EG: MAX_UINT 140define amdgpu_kernel void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 141 %cmp = icmp uge i32 %a, %b 142 %val = select i1 %cmp, i32 %a, i32 %b 143 store i32 %val, i32 addrspace(1)* %out, align 4 144 ret void 145} 146 147; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: 148; SI: s_max_u32 149; SI: s_max_u32 150; SI: s_max_u32 151; SI-NOT: s_max_u32 152; SI: s_endpgm 153 154; EG: MAX_UINT 155; EG: MAX_UINT 156; EG: MAX_UINT 157; EG-NOT: MAX_UINT 158define amdgpu_kernel void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { 159 %cmp = icmp uge <3 x i32> %a, %b 160 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 161 store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 162 ret void 163} 164 165; FUNC-LABEL: {{^}}v_test_umax_uge_i8: 166; SI: buffer_load_ubyte 167; SI: buffer_load_ubyte 168; SI: v_max_u32_e32 169 170; EG: MAX_UINT 171define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 172 %a = load i8, i8 addrspace(1)* %aptr, align 1 173 %b = load i8, i8 addrspace(1)* %bptr, align 1 174 %cmp = icmp uge i8 %a, %b 175 %val = select i1 %cmp, i8 %a, i8 %b 176 store i8 %val, i8 addrspace(1)* %out, align 1 177 ret void 178} 179 180; FUNC-LABEL: @v_test_umax_ugt_i32 181; SI: v_max_u32_e32 182 183; EG: MAX_UINT 184define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 185 %a = load i32, i32 addrspace(1)* %aptr, align 4 186 %b = load i32, i32 addrspace(1)* %bptr, align 4 187 %cmp = icmp ugt i32 %a, %b 188 %val = select i1 %cmp, i32 %a, i32 %b 189 store i32 %val, i32 addrspace(1)* %out, align 4 190 ret void 191} 192 193; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: 194; SI: s_max_u32 195 196; EG: MAX_UINT 197define amdgpu_kernel void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 198 %cmp = icmp ugt i32 %a, %b 199 %val = select i1 %cmp, i32 %a, i32 %b 200 store i32 %val, i32 addrspace(1)* %out, align 4 201 ret void 202} 203 204; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: 205; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 206; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 207 208; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 209; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 210define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 211 %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23> 212 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23> 213 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 214 ret void 215} 216 217; Make sure redundant and removed 218; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: 219; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 220; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 221; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]] 222; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 223; SI: buffer_store_dword [[VMAX]] 224 225; EG: MAX_UINT 226define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) nounwind { 227 %a.ext = zext i16 %a to i32 228 %b.ext = zext i16 %b to i32 229 %cmp = icmp ugt i32 %a.ext, %b.ext 230 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 231 %mask = and i32 %val, 65535 232 store i32 %mask, i32 addrspace(1)* %out 233 ret void 234} 235 236; Make sure redundant sign_extend_inreg removed. 237 238; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16: 239; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 240; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 241; SI-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]] 242; SI-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]] 243 244; SI: s_max_i32 [[MAX:s[0-9]+]], [[EXT_A]], [[EXT_B]] 245; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 246; SI: buffer_store_dword [[VMAX]] 247 248; EG: MAX_INT 249define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) nounwind { 250 %a.ext = sext i16 %a to i32 251 %b.ext = sext i16 %b to i32 252 %cmp = icmp sgt i32 %a.ext, %b.ext 253 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 254 %shl = shl i32 %val, 16 255 %sextinreg = ashr i32 %shl, 16 256 store i32 %sextinreg, i32 addrspace(1)* %out 257 ret void 258} 259 260; FUNC-LABEL: {{^}}s_test_imax_sge_i16: 261; SI: s_load_dword 262; SI: s_load_dword 263; SI: s_sext_i32_i16 264; SI: s_sext_i32_i16 265; SI: s_max_i32 266 267; EG: MAX_INT 268define amdgpu_kernel void @s_test_imax_sge_i16(i16 addrspace(1)* %out, [8 x i32], i16 %a, [8 x i32], i16 %b) nounwind { 269 %cmp = icmp sge i16 %a, %b 270 %val = select i1 %cmp, i16 %a, i16 %b 271 store i16 %val, i16 addrspace(1)* %out 272 ret void 273} 274 275; 64 bit 276; FUNC-LABEL: {{^}}test_umax_ugt_i64 277; SI: s_endpgm 278 279; EG: MAX_UINT 280; EG: MAX_UINT 281define amdgpu_kernel void @test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 282 %tmp = icmp ugt i64 %a, %b 283 %val = select i1 %tmp, i64 %a, i64 %b 284 store i64 %val, i64 addrspace(1)* %out, align 8 285 ret void 286} 287 288; FUNC-LABEL: {{^}}test_umax_uge_i64 289; SI: s_endpgm 290 291; EG: MAX_UINT 292; EG: MAX_UINT 293define amdgpu_kernel void @test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 294 %tmp = icmp uge i64 %a, %b 295 %val = select i1 %tmp, i64 %a, i64 %b 296 store i64 %val, i64 addrspace(1)* %out, align 8 297 ret void 298} 299 300; FUNC-LABEL: {{^}}test_imax_sgt_i64 301; SI: s_endpgm 302 303; EG-DAG: MAX_UINT 304; EG-DAG: MAX_INT 305define amdgpu_kernel void @test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 306 %tmp = icmp sgt i64 %a, %b 307 %val = select i1 %tmp, i64 %a, i64 %b 308 store i64 %val, i64 addrspace(1)* %out, align 8 309 ret void 310} 311 312; FUNC-LABEL: {{^}}test_imax_sge_i64 313; SI: s_endpgm 314 315; EG-DAG: MAX_UINT 316; EG-DAG: MAX_INT 317define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 318 %tmp = icmp sge i64 %a, %b 319 %val = select i1 %tmp, i64 %a, i64 %b 320 store i64 %val, i64 addrspace(1)* %out, align 8 321 ret void 322} 323