1; RUN: llc -march=amdgcn -mcpu=pitcairn < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s 3 4 5; FUNC-LABEL: {{^}}v_test_imax_sge_i32: 6; SI: v_max_i32_e32 7 8; EG: MAX_INT 9define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 10 %tid = call i32 @llvm.amdgcn.workitem.id.x() 11 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 12 %a = load i32, i32 addrspace(1)* %aptr, align 4 13 %b = load i32, i32 addrspace(1)* %gep.in, align 4 14 %cmp = icmp sge i32 %a, %b 15 %val = select i1 %cmp, i32 %a, i32 %b 16 store i32 %val, i32 addrspace(1)* %out, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: 21; SI: v_max_i32_e32 22; SI: v_max_i32_e32 23; SI: v_max_i32_e32 24; SI: v_max_i32_e32 25 26; These could be merged into one 27; EG: MAX_INT 28; EG: MAX_INT 29; EG: MAX_INT 30; EG: MAX_INT 31define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { 32 %tid = call i32 @llvm.amdgcn.workitem.id.x() 33 %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid 34 %a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4 35 %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 36 %cmp = icmp sge <4 x i32> %a, %b 37 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 38 store <4 x i32> %val, <4 x i32> addrspace(1)* %out, align 4 39 ret void 40} 41 42; FUNC-LABEL: @s_test_imax_sge_i32 43; SI: s_max_i32 44 45; EG: MAX_INT 46define amdgpu_kernel void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 47 %cmp = icmp sge i32 %a, %b 48 %val = select i1 %cmp, i32 %a, i32 %b 49 store i32 %val, i32 addrspace(1)* %out, align 4 50 ret void 51} 52 53; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32: 54; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 55 56; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 57define amdgpu_kernel void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 58 %cmp = icmp sge i32 %a, 9 59 %val = select i1 %cmp, i32 %a, i32 9 60 store i32 %val, i32 addrspace(1)* %out, align 4 61 ret void 62} 63 64; FUNC-LABEL: {{^}}v_test_imax_sge_i8: 65; SI: buffer_load_sbyte 66; SI: buffer_load_sbyte 67; SI: v_max_i32_e32 68 69; EG: MAX_INT 70define amdgpu_kernel void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 71 %a = load i8, i8 addrspace(1)* %aptr, align 1 72 %b = load i8, i8 addrspace(1)* %bptr, align 1 73 %cmp = icmp sge i8 %a, %b 74 %val = select i1 %cmp, i8 %a, i8 %b 75 store i8 %val, i8 addrspace(1)* %out, align 1 76 ret void 77} 78 79; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: 80; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 81 82; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 83define amdgpu_kernel void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 84 %cmp = icmp sgt i32 %a, 9 85 %val = select i1 %cmp, i32 %a, i32 9 86 store i32 %val, i32 addrspace(1)* %out, align 4 87 ret void 88} 89 90; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: 91; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 92; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 93 94; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 95; EG: MAX_INT {{.*}}literal.{{[xyzw]}} 96define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 97 %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9> 98 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9> 99 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 100 ret void 101} 102 103; FUNC-LABEL: @v_test_imax_sgt_i32 104; SI: v_max_i32_e32 105 106; EG: MAX_INT 107define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 108 %tid = call i32 @llvm.amdgcn.workitem.id.x() 109 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 110 %a = load i32, i32 addrspace(1)* %aptr, align 4 111 %b = load i32, i32 addrspace(1)* %gep.in, align 4 112 %cmp = icmp sgt i32 %a, %b 113 %val = select i1 %cmp, i32 %a, i32 %b 114 store i32 %val, i32 addrspace(1)* %out, align 4 115 ret void 116} 117 118; FUNC-LABEL: @s_test_imax_sgt_i32 119; SI: s_max_i32 120 121; EG: MAX_INT 122define amdgpu_kernel void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 123 %cmp = icmp sgt i32 %a, %b 124 %val = select i1 %cmp, i32 %a, i32 %b 125 store i32 %val, i32 addrspace(1)* %out, align 4 126 ret void 127} 128 129; FUNC-LABEL: @v_test_umax_uge_i32 130; SI: v_max_u32_e32 131 132; EG: MAX_UINT 133define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 134 %tid = call i32 @llvm.amdgcn.workitem.id.x() 135 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 136 %a = load i32, i32 addrspace(1)* %aptr, align 4 137 %b = load i32, i32 addrspace(1)* %gep.in, align 4 138 %cmp = icmp uge i32 %a, %b 139 %val = select i1 %cmp, i32 %a, i32 %b 140 store i32 %val, i32 addrspace(1)* %out, align 4 141 ret void 142} 143 144; FUNC-LABEL: @s_test_umax_uge_i32 145; SI: s_max_u32 146 147; EG: MAX_UINT 148define amdgpu_kernel void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 149 %cmp = icmp uge i32 %a, %b 150 %val = select i1 %cmp, i32 %a, i32 %b 151 store i32 %val, i32 addrspace(1)* %out, align 4 152 ret void 153} 154 155; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: 156; SI: s_max_u32 157; SI: s_max_u32 158; SI: s_max_u32 159; SI-NOT: s_max_u32 160; SI: s_endpgm 161 162; EG: MAX_UINT 163; EG: MAX_UINT 164; EG: MAX_UINT 165; EG-NOT: MAX_UINT 166define amdgpu_kernel void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { 167 %cmp = icmp uge <3 x i32> %a, %b 168 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 169 store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 170 ret void 171} 172 173; FUNC-LABEL: {{^}}v_test_umax_uge_i8: 174; SI: buffer_load_ubyte 175; SI: buffer_load_ubyte 176; SI: v_max_u32_e32 177 178; EG: MAX_UINT 179define amdgpu_kernel void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 180 %a = load i8, i8 addrspace(1)* %aptr, align 1 181 %b = load i8, i8 addrspace(1)* %bptr, align 1 182 %cmp = icmp uge i8 %a, %b 183 %val = select i1 %cmp, i8 %a, i8 %b 184 store i8 %val, i8 addrspace(1)* %out, align 1 185 ret void 186} 187 188; FUNC-LABEL: @v_test_umax_ugt_i32 189; SI: v_max_u32_e32 190 191; EG: MAX_UINT 192define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 193 %tid = call i32 @llvm.amdgcn.workitem.id.x() 194 %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid 195 %a = load i32, i32 addrspace(1)* %gep.in, align 4 196 %b = load i32, i32 addrspace(1)* %bptr, align 4 197 %cmp = icmp ugt i32 %a, %b 198 %val = select i1 %cmp, i32 %a, i32 %b 199 store i32 %val, i32 addrspace(1)* %out, align 4 200 ret void 201} 202 203; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: 204; SI: s_max_u32 205 206; EG: MAX_UINT 207define amdgpu_kernel void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 208 %cmp = icmp ugt i32 %a, %b 209 %val = select i1 %cmp, i32 %a, i32 %b 210 store i32 %val, i32 addrspace(1)* %out, align 4 211 ret void 212} 213 214; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: 215; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 216; SI-DAG: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 217 218; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 219; EG: MAX_UINT {{.*}}literal.{{[xyzw]}} 220define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 221 %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23> 222 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23> 223 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 224 ret void 225} 226 227; Make sure redundant and removed 228; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: 229; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 230; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 231; SI-DAG: s_and_b32 [[A16:s[0-9]+]], [[A]], 0xffff 232; SI-DAG: s_and_b32 [[B16:s[0-9]+]], [[B]], 0xffff 233; SI: s_max_u32 [[MAX:s[0-9]+]], [[A16]], [[B16]] 234; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 235; SI: buffer_store_dword [[VMAX]] 236 237; EG: MAX_UINT 238define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, [8 x i32], i16 zeroext %a, [8 x i32], i16 zeroext %b) nounwind { 239 %a.ext = zext i16 %a to i32 240 %b.ext = zext i16 %b to i32 241 %cmp = icmp ugt i32 %a.ext, %b.ext 242 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 243 %mask = and i32 %val, 65535 244 store i32 %mask, i32 addrspace(1)* %out 245 ret void 246} 247 248; Make sure redundant sign_extend_inreg removed. 249 250; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16: 251; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13 252; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c 253; SI-DAG: s_sext_i32_i16 [[EXT_A:s[0-9]+]], [[A]] 254; SI-DAG: s_sext_i32_i16 [[EXT_B:s[0-9]+]], [[B]] 255 256; SI: s_max_i32 [[MAX:s[0-9]+]], [[EXT_A]], [[EXT_B]] 257; SI: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 258; SI: buffer_store_dword [[VMAX]] 259 260; EG: MAX_INT 261define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, [8 x i32], i16 signext %a, [8 x i32], i16 signext %b) nounwind { 262 %a.ext = sext i16 %a to i32 263 %b.ext = sext i16 %b to i32 264 %cmp = icmp sgt i32 %a.ext, %b.ext 265 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 266 %shl = shl i32 %val, 16 267 %sextinreg = ashr i32 %shl, 16 268 store i32 %sextinreg, i32 addrspace(1)* %out 269 ret void 270} 271 272; FUNC-LABEL: {{^}}s_test_imax_sge_i16: 273; SI: s_load_dword 274; SI: s_load_dword 275; SI: s_sext_i32_i16 276; SI: s_sext_i32_i16 277; SI: s_max_i32 278 279; EG: MAX_INT 280define amdgpu_kernel void @s_test_imax_sge_i16(i16 addrspace(1)* %out, [8 x i32], i16 %a, [8 x i32], i16 %b) nounwind { 281 %cmp = icmp sge i16 %a, %b 282 %val = select i1 %cmp, i16 %a, i16 %b 283 store i16 %val, i16 addrspace(1)* %out 284 ret void 285} 286 287; 64 bit 288; FUNC-LABEL: {{^}}test_umax_ugt_i64 289; SI: s_endpgm 290 291; EG: MAX_UINT 292; EG: MAX_UINT 293define amdgpu_kernel void @test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 294 %tmp = icmp ugt i64 %a, %b 295 %val = select i1 %tmp, i64 %a, i64 %b 296 store i64 %val, i64 addrspace(1)* %out, align 8 297 ret void 298} 299 300; FUNC-LABEL: {{^}}test_umax_uge_i64 301; SI: s_endpgm 302 303; EG: MAX_UINT 304; EG: MAX_UINT 305define amdgpu_kernel void @test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 306 %tmp = icmp uge i64 %a, %b 307 %val = select i1 %tmp, i64 %a, i64 %b 308 store i64 %val, i64 addrspace(1)* %out, align 8 309 ret void 310} 311 312; FUNC-LABEL: {{^}}test_imax_sgt_i64 313; SI: s_endpgm 314 315; EG-DAG: MAX_UINT 316; EG-DAG: MAX_INT 317define amdgpu_kernel void @test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 318 %tmp = icmp sgt i64 %a, %b 319 %val = select i1 %tmp, i64 %a, i64 %b 320 store i64 %val, i64 addrspace(1)* %out, align 8 321 ret void 322} 323 324; FUNC-LABEL: {{^}}test_imax_sge_i64 325; SI: s_endpgm 326 327; EG-DAG: MAX_UINT 328; EG-DAG: MAX_INT 329define amdgpu_kernel void @test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { 330 %tmp = icmp sge i64 %a, %b 331 %val = select i1 %tmp, i64 %a, i64 %b 332 store i64 %val, i64 addrspace(1)* %out, align 8 333 ret void 334} 335 336 337declare i32 @llvm.amdgcn.workitem.id.x() #0 338 339attributes #0 = { nounwind readnone } 340attributes #1 = { nounwind } 341