1; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 3declare i32 @llvm.r600.read.tidig.x() nounwind readnone 4 5; FUNC-LABEL: {{^}}v_test_imax_sge_i32: 6; SI: v_max_i32_e32 7define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 8 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 9 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 10 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 11 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 12 %a = load i32, i32 addrspace(1)* %gep0, align 4 13 %b = load i32, i32 addrspace(1)* %gep1, align 4 14 %cmp = icmp sge i32 %a, %b 15 %val = select i1 %cmp, i32 %a, i32 %b 16 store i32 %val, i32 addrspace(1)* %outgep, align 4 17 ret void 18} 19 20; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32: 21; SI: v_max_i32_e32 22; SI: v_max_i32_e32 23; SI: v_max_i32_e32 24; SI: v_max_i32_e32 25define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { 26 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 27 %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid 28 %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid 29 %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid 30 %a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4 31 %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4 32 %cmp = icmp sge <4 x i32> %a, %b 33 %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b 34 store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4 35 ret void 36} 37 38; FUNC-LABEL: @s_test_imax_sge_i32 39; SI: s_max_i32 40define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 41 %cmp = icmp sge i32 %a, %b 42 %val = select i1 %cmp, i32 %a, i32 %b 43 store i32 %val, i32 addrspace(1)* %out, align 4 44 ret void 45} 46 47; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32: 48; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 49define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 50 %cmp = icmp sge i32 %a, 9 51 %val = select i1 %cmp, i32 %a, i32 9 52 store i32 %val, i32 addrspace(1)* %out, align 4 53 ret void 54} 55 56; FUNC-LABEL: {{^}}v_test_imax_sge_i8: 57; SI: buffer_load_sbyte 58; SI: buffer_load_sbyte 59; SI: v_max_i32_e32 60define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 61 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 62 %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid 63 %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid 64 %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 65 %a = load i8, i8 addrspace(1)* %gep0, align 1 66 %b = load i8, i8 addrspace(1)* %gep1, align 1 67 %cmp = icmp sge i8 %a, %b 68 %val = select i1 %cmp, i8 %a, i8 %b 69 store i8 %val, i8 addrspace(1)* %outgep, align 1 70 ret void 71} 72 73; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: 74; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 75define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { 76 %cmp = icmp sgt i32 %a, 9 77 %val = select i1 %cmp, i32 %a, i32 9 78 store i32 %val, i32 addrspace(1)* %out, align 4 79 ret void 80} 81 82; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32: 83; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 84; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 85define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 86 %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9> 87 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9> 88 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 89 ret void 90} 91; FUNC-LABEL: @v_test_imax_sgt_i32 92; SI: v_max_i32_e32 93define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 94 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 95 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 96 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 97 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 98 %a = load i32, i32 addrspace(1)* %gep0, align 4 99 %b = load i32, i32 addrspace(1)* %gep1, align 4 100 %cmp = icmp sgt i32 %a, %b 101 %val = select i1 %cmp, i32 %a, i32 %b 102 store i32 %val, i32 addrspace(1)* %outgep, align 4 103 ret void 104} 105 106; FUNC-LABEL: @s_test_imax_sgt_i32 107; SI: s_max_i32 108define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 109 %cmp = icmp sgt i32 %a, %b 110 %val = select i1 %cmp, i32 %a, i32 %b 111 store i32 %val, i32 addrspace(1)* %out, align 4 112 ret void 113} 114 115; FUNC-LABEL: @v_test_umax_uge_i32 116; SI: v_max_u32_e32 117define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 118 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 119 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 120 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 121 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 122 %a = load i32, i32 addrspace(1)* %gep0, align 4 123 %b = load i32, i32 addrspace(1)* %gep1, align 4 124 %cmp = icmp uge i32 %a, %b 125 %val = select i1 %cmp, i32 %a, i32 %b 126 store i32 %val, i32 addrspace(1)* %outgep, align 4 127 ret void 128} 129 130; FUNC-LABEL: @s_test_umax_uge_i32 131; SI: s_max_u32 132define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 133 %cmp = icmp uge i32 %a, %b 134 %val = select i1 %cmp, i32 %a, i32 %b 135 store i32 %val, i32 addrspace(1)* %out, align 4 136 ret void 137} 138 139; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32: 140; SI: s_max_u32 141; SI: s_max_u32 142; SI: s_max_u32 143; SI-NOT: s_max_u32 144; SI: s_endpgm 145define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind { 146 %cmp = icmp uge <3 x i32> %a, %b 147 %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b 148 store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4 149 ret void 150} 151 152; FUNC-LABEL: {{^}}v_test_umax_uge_i8: 153; SI: buffer_load_ubyte 154; SI: buffer_load_ubyte 155; SI: v_max_u32_e32 156define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { 157 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 158 %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid 159 %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid 160 %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid 161 %a = load i8, i8 addrspace(1)* %gep0, align 1 162 %b = load i8, i8 addrspace(1)* %gep1, align 1 163 %cmp = icmp uge i8 %a, %b 164 %val = select i1 %cmp, i8 %a, i8 %b 165 store i8 %val, i8 addrspace(1)* %outgep, align 1 166 ret void 167} 168 169; FUNC-LABEL: @v_test_umax_ugt_i32 170; SI: v_max_u32_e32 171define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { 172 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 173 %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid 174 %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid 175 %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid 176 %a = load i32, i32 addrspace(1)* %gep0, align 4 177 %b = load i32, i32 addrspace(1)* %gep1, align 4 178 %cmp = icmp ugt i32 %a, %b 179 %val = select i1 %cmp, i32 %a, i32 %b 180 store i32 %val, i32 addrspace(1)* %outgep, align 4 181 ret void 182} 183 184; FUNC-LABEL: {{^}}s_test_umax_ugt_i32: 185; SI: s_max_u32 186define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 187 %cmp = icmp ugt i32 %a, %b 188 %val = select i1 %cmp, i32 %a, i32 %b 189 store i32 %val, i32 addrspace(1)* %out, align 4 190 ret void 191} 192 193; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32: 194; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15 195; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23 196define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind { 197 %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23> 198 %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23> 199 store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4 200 ret void 201} 202 203; Make sure redundant and removed 204; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16: 205; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 206; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 207; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]] 208; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 209; SI-NEXT: buffer_store_dword [[VMAX]] 210define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind { 211 %a.ext = zext i16 %a to i32 212 %b.ext = zext i16 %b to i32 213 %cmp = icmp ugt i32 %a.ext, %b.ext 214 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 215 %mask = and i32 %val, 65535 216 store i32 %mask, i32 addrspace(1)* %out 217 ret void 218} 219 220; Make sure redundant sign_extend_inreg removed. 221 222; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16: 223; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb 224; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc 225; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]] 226; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]] 227; SI-NEXT: buffer_store_dword [[VMAX]] 228define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind { 229 %a.ext = sext i16 %a to i32 230 %b.ext = sext i16 %b to i32 231 %cmp = icmp sgt i32 %a.ext, %b.ext 232 %val = select i1 %cmp, i32 %a.ext, i32 %b.ext 233 %shl = shl i32 %val, 16 234 %sextinreg = ashr i32 %shl, 16 235 store i32 %sextinreg, i32 addrspace(1)* %out 236 ret void 237} 238 239; FUNC-LABEL: {{^}}s_test_imax_sge_i16: 240; SI: s_load_dword 241; SI: s_load_dword 242; SI: s_sext_i32_i16 243; SI: s_sext_i32_i16 244; SI: s_max_i32 245define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { 246 %cmp = icmp sge i16 %a, %b 247 %val = select i1 %cmp, i16 %a, i16 %b 248 store i16 %val, i16 addrspace(1)* %out 249 ret void 250} 251