1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: 6; GCN-NOT: v_cmp 7; GCN: s_cmp_lg_u32 8; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 9; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 10; GCN-NEXT:buffer_store_byte [[RESULT]] 11; GCN-NEXT: s_endpgm 12 13; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 14; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 15define amdgpu_kernel void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 16 %icmp0 = icmp eq i32 %a, %b 17 %ext = sext i1 %icmp0 to i32 18 %icmp1 = icmp eq i32 %ext, 0 19 store i1 %icmp1, i1 addrspace(1)* %out 20 ret void 21} 22 23; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: 24; GCN-NOT: v_cmp 25; GCN: s_cmp_lg_u32 26; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 27; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 28; GCN-NEXT: buffer_store_byte [[RESULT]] 29; GCN-NEXT: s_endpgm 30 31; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 32; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 33define amdgpu_kernel void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 34 %icmp0 = icmp ne i32 %a, %b 35 %ext = sext i1 %icmp0 to i32 36 %icmp1 = icmp ne i32 %ext, 0 37 store i1 %icmp1, i1 addrspace(1)* %out 38 ret void 39} 40 41; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: 42; GCN-NOT: v_cmp 43; GCN: s_cmp_eq_u32 44; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 45; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 46; GCN-NEXT: buffer_store_byte [[RESULT]] 47; GCN-NEXT: s_endpgm 48define amdgpu_kernel void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 49 %icmp0 = icmp eq i32 %a, %b 50 %ext = sext i1 %icmp0 to i32 51 %icmp1 = icmp eq i32 %ext, -1 52 store i1 %icmp1, i1 addrspace(1)* %out 53 ret void 54} 55 56; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: 57; GCN-NOT: v_cmp 58; GCN: s_cmp_eq_u32 59; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 60; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 61; GCN-NEXT: buffer_store_byte [[RESULT]] 62; GCN-NEXT: s_endpgm 63define amdgpu_kernel void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 64 %icmp0 = icmp ne i32 %a, %b 65 %ext = sext i1 %icmp0 to i32 66 %icmp1 = icmp ne i32 %ext, -1 67 store i1 %icmp1, i1 addrspace(1)* %out 68 ret void 69} 70 71; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: 72; GCN-NOT: v_cmp 73; GCN: s_cmp_lg_u32 74; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 75; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 76; GCN-NEXT: buffer_store_byte [[RESULT]] 77; GCN-NEXT: s_endpgm 78define amdgpu_kernel void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 79 %icmp0 = icmp eq i32 %a, %b 80 %ext = zext i1 %icmp0 to i32 81 %icmp1 = icmp eq i32 %ext, 0 82 store i1 %icmp1, i1 addrspace(1)* %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: 87; GCN-NOT: v_cmp 88; GCN: s_cmp_lg_u32 89; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 90; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 91; GCN-NEXT: buffer_store_byte [[RESULT]] 92; GCN-NEXT: s_endpgm 93define amdgpu_kernel void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 94 %icmp0 = icmp ne i32 %a, %b 95 %ext = zext i1 %icmp0 to i32 96 %icmp1 = icmp ne i32 %ext, 0 97 store i1 %icmp1, i1 addrspace(1)* %out 98 ret void 99} 100 101; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: 102; GCN-NOT: v_cmp 103; GCN: s_cmp_eq_u32 104; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 105; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 106; GCN-NEXT: buffer_store_byte [[RESULT]] 107; GCN-NEXT: s_endpgm 108define amdgpu_kernel void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 109 %icmp0 = icmp eq i32 %a, %b 110 %ext = zext i1 %icmp0 to i32 111 %icmp1 = icmp eq i32 %ext, 1 112 store i1 %icmp1, i1 addrspace(1)* %out 113 ret void 114} 115 116; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: 117; GCN-NOT: v_cmp 118; GCN: s_cmp_eq_u32 119; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 120; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 121; GCN-NEXT: buffer_store_byte [[RESULT]] 122define amdgpu_kernel void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 123 %icmp0 = icmp ne i32 %a, %b 124 %ext = zext i1 %icmp0 to i32 125 %icmp1 = icmp ne i32 %ext, 1 126 store i1 %icmp1, i1 addrspace(1)* %out 127 ret void 128} 129 130; Reduces to false: 131; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1: 132; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}} 133; GCN: buffer_store_byte [[TMP]] 134; GCN-NEXT: s_endpgm 135define amdgpu_kernel void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 136 %icmp0 = icmp eq i32 %a, %b 137 %ext = zext i1 %icmp0 to i32 138 %icmp1 = icmp eq i32 %ext, -1 139 store i1 %icmp1, i1 addrspace(1)* %out 140 ret void 141} 142 143; Reduces to true: 144; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1: 145; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}} 146; GCN: buffer_store_byte [[TMP]] 147; GCN-NEXT: s_endpgm 148define amdgpu_kernel void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 149 %icmp0 = icmp ne i32 %a, %b 150 %ext = zext i1 %icmp0 to i32 151 %icmp1 = icmp ne i32 %ext, -1 152 store i1 %icmp1, i1 addrspace(1)* %out 153 ret void 154} 155 156; FUNC-LABEL: {{^}}cmp_zext_k_i8max: 157; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 158; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 159; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], 0xff 160; SI: s_cmpk_lg_i32 [[B]], 0xff 161; SI: s_cselect_b64 [[CC:[^,]+]], -1, 0 162 163; VI: v_mov_b32_e32 [[VK255:v[0-9]+]], 0xff 164; VI: s_movk_i32 [[K255:s[0-9]+]], 0xff 165; VI: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]] 166; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]] 167 168; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 169; VI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 170; GCN: buffer_store_byte [[RESULT]] 171; GCN: s_endpgm 172define amdgpu_kernel void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { 173 %b.ext = zext i8 %b to i32 174 %icmp0 = icmp ne i32 %b.ext, 255 175 store i1 %icmp0, i1 addrspace(1)* %out 176 ret void 177} 178 179; FUNC-LABEL: {{^}}cmp_sext_k_neg1: 180; GCN: buffer_load_sbyte [[B:v[0-9]+]] 181; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}} 182; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 183; GCN: buffer_store_byte [[RESULT]] 184; GCN: s_endpgm 185define amdgpu_kernel void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind { 186 %b = load i8, i8 addrspace(1)* %b.ptr 187 %b.ext = sext i8 %b to i32 188 %icmp0 = icmp ne i32 %b.ext, -1 189 store i1 %icmp0, i1 addrspace(1)* %out 190 ret void 191} 192 193; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg: 194; GCN: v_cmp_ne_u32_e32 vcc, -1, v0 195; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc 196; GCN: buffer_store_byte [[SELECT]] 197define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind { 198 %b.ext = sext i8 %b to i32 199 %icmp0 = icmp ne i32 %b.ext, -1 200 store i1 %icmp0, i1 addrspace(1)* undef 201 ret void 202} 203 204; FIXME: This ends up doing a buffer_load_ubyte, and and compare to 205; 255. Seems to be because of ordering problems when not allowing load widths to be reduced. 206; Should do a buffer_load_sbyte and compare with -1 207 208; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: 209; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 210; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 211; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], 0xff 212; GCN: s_cmpk_lg_i32 [[B]], 0xff{{$}} 213; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 214; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 215; GCN: buffer_store_byte [[RESULT]] 216; GCN: s_endpgm 217define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { 218 %b.ext = sext i8 %b to i32 219 %icmp0 = icmp ne i32 %b.ext, -1 220 store i1 %icmp0, i1 addrspace(1)* %out 221 ret void 222} 223 224; FUNC-LABEL: {{^}}cmp_zext_k_neg1: 225; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 226; GCN: buffer_store_byte [[RESULT]] 227; GCN: s_endpgm 228define amdgpu_kernel void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { 229 %b.ext = zext i8 %b to i32 230 %icmp0 = icmp ne i32 %b.ext, -1 231 store i1 %icmp0, i1 addrspace(1)* %out 232 ret void 233} 234 235; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: 236; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 237; GCN: buffer_store_byte [[RESULT]] 238; GCN-NEXT: s_endpgm 239define amdgpu_kernel void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 240 %icmp0 = icmp ne i32 %a, %b 241 %ext = zext i1 %icmp0 to i32 242 %icmp1 = icmp ne i32 %ext, 2 243 store i1 %icmp1, i1 addrspace(1)* %out 244 ret void 245} 246 247; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: 248; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 249; GCN: buffer_store_byte [[RESULT]] 250; GCN-NEXT: s_endpgm 251define amdgpu_kernel void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 252 %icmp0 = icmp ne i32 %a, %b 253 %ext = zext i1 %icmp0 to i32 254 %icmp1 = icmp eq i32 %ext, 2 255 store i1 %icmp1, i1 addrspace(1)* %out 256 ret void 257} 258 259; FIXME: These cases should really be able fold to true/false in 260; DAGCombiner 261 262; This really folds away to false 263; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: 264; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}} 265; GCN: buffer_store_byte [[K]] 266define amdgpu_kernel void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 267 %icmp0 = icmp eq i32 %a, %b 268 %ext = sext i1 %icmp0 to i32 269 %icmp1 = icmp eq i32 %ext, 1 270 store i1 %icmp1, i1 addrspace(1)* %out 271 ret void 272} 273 274; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: 275; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 276; GCN: buffer_store_byte [[K]] 277define amdgpu_kernel void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 278 %icmp0 = icmp ne i32 %a, %b 279 %ext = sext i1 %icmp0 to i32 280 %icmp1 = icmp ne i32 %ext, 1 281 store i1 %icmp1, i1 addrspace(1)* %out 282 ret void 283} 284 285; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: 286; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 287; GCN: buffer_store_byte [[K]] 288define amdgpu_kernel void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 289 %icmp0 = icmp ne i32 %a, %b 290 %ext = sext i1 %icmp0 to i32 291 %icmp1 = icmp ne i32 %ext, 2 292 store i1 %icmp1, i1 addrspace(1)* %out 293 ret void 294} 295