1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s 3; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: 6; GCN-NOT: v_cmp 7; GCN: v_cmp_ne_u32_e32 vcc, 8; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 9; GCN-NEXT:buffer_store_byte [[RESULT]] 10; GCN-NEXT: s_endpgm 11 12; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 13; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 14define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 15 %icmp0 = icmp eq i32 %a, %b 16 %ext = sext i1 %icmp0 to i32 17 %icmp1 = icmp eq i32 %ext, 0 18 store i1 %icmp1, i1 addrspace(1)* %out 19 ret void 20} 21 22; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: 23; GCN-NOT: v_cmp 24; GCN: v_cmp_ne_u32_e32 vcc, 25; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 26; GCN-NEXT: buffer_store_byte [[RESULT]] 27; GCN-NEXT: s_endpgm 28 29; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 30; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 31define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 32 %icmp0 = icmp ne i32 %a, %b 33 %ext = sext i1 %icmp0 to i32 34 %icmp1 = icmp ne i32 %ext, 0 35 store i1 %icmp1, i1 addrspace(1)* %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: 40; GCN-NOT: v_cmp 41; GCN: v_cmp_eq_u32_e32 vcc, 42; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 43; GCN-NEXT: buffer_store_byte [[RESULT]] 44; GCN-NEXT: s_endpgm 45define void @sext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 46 %icmp0 = icmp eq i32 %a, %b 47 %ext = sext i1 %icmp0 to i32 48 %icmp1 = icmp eq i32 %ext, -1 49 store i1 %icmp1, i1 addrspace(1)* %out 50 ret void 51} 52 53; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: 54; GCN-NOT: v_cmp 55; GCN: v_cmp_eq_u32_e32 vcc, 56; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 57; GCN-NEXT: buffer_store_byte [[RESULT]] 58; GCN-NEXT: s_endpgm 59define void @sext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 60 %icmp0 = icmp ne i32 %a, %b 61 %ext = sext i1 %icmp0 to i32 62 %icmp1 = icmp ne i32 %ext, -1 63 store i1 %icmp1, i1 addrspace(1)* %out 64 ret void 65} 66 67; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: 68; GCN-NOT: v_cmp 69; GCN: v_cmp_ne_u32_e32 vcc, 70; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 71; GCN-NEXT: buffer_store_byte [[RESULT]] 72; GCN-NEXT: s_endpgm 73define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 74 %icmp0 = icmp eq i32 %a, %b 75 %ext = zext i1 %icmp0 to i32 76 %icmp1 = icmp eq i32 %ext, 0 77 store i1 %icmp1, i1 addrspace(1)* %out 78 ret void 79} 80 81; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: 82; GCN-NOT: v_cmp 83; GCN: v_cmp_ne_u32_e32 vcc, 84; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 85; GCN-NEXT: buffer_store_byte [[RESULT]] 86; GCN-NEXT: s_endpgm 87define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 88 %icmp0 = icmp ne i32 %a, %b 89 %ext = zext i1 %icmp0 to i32 90 %icmp1 = icmp ne i32 %ext, 0 91 store i1 %icmp1, i1 addrspace(1)* %out 92 ret void 93} 94 95; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: 96; GCN-NOT: v_cmp 97; GCN: v_cmp_eq_u32_e32 vcc, 98; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 99; GCN-NEXT: buffer_store_byte [[RESULT]] 100; GCN-NEXT: s_endpgm 101define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 102 %icmp0 = icmp eq i32 %a, %b 103 %ext = zext i1 %icmp0 to i32 104 %icmp1 = icmp eq i32 %ext, 1 105 store i1 %icmp1, i1 addrspace(1)* %out 106 ret void 107} 108 109; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: 110; GCN-NOT: v_cmp 111; GCN: v_cmp_eq_u32_e32 vcc, 112; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 113; GCN-NEXT: buffer_store_byte [[RESULT]] 114define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 115 %icmp0 = icmp ne i32 %a, %b 116 %ext = zext i1 %icmp0 to i32 117 %icmp1 = icmp ne i32 %ext, 1 118 store i1 %icmp1, i1 addrspace(1)* %out 119 ret void 120} 121 122; Reduces to false: 123; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1: 124; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}} 125; GCN: buffer_store_byte [[TMP]] 126; GCN-NEXT: s_endpgm 127define void @zext_bool_icmp_eq_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 128 %icmp0 = icmp eq i32 %a, %b 129 %ext = zext i1 %icmp0 to i32 130 %icmp1 = icmp eq i32 %ext, -1 131 store i1 %icmp1, i1 addrspace(1)* %out 132 ret void 133} 134 135; Reduces to true: 136; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1: 137; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}} 138; GCN: buffer_store_byte [[TMP]] 139; GCN-NEXT: s_endpgm 140define void @zext_bool_icmp_ne_neg1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 141 %icmp0 = icmp ne i32 %a, %b 142 %ext = zext i1 %icmp0 to i32 143 %icmp1 = icmp ne i32 %ext, -1 144 store i1 %icmp1, i1 addrspace(1)* %out 145 ret void 146} 147 148; FUNC-LABEL: {{^}}cmp_zext_k_i8max: 149; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 150; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 151; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff 152; GCN-DAG: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]] 153; SI-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]] 154; SI: v_cmp_ne_u32_e32 vcc, [[B]], [[VK255]] 155 156; VI-DAG: v_and_b32_e32 [[B:v[0-9]+]], [[VALUE]], [[VK255]] 157; VI: v_cmp_ne_u16_e32 vcc, [[K255]], [[B]] 158 159; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 160; GCN: buffer_store_byte [[RESULT]] 161; GCN: s_endpgm 162define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind { 163 %b.ext = zext i8 %b to i32 164 %icmp0 = icmp ne i32 %b.ext, 255 165 store i1 %icmp0, i1 addrspace(1)* %out 166 ret void 167} 168 169; FUNC-LABEL: {{^}}cmp_sext_k_neg1: 170; GCN: buffer_load_sbyte [[B:v[0-9]+]] 171; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}} 172; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 173; GCN: buffer_store_byte [[RESULT]] 174; GCN: s_endpgm 175define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind { 176 %b = load i8, i8 addrspace(1)* %b.ptr 177 %b.ext = sext i8 %b to i32 178 %icmp0 = icmp ne i32 %b.ext, -1 179 store i1 %icmp0, i1 addrspace(1)* %out 180 ret void 181} 182 183; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg: 184; GCN: s_load_dword [[B:s[0-9]+]] 185; GCN: v_cmp_ne_u32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} 186; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] 187; GCN-NEXT: buffer_store_byte [[RESULT]] 188; GCN: s_endpgm 189define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind { 190 %b.ext = sext i8 %b to i32 191 %icmp0 = icmp ne i32 %b.ext, -1 192 store i1 %icmp0, i1 addrspace(1)* %out 193 ret void 194} 195 196; FIXME: This ends up doing a buffer_load_ubyte, and and compare to 197; 255. Seems to be because of ordering problems when not allowing load widths to be reduced. 198; Should do a buffer_load_sbyte and compare with -1 199 200; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: 201; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 202; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 203; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff 204; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]] 205; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]] 206; GCN: v_cmp_ne_u32_e32 vcc, [[B]], [[VK]]{{$}} 207; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 208; GCN: buffer_store_byte [[RESULT]] 209; GCN: s_endpgm 210define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind { 211 %b.ext = sext i8 %b to i32 212 %icmp0 = icmp ne i32 %b.ext, -1 213 store i1 %icmp0, i1 addrspace(1)* %out 214 ret void 215} 216 217; FUNC-LABEL: {{^}}cmp_zext_k_neg1: 218; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 219; GCN: buffer_store_byte [[RESULT]] 220; GCN: s_endpgm 221define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind { 222 %b.ext = zext i8 %b to i32 223 %icmp0 = icmp ne i32 %b.ext, -1 224 store i1 %icmp0, i1 addrspace(1)* %out 225 ret void 226} 227 228; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: 229; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 230; GCN: buffer_store_byte [[RESULT]] 231; GCN-NEXT: s_endpgm 232define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 233 %icmp0 = icmp ne i32 %a, %b 234 %ext = zext i1 %icmp0 to i32 235 %icmp1 = icmp ne i32 %ext, 2 236 store i1 %icmp1, i1 addrspace(1)* %out 237 ret void 238} 239 240; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: 241; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 242; GCN: buffer_store_byte [[RESULT]] 243; GCN-NEXT: s_endpgm 244define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 245 %icmp0 = icmp ne i32 %a, %b 246 %ext = zext i1 %icmp0 to i32 247 %icmp1 = icmp eq i32 %ext, 2 248 store i1 %icmp1, i1 addrspace(1)* %out 249 ret void 250} 251 252; FIXME: These cases should really be able fold to true/false in 253; DAGCombiner 254 255; This really folds away to false 256; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: 257; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}} 258; GCN: buffer_store_byte [[K]] 259define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 260 %icmp0 = icmp eq i32 %a, %b 261 %ext = sext i1 %icmp0 to i32 262 %icmp1 = icmp eq i32 %ext, 1 263 store i1 %icmp1, i1 addrspace(1)* %out 264 ret void 265} 266 267; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: 268; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 269; GCN: buffer_store_byte [[K]] 270define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 271 %icmp0 = icmp ne i32 %a, %b 272 %ext = sext i1 %icmp0 to i32 273 %icmp1 = icmp ne i32 %ext, 1 274 store i1 %icmp1, i1 addrspace(1)* %out 275 ret void 276} 277 278; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: 279; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 280; GCN: buffer_store_byte [[K]] 281define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind { 282 %icmp0 = icmp ne i32 %a, %b 283 %ext = sext i1 %icmp0 to i32 284 %icmp1 = icmp ne i32 %ext, 2 285 store i1 %icmp1, i1 addrspace(1)* %out 286 ret void 287} 288