1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s 2 3; GCN-LABEL: {{^}}select_undef_lhs: 4; GCN: s_waitcnt 5; GCN-NOT: v_cmp 6; GCN-NOT: v_cndmask 7; GCN-NEXT: s_setpc_b64 8define float @select_undef_lhs(float %val, i1 %cond) { 9 %undef = call float @llvm.amdgcn.rcp.f32(float undef) 10 %sel = select i1 %cond, float %undef, float %val 11 ret float %sel 12} 13 14; GCN-LABEL: {{^}}select_undef_rhs: 15; GCN: s_waitcnt 16; GCN-NOT: v_cmp 17; GCN-NOT: v_cndmask 18; GCN-NEXT: s_setpc_b64 19define float @select_undef_rhs(float %val, i1 %cond) { 20 %undef = call float @llvm.amdgcn.rcp.f32(float undef) 21 %sel = select i1 %cond, float %val, float %undef 22 ret float %sel 23} 24 25; GCN-LABEL: {{^}}select_undef_n1: 26; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 1.0 27; GCN: store_dword {{[^,]+}}, [[RES]] 28define void @select_undef_n1(float addrspace(1)* %a, i32 %c) { 29 %cc = icmp eq i32 %c, 0 30 %sel = select i1 %cc, float 1.000000e+00, float undef 31 store float %sel, float addrspace(1)* %a 32 ret void 33} 34 35; GCN-LABEL: {{^}}select_undef_n2: 36; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 1.0 37; GCN: store_dword {{[^,]+}}, [[RES]] 38define void @select_undef_n2(float addrspace(1)* %a, i32 %c) { 39 %cc = icmp eq i32 %c, 0 40 %sel = select i1 %cc, float undef, float 1.000000e+00 41 store float %sel, float addrspace(1)* %a 42 ret void 43} 44 45declare float @llvm.amdgcn.rcp.f32(float) 46 47 48; Make sure the vector undef isn't lowered into 0s. 49; GCN-LABEL: {{^}}undef_v6f32: 50; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 51; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 52; GCN: s_cbranch_vccnz 53define amdgpu_kernel void @undef_v6f32(<6 x float> addrspace(3)* %ptr, i1 %cond) { 54entry: 55 br label %loop 56 57loop: 58 %phi = phi <6 x float> [ undef, %entry ], [ %add, %loop ] 59 %load = load volatile <6 x float>, <6 x float> addrspace(3)* undef 60 %add = fadd <6 x float> %load, %phi 61 br i1 %cond, label %loop, label %ret 62 63ret: 64 store volatile <6 x float> %add, <6 x float> addrspace(3)* undef 65 ret void 66} 67 68; GCN-LABEL: {{^}}undef_v6i32: 69; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 70; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 71; GCN: s_cbranch_vccnz 72define amdgpu_kernel void @undef_v6i32(<6 x i32> addrspace(3)* %ptr, i1 %cond) { 73entry: 74 br label %loop 75 76loop: 77 %phi = phi <6 x i32> [ undef, %entry ], [ %add, %loop ] 78 %load = load volatile <6 x i32>, <6 x i32> addrspace(3)* undef 79 %add = add <6 x i32> %load, %phi 80 br i1 %cond, label %loop, label %ret 81 82ret: 83 store volatile <6 x i32> %add, <6 x i32> addrspace(3)* undef 84 ret void 85} 86 87; Make sure the vector undef isn't lowered into 0s. 88; GCN-LABEL: {{^}}undef_v5f32: 89; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 90; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 91; GCN: s_cbranch_vccnz 92define amdgpu_kernel void @undef_v5f32(<5 x float> addrspace(3)* %ptr, i1 %cond) { 93entry: 94 br label %loop 95 96loop: 97 %phi = phi <5 x float> [ undef, %entry ], [ %add, %loop ] 98 %load = load volatile <5 x float>, <5 x float> addrspace(3)* undef 99 %add = fadd <5 x float> %load, %phi 100 br i1 %cond, label %loop, label %ret 101 102ret: 103 store volatile <5 x float> %add, <5 x float> addrspace(3)* undef 104 ret void 105} 106 107; GCN-LABEL: {{^}}undef_v5i32: 108; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 109; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 110; GCN: s_cbranch_vccnz 111define amdgpu_kernel void @undef_v5i32(<5 x i32> addrspace(3)* %ptr, i1 %cond) { 112entry: 113 br label %loop 114 115loop: 116 %phi = phi <5 x i32> [ undef, %entry ], [ %add, %loop ] 117 %load = load volatile <5 x i32>, <5 x i32> addrspace(3)* undef 118 %add = add <5 x i32> %load, %phi 119 br i1 %cond, label %loop, label %ret 120 121ret: 122 store volatile <5 x i32> %add, <5 x i32> addrspace(3)* undef 123 ret void 124} 125 126; Make sure the vector undef isn't lowered into 0s. 127; GCN-LABEL: {{^}}undef_v3f64: 128; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 129; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 130; GCN: s_cbranch_vccnz 131define amdgpu_kernel void @undef_v3f64(<3 x double> addrspace(3)* %ptr, i1 %cond) { 132entry: 133 br label %loop 134 135loop: 136 %phi = phi <3 x double> [ undef, %entry ], [ %add, %loop ] 137 %load = load volatile <3 x double>, <3 x double> addrspace(3)* %ptr 138 %add = fadd <3 x double> %load, %phi 139 br i1 %cond, label %loop, label %ret 140 141ret: 142 store volatile <3 x double> %add, <3 x double> addrspace(3)* %ptr 143 ret void 144} 145 146; GCN-LABEL: {{^}}undef_v3i64: 147; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 148; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 149; GCN: s_cbranch_vccnz 150define amdgpu_kernel void @undef_v3i64(<3 x i64> addrspace(3)* %ptr, i1 %cond) { 151entry: 152 br label %loop 153 154loop: 155 %phi = phi <3 x i64> [ undef, %entry ], [ %add, %loop ] 156 %load = load volatile <3 x i64>, <3 x i64> addrspace(3)* %ptr 157 %add = add <3 x i64> %load, %phi 158 br i1 %cond, label %loop, label %ret 159 160ret: 161 store volatile <3 x i64> %add, <3 x i64> addrspace(3)* %ptr 162 ret void 163} 164 165; Make sure the vector undef isn't lowered into 0s. 166; GCN-LABEL: {{^}}undef_v4f16: 167; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 168; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 169; GCN: s_cbranch_vccnz 170define amdgpu_kernel void @undef_v4f16(<4 x half> addrspace(3)* %ptr, i1 %cond) { 171entry: 172 br label %loop 173 174loop: 175 %phi = phi <4 x half> [ undef, %entry ], [ %add, %loop ] 176 %load = load volatile <4 x half>, <4 x half> addrspace(3)* %ptr 177 %add = fadd <4 x half> %load, %phi 178 br i1 %cond, label %loop, label %ret 179 180ret: 181 store volatile <4 x half> %add, <4 x half> addrspace(3)* %ptr 182 ret void 183} 184 185; GCN-LABEL: {{^}}undef_v4i16: 186; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 187; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 188; GCN: s_cbranch_vccnz 189define amdgpu_kernel void @undef_v4i16(<4 x i16> addrspace(3)* %ptr, i1 %cond) { 190entry: 191 br label %loop 192 193loop: 194 %phi = phi <4 x i16> [ undef, %entry ], [ %add, %loop ] 195 %load = load volatile <4 x i16>, <4 x i16> addrspace(3)* %ptr 196 %add = add <4 x i16> %load, %phi 197 br i1 %cond, label %loop, label %ret 198 199ret: 200 store volatile <4 x i16> %add, <4 x i16> addrspace(3)* %ptr 201 ret void 202} 203 204; Make sure the vector undef isn't lowered into 0s. 205; GCN-LABEL: {{^}}undef_v2f16: 206; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 207; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 208; GCN: s_cbranch_vccnz 209define amdgpu_kernel void @undef_v2f16(<2 x half> addrspace(3)* %ptr, i1 %cond) { 210entry: 211 br label %loop 212 213loop: 214 %phi = phi <2 x half> [ undef, %entry ], [ %add, %loop ] 215 %load = load volatile <2 x half>, <2 x half> addrspace(3)* %ptr 216 %add = fadd <2 x half> %load, %phi 217 br i1 %cond, label %loop, label %ret 218 219ret: 220 store volatile <2 x half> %add, <2 x half> addrspace(3)* %ptr 221 ret void 222} 223 224; GCN-LABEL: {{^}}undef_v2i16: 225; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0 226; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0 227; GCN: s_cbranch_vccnz 228define amdgpu_kernel void @undef_v2i16(<2 x i16> addrspace(3)* %ptr, i1 %cond) { 229entry: 230 br label %loop 231 232loop: 233 %phi = phi <2 x i16> [ undef, %entry ], [ %add, %loop ] 234 %load = load volatile <2 x i16>, <2 x i16> addrspace(3)* %ptr 235 %add = add <2 x i16> %load, %phi 236 br i1 %cond, label %loop, label %ret 237 238ret: 239 store volatile <2 x i16> %add, <2 x i16> addrspace(3)* %ptr 240 ret void 241} 242 243; We were expanding undef vectors into zero vectors. Optimizations 244; would then see we used no elements of the vector, and reform the 245; undef vector resulting in a combiner loop. 246; GCN-LABEL: {{^}}inf_loop_undef_vector: 247; GCN: s_waitcnt 248; GCN-NEXT: v_mad_u64_u32 249; GCN-NEXT: v_mul_lo_u32 250; GCN-NEXT: v_mul_lo_u32 251; GCN-NEXT: v_add3_u32 252; GCN-NEXT: global_store_dwordx2 253define void @inf_loop_undef_vector(<6 x float> %arg, float %arg1, i64 %arg2) { 254 %i = insertelement <6 x float> %arg, float %arg1, i64 2 255 %i3 = bitcast <6 x float> %i to <3 x i64> 256 %i4 = extractelement <3 x i64> %i3, i64 0 257 %i5 = extractelement <3 x i64> %i3, i64 1 258 %i6 = mul i64 %i5, %arg2 259 %i7 = add i64 %i6, %i4 260 store volatile i64 %i7, i64 addrspace(1)* undef, align 4 261 ret void 262} 263