1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 3; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: 4; CHECK-NEXT: ; BB#0: 5; CHECK-NEXT: s_endpgm 6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 7 call void @llvm.AMDGPU.kill(float 0.0) 8 ret void 9} 10 11; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg: 12; CHECK-NEXT: ; BB#0: 13; CHECK-NEXT: s_mov_b64 exec, 0 14; CHECK-NEXT: ; BB#1: 15; CHECK-NEXT: s_endpgm 16define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 17 call void @llvm.AMDGPU.kill(float -0.0) 18 ret void 19} 20 21; FIXME: Ideally only one would be emitted 22; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: 23; CHECK-NEXT: ; BB#0: 24; CHECK-NEXT: s_mov_b64 exec, 0 25; CHECK-NEXT: ; BB#1: 26; CHECK-NEXT: s_mov_b64 exec, 0 27; CHECK-NEXT: ; BB#2: 28; CHECK-NEXT: s_endpgm 29define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 30 call void @llvm.AMDGPU.kill(float -0.0) 31 call void @llvm.AMDGPU.kill(float -1.0) 32 ret void 33} 34 35; CHECK-LABEL: {{^}}test_kill_depth_var: 36; CHECK-NEXT: ; BB#0: 37; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 38; CHECK-NEXT: ; BB#1: 39; CHECK-NEXT: s_endpgm 40define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 41 call void @llvm.AMDGPU.kill(float %x) 42 ret void 43} 44 45; FIXME: Ideally only one would be emitted 46; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: 47; CHECK-NEXT: ; BB#0: 48; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 49; CHECK-NEXT: ; BB#1: 50; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 51; CHECK-NEXT: ; BB#2: 52; CHECK-NEXT: s_endpgm 53define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 54 call void @llvm.AMDGPU.kill(float %x) 55 call void @llvm.AMDGPU.kill(float %x) 56 ret void 57} 58 59; CHECK-LABEL: {{^}}test_kill_depth_var_x2: 60; CHECK-NEXT: ; BB#0: 61; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 62; CHECK-NEXT: ; BB#1: 63; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 64; CHECK-NEXT: ; BB#2: 65; CHECK-NEXT: s_endpgm 66define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 67 call void @llvm.AMDGPU.kill(float %x) 68 call void @llvm.AMDGPU.kill(float %y) 69 ret void 70} 71 72; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: 73; CHECK-NEXT: ; BB#0: 74; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 75; CHECK-NEXT: ; BB#1: 76; CHECK: v_mov_b32_e64 v7, -1 77; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 78; CHECK-NEXT: ; BB#2: 79; CHECK-NEXT: s_endpgm 80define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 81 call void @llvm.AMDGPU.kill(float %x) 82 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() 83 call void @llvm.AMDGPU.kill(float %y) 84 ret void 85} 86 87; FIXME: why does the skip depend on the asm length in the same block? 88 89; CHECK-LABEL: {{^}}test_kill_control_flow: 90; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 91; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 92 93; CHECK-NEXT: ; BB#1: 94; CHECK: v_mov_b32_e64 v7, -1 95; CHECK: v_nop_e64 96; CHECK: v_nop_e64 97; CHECK: v_nop_e64 98; CHECK: v_nop_e64 99; CHECK: v_nop_e64 100; CHECK: v_nop_e64 101; CHECK: v_nop_e64 102; CHECK: v_nop_e64 103; CHECK: v_nop_e64 104; CHECK: v_nop_e64 105 106; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 107; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 108; CHECK-NEXT: ; BB#2: 109; CHECK-NEXT: exp null off, off, off, off done vm 110; CHECK-NEXT: s_endpgm 111 112; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 113; CHECK-NEXT: s_endpgm 114define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 { 115entry: 116 %cmp = icmp eq i32 %arg, 0 117 br i1 %cmp, label %bb, label %exit 118 119bb: 120 %var = call float asm sideeffect " 121 v_mov_b32_e64 v7, -1 122 v_nop_e64 123 v_nop_e64 124 v_nop_e64 125 v_nop_e64 126 v_nop_e64 127 v_nop_e64 128 v_nop_e64 129 v_nop_e64 130 v_nop_e64 131 v_nop_e64", "={VGPR7}"() 132 call void @llvm.AMDGPU.kill(float %var) 133 br label %exit 134 135exit: 136 ret void 137} 138 139; CHECK-LABEL: {{^}}test_kill_control_flow_remainder: 140; CHECK: s_cmp_lg_u32 s{{[0-9]+}}, 0 141; CHECK-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0 142; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 143 144; CHECK-NEXT: ; BB#1: ; %bb 145; CHECK: v_mov_b32_e64 v7, -1 146; CHECK: v_nop_e64 147; CHECK: v_nop_e64 148; CHECK: v_nop_e64 149; CHECK: v_nop_e64 150; CHECK: v_nop_e64 151; CHECK: v_nop_e64 152; CHECK: v_nop_e64 153; CHECK: v_nop_e64 154; CHECK: ;;#ASMEND 155; CHECK: v_mov_b32_e64 v8, -1 156; CHECK: ;;#ASMEND 157; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 158; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 159 160; CHECK-NEXT: ; BB#2: 161; CHECK-NEXT: exp null off, off, off, off done vm 162; CHECK-NEXT: s_endpgm 163 164; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 165; CHECK: buffer_store_dword v8 166; CHECK: v_mov_b32_e64 v9, -2 167 168; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}: 169; CHECK: buffer_store_dword v9 170; CHECK-NEXT: s_endpgm 171define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 172entry: 173 %cmp = icmp eq i32 %arg, 0 174 br i1 %cmp, label %bb, label %exit 175 176bb: 177 %var = call float asm sideeffect " 178 v_mov_b32_e64 v7, -1 179 v_nop_e64 180 v_nop_e64 181 v_nop_e64 182 v_nop_e64 183 v_nop_e64 184 v_nop_e64 185 v_nop_e64 186 v_nop_e64 187 v_nop_e64 188 v_nop_e64 189 v_nop_e64", "={VGPR7}"() 190 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() 191 call void @llvm.AMDGPU.kill(float %var) 192 store volatile float %live.across, float addrspace(1)* undef 193 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() 194 br label %exit 195 196exit: 197 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 198 store float %phi, float addrspace(1)* undef 199 ret void 200} 201 202; CHECK-LABEL: {{^}}test_kill_divergent_loop: 203; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0 204; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc 205; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] 206; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]] 207; CHECK-NEXT: s_cbranch_execz [[EXIT]] 208 209; CHECK: {{BB[0-9]+_[0-9]+}}: ; %bb.preheader 210; CHECK: s_mov_b32 211 212; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]: 213 214; CHECK: v_mov_b32_e64 v7, -1 215; CHECK: v_nop_e64 216; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 217 218; CHECK-NEXT: ; BB#3: 219; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] 220; CHECK: v_cmp_eq_u32_e32 vcc, 0, [[LOAD]] 221; CHECK-NEXT: s_and_b64 vcc, exec, vcc 222; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]] 223 224; CHECK-NEXT: {{^}}[[EXIT]]: 225; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]] 226; CHECK: buffer_store_dword 227; CHECK: s_endpgm 228define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 229entry: 230 %cmp = icmp eq i32 %arg, 0 231 br i1 %cmp, label %bb, label %exit 232 233bb: 234 %var = call float asm sideeffect " 235 v_mov_b32_e64 v7, -1 236 v_nop_e64 237 v_nop_e64 238 v_nop_e64 239 v_nop_e64 240 v_nop_e64 241 v_nop_e64 242 v_nop_e64 243 v_nop_e64 244 v_nop_e64 245 v_nop_e64", "={VGPR7}"() 246 call void @llvm.AMDGPU.kill(float %var) 247 %vgpr = load volatile i32, i32 addrspace(1)* undef 248 %loop.cond = icmp eq i32 %vgpr, 0 249 br i1 %loop.cond, label %bb, label %exit 250 251exit: 252 store volatile i32 8, i32 addrspace(1)* undef 253 ret void 254} 255 256; bug 28550 257; CHECK-LABEL: {{^}}phi_use_def_before_kill: 258; CHECK: v_cndmask_b32_e64 [[PHIREG:v[0-9]+]], 0, -1.0, 259; CHECK: v_cmpx_le_f32_e32 vcc, 0, 260; CHECK-NEXT: s_cbranch_execnz [[BB4:BB[0-9]+_[0-9]+]] 261 262; CHECK: exp 263; CHECK-NEXT: s_endpgm 264 265; CHECK: [[KILLBB:BB[0-9]+_[0-9]+]]: 266; CHECK-NEXT: s_cbranch_scc0 [[PHIBB:BB[0-9]+_[0-9]+]] 267 268; CHECK: [[PHIBB]]: 269; CHECK: v_cmp_eq_f32_e32 vcc, 0, [[PHIREG]] 270; CHECK-NEXT: s_cbranch_vccz [[ENDBB:BB[0-9]+_[0-9]+]] 271 272; CHECK: ; %bb10 273; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 9 274; CHECK: buffer_store_dword 275 276; CHECK: [[ENDBB]]: 277; CHECK-NEXT: s_endpgm 278define amdgpu_ps void @phi_use_def_before_kill() #0 { 279bb: 280 %tmp = fadd float undef, 1.000000e+00 281 %tmp1 = fcmp olt float 0.000000e+00, %tmp 282 %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00 283 call void @llvm.AMDGPU.kill(float %tmp2) 284 br i1 undef, label %phibb, label %bb8 285 286phibb: 287 %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ] 288 %tmp6 = fcmp oeq float %tmp5, 0.000000e+00 289 br i1 %tmp6, label %bb10, label %end 290 291bb8: 292 store volatile i32 8, i32 addrspace(1)* undef 293 br label %phibb 294 295bb10: 296 store volatile i32 9, i32 addrspace(1)* undef 297 br label %end 298 299end: 300 ret void 301} 302 303; CHECK-LABEL: {{^}}no_skip_no_successors: 304; CHECK: v_cmp_nge_f32 305; CHECK-NEXT: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] 306 307; CHECK: ; %bb6 308; CHECK: s_mov_b64 exec, 0 309 310; CHECK: [[SKIPKILL]]: 311; CHECK: v_cmp_nge_f32_e32 vcc 312; CHECK-NEXT: BB#3: ; %bb5 313; CHECK-NEXT: .Lfunc_end{{[0-9]+}} 314define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 { 315bb: 316 %tmp = fcmp ult float %arg1, 0.000000e+00 317 %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000 318 br i1 %tmp, label %bb6, label %bb3 319 320bb3: ; preds = %bb 321 br i1 %tmp2, label %bb5, label %bb4 322 323bb4: ; preds = %bb3 324 br i1 true, label %bb5, label %bb7 325 326bb5: ; preds = %bb4, %bb3 327 unreachable 328 329bb6: ; preds = %bb 330 call void @llvm.AMDGPU.kill(float -1.000000e+00) 331 unreachable 332 333bb7: ; preds = %bb4 334 ret void 335} 336 337; CHECK-LABEL: {{^}}if_after_kill_block: 338; CHECK: ; BB#0: 339; CHECK: s_and_saveexec_b64 340; CHECK: s_xor_b64 341; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]] 342 343; CHECK: v_cmpx_le_f32_e32 vcc, 0, 344; CHECK: [[BB4]]: 345; CHECK: s_or_b64 exec, exec 346; CHECK: image_sample_c 347 348; CHECK: v_cmp_neq_f32_e32 vcc, 0, 349; CHECK: s_and_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc 350; CHECK: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, exec 351; CHECK: mask branch [[END:BB[0-9]+_[0-9]+]] 352; CHECK-NOT: branch 353 354; CHECK: BB{{[0-9]+_[0-9]+}}: ; %bb8 355; CHECK: buffer_store_dword 356 357; CHECK: [[END]]: 358; CHECK: s_or_b64 exec, exec 359; CHECK: s_endpgm 360define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x i32> %arg2) #0 { 361bb: 362 %tmp = fcmp ult float %arg1, 0.000000e+00 363 br i1 %tmp, label %bb3, label %bb4 364 365bb3: ; preds = %bb 366 call void @llvm.AMDGPU.kill(float %arg) 367 br label %bb4 368 369bb4: ; preds = %bb3, %bb 370 %tmp5 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) 371 %tmp6 = extractelement <4 x float> %tmp5, i32 0 372 %tmp7 = fcmp une float %tmp6, 0.000000e+00 373 br i1 %tmp7, label %bb8, label %bb9 374 375bb8: ; preds = %bb9, %bb4 376 store volatile i32 9, i32 addrspace(1)* undef 377 ret void 378 379bb9: ; preds = %bb4 380 ret void 381} 382 383declare void @llvm.AMDGPU.kill(float) #0 384declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1 385declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind 386 387attributes #0 = { nounwind } 388attributes #1 = { nounwind readnone } 389