1; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s 2 3; Although it's modeled without any control flow in order to get better code 4; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls 5; it with "true". In case it's called in a provably infinite loop, we still 6; need to successfully exit and export something, even if we can't know where 7; to jump to in the LLVM IR. Therefore we insert a null export ourselves in 8; this case right before the s_endpgm to avoid GPU hangs, which is what this 9; tests. 10 11; CHECK-LABEL: return_void 12; Make sure that we remove the done bit from the original export 13; CHECK: exp mrt0 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} vm 14; CHECK: exp null off, off, off, off done vm 15; CHECK-NEXT: s_endpgm 16define amdgpu_ps void @return_void(float %0) #0 { 17main_body: 18 %cmp = fcmp olt float %0, 1.000000e+01 19 br i1 %cmp, label %end, label %loop 20 21loop: 22 call void @llvm.amdgcn.kill(i1 false) #3 23 br label %loop 24 25end: 26 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3 27 ret void 28} 29 30; Check that we also remove the done bit from compressed exports correctly. 31; CHECK-LABEL: return_void_compr 32; CHECK: exp mrt0 v{{[0-9]+}}, off, v{{[0-9]+}}, off compr vm 33; CHECK: exp null off, off, off, off done vm 34; CHECK-NEXT: s_endpgm 35define amdgpu_ps void @return_void_compr(float %0) #0 { 36main_body: 37 %cmp = fcmp olt float %0, 1.000000e+01 38 br i1 %cmp, label %end, label %loop 39 40loop: 41 call void @llvm.amdgcn.kill(i1 false) #3 42 br label %loop 43 44end: 45 call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3 46 ret void 47} 48 49; test the case where there's only a kill in an infinite loop 50; CHECK-LABEL: only_kill 51; CHECK: exp null off, off, off, off done vm 52; CHECK-NEXT: s_endpgm 53; SILateBranchLowering inserts an extra null export here, but it should be harmless. 54; CHECK: exp null off, off, off, off done vm 55; CHECK-NEXT: s_endpgm 56define amdgpu_ps void @only_kill() #0 { 57main_body: 58 br label %loop 59 60loop: 61 call void @llvm.amdgcn.kill(i1 false) #3 62 br label %loop 63} 64 65; Check that the epilog is the final block 66; CHECK-LABEL: return_nonvoid 67; CHECK: exp null off, off, off, off done vm 68; CHECK-NEXT: s_endpgm 69; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}: 70define amdgpu_ps float @return_nonvoid(float %0) #0 { 71main_body: 72 %cmp = fcmp olt float %0, 1.000000e+01 73 br i1 %cmp, label %end, label %loop 74 75loop: 76 call void @llvm.amdgcn.kill(i1 false) #3 77 br label %loop 78 79end: 80 ret float 0. 81} 82 83declare void @llvm.amdgcn.kill(i1) #0 84declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 85declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0 86 87attributes #0 = { nounwind } 88