1*7c724a89SStanislav Mekhanoshin; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s 2*7c724a89SStanislav Mekhanoshin; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s 3*7c724a89SStanislav Mekhanoshin; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s 4*7c724a89SStanislav Mekhanoshin; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s 5*7c724a89SStanislav Mekhanoshin; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -amdgpu-inline-max-bb=1 < %s | FileCheck -check-prefixes=GCN,GCN-MAXBB1 %s 65670e6d4SStanislav Mekhanoshin 75670e6d4SStanislav Mekhanoshindefine coldcc float @foo(float %x, float %y) { 85670e6d4SStanislav Mekhanoshinentry: 95670e6d4SStanislav Mekhanoshin %cmp = fcmp ogt float %x, 0.000000e+00 105670e6d4SStanislav Mekhanoshin %div = fdiv float %y, %x 115670e6d4SStanislav Mekhanoshin %mul = fmul float %x, %y 125670e6d4SStanislav Mekhanoshin %cond = select i1 %cmp, float %div, float %mul 135670e6d4SStanislav Mekhanoshin ret float %cond 145670e6d4SStanislav Mekhanoshin} 155670e6d4SStanislav Mekhanoshin 162a22c5deSYaxun Liudefine coldcc void @foo_private_ptr(float addrspace(5)* nocapture %p) { 175670e6d4SStanislav Mekhanoshinentry: 182a22c5deSYaxun Liu %tmp1 = load float, float addrspace(5)* %p, align 4 195670e6d4SStanislav Mekhanoshin %cmp = fcmp ogt float %tmp1, 1.000000e+00 205670e6d4SStanislav Mekhanoshin br i1 %cmp, label %if.then, label %if.end 215670e6d4SStanislav Mekhanoshin 225670e6d4SStanislav Mekhanoshinif.then: ; preds = %entry 235670e6d4SStanislav Mekhanoshin %div = fdiv float 1.000000e+00, %tmp1 242a22c5deSYaxun Liu store float %div, float addrspace(5)* %p, align 4 255670e6d4SStanislav Mekhanoshin br label %if.end 265670e6d4SStanislav Mekhanoshin 275670e6d4SStanislav Mekhanoshinif.end: ; preds = %if.then, %entry 285670e6d4SStanislav Mekhanoshin ret void 295670e6d4SStanislav Mekhanoshin} 305670e6d4SStanislav Mekhanoshin 312a22c5deSYaxun Liudefine coldcc void @foo_private_ptr2(float addrspace(5)* nocapture %p1, float addrspace(5)* nocapture %p2) { 325670e6d4SStanislav Mekhanoshinentry: 332a22c5deSYaxun Liu %tmp1 = load float, float addrspace(5)* %p1, align 4 346fd11b14Sdfukalov %cmp = fcmp ogt float %tmp1, 1.000000e+00 356fd11b14Sdfukalov br i1 %cmp, label %if.then, label %if.end 366fd11b14Sdfukalov 376fd11b14Sdfukalovif.then: 385670e6d4SStanislav Mekhanoshin %div = fdiv float 2.000000e+00, %tmp1 392a22c5deSYaxun Liu store float %div, float addrspace(5)* %p2, align 4 406fd11b14Sdfukalov br label %if.end 416fd11b14Sdfukalov 426fd11b14Sdfukalovif.end: 435670e6d4SStanislav Mekhanoshin ret void 445670e6d4SStanislav Mekhanoshin} 455670e6d4SStanislav Mekhanoshin 463d397091SMatt Arsenaultdefine float @sin_wrapper(float %x) { 475670e6d4SStanislav Mekhanoshinbb: 485670e6d4SStanislav Mekhanoshin %call = tail call float @_Z3sinf(float %x) 495670e6d4SStanislav Mekhanoshin ret float %call 505670e6d4SStanislav Mekhanoshin} 515670e6d4SStanislav Mekhanoshin 522a22c5deSYaxun Liudefine void @foo_noinline(float addrspace(5)* nocapture %p) #0 { 535670e6d4SStanislav Mekhanoshinentry: 542a22c5deSYaxun Liu %tmp1 = load float, float addrspace(5)* %p, align 4 555670e6d4SStanislav Mekhanoshin %mul = fmul float %tmp1, 2.000000e+00 562a22c5deSYaxun Liu store float %mul, float addrspace(5)* %p, align 4 575670e6d4SStanislav Mekhanoshin ret void 585670e6d4SStanislav Mekhanoshin} 595670e6d4SStanislav Mekhanoshin 605670e6d4SStanislav Mekhanoshin; GCN: define amdgpu_kernel void @test_inliner( 615670e6d4SStanislav Mekhanoshin; GCN-INL1: %c1 = tail call coldcc float @foo( 625670e6d4SStanislav Mekhanoshin; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 63*7c724a89SStanislav Mekhanoshin; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c 64*7c724a89SStanislav Mekhanoshin; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 65*7c724a89SStanislav Mekhanoshin; GCN-MAXBB1: call coldcc void @foo_private_ptr 66*7c724a89SStanislav Mekhanoshin; GCN-MAXBB1: call coldcc void @foo_private_ptr2 675670e6d4SStanislav Mekhanoshin; GCN: call void @foo_noinline( 685670e6d4SStanislav Mekhanoshin; GCN: tail call float @_Z3sinf( 695670e6d4SStanislav Mekhanoshindefine amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) { 705670e6d4SStanislav Mekhanoshinentry: 712a22c5deSYaxun Liu %pvt_arr = alloca [64 x float], align 4, addrspace(5) 725670e6d4SStanislav Mekhanoshin %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 735670e6d4SStanislav Mekhanoshin %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid 745670e6d4SStanislav Mekhanoshin %tmp2 = load float, float addrspace(1)* %arrayidx, align 4 755670e6d4SStanislav Mekhanoshin %add = add i32 %tid, 1 765670e6d4SStanislav Mekhanoshin %arrayidx2 = getelementptr inbounds float, float addrspace(1)* %a, i32 %add 775670e6d4SStanislav Mekhanoshin %tmp5 = load float, float addrspace(1)* %arrayidx2, align 4 785670e6d4SStanislav Mekhanoshin %c1 = tail call coldcc float @foo(float %tmp2, float %tmp5) 795670e6d4SStanislav Mekhanoshin %or = or i32 %tid, %n 802a22c5deSYaxun Liu %arrayidx5 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or 812a22c5deSYaxun Liu store float %c1, float addrspace(5)* %arrayidx5, align 4 822a22c5deSYaxun Liu %arrayidx7 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %or 832a22c5deSYaxun Liu call coldcc void @foo_private_ptr(float addrspace(5)* %arrayidx7) 842a22c5deSYaxun Liu %arrayidx8 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 1 852a22c5deSYaxun Liu %arrayidx9 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 2 862a22c5deSYaxun Liu call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9) 872a22c5deSYaxun Liu call void @foo_noinline(float addrspace(5)* %arrayidx7) 885670e6d4SStanislav Mekhanoshin %and = and i32 %tid, %n 892a22c5deSYaxun Liu %arrayidx11 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %and 902a22c5deSYaxun Liu %tmp12 = load float, float addrspace(5)* %arrayidx11, align 4 913d397091SMatt Arsenault %c2 = call float @sin_wrapper(float %tmp12) 922a22c5deSYaxun Liu store float %c2, float addrspace(5)* %arrayidx7, align 4 935670e6d4SStanislav Mekhanoshin %xor = xor i32 %tid, %n 942a22c5deSYaxun Liu %arrayidx16 = getelementptr inbounds [64 x float], [64 x float] addrspace(5)* %pvt_arr, i32 0, i32 %xor 952a22c5deSYaxun Liu %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4 965670e6d4SStanislav Mekhanoshin store float %tmp16, float addrspace(1)* %arrayidx, align 4 975670e6d4SStanislav Mekhanoshin ret void 985670e6d4SStanislav Mekhanoshin} 995670e6d4SStanislav Mekhanoshin 1005670e6d4SStanislav Mekhanoshin; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr( 101*7c724a89SStanislav Mekhanoshin; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 102*7c724a89SStanislav Mekhanoshin; GCN-MAXBB1: call coldcc void @foo_private_ptr2 1035670e6d4SStanislav Mekhanoshindefine amdgpu_kernel void @test_inliner_multi_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n, float %v) { 1045670e6d4SStanislav Mekhanoshinentry: 1052a22c5deSYaxun Liu %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) 1062a22c5deSYaxun Liu %pvt_arr2 = alloca [32 x float], align 4, addrspace(5) 1075670e6d4SStanislav Mekhanoshin %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 1085670e6d4SStanislav Mekhanoshin %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid 1095670e6d4SStanislav Mekhanoshin %or = or i32 %tid, %n 1102a22c5deSYaxun Liu %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or 1112a22c5deSYaxun Liu %arrayidx5 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or 1122a22c5deSYaxun Liu store float %v, float addrspace(5)* %arrayidx4, align 4 1132a22c5deSYaxun Liu store float %v, float addrspace(5)* %arrayidx5, align 4 1142a22c5deSYaxun Liu %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1 1152a22c5deSYaxun Liu %arrayidx9 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2 1162a22c5deSYaxun Liu call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9) 1175670e6d4SStanislav Mekhanoshin %xor = xor i32 %tid, %n 1182a22c5deSYaxun Liu %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor 1192a22c5deSYaxun Liu %arrayidx16 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor 1202a22c5deSYaxun Liu %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4 1212a22c5deSYaxun Liu %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4 1225670e6d4SStanislav Mekhanoshin %tmp17 = fadd float %tmp15, %tmp16 1235670e6d4SStanislav Mekhanoshin store float %tmp17, float addrspace(1)* %arrayidx, align 4 1245670e6d4SStanislav Mekhanoshin ret void 1255670e6d4SStanislav Mekhanoshin} 1265670e6d4SStanislav Mekhanoshin 1275670e6d4SStanislav Mekhanoshin; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff( 1285670e6d4SStanislav Mekhanoshin; GCN-INL1: call coldcc void @foo_private_ptr2 1295670e6d4SStanislav Mekhanoshin; GCN-INLDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 1305670e6d4SStanislav Mekhanoshindefine amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff(float addrspace(1)* nocapture %a, i32 %n, float %v) { 1315670e6d4SStanislav Mekhanoshinentry: 1322a22c5deSYaxun Liu %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) 1332a22c5deSYaxun Liu %pvt_arr2 = alloca [33 x float], align 4, addrspace(5) 1345670e6d4SStanislav Mekhanoshin %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 1355670e6d4SStanislav Mekhanoshin %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i32 %tid 1365670e6d4SStanislav Mekhanoshin %or = or i32 %tid, %n 1372a22c5deSYaxun Liu %arrayidx4 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %or 1382a22c5deSYaxun Liu %arrayidx5 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %or 1392a22c5deSYaxun Liu store float %v, float addrspace(5)* %arrayidx4, align 4 1402a22c5deSYaxun Liu store float %v, float addrspace(5)* %arrayidx5, align 4 1412a22c5deSYaxun Liu %arrayidx8 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 1 1422a22c5deSYaxun Liu %arrayidx9 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 2 1432a22c5deSYaxun Liu call coldcc void @foo_private_ptr2(float addrspace(5)* %arrayidx8, float addrspace(5)* %arrayidx9) 1445670e6d4SStanislav Mekhanoshin %xor = xor i32 %tid, %n 1452a22c5deSYaxun Liu %arrayidx15 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %pvt_arr1, i32 0, i32 %xor 1462a22c5deSYaxun Liu %arrayidx16 = getelementptr inbounds [33 x float], [33 x float] addrspace(5)* %pvt_arr2, i32 0, i32 %xor 1472a22c5deSYaxun Liu %tmp15 = load float, float addrspace(5)* %arrayidx15, align 4 1482a22c5deSYaxun Liu %tmp16 = load float, float addrspace(5)* %arrayidx16, align 4 1495670e6d4SStanislav Mekhanoshin %tmp17 = fadd float %tmp15, %tmp16 1505670e6d4SStanislav Mekhanoshin store float %tmp17, float addrspace(1)* %arrayidx, align 4 1515670e6d4SStanislav Mekhanoshin ret void 1525670e6d4SStanislav Mekhanoshin} 1535670e6d4SStanislav Mekhanoshin 154*7c724a89SStanislav Mekhanoshin; GCN: define amdgpu_kernel void @test_inliner_maxbb_singlebb( 155*7c724a89SStanislav Mekhanoshin; GCN: tail call float @_Z3sinf 156*7c724a89SStanislav Mekhanoshindefine amdgpu_kernel void @test_inliner_maxbb_singlebb(float addrspace(1)* nocapture %a, i32 %n) { 157*7c724a89SStanislav Mekhanoshinentry: 158*7c724a89SStanislav Mekhanoshin %cmp = icmp eq i32 %n, 1 159*7c724a89SStanislav Mekhanoshin br i1 %cmp, label %bb.1, label %bb.2 160*7c724a89SStanislav Mekhanoshin br label %bb.1 161*7c724a89SStanislav Mekhanoshin 162*7c724a89SStanislav Mekhanoshinbb.1: 163*7c724a89SStanislav Mekhanoshin store float 1.0, float* undef 164*7c724a89SStanislav Mekhanoshin br label %bb.2 165*7c724a89SStanislav Mekhanoshin 166*7c724a89SStanislav Mekhanoshinbb.2: 167*7c724a89SStanislav Mekhanoshin %c = call float @sin_wrapper(float 1.0) 168*7c724a89SStanislav Mekhanoshin store float %c, float addrspace(1)* %a 169*7c724a89SStanislav Mekhanoshin ret void 170*7c724a89SStanislav Mekhanoshin} 171*7c724a89SStanislav Mekhanoshin 1725670e6d4SStanislav Mekhanoshindeclare i32 @llvm.amdgcn.workitem.id.x() #1 1735670e6d4SStanislav Mekhanoshindeclare float @_Z3sinf(float) #1 1745670e6d4SStanislav Mekhanoshin 1755670e6d4SStanislav Mekhanoshinattributes #0 = { noinline } 1765670e6d4SStanislav Mekhanoshinattributes #1 = { nounwind readnone } 177