1; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s 3; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s 4 5declare amdgpu_gfx float @extern_func(float) #0 6declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0 7 8@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4 9 10define amdgpu_gfx float @no_stack(float %arg0) #0 { 11 %add = fadd float %arg0, 1.0 12 ret float %add 13} 14 15define amdgpu_gfx float @simple_stack(float %arg0) #0 { 16 %stack = alloca float, i32 4, align 4, addrspace(5) 17 store volatile float 2.0, float addrspace(5)* %stack 18 %val = load volatile float, float addrspace(5)* %stack 19 %add = fadd float %arg0, %val 20 ret float %add 21} 22 23define amdgpu_gfx float @multiple_stack(float %arg0) #0 { 24 %stack = alloca float, i32 4, align 4, addrspace(5) 25 store volatile float 2.0, float addrspace(5)* %stack 26 %val = load volatile float, float addrspace(5)* %stack 27 %add = fadd float %arg0, %val 28 %stack2 = alloca float, i32 4, align 4, addrspace(5) 29 store volatile float 2.0, float addrspace(5)* %stack2 30 %val2 = load volatile float, float addrspace(5)* %stack2 31 %add2 = fadd float %add, %val2 32 ret float %add2 33} 34 35define amdgpu_gfx float @dynamic_stack(float %arg0) #0 { 36bb0: 37 %cmp = fcmp ogt float %arg0, 0.0 38 br i1 %cmp, label %bb1, label %bb2 39 40bb1: 41 %stack = alloca float, i32 4, align 4, addrspace(5) 42 store volatile float 2.0, float addrspace(5)* %stack 43 %val = load volatile float, float addrspace(5)* %stack 44 %add = fadd float %arg0, %val 45 br label %bb2 46 47bb2: 48 %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ] 49 ret float %res 50} 51 52define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 { 53bb0: 54 br label %bb1 55 56bb1: 57 %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ] 58 %stack = alloca float, i32 4, align 4, addrspace(5) 59 store volatile float 2.0, float addrspace(5)* %stack 60 %val = load volatile float, float addrspace(5)* %stack 61 %add = fadd float %arg0, %val 62 %cmp = icmp sgt i32 %ctr, 0 63 %newctr = sub i32 %ctr, 1 64 br i1 %cmp, label %bb1, label %bb2 65 66bb2: 67 ret float %add 68} 69 70define amdgpu_gfx float @no_stack_call(float %arg0) #0 { 71 %res = call amdgpu_gfx float @simple_stack(float %arg0) 72 ret float %res 73} 74 75define amdgpu_gfx float @simple_stack_call(float %arg0) #0 { 76 %stack = alloca float, i32 4, align 4, addrspace(5) 77 store volatile float 2.0, float addrspace(5)* %stack 78 %val = load volatile float, float addrspace(5)* %stack 79 %res = call amdgpu_gfx float @simple_stack(float %arg0) 80 %add = fadd float %res, %val 81 ret float %add 82} 83 84define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 { 85 %res = call amdgpu_gfx float @extern_func(float %arg0) 86 ret float %res 87} 88 89define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 { 90 %stack = alloca float, i32 4, align 4, addrspace(5) 91 store volatile float 2.0, float addrspace(5)* %stack 92 %val = load volatile float, float addrspace(5)* %stack 93 %res = call amdgpu_gfx float @extern_func(float %arg0) 94 %add = fadd float %res, %val 95 ret float %add 96} 97 98define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 { 99 %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0) 100 ret float %res 101} 102 103define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 { 104 %fptr = load void()*, void()* addrspace(4)* @funcptr 105 call amdgpu_gfx void %fptr() 106 ret float %arg0 107} 108 109define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 { 110 %stack = alloca float, i32 4, align 4, addrspace(5) 111 store volatile float 2.0, float addrspace(5)* %stack 112 %val = load volatile float, float addrspace(5)* %stack 113 %fptr = load void()*, void()* addrspace(4)* @funcptr 114 call amdgpu_gfx void %fptr() 115 %add = fadd float %arg0, %val 116 ret float %add 117} 118 119define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 { 120 %stack = alloca float, i32 4, align 4, addrspace(5) 121 store volatile float 2.0, float addrspace(5)* %stack 122 %val = load volatile float, float addrspace(5)* %stack 123 %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0) 124 %add = fadd float %res, %val 125 ret float %add 126} 127 128@lds = internal addrspace(3) global [64 x float] undef 129 130define amdgpu_gfx float @simple_lds(float %arg0) #0 { 131 %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0 132 %val = load float, float addrspace(3)* %lds_ptr 133 ret float %val 134} 135 136define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 { 137 %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0 138 %val = load float, float addrspace(3)* %lds_ptr 139 %res = call amdgpu_gfx float @simple_lds_recurse(float %val) 140 ret float %res 141} 142 143attributes #0 = { nounwind } 144 145; GCN: amdpal.pipelines: 146; GCN-NEXT: - .registers: 147; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} 148; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} 149; GCN-NEXT: .shader_functions: 150; GCN-NEXT: dynamic_stack: 151; GCN-NEXT: .lds_size: 0{{$}} 152; GCN-NEXT: .sgpr_count: 0x28{{$}} 153; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 154; SDAG-NEXT: .vgpr_count: 0x2{{$}} 155; GISEL-NEXT: .vgpr_count: 0x3{{$}} 156; GCN-NEXT: dynamic_stack_loop: 157; GCN-NEXT: .lds_size: 0{{$}} 158; SDAG-NEXT: .sgpr_count: 0x25{{$}} 159; GISEL-NEXT: .sgpr_count: 0x26{{$}} 160; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 161; SDAG-NEXT: .vgpr_count: 0x3{{$}} 162; GISEL-NEXT: .vgpr_count: 0x4{{$}} 163; GCN-NEXT: multiple_stack: 164; GCN-NEXT: .lds_size: 0{{$}} 165; GCN-NEXT: .sgpr_count: 0x21{{$}} 166; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}} 167; GCN-NEXT: .vgpr_count: 0x3{{$}} 168; GCN-NEXT: no_stack: 169; GCN-NEXT: .lds_size: 0{{$}} 170; GCN-NEXT: .sgpr_count: 0x20{{$}} 171; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} 172; GCN-NEXT: .vgpr_count: 0x1{{$}} 173; GCN-NEXT: no_stack_call: 174; GCN-NEXT: .lds_size: 0{{$}} 175; GCN-NEXT: .sgpr_count: 0x24{{$}} 176; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 177; GCN-NEXT: .vgpr_count: 0x3{{$}} 178; GCN-NEXT: no_stack_extern_call: 179; GCN-NEXT: .lds_size: 0{{$}} 180; GFX8-NEXT: .sgpr_count: 0x28{{$}} 181; GFX9-NEXT: .sgpr_count: 0x2c{{$}} 182; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 183; GCN-NEXT: .vgpr_count: 0x2b{{$}} 184; GCN-NEXT: no_stack_extern_call_many_args: 185; GCN-NEXT: .lds_size: 0{{$}} 186; GFX8-NEXT: .sgpr_count: 0x28{{$}} 187; GFX9-NEXT: .sgpr_count: 0x2c{{$}} 188; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} 189; GCN-NEXT: .vgpr_count: 0x2b{{$}} 190; GCN-NEXT: no_stack_indirect_call: 191; GCN-NEXT: .lds_size: 0{{$}} 192; GFX8-NEXT: .sgpr_count: 0x28{{$}} 193; GFX9-NEXT: .sgpr_count: 0x2c{{$}} 194; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 195; GCN-NEXT: .vgpr_count: 0x2b{{$}} 196; GCN-NEXT: simple_lds: 197; GCN-NEXT: .lds_size: 0x100{{$}} 198; GCN-NEXT: .sgpr_count: 0x20{{$}} 199; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} 200; GCN-NEXT: .vgpr_count: 0x1{{$}} 201; GCN-NEXT: simple_lds_recurse: 202; GCN-NEXT: .lds_size: 0x100{{$}} 203; GCN-NEXT: .sgpr_count: 0x26{{$}} 204; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} 205; GCN-NEXT: .vgpr_count: 0x29{{$}} 206; GCN-NEXT: simple_stack: 207; GCN-NEXT: .lds_size: 0{{$}} 208; GCN-NEXT: .sgpr_count: 0x21{{$}} 209; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}} 210; GCN-NEXT: .vgpr_count: 0x2{{$}} 211; GCN-NEXT: simple_stack_call: 212; GCN-NEXT: .lds_size: 0{{$}} 213; GCN-NEXT: .sgpr_count: 0x24{{$}} 214; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} 215; GCN-NEXT: .vgpr_count: 0x4{{$}} 216; GCN-NEXT: simple_stack_extern_call: 217; GCN-NEXT: .lds_size: 0{{$}} 218; GFX8-NEXT: .sgpr_count: 0x28{{$}} 219; GFX9-NEXT: .sgpr_count: 0x2c{{$}} 220; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} 221; GCN-NEXT: .vgpr_count: 0x2b{{$}} 222; GCN-NEXT: simple_stack_indirect_call: 223; GCN-NEXT: .lds_size: 0{{$}} 224; GFX8-NEXT: .sgpr_count: 0x28{{$}} 225; GFX9-NEXT: .sgpr_count: 0x2c{{$}} 226; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} 227; GCN-NEXT: .vgpr_count: 0x2b{{$}} 228; GCN-NEXT: simple_stack_recurse: 229; GCN-NEXT: .lds_size: 0{{$}} 230; GCN-NEXT: .sgpr_count: 0x26{{$}} 231; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} 232; GCN-NEXT: .vgpr_count: 0x2a{{$}} 233; GCN-NEXT: ... 234