1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s 3 4; Kernels are not called, so there is no call preserved mask. 5; GCN-LABEL: {{^}}kernel: 6; GCN: flat_store_dword 7define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) #0 { 8entry: 9 store i32 0, i32 addrspace(1)* %out 10 ret void 11} 12 13; GCN-LABEL: {{^}}func: 14; GCN: ; NumVgprs: 8 15define hidden void @func() #1 { 16 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 17 ret void 18} 19 20; GCN-LABEL: {{^}}kernel_call: 21; GCN-NOT: buffer_store 22; GCN-NOT: buffer_load 23; GCN-NOT: readlane 24; GCN-NOT: writelane 25; GCN: flat_load_dword v8 26; GCN: s_swappc_b64 27; GCN-NOT: buffer_store 28; GCN-NOT: buffer_load 29; GCN-NOT: readlane 30; GCN-NOT: writelane 31; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 32 33; GCN: ; NumSgprs: 37 34; GCN: ; NumVgprs: 9 35define amdgpu_kernel void @kernel_call() #0 { 36 %vgpr = load volatile i32, i32 addrspace(1)* undef 37 tail call void @func() 38 store volatile i32 %vgpr, i32 addrspace(1)* undef 39 ret void 40} 41 42; GCN-LABEL: {{^}}func_regular_call: 43; GCN-NOT: buffer_store 44; GCN-NOT: buffer_load 45; GCN-NOT: readlane 46; GCN-NOT: writelane 47; GCN: flat_load_dword v8 48; GCN: s_swappc_b64 49; GCN-NOT: buffer_store 50; GCN-NOT: buffer_load 51; GCN-NOT: readlane 52; GCN-NOT: writelane 53; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 54 55; GCN: ; NumSgprs: 32 56; GCN: ; NumVgprs: 9 57define void @func_regular_call() #1 { 58 %vgpr = load volatile i32, i32 addrspace(1)* undef 59 tail call void @func() 60 store volatile i32 %vgpr, i32 addrspace(1)* undef 61 ret void 62} 63 64; GCN-LABEL: {{^}}func_tail_call: 65; GCN: s_waitcnt 66; GCN-NEXT: s_getpc_b64 s[4:5] 67; GCN-NEXT: s_add_u32 s4, 68; GCN-NEXT: s_addc_u32 s5, 69; GCN-NEXT: s_setpc_b64 s[4:5] 70 71; GCN: ; NumSgprs: 32 72; GCN: ; NumVgprs: 8 73define void @func_tail_call() #1 { 74 tail call void @func() 75 ret void 76} 77 78; GCN-LABEL: {{^}}func_call_tail_call: 79; GCN: flat_load_dword v8 80; GCN: s_swappc_b64 81; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 82; GCN: s_setpc_b64 83 84; GCN: ; NumSgprs: 32 85; GCN: ; NumVgprs: 9 86define void @func_call_tail_call() #1 { 87 %vgpr = load volatile i32, i32 addrspace(1)* undef 88 tail call void @func() 89 store volatile i32 %vgpr, i32 addrspace(1)* undef 90 tail call void @func() 91 ret void 92} 93 94; GCN-LABEL: {{^}}void_func_void: 95define void @void_func_void() noinline { 96 ret void 97} 98 99; Make sure we don't get save/restore of FP between calls. 100; GCN-LABEL: {{^}}test_funcx2: 101; GCN-NOT: s5 102; GCN-NOT: s32 103define void @test_funcx2() #0 { 104 call void @void_func_void() 105 call void @void_func_void() 106 ret void 107} 108 109; GCN-LABEL: {{^}}wombat: 110define weak amdgpu_kernel void @wombat(i32* %arg, i32* %arg2) { 111bb: 112 call void @hoge() #0 113 ret void 114} 115 116; Make sure we save/restore the return address around the call. 117; Function Attrs: norecurse 118define internal void @hoge() #2 { 119bb: 120; GCN-LABEL: {{^}}hoge: 121; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, 122; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 123; GCN: s_swappc_b64 s[30:31] 124; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]], 125; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]], 126; GCN: s_waitcnt vmcnt(0) 127; GCN: s_setpc_b64 s[4:5] 128 call void @eggs() 129 ret void 130} 131 132declare dso_local void @eggs() 133 134 135attributes #0 = { nounwind } 136attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 137attributes #2 = { norecurse } 138