1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s 4 5; Make sure to run a GPU with the SGPR allocation bug. 6 7; GCN-LABEL: {{^}}use_vcc: 8; GCN: ; NumSgprs: 34 9; GCN: ; NumVgprs: 0 10define void @use_vcc() #1 { 11 call void asm sideeffect "", "~{vcc}" () #0 12 ret void 13} 14 15; GCN-LABEL: {{^}}indirect_use_vcc: 16; GCN: v_writelane_b32 v40, s33, 2 17; GCN: v_writelane_b32 v40, s30, 0 18; GCN: v_writelane_b32 v40, s31, 1 19; GCN: s_swappc_b64 20; GCN: v_readlane_b32 s31, v40, 1 21; GCN: v_readlane_b32 s30, v40, 0 22; GCN: v_readlane_b32 s33, v40, 2 23; GCN: s_setpc_b64 s[30:31] 24; GCN: ; NumSgprs: 36 25; GCN: ; NumVgprs: 41 26define void @indirect_use_vcc() #1 { 27 call void @use_vcc() 28 ret void 29} 30 31; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: 32; GCN: is_dynamic_callstack = 0 33; CI: ; NumSgprs: 38 34; VI-NOBUG: ; NumSgprs: 40 35; VI-BUG: ; NumSgprs: 96 36; GCN: ; NumVgprs: 41 37define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 { 38 call void @indirect_use_vcc() 39 ret void 40} 41 42; GCN-LABEL: {{^}}use_flat_scratch: 43; CI: ; NumSgprs: 36 44; VI: ; NumSgprs: 38 45; GCN: ; NumVgprs: 0 46define void @use_flat_scratch() #1 { 47 call void asm sideeffect "", "~{flat_scratch}" () #0 48 ret void 49} 50 51; GCN-LABEL: {{^}}indirect_use_flat_scratch: 52; CI: ; NumSgprs: 38 53; VI: ; NumSgprs: 40 54; GCN: ; NumVgprs: 41 55define void @indirect_use_flat_scratch() #1 { 56 call void @use_flat_scratch() 57 ret void 58} 59 60; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: 61; GCN: is_dynamic_callstack = 0 62; CI: ; NumSgprs: 38 63; VI-NOBUG: ; NumSgprs: 40 64; VI-BUG: ; NumSgprs: 96 65; GCN: ; NumVgprs: 41 66define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 { 67 call void @indirect_use_flat_scratch() 68 ret void 69} 70 71; GCN-LABEL: {{^}}use_10_vgpr: 72; GCN: ; NumVgprs: 10 73define void @use_10_vgpr() #1 { 74 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0 75 call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0 76 ret void 77} 78 79; GCN-LABEL: {{^}}indirect_use_10_vgpr: 80; GCN: ; NumVgprs: 41 81define void @indirect_use_10_vgpr() #0 { 82 call void @use_10_vgpr() 83 ret void 84} 85 86; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: 87; GCN: is_dynamic_callstack = 0 88; GCN: ; NumVgprs: 41 89define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { 90 call void @indirect_use_10_vgpr() 91 ret void 92} 93 94; GCN-LABEL: {{^}}use_50_vgpr: 95; GCN: ; NumVgprs: 50 96define void @use_50_vgpr() #1 { 97 call void asm sideeffect "", "~{v49}"() #0 98 ret void 99} 100 101; GCN-LABEL: {{^}}indirect_use_50_vgpr: 102; GCN: ; NumVgprs: 50 103define void @indirect_use_50_vgpr() #0 { 104 call void @use_50_vgpr() 105 ret void 106} 107 108; GCN-LABEL: {{^}}use_80_sgpr: 109; GCN: ; NumSgprs: 80 110define void @use_80_sgpr() #1 { 111 call void asm sideeffect "", "~{s79}"() #0 112 ret void 113} 114 115; GCN-LABEL: {{^}}indirect_use_80_sgpr: 116; GCN: ; NumSgprs: 82 117define void @indirect_use_80_sgpr() #1 { 118 call void @use_80_sgpr() 119 ret void 120} 121 122; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr: 123; GCN: is_dynamic_callstack = 0 124; CI: ; NumSgprs: 84 125; VI-NOBUG: ; NumSgprs: 86 126; VI-BUG: ; NumSgprs: 96 127define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 { 128 call void @indirect_use_80_sgpr() 129 ret void 130} 131 132 133; GCN-LABEL: {{^}}use_stack0: 134; GCN: ScratchSize: 2052 135define void @use_stack0() #1 { 136 %alloca = alloca [512 x i32], align 4, addrspace(5) 137 call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 138 ret void 139} 140 141; GCN-LABEL: {{^}}use_stack1: 142; GCN: ScratchSize: 404 143define void @use_stack1() #1 { 144 %alloca = alloca [100 x i32], align 4, addrspace(5) 145 call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0 146 ret void 147} 148 149; GCN-LABEL: {{^}}indirect_use_stack: 150; GCN: ScratchSize: 2132 151define void @indirect_use_stack() #1 { 152 %alloca = alloca [16 x i32], align 4, addrspace(5) 153 call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0 154 call void @use_stack0() 155 ret void 156} 157 158; GCN-LABEL: {{^}}indirect_2_level_use_stack: 159; GCN: is_dynamic_callstack = 0 160; GCN: ScratchSize: 2132 161define amdgpu_kernel void @indirect_2_level_use_stack() #0 { 162 call void @indirect_use_stack() 163 ret void 164} 165 166 167; Should be maximum of callee usage 168; GCN-LABEL: {{^}}multi_call_use_use_stack: 169; GCN: is_dynamic_callstack = 0 170; GCN: ScratchSize: 2052 171define amdgpu_kernel void @multi_call_use_use_stack() #0 { 172 call void @use_stack0() 173 call void @use_stack1() 174 ret void 175} 176 177 178declare void @external() #0 179 180; GCN-LABEL: {{^}}usage_external: 181; GCN: is_dynamic_callstack = 1 182; NumSgprs: 48 183; NumVgprs: 24 184; GCN: ScratchSize: 16384 185define amdgpu_kernel void @usage_external() #0 { 186 call void @external() 187 ret void 188} 189 190declare void @external_recurse() #2 191 192; GCN-LABEL: {{^}}usage_external_recurse: 193; GCN: is_dynamic_callstack = 1 194; NumSgprs: 48 195; NumVgprs: 24 196; GCN: ScratchSize: 16384 197define amdgpu_kernel void @usage_external_recurse() #0 { 198 call void @external_recurse() 199 ret void 200} 201 202; GCN-LABEL: {{^}}direct_recursion_use_stack: 203; GCN: ScratchSize: 18448{{$}} 204define void @direct_recursion_use_stack(i32 %val) #2 { 205 %alloca = alloca [512 x i32], align 4, addrspace(5) 206 call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0 207 %cmp = icmp eq i32 %val, 0 208 br i1 %cmp, label %ret, label %call 209 210call: 211 %val.sub1 = sub i32 %val, 1 212 call void @direct_recursion_use_stack(i32 %val.sub1) 213 br label %ret 214 215ret: 216 ret void 217} 218 219; GCN-LABEL: {{^}}usage_direct_recursion: 220; GCN: is_ptr64 = 1 221; GCN: is_dynamic_callstack = 1 222; GCN: workitem_private_segment_byte_size = 18448{{$}} 223define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { 224 call void @direct_recursion_use_stack(i32 %n) 225 ret void 226} 227 228; Make sure there's no assert when a sgpr96 is used. 229; GCN-LABEL: {{^}}count_use_sgpr96_external_call 230; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] 231; CI: NumSgprs: 84 232; VI-NOBUG: NumSgprs: 86 233; VI-BUG: NumSgprs: 96 234; GCN: NumVgprs: 50 235define amdgpu_kernel void @count_use_sgpr96_external_call() { 236entry: 237 tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1 238 call void @external() 239 ret void 240} 241 242; Make sure there's no assert when a sgpr160 is used. 243; GCN-LABEL: {{^}}count_use_sgpr160_external_call 244; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] 245; CI: NumSgprs: 84 246; VI-NOBUG: NumSgprs: 86 247; VI-BUG: NumSgprs: 96 248; GCN: NumVgprs: 50 249define amdgpu_kernel void @count_use_sgpr160_external_call() { 250entry: 251 tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 252 call void @external() 253 ret void 254} 255 256; Make sure there's no assert when a vgpr160 is used. 257; GCN-LABEL: {{^}}count_use_vgpr160_external_call 258; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] 259; CI: NumSgprs: 84 260; VI-NOBUG: NumSgprs: 86 261; VI-BUG: NumSgprs: 96 262; GCN: NumVgprs: 50 263define amdgpu_kernel void @count_use_vgpr160_external_call() { 264entry: 265 tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 266 call void @external() 267 ret void 268} 269 270attributes #0 = { nounwind noinline norecurse } 271attributes #1 = { nounwind noinline norecurse } 272attributes #2 = { nounwind noinline } 273