1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s 4 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s 8 9 10; On gfx8, the queue ptr is required for this addrspacecast. 11; CHECK: - .args: 12; PRE-GFX9: .offset: 208 13; PRE-GFX9-NEXT: .size: 8 14; PRE-GFX9-NEXT: .value_kind: hidden_queue_ptr 15; GFX9-NOT: .value_kind: hidden_queue_ptr 16; CHECK: .name: addrspacecast_requires_queue_ptr 17; CHECK: .symbol: addrspacecast_requires_queue_ptr.kd 18define amdgpu_kernel void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) { 19 %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32* 20 %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32* 21 store volatile i32 1, i32* %flat.private 22 store volatile i32 2, i32* %flat.local 23 ret void 24} 25 26; CHECK: - .args: 27; CHECK: .value_kind: hidden_multigrid_sync_arg 28; PRE-GFX9: .offset: 200 29; PRE-GFX9-NEXT: .size: 4 30; PRE-GFX9-NEXT: .value_kind: hidden_private_base 31; PRE-GFX9-NEXT: .offset: 204 32; PRE-GFX9-NEXT: .size: 4 33; PRE-GFX9-NEXT: .value_kind: hidden_shared_base 34; GFX9-NOT: .value_kind: hidden_multigrid_sync_arg 35; GFX9-NOT: .value_kind: hidden_private_base 36; CKECK-NOT: .value_kind: hidden_queue_ptr 37; CHECK: .name: is_shared_requires_queue_ptr 38; CHECK: .symbol: is_shared_requires_queue_ptr.kd 39define amdgpu_kernel void @is_shared_requires_queue_ptr(i8* %ptr) { 40 %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr) 41 %zext = zext i1 %is.shared to i32 42 store volatile i32 %zext, i32 addrspace(1)* undef 43 ret void 44} 45 46; CHECK: - .args: 47; CHECK: .value_kind: hidden_multigrid_sync_arg 48; PRE-GFX9: .offset: 200 49; PRE-GFX9-NEXT: .size: 4 50; PRE-GFX9-NEXT: .value_kind: hidden_private_base 51; PRE-GFX9-NEXT: .offset: 204 52; PRE-GFX9-NEXT: .size: 4 53; PRE-GFX9-NEXT: .value_kind: hidden_shared_base 54; GFX9-NOT: .value_kind: hidden_private_base 55; GFX9-NOT: .value_kind: hidden_shared_base 56; CKECK-NOT: .value_kind: hidden_queue_ptr 57; CHECK: .name: is_private_requires_queue_ptr 58; CHECK: .symbol: is_private_requires_queue_ptr.kd 59define amdgpu_kernel void @is_private_requires_queue_ptr(i8* %ptr) { 60 %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr) 61 %zext = zext i1 %is.private to i32 62 store volatile i32 %zext, i32 addrspace(1)* undef 63 ret void 64} 65 66; CHECK: - .args: 67; CHECK: .value_kind: hidden_multigrid_sync_arg 68; PRE-GFX9: .offset: 192 69; PRE-GFX9-NEXT: .size: 4 70; PRE-GFX9-NEXT: .value_kind: hidden_private_base 71; PRE-GFX9-NEXT: .offset: 196 72; PRE-GFX9-NEXT: .size: 4 73; PRE-GFX9-NEXT: .value_kind: hidden_shared_base 74; PRE-GFX9-NEXT: .address_space: global 75; PRE-GFX9-NEXT: .offset: 200 76; PRE-GFX9-NEXT: .size: 8 77; PRE-GFX9-NEXT: .value_kind: hidden_queue_ptr 78; GFX9-NOT: .value_kind: hidden_private_base 79; GFX9-NOT: .value_kind: hidden_shared_base 80; GFX9-NOT: .value_kind: hidden_queue_ptr 81; CHECK: .name: trap_requires_queue_ptr 82; CHECK: .symbol: trap_requires_queue_ptr.kd 83define amdgpu_kernel void @trap_requires_queue_ptr() { 84 call void @llvm.trap() 85 unreachable 86} 87 88; CHECK: - .args: 89; CHECK: .offset: 208 90; CHECK-NEXT: .size: 8 91; CHECK-NEXT: .value_kind: hidden_queue_ptr 92; CHECK: .name: amdgcn_queue_ptr_requires_queue_ptr 93; CHECK: .symbol: amdgcn_queue_ptr_requires_queue_ptr.kd 94define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(i64 addrspace(1)* %ptr) { 95 %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 96 %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 97 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 98 %dispatch.id = call i64 @llvm.amdgcn.dispatch.id() 99 %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr 100 %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr 101 %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr 102 store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr 103 ret void 104} 105 106 107declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 108declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() 109declare i64 @llvm.amdgcn.dispatch.id() 110declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 111declare i1 @llvm.amdgcn.is.shared(i8*) 112declare i1 @llvm.amdgcn.is.private(i8*) 113declare void @llvm.trap() 114declare void @llvm.debugtrap() 115