1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK  %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefix=CHECK  %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 -filetype=obj -o - < %s | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9  %s
4
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefix=CHECK %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=CHECK,GFX9 %s
8
9
10; On gfx8, the queue ptr is required for this addrspacecast.
11; CHECK: - .args:
12; PRE-GFX9:          .offset:         208
13; PRE-GFX9-NEXT:     .size:           8
14; PRE-GFX9-NEXT:     .value_kind:     hidden_queue_ptr
15; GFX9-NOT:          .value_kind:     hidden_queue_ptr
16; CHECK:             .name:           addrspacecast_requires_queue_ptr
17; CHECK:             .symbol:         addrspacecast_requires_queue_ptr.kd
18define amdgpu_kernel void @addrspacecast_requires_queue_ptr(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
19  %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
20  %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
21  store volatile i32 1, i32* %flat.private
22  store volatile i32 2, i32* %flat.local
23  ret void
24}
25
26; CHECK: - .args:
27; CHECK:             .value_kind:     hidden_multigrid_sync_arg
28; PRE-GFX9:          .offset:         200
29; PRE-GFX9-NEXT:     .size:           4
30; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
31; PRE-GFX9-NEXT:     .offset:         204
32; PRE-GFX9-NEXT:     .size:           4
33; PRE-GFX9-NEXT:     .value_kind:     hidden_shared_base
34; GFX9-NOT:          .value_kind:     hidden_multigrid_sync_arg
35; GFX9-NOT:          .value_kind:     hidden_private_base
36; CKECK-NOT:         .value_kind:     hidden_queue_ptr
37; CHECK:             .name:           is_shared_requires_queue_ptr
38; CHECK:             .symbol:         is_shared_requires_queue_ptr.kd
39define amdgpu_kernel void @is_shared_requires_queue_ptr(i8* %ptr) {
40  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
41  %zext = zext i1 %is.shared to i32
42  store volatile i32 %zext, i32 addrspace(1)* undef
43  ret void
44}
45
46; CHECK: - .args:
47; CHECK:             .value_kind:     hidden_multigrid_sync_arg
48; PRE-GFX9:          .offset:         200
49; PRE-GFX9-NEXT:     .size:           4
50; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
51; PRE-GFX9-NEXT:     .offset:         204
52; PRE-GFX9-NEXT:     .size:           4
53; PRE-GFX9-NEXT:     .value_kind:     hidden_shared_base
54; GFX9-NOT:          .value_kind:     hidden_private_base
55; GFX9-NOT:          .value_kind:     hidden_shared_base
56; CKECK-NOT:         .value_kind:     hidden_queue_ptr
57; CHECK:             .name:           is_private_requires_queue_ptr
58; CHECK:             .symbol:         is_private_requires_queue_ptr.kd
59define amdgpu_kernel void @is_private_requires_queue_ptr(i8* %ptr) {
60  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
61  %zext = zext i1 %is.private to i32
62  store volatile i32 %zext, i32 addrspace(1)* undef
63  ret void
64}
65
66; CHECK: - .args:
67; CHECK:             .value_kind:     hidden_multigrid_sync_arg
68; PRE-GFX9:          .offset:         192
69; PRE-GFX9-NEXT:     .size:           4
70; PRE-GFX9-NEXT:     .value_kind:     hidden_private_base
71; PRE-GFX9-NEXT:     .offset:         196
72; PRE-GFX9-NEXT:     .size:           4
73; PRE-GFX9-NEXT:     .value_kind:     hidden_shared_base
74; PRE-GFX9-NEXT:     .address_space:  global
75; PRE-GFX9-NEXT:     .offset:         200
76; PRE-GFX9-NEXT:     .size:           8
77; PRE-GFX9-NEXT:     .value_kind:     hidden_queue_ptr
78; GFX9-NOT:          .value_kind:     hidden_private_base
79; GFX9-NOT:          .value_kind:     hidden_shared_base
80; GFX9-NOT:          .value_kind:     hidden_queue_ptr
81; CHECK:             .name:           trap_requires_queue_ptr
82; CHECK:             .symbol:         trap_requires_queue_ptr.kd
83define amdgpu_kernel void @trap_requires_queue_ptr() {
84  call void @llvm.trap()
85  unreachable
86}
87
88; CHECK: - .args:
89; CHECK:             .offset:         208
90; CHECK-NEXT:        .size:           8
91; CHECK-NEXT:        .value_kind:     hidden_queue_ptr
92; CHECK:             .name:           amdgcn_queue_ptr_requires_queue_ptr
93; CHECK:             .symbol:         amdgcn_queue_ptr_requires_queue_ptr.kd
94define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(i64 addrspace(1)* %ptr)  {
95  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
96  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
97  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
98  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
99  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
100  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
101  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
102  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
103  ret void
104}
105
106
107declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
108declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
109declare i64 @llvm.amdgcn.dispatch.id()
110declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
111declare i1 @llvm.amdgcn.is.shared(i8*)
112declare i1 @llvm.amdgcn.is.private(i8*)
113declare void @llvm.trap()
114declare void @llvm.debugtrap()
115