1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
3; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
4
5declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
6
7@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
8@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
9
10@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
11@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
12
13;.
14; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4
15; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
16; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4
17; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
18;.
19define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
20; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
21; HSA-SAME: () #[[ATTR1:[0-9]+]] {
22; HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4
23; HSA-NEXT:    ret void
24;
25  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
26  ret void
27}
28
29define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
30; HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
31; HSA-SAME: () #[[ATTR2:[0-9]+]] {
32; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
33; HSA-NEXT:    ret void
34;
35  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
36  ret void
37}
38
39define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
40; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
41; HSA-SAME: () #[[ATTR2]] {
42; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
43; HSA-NEXT:    ret void
44;
45  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
46  ret void
47}
48
49define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
50; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
51; HSA-SAME: () #[[ATTR2]] {
52; HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
53; HSA-NEXT:    ret void
54;
55  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
56  ret void
57}
58
59define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
60; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
61; HSA-SAME: () #[[ATTR1]] {
62; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4
63; HSA-NEXT:    ret void
64;
65  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
66  ret void
67}
68
69define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
70; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
71; HSA-SAME: () #[[ATTR1]] {
72; HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
73; HSA-NEXT:    ret void
74;
75  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
76  ret void
77}
78
79define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
80; HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
81; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
82; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
83; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
84; HSA-NEXT:    ret void
85;
86  %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
87  store i32 %val, i32 addrspace(1)* %out
88  ret void
89}
90
91define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
92; HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
93; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
94; HSA-NEXT:    [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
95; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
96; HSA-NEXT:    ret void
97;
98  %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
99  store i32 %val, i32 addrspace(1)* %out
100  ret void
101}
102
103define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
104; HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
105; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
106; HSA-NEXT:    [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
107; HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
108; HSA-NEXT:    store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
109; HSA-NEXT:    ret void
110;
111  %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
112  %val0 = extractvalue { i32, i1 } %val, 0
113  store i32 %val0, i32 addrspace(1)* %out
114  ret void
115}
116
117define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
118; HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
119; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
120; HSA-NEXT:    call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
121; HSA-NEXT:    ret void
122;
123  call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
124  ret void
125}
126
127; Can't just search the pointer value
128define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
129; HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
130; HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
131; HSA-NEXT:    store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
132; HSA-NEXT:    ret void
133;
134  store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
135  ret void
136}
137
138; Can't just search pointer types
139define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
140; HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
141; HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
142; HSA-NEXT:    store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
143; HSA-NEXT:    ret void
144;
145  store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
146  ret void
147}
148
149; Cast group to flat, do GEP, cast back to group
150define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
151; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
152; HSA-SAME: () #[[ATTR2]] {
153; HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
154; HSA-NEXT:    ret void
155;
156  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
157  ret void
158}
159
160define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
161; HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
162; HSA-SAME: () #[[ATTR2]] {
163; HSA-NEXT:    ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
164;
165  ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
166}
167
168attributes #0 = { argmemonly nounwind }
169attributes #1 = { nounwind }
170;.
171; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
172; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
173; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" }
174;.
175; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
176; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
177; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
178;.
179