1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
3
4declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
5
6@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
7@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
8
9@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
10@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
11
12;.
13; HSA: @[[LDS_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global i32 undef, align 4
14; HSA: @[[LDS_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
15; HSA: @[[GLOBAL_I32:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global i32 undef, align 4
16; HSA: @[[GLOBAL_ARR:[a-zA-Z0-9_$"\\.-]+]] = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
17;.
18define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
19; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
20; HSA-SAME: () #[[ATTR1:[0-9]+]] {
21; HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), align 4
22; HSA-NEXT:    ret void
23;
24  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
25  ret void
26}
27
28define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
29; HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
30; HSA-SAME: () #[[ATTR2:[0-9]+]] {
31; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), align 4
32; HSA-NEXT:    ret void
33;
34  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*)
35  ret void
36}
37
38define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
39; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
40; HSA-SAME: () #[[ATTR2]] {
41; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*), align 4
42; HSA-NEXT:    ret void
43;
44  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds.i32 to i32 addrspace(4)*)
45  ret void
46}
47
48define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
49; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
50; HSA-SAME: () #[[ATTR2]] {
51; HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
52; HSA-NEXT:    ret void
53;
54  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
55  ret void
56}
57
58define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
59; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
60; HSA-SAME: () #[[ATTR1]] {
61; HSA-NEXT:    store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*), align 4
62; HSA-NEXT:    ret void
63;
64  store i32 7, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global.i32 to i32 addrspace(4)*)
65  ret void
66}
67
68define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
69; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
70; HSA-SAME: () #[[ATTR1]] {
71; HSA-NEXT:    store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
72; HSA-NEXT:    ret void
73;
74  store i32 7, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
75  ret void
76}
77
78define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
79; HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
80; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
81; HSA-NEXT:    [[VAL:%.*]] = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4
82; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
83; HSA-NEXT:    ret void
84;
85  %val = load i32, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8)
86  store i32 %val, i32 addrspace(1)* %out
87  ret void
88}
89
90define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
91; HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
92; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
93; HSA-NEXT:    [[VAL:%.*]] = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst, align 4
94; HSA-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT]], align 4
95; HSA-NEXT:    ret void
96;
97  %val = atomicrmw add i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 1 seq_cst
98  store i32 %val, i32 addrspace(1)* %out
99  ret void
100}
101
102define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
103; HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
104; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
105; HSA-NEXT:    [[VAL:%.*]] = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
106; HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
107; HSA-NEXT:    store i32 [[VAL0]], i32 addrspace(1)* [[OUT]], align 4
108; HSA-NEXT:    ret void
109;
110  %val = cmpxchg i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst
111  %val0 = extractvalue { i32, i1 } %val, 0
112  store i32 %val0, i32 addrspace(1)* %out
113  ret void
114}
115
116define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 {
117; HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
118; HSA-SAME: (i32 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
119; HSA-NEXT:    call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 [[OUT]], i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
120; HSA-NEXT:    ret void
121;
122  call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false)
123  ret void
124}
125
126; Can't just search the pointer value
127define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(i32 addrspace(4)* addrspace(1)* %out) #1 {
128; HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
129; HSA-SAME: (i32 addrspace(4)* addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
130; HSA-NEXT:    store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* [[OUT]], align 8
131; HSA-NEXT:    ret void
132;
133  store i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 addrspace(4)* addrspace(1)* %out
134  ret void
135}
136
137; Can't just search pointer types
138define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(i64 addrspace(1)* %out) #1 {
139; HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
140; HSA-SAME: (i64 addrspace(1)* [[OUT:%.*]]) #[[ATTR2]] {
141; HSA-NEXT:    store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* [[OUT]], align 4
142; HSA-NEXT:    ret void
143;
144  store i64 ptrtoint (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i64), i64 addrspace(1)* %out
145  ret void
146}
147
148; Cast group to flat, do GEP, cast back to group
149define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
150; HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
151; HSA-SAME: () #[[ATTR2]] {
152; HSA-NEXT:    store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*), align 4
153; HSA-NEXT:    ret void
154;
155  store i32 7, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
156  ret void
157}
158
159define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
160; HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
161; HSA-SAME: () #[[ATTR2]] {
162; HSA-NEXT:    ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
163;
164  ret i32 addrspace(3)* addrspacecast (i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8) to i32 addrspace(3)*)
165}
166
167attributes #0 = { argmemonly nounwind }
168attributes #1 = { nounwind }
169;.
170; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
171; HSA: attributes #[[ATTR1]] = { nounwind }
172; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" }
173;.
174