1; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck  %s
2; RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga < %s | FileCheck  %s
3; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=CHECK,HSA %s
4
5; Disable optimizations in case there are optimizations added that
6; specialize away generic pointer accesses.
7
8
9; These testcases might become useless when there are optimizations to
10; remove generic pointers.
11
12; CHECK-LABEL: {{^}}store_flat_i32:
13; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
14; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
15; CHECK: s_waitcnt lgkmcnt(0)
16; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
17; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
18; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
19; CHECK: flat_store_dword v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}, v[[DATA]]
20define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
21  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
22  store i32 %x, i32 addrspace(4)* %fptr, align 4
23  ret void
24}
25
26; CHECK-LABEL: {{^}}store_flat_i64:
27; CHECK: flat_store_dwordx2
28define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
29  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
30  store i64 %x, i64 addrspace(4)* %fptr, align 8
31  ret void
32}
33
34; CHECK-LABEL: {{^}}store_flat_v4i32:
35; CHECK: flat_store_dwordx4
36define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
37  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
38  store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
39  ret void
40}
41
42; CHECK-LABEL: {{^}}store_flat_trunc_i16:
43; CHECK: flat_store_short
44define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
45  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
46  %y = trunc i32 %x to i16
47  store i16 %y, i16 addrspace(4)* %fptr, align 2
48  ret void
49}
50
51; CHECK-LABEL: {{^}}store_flat_trunc_i8:
52; CHECK: flat_store_byte
53define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
54  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
55  %y = trunc i32 %x to i8
56  store i8 %y, i8 addrspace(4)* %fptr, align 2
57  ret void
58}
59
60
61
62; CHECK-LABEL: load_flat_i32:
63; CHECK: flat_load_dword
64define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
65  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
66  %fload = load i32, i32 addrspace(4)* %fptr, align 4
67  store i32 %fload, i32 addrspace(1)* %out, align 4
68  ret void
69}
70
71; CHECK-LABEL: load_flat_i64:
72; CHECK: flat_load_dwordx2
73define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
74  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
75  %fload = load i64, i64 addrspace(4)* %fptr, align 8
76  store i64 %fload, i64 addrspace(1)* %out, align 8
77  ret void
78}
79
80; CHECK-LABEL: load_flat_v4i32:
81; CHECK: flat_load_dwordx4
82define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
83  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
84  %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 32
85  store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
86  ret void
87}
88
89; CHECK-LABEL: sextload_flat_i8:
90; CHECK: flat_load_sbyte
91define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
92  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
93  %fload = load i8, i8 addrspace(4)* %fptr, align 4
94  %ext = sext i8 %fload to i32
95  store i32 %ext, i32 addrspace(1)* %out, align 4
96  ret void
97}
98
99; CHECK-LABEL: zextload_flat_i8:
100; CHECK: flat_load_ubyte
101define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
102  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
103  %fload = load i8, i8 addrspace(4)* %fptr, align 4
104  %ext = zext i8 %fload to i32
105  store i32 %ext, i32 addrspace(1)* %out, align 4
106  ret void
107}
108
109; CHECK-LABEL: sextload_flat_i16:
110; CHECK: flat_load_sshort
111define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
112  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
113  %fload = load i16, i16 addrspace(4)* %fptr, align 4
114  %ext = sext i16 %fload to i32
115  store i32 %ext, i32 addrspace(1)* %out, align 4
116  ret void
117}
118
119; CHECK-LABEL: zextload_flat_i16:
120; CHECK: flat_load_ushort
121define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
122  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
123  %fload = load i16, i16 addrspace(4)* %fptr, align 4
124  %ext = zext i16 %fload to i32
125  store i32 %ext, i32 addrspace(1)* %out, align 4
126  ret void
127}
128
129; CHECK-LABEL: flat_scratch_unaligned_load:
130; CHECK: flat_load_ubyte
131; CHECK: flat_load_ubyte
132; CHECK: flat_load_ubyte
133; CHECK: flat_load_ubyte
134define void @flat_scratch_unaligned_load() {
135  %scratch = alloca i32
136  %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
137  %ld = load volatile i32, i32 addrspace(4)* %fptr, align 1
138  ret void
139}
140
141; CHECK-LABEL: flat_scratch_unaligned_store:
142; CHECK: flat_store_byte
143; CHECK: flat_store_byte
144; CHECK: flat_store_byte
145; CHECK: flat_store_byte
146define void @flat_scratch_unaligned_store() {
147  %scratch = alloca i32
148  %fptr = addrspacecast i32* %scratch to i32 addrspace(4)*
149  store volatile i32 0, i32 addrspace(4)* %fptr, align 1
150  ret void
151}
152
153; CHECK-LABEL: flat_scratch_multidword_load:
154; HSA: flat_load_dword
155; HSA: flat_load_dword
156; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
157define void @flat_scratch_multidword_load() {
158  %scratch = alloca <2 x i32>
159  %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
160  %ld = load volatile <2 x i32>, <2 x i32> addrspace(4)* %fptr
161  ret void
162}
163
164; CHECK-LABEL: flat_scratch_multidword_store:
165; HSA: flat_store_dword
166; HSA: flat_store_dword
167; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr
168define void @flat_scratch_multidword_store() {
169  %scratch = alloca <2 x i32>
170  %fptr = addrspacecast <2 x i32>* %scratch to <2 x i32> addrspace(4)*
171  store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(4)* %fptr
172  ret void
173}
174
175attributes #0 = { nounwind }
176attributes #1 = { nounwind convergent }
177attributes #3 = { nounwind readnone }
178