1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4
5; Test flat scratch SVS addressing mode with various combinations of alignment
6; of soffset, voffset and inst_offset.
7
8declare i32 @llvm.amdgcn.workitem.id.x()
9
10define amdgpu_kernel void @soff1_voff1(i32 %soff) {
11; GFX940-SDAG-LABEL: soff1_voff1:
12; GFX940-SDAG:       ; %bb.0: ; %bb
13; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
14; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
15; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
16; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
17; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
18; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
19; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
20; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
21; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
22; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
23; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
24; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
25; GFX940-SDAG-NEXT:    s_endpgm
26;
27; GFX940-GISEL-LABEL: soff1_voff1:
28; GFX940-GISEL:       ; %bb.0: ; %bb
29; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
30; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
31; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
32; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
33; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
34; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
35; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
36; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
37; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
38; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
39; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
40; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
41; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
42; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
43; GFX940-GISEL-NEXT:    s_endpgm
44bb:
45  %soff1 = mul i32 %soff, 1
46  %a = alloca i8, i32 64, align 4, addrspace(5)
47  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
48  %voff = call i32 @llvm.amdgcn.workitem.id.x()
49  %voff1 = mul i32 %voff, 1
50  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
51  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
52  store volatile i8 1, i8 addrspace(5)* %p1
53  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
54  store volatile i8 2, i8 addrspace(5)* %p2
55  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
56  store volatile i8 4, i8 addrspace(5)* %p4
57  ret void
58}
59
60define amdgpu_kernel void @soff1_voff2(i32 %soff) {
61; GFX940-SDAG-LABEL: soff1_voff2:
62; GFX940-SDAG:       ; %bb.0: ; %bb
63; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
64; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
65; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
66; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
67; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
69; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
70; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
71; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
72; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
73; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
74; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
75; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
76; GFX940-SDAG-NEXT:    s_endpgm
77;
78; GFX940-GISEL-LABEL: soff1_voff2:
79; GFX940-GISEL:       ; %bb.0: ; %bb
80; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
81; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
82; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
83; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
84; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
85; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
86; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
87; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
88; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
89; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
90; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
91; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
92; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
93; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
94; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
95; GFX940-GISEL-NEXT:    s_endpgm
96bb:
97  %soff1 = mul i32 %soff, 1
98  %a = alloca i8, i32 64, align 4, addrspace(5)
99  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
100  %voff = call i32 @llvm.amdgcn.workitem.id.x()
101  %voff2 = mul i32 %voff, 2
102  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
103  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
104  store volatile i8 1, i8 addrspace(5)* %p1
105  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
106  store volatile i8 2, i8 addrspace(5)* %p2
107  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
108  store volatile i8 4, i8 addrspace(5)* %p4
109  ret void
110}
111
112define amdgpu_kernel void @soff1_voff4(i32 %soff) {
113; GFX940-SDAG-LABEL: soff1_voff4:
114; GFX940-SDAG:       ; %bb.0: ; %bb
115; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
116; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
117; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
118; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
119; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
121; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
122; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
123; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
124; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
125; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
126; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
127; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
128; GFX940-SDAG-NEXT:    s_endpgm
129;
130; GFX940-GISEL-LABEL: soff1_voff4:
131; GFX940-GISEL:       ; %bb.0: ; %bb
132; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
133; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
134; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
135; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
136; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
137; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
138; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
139; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
140; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
141; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
142; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
143; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
144; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
145; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
146; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
147; GFX940-GISEL-NEXT:    s_endpgm
148bb:
149  %soff1 = mul i32 %soff, 1
150  %a = alloca i8, i32 64, align 4, addrspace(5)
151  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
152  %voff = call i32 @llvm.amdgcn.workitem.id.x()
153  %voff4 = mul i32 %voff, 4
154  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
155  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
156  store volatile i8 1, i8 addrspace(5)* %p1
157  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
158  store volatile i8 2, i8 addrspace(5)* %p2
159  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
160  store volatile i8 4, i8 addrspace(5)* %p4
161  ret void
162}
163
164define amdgpu_kernel void @soff2_voff1(i32 %soff) {
165; GFX940-SDAG-LABEL: soff2_voff1:
166; GFX940-SDAG:       ; %bb.0: ; %bb
167; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
168; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
169; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
170; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
171; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
172; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
173; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
174; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
175; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
176; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
177; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
178; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
179; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
180; GFX940-SDAG-NEXT:    s_endpgm
181;
182; GFX940-GISEL-LABEL: soff2_voff1:
183; GFX940-GISEL:       ; %bb.0: ; %bb
184; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
185; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
186; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
187; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
188; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
189; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
190; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
191; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
192; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
193; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
194; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
195; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
196; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
197; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
198; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
199; GFX940-GISEL-NEXT:    s_endpgm
200bb:
201  %soff2 = mul i32 %soff, 2
202  %a = alloca i8, i32 64, align 4, addrspace(5)
203  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
204  %voff = call i32 @llvm.amdgcn.workitem.id.x()
205  %voff1 = mul i32 %voff, 1
206  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
207  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
208  store volatile i8 1, i8 addrspace(5)* %p1
209  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
210  store volatile i8 2, i8 addrspace(5)* %p2
211  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
212  store volatile i8 4, i8 addrspace(5)* %p4
213  ret void
214}
215
216define amdgpu_kernel void @soff2_voff2(i32 %soff) {
217; GFX940-SDAG-LABEL: soff2_voff2:
218; GFX940-SDAG:       ; %bb.0: ; %bb
219; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
220; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
221; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
222; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
223; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
224; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
225; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
226; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
227; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
228; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
229; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
230; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
231; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
232; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
233; GFX940-SDAG-NEXT:    s_endpgm
234;
235; GFX940-GISEL-LABEL: soff2_voff2:
236; GFX940-GISEL:       ; %bb.0: ; %bb
237; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
238; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
239; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
240; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
241; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
242; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
243; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
244; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
245; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
246; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
247; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
248; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
249; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
250; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
251; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
252; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
253; GFX940-GISEL-NEXT:    s_endpgm
254bb:
255  %soff2 = mul i32 %soff, 2
256  %a = alloca i8, i32 64, align 4, addrspace(5)
257  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
258  %voff = call i32 @llvm.amdgcn.workitem.id.x()
259  %voff2 = mul i32 %voff, 2
260  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
261  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
262  store volatile i8 1, i8 addrspace(5)* %p1
263  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
264  store volatile i8 2, i8 addrspace(5)* %p2
265  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
266  store volatile i8 4, i8 addrspace(5)* %p4
267  ret void
268}
269
270define amdgpu_kernel void @soff2_voff4(i32 %soff) {
271; GFX940-SDAG-LABEL: soff2_voff4:
272; GFX940-SDAG:       ; %bb.0: ; %bb
273; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
274; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
275; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
276; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
277; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
278; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
279; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
280; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
281; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
282; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
283; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
284; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
285; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
286; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
287; GFX940-SDAG-NEXT:    s_endpgm
288;
289; GFX940-GISEL-LABEL: soff2_voff4:
290; GFX940-GISEL:       ; %bb.0: ; %bb
291; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
292; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
293; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
294; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
295; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
296; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
297; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
298; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
299; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
300; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
301; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
302; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
303; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
304; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
305; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
306; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
307; GFX940-GISEL-NEXT:    s_endpgm
308bb:
309  %soff2 = mul i32 %soff, 2
310  %a = alloca i8, i32 64, align 4, addrspace(5)
311  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
312  %voff = call i32 @llvm.amdgcn.workitem.id.x()
313  %voff4 = mul i32 %voff, 4
314  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
315  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
316  store volatile i8 1, i8 addrspace(5)* %p1
317  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
318  store volatile i8 2, i8 addrspace(5)* %p2
319  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
320  store volatile i8 4, i8 addrspace(5)* %p4
321  ret void
322}
323
324define amdgpu_kernel void @soff4_voff1(i32 %soff) {
325; GFX940-SDAG-LABEL: soff4_voff1:
326; GFX940-SDAG:       ; %bb.0: ; %bb
327; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
328; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
329; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
330; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
331; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
332; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
333; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
334; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
335; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
336; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
337; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
338; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
339; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
340; GFX940-SDAG-NEXT:    s_endpgm
341;
342; GFX940-GISEL-LABEL: soff4_voff1:
343; GFX940-GISEL:       ; %bb.0: ; %bb
344; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
345; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
346; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
347; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
348; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
349; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
350; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
351; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
352; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
353; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
354; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
355; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
356; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
357; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
358; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
359; GFX940-GISEL-NEXT:    s_endpgm
360bb:
361  %soff4 = mul i32 %soff, 4
362  %a = alloca i8, i32 64, align 4, addrspace(5)
363  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
364  %voff = call i32 @llvm.amdgcn.workitem.id.x()
365  %voff1 = mul i32 %voff, 1
366  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
367  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
368  store volatile i8 1, i8 addrspace(5)* %p1
369  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
370  store volatile i8 2, i8 addrspace(5)* %p2
371  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
372  store volatile i8 4, i8 addrspace(5)* %p4
373  ret void
374}
375
376define amdgpu_kernel void @soff4_voff2(i32 %soff) {
377; GFX940-SDAG-LABEL: soff4_voff2:
378; GFX940-SDAG:       ; %bb.0: ; %bb
379; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
380; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
381; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
382; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
383; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
384; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
385; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
386; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
387; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
388; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
389; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
390; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
391; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
392; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
393; GFX940-SDAG-NEXT:    s_endpgm
394;
395; GFX940-GISEL-LABEL: soff4_voff2:
396; GFX940-GISEL:       ; %bb.0: ; %bb
397; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
398; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
399; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
400; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
401; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
402; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
403; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
404; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
405; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
406; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
407; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
408; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
409; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
410; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
411; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
412; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
413; GFX940-GISEL-NEXT:    s_endpgm
414bb:
415  %soff4 = mul i32 %soff, 4
416  %a = alloca i8, i32 64, align 4, addrspace(5)
417  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
418  %voff = call i32 @llvm.amdgcn.workitem.id.x()
419  %voff2 = mul i32 %voff, 2
420  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
421  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
422  store volatile i8 1, i8 addrspace(5)* %p1
423  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
424  store volatile i8 2, i8 addrspace(5)* %p2
425  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
426  store volatile i8 4, i8 addrspace(5)* %p4
427  ret void
428}
429
430define amdgpu_kernel void @soff4_voff4(i32 %soff) {
431; GFX940-SDAG-LABEL: soff4_voff4:
432; GFX940-SDAG:       ; %bb.0: ; %bb
433; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
434; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
435; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
436; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
437; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
438; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
439; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
440; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
441; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
442; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
443; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
444; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
445; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
446; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
447; GFX940-SDAG-NEXT:    s_endpgm
448;
449; GFX940-GISEL-LABEL: soff4_voff4:
450; GFX940-GISEL:       ; %bb.0: ; %bb
451; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
452; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
453; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
454; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
455; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
456; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
457; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
458; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
459; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
460; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
461; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
462; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
463; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
464; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
465; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
466; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
467; GFX940-GISEL-NEXT:    s_endpgm
468bb:
469  %soff4 = mul i32 %soff, 4
470  %a = alloca i8, i32 64, align 4, addrspace(5)
471  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
472  %voff = call i32 @llvm.amdgcn.workitem.id.x()
473  %voff4 = mul i32 %voff, 4
474  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
475  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
476  store volatile i8 1, i8 addrspace(5)* %p1
477  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
478  store volatile i8 2, i8 addrspace(5)* %p2
479  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
480  store volatile i8 4, i8 addrspace(5)* %p4
481  ret void
482}
483