1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
6
7; Test flat scratch SVS addressing mode with various combinations of alignment
8; of soffset, voffset and inst_offset.
9
10declare i32 @llvm.amdgcn.workitem.id.x()
11
12define amdgpu_kernel void @soff1_voff1(i32 %soff) {
13; GFX940-SDAG-LABEL: soff1_voff1:
14; GFX940-SDAG:       ; %bb.0: ; %bb
15; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
16; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
17; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
18; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
20; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
21; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
22; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
23; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
24; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
25; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
26; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
27; GFX940-SDAG-NEXT:    s_endpgm
28;
29; GFX940-GISEL-LABEL: soff1_voff1:
30; GFX940-GISEL:       ; %bb.0: ; %bb
31; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
32; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
33; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
34; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
35; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
37; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
38; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
39; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
40; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
41; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
42; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
43; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
44; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
45; GFX940-GISEL-NEXT:    s_endpgm
46;
47; GFX11-SDAG-LABEL: soff1_voff1:
48; GFX11-SDAG:       ; %bb.0: ; %bb
49; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
50; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
51; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
52; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
53; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
55; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
56; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
57; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
58; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
59; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
60; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
61; GFX11-SDAG-NEXT:    s_endpgm
62;
63; GFX11-GISEL-LABEL: soff1_voff1:
64; GFX11-GISEL:       ; %bb.0: ; %bb
65; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
66; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
67; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
68; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
69; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
70; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
71; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
72; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
73; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
74; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
75; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
76; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
77; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
78; GFX11-GISEL-NEXT:    s_endpgm
79bb:
80  %soff1 = mul i32 %soff, 1
81  %a = alloca i8, i32 64, align 4, addrspace(5)
82  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
83  %voff = call i32 @llvm.amdgcn.workitem.id.x()
84  %voff1 = mul i32 %voff, 1
85  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
86  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
87  store volatile i8 1, i8 addrspace(5)* %p1
88  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
89  store volatile i8 2, i8 addrspace(5)* %p2
90  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
91  store volatile i8 4, i8 addrspace(5)* %p4
92  ret void
93}
94
95define amdgpu_kernel void @soff1_voff2(i32 %soff) {
96; GFX940-SDAG-LABEL: soff1_voff2:
97; GFX940-SDAG:       ; %bb.0: ; %bb
98; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
99; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
100; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
101; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
102; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
104; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
105; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
106; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
107; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
108; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
109; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
110; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
111; GFX940-SDAG-NEXT:    s_endpgm
112;
113; GFX940-GISEL-LABEL: soff1_voff2:
114; GFX940-GISEL:       ; %bb.0: ; %bb
115; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
116; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
117; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
118; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
119; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
121; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
122; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
123; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
124; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
125; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
126; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
127; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
128; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
129; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
130; GFX940-GISEL-NEXT:    s_endpgm
131;
132; GFX11-SDAG-LABEL: soff1_voff2:
133; GFX11-SDAG:       ; %bb.0: ; %bb
134; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
135; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
136; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
137; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
138; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
139; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
140; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
141; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
142; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
144; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
145; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
146; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
147; GFX11-SDAG-NEXT:    s_endpgm
148;
149; GFX11-GISEL-LABEL: soff1_voff2:
150; GFX11-GISEL:       ; %bb.0: ; %bb
151; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
152; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
153; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
154; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
155; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
157; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
158; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
159; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
160; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
161; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
162; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
163; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
164; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
165; GFX11-GISEL-NEXT:    s_endpgm
166bb:
167  %soff1 = mul i32 %soff, 1
168  %a = alloca i8, i32 64, align 4, addrspace(5)
169  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
170  %voff = call i32 @llvm.amdgcn.workitem.id.x()
171  %voff2 = mul i32 %voff, 2
172  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
173  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
174  store volatile i8 1, i8 addrspace(5)* %p1
175  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
176  store volatile i8 2, i8 addrspace(5)* %p2
177  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
178  store volatile i8 4, i8 addrspace(5)* %p4
179  ret void
180}
181
182define amdgpu_kernel void @soff1_voff4(i32 %soff) {
183; GFX940-SDAG-LABEL: soff1_voff4:
184; GFX940-SDAG:       ; %bb.0: ; %bb
185; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
186; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
187; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
188; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
189; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
190; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
191; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
192; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
193; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
194; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
195; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
196; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
197; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
198; GFX940-SDAG-NEXT:    s_endpgm
199;
200; GFX940-GISEL-LABEL: soff1_voff4:
201; GFX940-GISEL:       ; %bb.0: ; %bb
202; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
203; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
204; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
205; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
206; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
208; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
209; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
210; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
211; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
212; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
213; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
214; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
215; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
216; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
217; GFX940-GISEL-NEXT:    s_endpgm
218;
219; GFX11-SDAG-LABEL: soff1_voff4:
220; GFX11-SDAG:       ; %bb.0: ; %bb
221; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
222; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
223; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
224; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
225; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
226; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
227; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
228; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
229; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
230; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
231; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
232; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
233; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
234; GFX11-SDAG-NEXT:    s_endpgm
235;
236; GFX11-GISEL-LABEL: soff1_voff4:
237; GFX11-GISEL:       ; %bb.0: ; %bb
238; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
239; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
240; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
241; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
242; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
244; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
245; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
246; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
247; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
248; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
249; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
250; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
251; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
252; GFX11-GISEL-NEXT:    s_endpgm
253bb:
254  %soff1 = mul i32 %soff, 1
255  %a = alloca i8, i32 64, align 4, addrspace(5)
256  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
257  %voff = call i32 @llvm.amdgcn.workitem.id.x()
258  %voff4 = mul i32 %voff, 4
259  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
260  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
261  store volatile i8 1, i8 addrspace(5)* %p1
262  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
263  store volatile i8 2, i8 addrspace(5)* %p2
264  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
265  store volatile i8 4, i8 addrspace(5)* %p4
266  ret void
267}
268
269define amdgpu_kernel void @soff2_voff1(i32 %soff) {
270; GFX940-SDAG-LABEL: soff2_voff1:
271; GFX940-SDAG:       ; %bb.0: ; %bb
272; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
273; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
274; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
275; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
276; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
277; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
278; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
279; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
280; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
281; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
282; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
283; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
284; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
285; GFX940-SDAG-NEXT:    s_endpgm
286;
287; GFX940-GISEL-LABEL: soff2_voff1:
288; GFX940-GISEL:       ; %bb.0: ; %bb
289; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
290; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
291; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
292; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
293; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
294; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
295; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
296; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
297; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
298; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
299; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
300; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
301; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
302; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
303; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
304; GFX940-GISEL-NEXT:    s_endpgm
305;
306; GFX11-SDAG-LABEL: soff2_voff1:
307; GFX11-SDAG:       ; %bb.0: ; %bb
308; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
309; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
310; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
311; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
312; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
313; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
314; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
315; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
316; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
317; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
318; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
319; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
320; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
321; GFX11-SDAG-NEXT:    s_endpgm
322;
323; GFX11-GISEL-LABEL: soff2_voff1:
324; GFX11-GISEL:       ; %bb.0: ; %bb
325; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
326; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
327; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
328; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
329; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
330; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
331; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
332; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
333; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
334; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
335; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
336; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
337; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
338; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
339; GFX11-GISEL-NEXT:    s_endpgm
340bb:
341  %soff2 = mul i32 %soff, 2
342  %a = alloca i8, i32 64, align 4, addrspace(5)
343  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
344  %voff = call i32 @llvm.amdgcn.workitem.id.x()
345  %voff1 = mul i32 %voff, 1
346  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
347  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
348  store volatile i8 1, i8 addrspace(5)* %p1
349  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
350  store volatile i8 2, i8 addrspace(5)* %p2
351  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
352  store volatile i8 4, i8 addrspace(5)* %p4
353  ret void
354}
355
356define amdgpu_kernel void @soff2_voff2(i32 %soff) {
357; GFX940-SDAG-LABEL: soff2_voff2:
358; GFX940-SDAG:       ; %bb.0: ; %bb
359; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
360; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
361; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
362; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
363; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
364; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
365; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
366; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
367; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
368; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
369; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
370; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
371; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
372; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
373; GFX940-SDAG-NEXT:    s_endpgm
374;
375; GFX940-GISEL-LABEL: soff2_voff2:
376; GFX940-GISEL:       ; %bb.0: ; %bb
377; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
378; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
379; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
380; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
381; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
382; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
383; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
384; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
385; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
386; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
387; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
388; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
389; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
390; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
391; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
392; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
393; GFX940-GISEL-NEXT:    s_endpgm
394;
395; GFX11-SDAG-LABEL: soff2_voff2:
396; GFX11-SDAG:       ; %bb.0: ; %bb
397; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
398; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
399; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
400; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
401; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
402; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
403; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
404; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
405; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
406; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
407; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
408; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
409; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
410; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
411; GFX11-SDAG-NEXT:    s_endpgm
412;
413; GFX11-GISEL-LABEL: soff2_voff2:
414; GFX11-GISEL:       ; %bb.0: ; %bb
415; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
416; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
417; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
418; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
419; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
420; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
421; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
422; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
423; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
424; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
425; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
426; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
427; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
428; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
429; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
430; GFX11-GISEL-NEXT:    s_endpgm
431bb:
432  %soff2 = mul i32 %soff, 2
433  %a = alloca i8, i32 64, align 4, addrspace(5)
434  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
435  %voff = call i32 @llvm.amdgcn.workitem.id.x()
436  %voff2 = mul i32 %voff, 2
437  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
438  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
439  store volatile i8 1, i8 addrspace(5)* %p1
440  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
441  store volatile i8 2, i8 addrspace(5)* %p2
442  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
443  store volatile i8 4, i8 addrspace(5)* %p4
444  ret void
445}
446
447define amdgpu_kernel void @soff2_voff4(i32 %soff) {
448; GFX940-SDAG-LABEL: soff2_voff4:
449; GFX940-SDAG:       ; %bb.0: ; %bb
450; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
451; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
452; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
453; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
454; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
455; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
456; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
457; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
458; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
459; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
460; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
461; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
462; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
463; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
464; GFX940-SDAG-NEXT:    s_endpgm
465;
466; GFX940-GISEL-LABEL: soff2_voff4:
467; GFX940-GISEL:       ; %bb.0: ; %bb
468; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
469; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
470; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
471; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
472; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
473; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
474; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
475; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
476; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
477; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
478; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
479; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
480; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
481; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
482; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
483; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
484; GFX940-GISEL-NEXT:    s_endpgm
485;
486; GFX11-SDAG-LABEL: soff2_voff4:
487; GFX11-SDAG:       ; %bb.0: ; %bb
488; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
489; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
490; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
491; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
492; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
493; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
494; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
495; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
496; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
497; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
498; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
499; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
500; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
501; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
502; GFX11-SDAG-NEXT:    s_endpgm
503;
504; GFX11-GISEL-LABEL: soff2_voff4:
505; GFX11-GISEL:       ; %bb.0: ; %bb
506; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
507; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
508; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
509; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
510; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
511; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
512; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
513; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
514; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
515; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
516; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
517; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
518; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
519; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
520; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
521; GFX11-GISEL-NEXT:    s_endpgm
522bb:
523  %soff2 = mul i32 %soff, 2
524  %a = alloca i8, i32 64, align 4, addrspace(5)
525  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
526  %voff = call i32 @llvm.amdgcn.workitem.id.x()
527  %voff4 = mul i32 %voff, 4
528  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
529  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
530  store volatile i8 1, i8 addrspace(5)* %p1
531  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
532  store volatile i8 2, i8 addrspace(5)* %p2
533  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
534  store volatile i8 4, i8 addrspace(5)* %p4
535  ret void
536}
537
538define amdgpu_kernel void @soff4_voff1(i32 %soff) {
539; GFX940-SDAG-LABEL: soff4_voff1:
540; GFX940-SDAG:       ; %bb.0: ; %bb
541; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
542; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
543; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
544; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
546; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
547; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
548; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
549; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
550; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
551; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
552; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
553; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
554; GFX940-SDAG-NEXT:    s_endpgm
555;
556; GFX940-GISEL-LABEL: soff4_voff1:
557; GFX940-GISEL:       ; %bb.0: ; %bb
558; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
559; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
560; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
561; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
562; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
563; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
564; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
565; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
566; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
567; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
568; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
569; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
570; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
571; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
572; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
573; GFX940-GISEL-NEXT:    s_endpgm
574;
575; GFX11-SDAG-LABEL: soff4_voff1:
576; GFX11-SDAG:       ; %bb.0: ; %bb
577; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
578; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
579; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
580; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
581; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
582; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
583; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
584; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
585; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
586; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
587; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
588; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
589; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
590; GFX11-SDAG-NEXT:    s_endpgm
591;
592; GFX11-GISEL-LABEL: soff4_voff1:
593; GFX11-GISEL:       ; %bb.0: ; %bb
594; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
595; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
596; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
597; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
598; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
599; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
600; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
601; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
602; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
603; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
604; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
605; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
606; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
607; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
608; GFX11-GISEL-NEXT:    s_endpgm
609bb:
610  %soff4 = mul i32 %soff, 4
611  %a = alloca i8, i32 64, align 4, addrspace(5)
612  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
613  %voff = call i32 @llvm.amdgcn.workitem.id.x()
614  %voff1 = mul i32 %voff, 1
615  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
616  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
617  store volatile i8 1, i8 addrspace(5)* %p1
618  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
619  store volatile i8 2, i8 addrspace(5)* %p2
620  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
621  store volatile i8 4, i8 addrspace(5)* %p4
622  ret void
623}
624
625define amdgpu_kernel void @soff4_voff2(i32 %soff) {
626; GFX940-SDAG-LABEL: soff4_voff2:
627; GFX940-SDAG:       ; %bb.0: ; %bb
628; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
629; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
630; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
631; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
632; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
633; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
634; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
635; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
636; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
637; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
638; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
639; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
640; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
641; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
642; GFX940-SDAG-NEXT:    s_endpgm
643;
644; GFX940-GISEL-LABEL: soff4_voff2:
645; GFX940-GISEL:       ; %bb.0: ; %bb
646; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
647; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
648; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
649; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
650; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
651; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
652; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
653; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
654; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
655; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
656; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
657; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
658; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
659; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
660; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
661; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
662; GFX940-GISEL-NEXT:    s_endpgm
663;
664; GFX11-SDAG-LABEL: soff4_voff2:
665; GFX11-SDAG:       ; %bb.0: ; %bb
666; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
667; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
668; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
669; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
670; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
671; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
672; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
673; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
674; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
675; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
676; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
677; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
678; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
679; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
680; GFX11-SDAG-NEXT:    s_endpgm
681;
682; GFX11-GISEL-LABEL: soff4_voff2:
683; GFX11-GISEL:       ; %bb.0: ; %bb
684; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
685; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
686; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
687; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
688; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
689; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
690; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
691; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
692; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
693; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
694; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
695; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
696; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
697; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
698; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
699; GFX11-GISEL-NEXT:    s_endpgm
700bb:
701  %soff4 = mul i32 %soff, 4
702  %a = alloca i8, i32 64, align 4, addrspace(5)
703  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
704  %voff = call i32 @llvm.amdgcn.workitem.id.x()
705  %voff2 = mul i32 %voff, 2
706  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
707  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
708  store volatile i8 1, i8 addrspace(5)* %p1
709  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
710  store volatile i8 2, i8 addrspace(5)* %p2
711  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
712  store volatile i8 4, i8 addrspace(5)* %p4
713  ret void
714}
715
716define amdgpu_kernel void @soff4_voff4(i32 %soff) {
717; GFX940-SDAG-LABEL: soff4_voff4:
718; GFX940-SDAG:       ; %bb.0: ; %bb
719; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
720; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
721; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
722; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
723; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
724; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
725; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
726; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
727; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
728; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
729; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
730; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
731; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
732; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
733; GFX940-SDAG-NEXT:    s_endpgm
734;
735; GFX940-GISEL-LABEL: soff4_voff4:
736; GFX940-GISEL:       ; %bb.0: ; %bb
737; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
738; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
739; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
740; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
741; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
742; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
743; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
744; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
745; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
746; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
747; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
748; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
749; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
750; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
751; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
752; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
753; GFX940-GISEL-NEXT:    s_endpgm
754;
755; GFX11-SDAG-LABEL: soff4_voff4:
756; GFX11-SDAG:       ; %bb.0: ; %bb
757; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
758; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
759; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
760; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
761; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
762; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
763; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
764; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
765; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
766; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
767; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
768; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
769; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
770; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
771; GFX11-SDAG-NEXT:    s_endpgm
772;
773; GFX11-GISEL-LABEL: soff4_voff4:
774; GFX11-GISEL:       ; %bb.0: ; %bb
775; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
776; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
777; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
778; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
779; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
780; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
781; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
782; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
783; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
784; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
785; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
786; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
787; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
788; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
789; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
790; GFX11-GISEL-NEXT:    s_endpgm
791bb:
792  %soff4 = mul i32 %soff, 4
793  %a = alloca i8, i32 64, align 4, addrspace(5)
794  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
795  %voff = call i32 @llvm.amdgcn.workitem.id.x()
796  %voff4 = mul i32 %voff, 4
797  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
798  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
799  store volatile i8 1, i8 addrspace(5)* %p1
800  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
801  store volatile i8 2, i8 addrspace(5)* %p2
802  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
803  store volatile i8 4, i8 addrspace(5)* %p4
804  ret void
805}
806