1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
6
7; Test flat scratch SVS addressing mode with various combinations of alignment
8; of soffset, voffset and inst_offset.
9
10declare i32 @llvm.amdgcn.workitem.id.x()
11
12define amdgpu_kernel void @soff1_voff1(i32 %soff) {
13; GFX940-SDAG-LABEL: soff1_voff1:
14; GFX940-SDAG:       ; %bb.0: ; %bb
15; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
16; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
17; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
18; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
20; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
21; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
22; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
23; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
24; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
25; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
26; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
27; GFX940-SDAG-NEXT:    s_endpgm
28;
29; GFX940-GISEL-LABEL: soff1_voff1:
30; GFX940-GISEL:       ; %bb.0: ; %bb
31; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
32; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
33; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
34; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
35; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
37; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
38; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
39; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
40; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
41; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
42; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
43; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
44; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
45; GFX940-GISEL-NEXT:    s_endpgm
46;
47; GFX11-SDAG-LABEL: soff1_voff1:
48; GFX11-SDAG:       ; %bb.0: ; %bb
49; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
50; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
51; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
52; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
53; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
55; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
56; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
57; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
58; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
59; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
60; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
61; GFX11-SDAG-NEXT:    s_endpgm
62;
63; GFX11-GISEL-LABEL: soff1_voff1:
64; GFX11-GISEL:       ; %bb.0: ; %bb
65; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
66; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
67; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
68; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
69; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
70; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
71; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
72; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
73; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
74; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
75; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
76; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
77; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
78; GFX11-GISEL-NEXT:    s_endpgm
79bb:
80  %soff1 = mul i32 %soff, 1
81  %a = alloca i8, i32 64, align 4, addrspace(5)
82  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
83  %voff = call i32 @llvm.amdgcn.workitem.id.x()
84  %voff1 = mul i32 %voff, 1
85  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
86  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
87  store volatile i8 1, i8 addrspace(5)* %p1
88  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
89  store volatile i8 2, i8 addrspace(5)* %p2
90  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
91  store volatile i8 4, i8 addrspace(5)* %p4
92  ret void
93}
94
95define amdgpu_kernel void @soff1_voff2(i32 %soff) {
96; GFX940-SDAG-LABEL: soff1_voff2:
97; GFX940-SDAG:       ; %bb.0: ; %bb
98; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
99; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
100; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
101; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
102; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
104; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
105; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
106; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
107; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
108; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
109; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
110; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
111; GFX940-SDAG-NEXT:    s_endpgm
112;
113; GFX940-GISEL-LABEL: soff1_voff2:
114; GFX940-GISEL:       ; %bb.0: ; %bb
115; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
116; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
117; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
118; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
119; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
121; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
122; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
123; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
124; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
125; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
126; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
127; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
128; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
129; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
130; GFX940-GISEL-NEXT:    s_endpgm
131;
132; GFX11-SDAG-LABEL: soff1_voff2:
133; GFX11-SDAG:       ; %bb.0: ; %bb
134; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
135; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
136; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
137; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
138; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
139; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
140; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
141; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
142; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
144; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
145; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
146; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
147; GFX11-SDAG-NEXT:    s_endpgm
148;
149; GFX11-GISEL-LABEL: soff1_voff2:
150; GFX11-GISEL:       ; %bb.0: ; %bb
151; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
152; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
153; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
154; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
155; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
157; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
158; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
159; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
160; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
161; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
162; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
163; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
164; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
165; GFX11-GISEL-NEXT:    s_endpgm
166bb:
167  %soff1 = mul i32 %soff, 1
168  %a = alloca i8, i32 64, align 4, addrspace(5)
169  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
170  %voff = call i32 @llvm.amdgcn.workitem.id.x()
171  %voff2 = mul i32 %voff, 2
172  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
173  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
174  store volatile i8 1, i8 addrspace(5)* %p1
175  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
176  store volatile i8 2, i8 addrspace(5)* %p2
177  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
178  store volatile i8 4, i8 addrspace(5)* %p4
179  ret void
180}
181
182define amdgpu_kernel void @soff1_voff4(i32 %soff) {
183; GFX940-SDAG-LABEL: soff1_voff4:
184; GFX940-SDAG:       ; %bb.0: ; %bb
185; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
186; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
187; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
188; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
189; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
190; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
191; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
192; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
193; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
194; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
195; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
196; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
197; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
198; GFX940-SDAG-NEXT:    s_endpgm
199;
200; GFX940-GISEL-LABEL: soff1_voff4:
201; GFX940-GISEL:       ; %bb.0: ; %bb
202; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
203; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
204; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
205; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
206; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
208; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
209; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
210; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
211; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
212; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
213; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
214; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
215; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
216; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
217; GFX940-GISEL-NEXT:    s_endpgm
218;
219; GFX11-SDAG-LABEL: soff1_voff4:
220; GFX11-SDAG:       ; %bb.0: ; %bb
221; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
222; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
223; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
224; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
225; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
226; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
227; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
228; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
229; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
230; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
231; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
232; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
233; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
234; GFX11-SDAG-NEXT:    s_endpgm
235;
236; GFX11-GISEL-LABEL: soff1_voff4:
237; GFX11-GISEL:       ; %bb.0: ; %bb
238; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
239; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
240; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
241; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
242; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
244; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
245; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
246; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
247; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
248; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
249; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
250; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
251; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
252; GFX11-GISEL-NEXT:    s_endpgm
253bb:
254  %soff1 = mul i32 %soff, 1
255  %a = alloca i8, i32 64, align 4, addrspace(5)
256  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
257  %voff = call i32 @llvm.amdgcn.workitem.id.x()
258  %voff4 = mul i32 %voff, 4
259  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
260  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
261  store volatile i8 1, i8 addrspace(5)* %p1
262  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
263  store volatile i8 2, i8 addrspace(5)* %p2
264  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
265  store volatile i8 4, i8 addrspace(5)* %p4
266  ret void
267}
268
269define amdgpu_kernel void @soff2_voff1(i32 %soff) {
270; GFX940-SDAG-LABEL: soff2_voff1:
271; GFX940-SDAG:       ; %bb.0: ; %bb
272; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
273; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
274; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
275; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
276; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
277; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
278; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
279; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
280; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
281; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
282; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
283; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
284; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
285; GFX940-SDAG-NEXT:    s_endpgm
286;
287; GFX940-GISEL-LABEL: soff2_voff1:
288; GFX940-GISEL:       ; %bb.0: ; %bb
289; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
290; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
291; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
292; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
293; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
294; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
295; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
296; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
297; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
298; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
299; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
300; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
301; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
302; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
303; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
304; GFX940-GISEL-NEXT:    s_endpgm
305;
306; GFX11-SDAG-LABEL: soff2_voff1:
307; GFX11-SDAG:       ; %bb.0: ; %bb
308; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
309; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
310; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
311; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
312; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
313; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
314; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
315; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
316; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
317; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
318; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
319; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
320; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
321; GFX11-SDAG-NEXT:    s_endpgm
322;
323; GFX11-GISEL-LABEL: soff2_voff1:
324; GFX11-GISEL:       ; %bb.0: ; %bb
325; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
326; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
327; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
328; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
329; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
330; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
331; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
332; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
333; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
334; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
335; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
336; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
337; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
338; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
339; GFX11-GISEL-NEXT:    s_endpgm
340bb:
341  %soff2 = mul i32 %soff, 2
342  %a = alloca i8, i32 64, align 4, addrspace(5)
343  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
344  %voff = call i32 @llvm.amdgcn.workitem.id.x()
345  %voff1 = mul i32 %voff, 1
346  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
347  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
348  store volatile i8 1, i8 addrspace(5)* %p1
349  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
350  store volatile i8 2, i8 addrspace(5)* %p2
351  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
352  store volatile i8 4, i8 addrspace(5)* %p4
353  ret void
354}
355
356define amdgpu_kernel void @soff2_voff2(i32 %soff) {
357; GFX940-SDAG-LABEL: soff2_voff2:
358; GFX940-SDAG:       ; %bb.0: ; %bb
359; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
360; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
361; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
362; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
363; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
364; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
365; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
366; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
367; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
368; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
369; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
370; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
371; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
372; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
373; GFX940-SDAG-NEXT:    s_endpgm
374;
375; GFX940-GISEL-LABEL: soff2_voff2:
376; GFX940-GISEL:       ; %bb.0: ; %bb
377; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
378; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
379; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
380; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
381; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
382; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
383; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
384; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
385; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
386; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
387; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
388; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
389; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
390; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
391; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
392; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
393; GFX940-GISEL-NEXT:    s_endpgm
394;
395; GFX11-SDAG-LABEL: soff2_voff2:
396; GFX11-SDAG:       ; %bb.0: ; %bb
397; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
398; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
399; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
400; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
401; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
402; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
403; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
404; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
405; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
406; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
407; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
408; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
409; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
410; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
411; GFX11-SDAG-NEXT:    s_endpgm
412;
413; GFX11-GISEL-LABEL: soff2_voff2:
414; GFX11-GISEL:       ; %bb.0: ; %bb
415; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
416; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
417; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
418; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
419; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
420; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
421; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
422; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
423; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
424; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
425; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
426; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
427; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
428; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
429; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
430; GFX11-GISEL-NEXT:    s_endpgm
431bb:
432  %soff2 = mul i32 %soff, 2
433  %a = alloca i8, i32 64, align 4, addrspace(5)
434  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
435  %voff = call i32 @llvm.amdgcn.workitem.id.x()
436  %voff2 = mul i32 %voff, 2
437  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
438  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
439  store volatile i8 1, i8 addrspace(5)* %p1
440  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
441  store volatile i8 2, i8 addrspace(5)* %p2
442  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
443  store volatile i8 4, i8 addrspace(5)* %p4
444  ret void
445}
446
447define amdgpu_kernel void @soff2_voff4(i32 %soff) {
448; GFX940-SDAG-LABEL: soff2_voff4:
449; GFX940-SDAG:       ; %bb.0: ; %bb
450; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
451; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
452; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
453; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
454; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
455; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
456; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
457; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
458; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
459; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
460; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
461; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
462; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
463; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
464; GFX940-SDAG-NEXT:    s_endpgm
465;
466; GFX940-GISEL-LABEL: soff2_voff4:
467; GFX940-GISEL:       ; %bb.0: ; %bb
468; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
469; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
470; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
471; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
472; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
473; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
474; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
475; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
476; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
477; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
478; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
479; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
480; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
481; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
482; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
483; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
484; GFX940-GISEL-NEXT:    s_endpgm
485;
486; GFX11-SDAG-LABEL: soff2_voff4:
487; GFX11-SDAG:       ; %bb.0: ; %bb
488; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
489; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
490; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
491; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
492; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
493; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
494; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
495; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
496; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
497; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
498; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
499; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
500; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
501; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
502; GFX11-SDAG-NEXT:    s_endpgm
503;
504; GFX11-GISEL-LABEL: soff2_voff4:
505; GFX11-GISEL:       ; %bb.0: ; %bb
506; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
507; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
508; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
509; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
510; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
511; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
512; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
513; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
514; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
515; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
516; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
517; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
518; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
519; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
520; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
521; GFX11-GISEL-NEXT:    s_endpgm
522bb:
523  %soff2 = mul i32 %soff, 2
524  %a = alloca i8, i32 64, align 4, addrspace(5)
525  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
526  %voff = call i32 @llvm.amdgcn.workitem.id.x()
527  %voff4 = mul i32 %voff, 4
528  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
529  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
530  store volatile i8 1, i8 addrspace(5)* %p1
531  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
532  store volatile i8 2, i8 addrspace(5)* %p2
533  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
534  store volatile i8 4, i8 addrspace(5)* %p4
535  ret void
536}
537
538define amdgpu_kernel void @soff4_voff1(i32 %soff) {
539; GFX940-SDAG-LABEL: soff4_voff1:
540; GFX940-SDAG:       ; %bb.0: ; %bb
541; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
542; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
543; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
544; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
546; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
547; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
548; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
549; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
550; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
551; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
552; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
553; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
554; GFX940-SDAG-NEXT:    s_endpgm
555;
556; GFX940-GISEL-LABEL: soff4_voff1:
557; GFX940-GISEL:       ; %bb.0: ; %bb
558; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
559; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
560; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
561; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
562; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
563; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
564; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
565; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
566; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
567; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
568; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
569; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
570; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
571; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
572; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
573; GFX940-GISEL-NEXT:    s_endpgm
574;
575; GFX11-SDAG-LABEL: soff4_voff1:
576; GFX11-SDAG:       ; %bb.0: ; %bb
577; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
578; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
579; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 2
580; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
581; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
582; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
583; GFX11-SDAG-NEXT:    v_add3_u32 v2, 4, s0, v0
584; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
585; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v1, off offset:1 dlc
586; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
587; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v3, off offset:2 dlc
588; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
589; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
590; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
591; GFX11-SDAG-NEXT:    s_endpgm
592;
593; GFX11-GISEL-LABEL: soff4_voff1:
594; GFX11-GISEL:       ; %bb.0: ; %bb
595; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
596; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
597; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
598; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
599; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
600; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
601; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
602; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
603; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
604; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
605; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
606; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
607; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
608; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
609; GFX11-GISEL-NEXT:    s_endpgm
610bb:
611  %soff4 = mul i32 %soff, 4
612  %a = alloca i8, i32 64, align 4, addrspace(5)
613  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
614  %voff = call i32 @llvm.amdgcn.workitem.id.x()
615  %voff1 = mul i32 %voff, 1
616  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
617  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
618  store volatile i8 1, i8 addrspace(5)* %p1
619  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
620  store volatile i8 2, i8 addrspace(5)* %p2
621  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
622  store volatile i8 4, i8 addrspace(5)* %p4
623  ret void
624}
625
626define amdgpu_kernel void @soff4_voff2(i32 %soff) {
627; GFX940-SDAG-LABEL: soff4_voff2:
628; GFX940-SDAG:       ; %bb.0: ; %bb
629; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
630; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
631; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
632; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
633; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
634; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
635; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
636; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
637; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
638; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
639; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
640; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
641; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
642; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
643; GFX940-SDAG-NEXT:    s_endpgm
644;
645; GFX940-GISEL-LABEL: soff4_voff2:
646; GFX940-GISEL:       ; %bb.0: ; %bb
647; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
648; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
649; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
650; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
651; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
652; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
653; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
654; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
655; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
656; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
657; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
658; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
659; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
660; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
661; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
662; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
663; GFX940-GISEL-NEXT:    s_endpgm
664;
665; GFX11-SDAG-LABEL: soff4_voff2:
666; GFX11-SDAG:       ; %bb.0: ; %bb
667; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
668; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
669; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
670; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
671; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
672; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
673; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
674; GFX11-SDAG-NEXT:    v_add3_u32 v3, 4, s0, v0
675; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
676; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
677; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
678; GFX11-SDAG-NEXT:    scratch_store_b8 v3, v2, off offset:2 dlc
679; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
680; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
681; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
682; GFX11-SDAG-NEXT:    s_endpgm
683;
684; GFX11-GISEL-LABEL: soff4_voff2:
685; GFX11-GISEL:       ; %bb.0: ; %bb
686; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
687; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
688; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
689; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
690; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
691; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
692; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
693; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
694; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
695; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
696; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
697; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
698; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
699; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
700; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
701; GFX11-GISEL-NEXT:    s_endpgm
702bb:
703  %soff4 = mul i32 %soff, 4
704  %a = alloca i8, i32 64, align 4, addrspace(5)
705  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
706  %voff = call i32 @llvm.amdgcn.workitem.id.x()
707  %voff2 = mul i32 %voff, 2
708  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
709  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
710  store volatile i8 1, i8 addrspace(5)* %p1
711  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
712  store volatile i8 2, i8 addrspace(5)* %p2
713  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
714  store volatile i8 4, i8 addrspace(5)* %p4
715  ret void
716}
717
718define amdgpu_kernel void @soff4_voff4(i32 %soff) {
719; GFX940-SDAG-LABEL: soff4_voff4:
720; GFX940-SDAG:       ; %bb.0: ; %bb
721; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
722; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
723; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
724; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
725; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
726; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
727; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
728; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
729; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
730; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
731; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
732; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
733; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
734; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
735; GFX940-SDAG-NEXT:    s_endpgm
736;
737; GFX940-GISEL-LABEL: soff4_voff4:
738; GFX940-GISEL:       ; %bb.0: ; %bb
739; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
740; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
741; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
742; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
743; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
744; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
745; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
746; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
747; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
748; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
749; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
750; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
751; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
752; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
753; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
754; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
755; GFX940-GISEL-NEXT:    s_endpgm
756;
757; GFX11-SDAG-LABEL: soff4_voff4:
758; GFX11-SDAG:       ; %bb.0: ; %bb
759; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
760; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
761; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
762; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
763; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
764; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
765; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
766; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
767; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
768; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
769; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
770; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
771; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
772; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
773; GFX11-SDAG-NEXT:    s_endpgm
774;
775; GFX11-GISEL-LABEL: soff4_voff4:
776; GFX11-GISEL:       ; %bb.0: ; %bb
777; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
778; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
779; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
780; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
781; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
782; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
783; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
784; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
785; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
786; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
787; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
788; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
789; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
790; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
791; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
792; GFX11-GISEL-NEXT:    s_endpgm
793bb:
794  %soff4 = mul i32 %soff, 4
795  %a = alloca i8, i32 64, align 4, addrspace(5)
796  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
797  %voff = call i32 @llvm.amdgcn.workitem.id.x()
798  %voff4 = mul i32 %voff, 4
799  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
800  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
801  store volatile i8 1, i8 addrspace(5)* %p1
802  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
803  store volatile i8 2, i8 addrspace(5)* %p2
804  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
805  store volatile i8 4, i8 addrspace(5)* %p4
806  ret void
807}
808