1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
6
7; Test flat scratch SVS addressing mode with various combinations of alignment
8; of soffset, voffset and inst_offset.
9
10declare i32 @llvm.amdgcn.workitem.id.x()
11
12define amdgpu_kernel void @soff1_voff1(i32 %soff) {
13; GFX940-SDAG-LABEL: soff1_voff1:
14; GFX940-SDAG:       ; %bb.0: ; %bb
15; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
16; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
17; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
18; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
20; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
21; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
22; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
23; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
24; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
25; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
26; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
27; GFX940-SDAG-NEXT:    s_endpgm
28;
29; GFX940-GISEL-LABEL: soff1_voff1:
30; GFX940-GISEL:       ; %bb.0: ; %bb
31; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
32; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
33; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
34; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
35; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
37; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
38; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
39; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
40; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
41; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
42; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
43; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
44; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
45; GFX940-GISEL-NEXT:    s_endpgm
46;
47; GFX11-SDAG-LABEL: soff1_voff1:
48; GFX11-SDAG:       ; %bb.0: ; %bb
49; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
50; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
51; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
52; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
53; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
54; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
55; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
56; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
57; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
58; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
59; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
60; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
61; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
62; GFX11-SDAG-NEXT:    s_endpgm
63;
64; GFX11-GISEL-LABEL: soff1_voff1:
65; GFX11-GISEL:       ; %bb.0: ; %bb
66; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
67; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
68; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
69; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
70; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
71; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
72; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
73; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
74; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
75; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
76; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
77; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
78; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
79; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
80; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
81; GFX11-GISEL-NEXT:    s_endpgm
82bb:
83  %soff1 = mul i32 %soff, 1
84  %a = alloca i8, i32 64, align 4, addrspace(5)
85  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
86  %voff = call i32 @llvm.amdgcn.workitem.id.x()
87  %voff1 = mul i32 %voff, 1
88  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
89  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
90  store volatile i8 1, i8 addrspace(5)* %p1
91  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
92  store volatile i8 2, i8 addrspace(5)* %p2
93  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
94  store volatile i8 4, i8 addrspace(5)* %p4
95  ret void
96}
97
98define amdgpu_kernel void @soff1_voff2(i32 %soff) {
99; GFX940-SDAG-LABEL: soff1_voff2:
100; GFX940-SDAG:       ; %bb.0: ; %bb
101; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
102; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
103; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
104; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
105; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
106; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
107; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
108; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
109; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
110; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
111; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
112; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
113; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
114; GFX940-SDAG-NEXT:    s_endpgm
115;
116; GFX940-GISEL-LABEL: soff1_voff2:
117; GFX940-GISEL:       ; %bb.0: ; %bb
118; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
119; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
120; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
121; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
122; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
123; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
124; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
125; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
126; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
127; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
128; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
129; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
130; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
131; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
132; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
133; GFX940-GISEL-NEXT:    s_endpgm
134;
135; GFX11-SDAG-LABEL: soff1_voff2:
136; GFX11-SDAG:       ; %bb.0: ; %bb
137; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
138; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
139; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
140; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
141; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
142; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
143; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
144; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
145; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
146; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
147; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
148; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
149; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
150; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
151; GFX11-SDAG-NEXT:    s_endpgm
152;
153; GFX11-GISEL-LABEL: soff1_voff2:
154; GFX11-GISEL:       ; %bb.0: ; %bb
155; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
156; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
157; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
158; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
159; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
160; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
161; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
162; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
163; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
164; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
165; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
166; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
167; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
168; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
169; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
170; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
171; GFX11-GISEL-NEXT:    s_endpgm
172bb:
173  %soff1 = mul i32 %soff, 1
174  %a = alloca i8, i32 64, align 4, addrspace(5)
175  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
176  %voff = call i32 @llvm.amdgcn.workitem.id.x()
177  %voff2 = mul i32 %voff, 2
178  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
179  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
180  store volatile i8 1, i8 addrspace(5)* %p1
181  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
182  store volatile i8 2, i8 addrspace(5)* %p2
183  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
184  store volatile i8 4, i8 addrspace(5)* %p4
185  ret void
186}
187
188define amdgpu_kernel void @soff1_voff4(i32 %soff) {
189; GFX940-SDAG-LABEL: soff1_voff4:
190; GFX940-SDAG:       ; %bb.0: ; %bb
191; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
192; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
193; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
194; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
195; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
196; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
197; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
198; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
199; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
200; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
201; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
202; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
203; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
204; GFX940-SDAG-NEXT:    s_endpgm
205;
206; GFX940-GISEL-LABEL: soff1_voff4:
207; GFX940-GISEL:       ; %bb.0: ; %bb
208; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
209; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
210; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
211; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
212; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
213; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
214; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
215; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
216; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
217; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
218; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
219; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
220; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
221; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
222; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
223; GFX940-GISEL-NEXT:    s_endpgm
224;
225; GFX11-SDAG-LABEL: soff1_voff4:
226; GFX11-SDAG:       ; %bb.0: ; %bb
227; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
228; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
229; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
230; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
231; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
232; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
233; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
234; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
235; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
236; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
237; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
238; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
239; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
240; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
241; GFX11-SDAG-NEXT:    s_endpgm
242;
243; GFX11-GISEL-LABEL: soff1_voff4:
244; GFX11-GISEL:       ; %bb.0: ; %bb
245; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
246; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
247; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
248; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
249; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
250; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
251; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
252; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
253; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
254; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
255; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
256; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
257; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
258; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
259; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
260; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
261; GFX11-GISEL-NEXT:    s_endpgm
262bb:
263  %soff1 = mul i32 %soff, 1
264  %a = alloca i8, i32 64, align 4, addrspace(5)
265  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
266  %voff = call i32 @llvm.amdgcn.workitem.id.x()
267  %voff4 = mul i32 %voff, 4
268  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
269  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
270  store volatile i8 1, i8 addrspace(5)* %p1
271  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
272  store volatile i8 2, i8 addrspace(5)* %p2
273  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
274  store volatile i8 4, i8 addrspace(5)* %p4
275  ret void
276}
277
278define amdgpu_kernel void @soff2_voff1(i32 %soff) {
279; GFX940-SDAG-LABEL: soff2_voff1:
280; GFX940-SDAG:       ; %bb.0: ; %bb
281; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
282; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
283; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
284; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
285; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
286; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
287; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
288; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
289; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
290; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
291; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
292; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
293; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
294; GFX940-SDAG-NEXT:    s_endpgm
295;
296; GFX940-GISEL-LABEL: soff2_voff1:
297; GFX940-GISEL:       ; %bb.0: ; %bb
298; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
299; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
300; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
301; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
302; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
303; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
304; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
305; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
306; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
307; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
308; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
309; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
310; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
311; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
312; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
313; GFX940-GISEL-NEXT:    s_endpgm
314;
315; GFX11-SDAG-LABEL: soff2_voff1:
316; GFX11-SDAG:       ; %bb.0: ; %bb
317; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
318; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
319; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
320; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
321; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
322; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
323; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
324; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
325; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
326; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
327; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
328; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
329; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
330; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
331; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
332; GFX11-SDAG-NEXT:    s_endpgm
333;
334; GFX11-GISEL-LABEL: soff2_voff1:
335; GFX11-GISEL:       ; %bb.0: ; %bb
336; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
337; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
338; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
339; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
340; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
341; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
342; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
343; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
344; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
345; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
346; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
347; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
348; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
349; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
350; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
351; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
352; GFX11-GISEL-NEXT:    s_endpgm
353bb:
354  %soff2 = mul i32 %soff, 2
355  %a = alloca i8, i32 64, align 4, addrspace(5)
356  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
357  %voff = call i32 @llvm.amdgcn.workitem.id.x()
358  %voff1 = mul i32 %voff, 1
359  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
360  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
361  store volatile i8 1, i8 addrspace(5)* %p1
362  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
363  store volatile i8 2, i8 addrspace(5)* %p2
364  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
365  store volatile i8 4, i8 addrspace(5)* %p4
366  ret void
367}
368
369define amdgpu_kernel void @soff2_voff2(i32 %soff) {
370; GFX940-SDAG-LABEL: soff2_voff2:
371; GFX940-SDAG:       ; %bb.0: ; %bb
372; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
373; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
374; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
375; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
376; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
377; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
378; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
379; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
380; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
381; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
382; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
383; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
384; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
385; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
386; GFX940-SDAG-NEXT:    s_endpgm
387;
388; GFX940-GISEL-LABEL: soff2_voff2:
389; GFX940-GISEL:       ; %bb.0: ; %bb
390; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
391; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
392; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
393; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
394; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
395; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
396; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
397; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
398; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
399; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
400; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
401; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
402; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
403; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
404; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
405; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
406; GFX940-GISEL-NEXT:    s_endpgm
407;
408; GFX11-SDAG-LABEL: soff2_voff2:
409; GFX11-SDAG:       ; %bb.0: ; %bb
410; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
411; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
412; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
413; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
414; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
415; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
416; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
417; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
418; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
419; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
420; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
421; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
422; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
423; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
424; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
425; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
426; GFX11-SDAG-NEXT:    s_endpgm
427;
428; GFX11-GISEL-LABEL: soff2_voff2:
429; GFX11-GISEL:       ; %bb.0: ; %bb
430; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
431; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
432; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
433; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
434; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
435; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
436; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
437; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
438; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
439; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
440; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
441; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
442; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
443; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
444; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
445; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
446; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
447; GFX11-GISEL-NEXT:    s_endpgm
448bb:
449  %soff2 = mul i32 %soff, 2
450  %a = alloca i8, i32 64, align 4, addrspace(5)
451  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
452  %voff = call i32 @llvm.amdgcn.workitem.id.x()
453  %voff2 = mul i32 %voff, 2
454  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
455  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
456  store volatile i8 1, i8 addrspace(5)* %p1
457  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
458  store volatile i8 2, i8 addrspace(5)* %p2
459  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
460  store volatile i8 4, i8 addrspace(5)* %p4
461  ret void
462}
463
464define amdgpu_kernel void @soff2_voff4(i32 %soff) {
465; GFX940-SDAG-LABEL: soff2_voff4:
466; GFX940-SDAG:       ; %bb.0: ; %bb
467; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
468; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
469; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
470; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
471; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
472; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
473; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
474; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
475; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
476; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
477; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
478; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
479; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
480; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
481; GFX940-SDAG-NEXT:    s_endpgm
482;
483; GFX940-GISEL-LABEL: soff2_voff4:
484; GFX940-GISEL:       ; %bb.0: ; %bb
485; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
486; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
487; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
488; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
489; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
490; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
491; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
492; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
493; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
494; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
495; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
496; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
497; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
498; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
499; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
500; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
501; GFX940-GISEL-NEXT:    s_endpgm
502;
503; GFX11-SDAG-LABEL: soff2_voff4:
504; GFX11-SDAG:       ; %bb.0: ; %bb
505; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
506; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
507; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
508; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
509; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
510; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
511; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
512; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
513; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
514; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
515; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
516; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
517; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
518; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
519; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
520; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
521; GFX11-SDAG-NEXT:    s_endpgm
522;
523; GFX11-GISEL-LABEL: soff2_voff4:
524; GFX11-GISEL:       ; %bb.0: ; %bb
525; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
526; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
527; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
528; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
529; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
530; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
531; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
532; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
533; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
534; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
535; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
536; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
537; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
538; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
539; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
540; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
541; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
542; GFX11-GISEL-NEXT:    s_endpgm
543bb:
544  %soff2 = mul i32 %soff, 2
545  %a = alloca i8, i32 64, align 4, addrspace(5)
546  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
547  %voff = call i32 @llvm.amdgcn.workitem.id.x()
548  %voff4 = mul i32 %voff, 4
549  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
550  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
551  store volatile i8 1, i8 addrspace(5)* %p1
552  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
553  store volatile i8 2, i8 addrspace(5)* %p2
554  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
555  store volatile i8 4, i8 addrspace(5)* %p4
556  ret void
557}
558
559define amdgpu_kernel void @soff4_voff1(i32 %soff) {
560; GFX940-SDAG-LABEL: soff4_voff1:
561; GFX940-SDAG:       ; %bb.0: ; %bb
562; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
563; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
564; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
565; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
566; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
567; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
568; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
569; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
570; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
571; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
572; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
573; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
574; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
575; GFX940-SDAG-NEXT:    s_endpgm
576;
577; GFX940-GISEL-LABEL: soff4_voff1:
578; GFX940-GISEL:       ; %bb.0: ; %bb
579; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
580; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
581; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
582; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
583; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
584; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
585; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
586; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
587; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
588; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
589; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
590; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
591; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
592; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
593; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
594; GFX940-GISEL-NEXT:    s_endpgm
595;
596; GFX11-SDAG-LABEL: soff4_voff1:
597; GFX11-SDAG:       ; %bb.0: ; %bb
598; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
599; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
600; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 2
601; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
602; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
603; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
604; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
605; GFX11-SDAG-NEXT:    v_add3_u32 v2, 4, s0, v0
606; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
607; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v1, off offset:1 dlc
608; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
609; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v3, off offset:2 dlc
610; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
611; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
612; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
613; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
614; GFX11-SDAG-NEXT:    s_endpgm
615;
616; GFX11-GISEL-LABEL: soff4_voff1:
617; GFX11-GISEL:       ; %bb.0: ; %bb
618; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
619; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
620; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
621; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
622; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
623; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
624; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
625; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
626; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
627; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
628; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
629; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
630; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
631; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
632; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
633; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
634; GFX11-GISEL-NEXT:    s_endpgm
635bb:
636  %soff4 = mul i32 %soff, 4
637  %a = alloca i8, i32 64, align 4, addrspace(5)
638  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
639  %voff = call i32 @llvm.amdgcn.workitem.id.x()
640  %voff1 = mul i32 %voff, 1
641  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
642  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
643  store volatile i8 1, i8 addrspace(5)* %p1
644  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
645  store volatile i8 2, i8 addrspace(5)* %p2
646  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
647  store volatile i8 4, i8 addrspace(5)* %p4
648  ret void
649}
650
651define amdgpu_kernel void @soff4_voff2(i32 %soff) {
652; GFX940-SDAG-LABEL: soff4_voff2:
653; GFX940-SDAG:       ; %bb.0: ; %bb
654; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
655; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
656; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
657; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
658; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
659; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
660; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
661; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
662; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
663; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
664; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
665; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
666; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
667; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
668; GFX940-SDAG-NEXT:    s_endpgm
669;
670; GFX940-GISEL-LABEL: soff4_voff2:
671; GFX940-GISEL:       ; %bb.0: ; %bb
672; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
673; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
674; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
675; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
676; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
677; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
678; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
679; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
680; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
681; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
682; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
683; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
684; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
685; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
686; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
687; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
688; GFX940-GISEL-NEXT:    s_endpgm
689;
690; GFX11-SDAG-LABEL: soff4_voff2:
691; GFX11-SDAG:       ; %bb.0: ; %bb
692; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
693; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
694; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
695; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
696; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
697; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
698; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
699; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
700; GFX11-SDAG-NEXT:    v_add3_u32 v3, 4, s0, v0
701; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
702; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
703; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
704; GFX11-SDAG-NEXT:    scratch_store_b8 v3, v2, off offset:2 dlc
705; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
706; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
707; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
708; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
709; GFX11-SDAG-NEXT:    s_endpgm
710;
711; GFX11-GISEL-LABEL: soff4_voff2:
712; GFX11-GISEL:       ; %bb.0: ; %bb
713; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
714; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
715; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
716; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
717; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
718; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
719; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
720; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
721; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
722; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
723; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
724; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
725; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
726; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
727; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
728; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
729; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
730; GFX11-GISEL-NEXT:    s_endpgm
731bb:
732  %soff4 = mul i32 %soff, 4
733  %a = alloca i8, i32 64, align 4, addrspace(5)
734  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
735  %voff = call i32 @llvm.amdgcn.workitem.id.x()
736  %voff2 = mul i32 %voff, 2
737  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
738  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
739  store volatile i8 1, i8 addrspace(5)* %p1
740  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
741  store volatile i8 2, i8 addrspace(5)* %p2
742  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
743  store volatile i8 4, i8 addrspace(5)* %p4
744  ret void
745}
746
747define amdgpu_kernel void @soff4_voff4(i32 %soff) {
748; GFX940-SDAG-LABEL: soff4_voff4:
749; GFX940-SDAG:       ; %bb.0: ; %bb
750; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
751; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
752; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
753; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
754; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
755; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
756; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
757; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
758; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
759; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
760; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
761; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
762; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
763; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
764; GFX940-SDAG-NEXT:    s_endpgm
765;
766; GFX940-GISEL-LABEL: soff4_voff4:
767; GFX940-GISEL:       ; %bb.0: ; %bb
768; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
769; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
770; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
771; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
772; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
773; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
774; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
775; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
776; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
777; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
778; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
779; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
780; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
781; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
782; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
783; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
784; GFX940-GISEL-NEXT:    s_endpgm
785;
786; GFX11-SDAG-LABEL: soff4_voff4:
787; GFX11-SDAG:       ; %bb.0: ; %bb
788; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
789; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
790; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
791; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
792; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
793; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
794; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
795; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
796; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
797; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
798; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
799; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
800; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
801; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
802; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
803; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
804; GFX11-SDAG-NEXT:    s_endpgm
805;
806; GFX11-GISEL-LABEL: soff4_voff4:
807; GFX11-GISEL:       ; %bb.0: ; %bb
808; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
809; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
810; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 1
811; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 4
812; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
813; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
814; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
815; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
816; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v1, v0
817; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, 2
818; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
819; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
820; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
821; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
822; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
823; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
824; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
825; GFX11-GISEL-NEXT:    s_endpgm
826bb:
827  %soff4 = mul i32 %soff, 4
828  %a = alloca i8, i32 64, align 4, addrspace(5)
829  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
830  %voff = call i32 @llvm.amdgcn.workitem.id.x()
831  %voff4 = mul i32 %voff, 4
832  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
833  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
834  store volatile i8 1, i8 addrspace(5)* %p1
835  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
836  store volatile i8 2, i8 addrspace(5)* %p2
837  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
838  store volatile i8 4, i8 addrspace(5)* %p4
839  ret void
840}
841