1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-SDAG
3; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefixes=GFX940-GISEL
4; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-SDAG
5; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11-GISEL
6
7; Test flat scratch SVS addressing mode with various combinations of alignment
8; of soffset, voffset and inst_offset.
9
10declare i32 @llvm.amdgcn.workitem.id.x()
11
12define amdgpu_kernel void @soff1_voff1(i32 %soff) {
13; GFX940-SDAG-LABEL: soff1_voff1:
14; GFX940-SDAG:       ; %bb.0: ; %bb
15; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
16; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
17; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
18; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
19; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
20; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
21; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
22; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
23; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
24; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
25; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
26; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
27; GFX940-SDAG-NEXT:    s_endpgm
28;
29; GFX940-GISEL-LABEL: soff1_voff1:
30; GFX940-GISEL:       ; %bb.0: ; %bb
31; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
32; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
33; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
34; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
35; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
37; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
38; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
39; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
40; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
41; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
42; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
43; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
44; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
45; GFX940-GISEL-NEXT:    s_endpgm
46;
47; GFX11-SDAG-LABEL: soff1_voff1:
48; GFX11-SDAG:       ; %bb.0: ; %bb
49; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
50; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
51; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
52; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
53; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
54; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
55; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
56; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
57; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
58; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
59; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
60; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
61; GFX11-SDAG-NEXT:    s_endpgm
62;
63; GFX11-GISEL-LABEL: soff1_voff1:
64; GFX11-GISEL:       ; %bb.0: ; %bb
65; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
66; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
67; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
68; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
69; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
70; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
71; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
72; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
73; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
74; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
75; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
76; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
77; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
78; GFX11-GISEL-NEXT:    s_endpgm
79bb:
80  %soff1 = mul i32 %soff, 1
81  %a = alloca i8, i32 64, align 4, addrspace(5)
82  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
83  %voff = call i32 @llvm.amdgcn.workitem.id.x()
84  %voff1 = mul i32 %voff, 1
85  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
86  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
87  store volatile i8 1, i8 addrspace(5)* %p1
88  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
89  store volatile i8 2, i8 addrspace(5)* %p2
90  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
91  store volatile i8 4, i8 addrspace(5)* %p4
92  ret void
93}
94
95define amdgpu_kernel void @soff1_voff2(i32 %soff) {
96; GFX940-SDAG-LABEL: soff1_voff2:
97; GFX940-SDAG:       ; %bb.0: ; %bb
98; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
99; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
100; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
101; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
102; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
103; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
104; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
105; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
106; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
107; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
108; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
109; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
110; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
111; GFX940-SDAG-NEXT:    s_endpgm
112;
113; GFX940-GISEL-LABEL: soff1_voff2:
114; GFX940-GISEL:       ; %bb.0: ; %bb
115; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
116; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
117; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
118; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
119; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
121; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
122; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
123; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
124; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
125; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
126; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
127; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
128; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
129; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
130; GFX940-GISEL-NEXT:    s_endpgm
131;
132; GFX11-SDAG-LABEL: soff1_voff2:
133; GFX11-SDAG:       ; %bb.0: ; %bb
134; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
135; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
136; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
137; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
138; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
139; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
140; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
141; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
142; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
143; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
144; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
145; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
146; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
147; GFX11-SDAG-NEXT:    s_endpgm
148;
149; GFX11-GISEL-LABEL: soff1_voff2:
150; GFX11-GISEL:       ; %bb.0: ; %bb
151; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
152; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
153; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
154; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
155; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
156; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
157; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
158; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
159; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
160; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
161; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
162; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
163; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
164; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
165; GFX11-GISEL-NEXT:    s_endpgm
166bb:
167  %soff1 = mul i32 %soff, 1
168  %a = alloca i8, i32 64, align 4, addrspace(5)
169  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
170  %voff = call i32 @llvm.amdgcn.workitem.id.x()
171  %voff2 = mul i32 %voff, 2
172  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
173  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
174  store volatile i8 1, i8 addrspace(5)* %p1
175  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
176  store volatile i8 2, i8 addrspace(5)* %p2
177  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
178  store volatile i8 4, i8 addrspace(5)* %p4
179  ret void
180}
181
182define amdgpu_kernel void @soff1_voff4(i32 %soff) {
183; GFX940-SDAG-LABEL: soff1_voff4:
184; GFX940-SDAG:       ; %bb.0: ; %bb
185; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
186; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
187; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
188; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
189; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
190; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
191; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
192; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
193; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
194; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
195; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
196; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
197; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
198; GFX940-SDAG-NEXT:    s_endpgm
199;
200; GFX940-GISEL-LABEL: soff1_voff4:
201; GFX940-GISEL:       ; %bb.0: ; %bb
202; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
203; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
204; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
205; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
206; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
207; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
208; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
209; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
210; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
211; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
212; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
213; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
214; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
215; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
216; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
217; GFX940-GISEL-NEXT:    s_endpgm
218;
219; GFX11-SDAG-LABEL: soff1_voff4:
220; GFX11-SDAG:       ; %bb.0: ; %bb
221; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
222; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 2, v0
223; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
224; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
225; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
226; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
227; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
228; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
229; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
230; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
231; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
232; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
233; GFX11-SDAG-NEXT:    s_endpgm
234;
235; GFX11-GISEL-LABEL: soff1_voff4:
236; GFX11-GISEL:       ; %bb.0: ; %bb
237; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
238; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
239; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
240; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
241; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
242; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
243; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
244; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
245; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
246; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
247; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
248; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
249; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
250; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
251; GFX11-GISEL-NEXT:    s_endpgm
252bb:
253  %soff1 = mul i32 %soff, 1
254  %a = alloca i8, i32 64, align 4, addrspace(5)
255  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
256  %voff = call i32 @llvm.amdgcn.workitem.id.x()
257  %voff4 = mul i32 %voff, 4
258  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
259  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
260  store volatile i8 1, i8 addrspace(5)* %p1
261  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
262  store volatile i8 2, i8 addrspace(5)* %p2
263  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
264  store volatile i8 4, i8 addrspace(5)* %p4
265  ret void
266}
267
268define amdgpu_kernel void @soff2_voff1(i32 %soff) {
269; GFX940-SDAG-LABEL: soff2_voff1:
270; GFX940-SDAG:       ; %bb.0: ; %bb
271; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
272; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
273; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
274; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
275; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
276; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
277; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
278; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
279; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
280; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
281; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
282; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
283; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
284; GFX940-SDAG-NEXT:    s_endpgm
285;
286; GFX940-GISEL-LABEL: soff2_voff1:
287; GFX940-GISEL:       ; %bb.0: ; %bb
288; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
289; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
290; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
291; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
292; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
293; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
294; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
295; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
296; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
297; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
298; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
299; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
300; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
301; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
302; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
303; GFX940-GISEL-NEXT:    s_endpgm
304;
305; GFX11-SDAG-LABEL: soff2_voff1:
306; GFX11-SDAG:       ; %bb.0: ; %bb
307; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
308; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
309; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 4
310; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
311; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
312; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
313; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
314; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
315; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
316; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
317; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
318; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
319; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
320; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
321; GFX11-SDAG-NEXT:    s_endpgm
322;
323; GFX11-GISEL-LABEL: soff2_voff1:
324; GFX11-GISEL:       ; %bb.0: ; %bb
325; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
326; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
327; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
328; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
329; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
330; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
331; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
332; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
333; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
334; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
335; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
336; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
337; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
338; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
339; GFX11-GISEL-NEXT:    s_endpgm
340bb:
341  %soff2 = mul i32 %soff, 2
342  %a = alloca i8, i32 64, align 4, addrspace(5)
343  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
344  %voff = call i32 @llvm.amdgcn.workitem.id.x()
345  %voff1 = mul i32 %voff, 1
346  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
347  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
348  store volatile i8 1, i8 addrspace(5)* %p1
349  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
350  store volatile i8 2, i8 addrspace(5)* %p2
351  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
352  store volatile i8 4, i8 addrspace(5)* %p4
353  ret void
354}
355
356define amdgpu_kernel void @soff2_voff2(i32 %soff) {
357; GFX940-SDAG-LABEL: soff2_voff2:
358; GFX940-SDAG:       ; %bb.0: ; %bb
359; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
360; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
361; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
362; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
363; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
364; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
365; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
366; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
367; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
368; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
369; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
370; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
371; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
372; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
373; GFX940-SDAG-NEXT:    s_endpgm
374;
375; GFX940-GISEL-LABEL: soff2_voff2:
376; GFX940-GISEL:       ; %bb.0: ; %bb
377; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
378; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
379; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
380; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
381; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
382; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
383; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
384; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
385; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
386; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
387; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
388; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
389; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
390; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
391; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
392; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
393; GFX940-GISEL-NEXT:    s_endpgm
394;
395; GFX11-SDAG-LABEL: soff2_voff2:
396; GFX11-SDAG:       ; %bb.0: ; %bb
397; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
398; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
399; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 2 :: v_dual_mov_b32 v3, 4
400; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
401; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
402; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
403; GFX11-SDAG-NEXT:    v_add3_u32 v0, 4, s0, v0
404; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, off offset:1 dlc
405; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
406; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, off offset:2 dlc
407; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
408; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
409; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
410; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
411; GFX11-SDAG-NEXT:    s_endpgm
412;
413; GFX11-GISEL-LABEL: soff2_voff2:
414; GFX11-GISEL:       ; %bb.0: ; %bb
415; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
416; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
417; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
418; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
419; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
420; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
421; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
422; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
423; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
424; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
425; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
426; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
427; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
428; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
429; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
430; GFX11-GISEL-NEXT:    s_endpgm
431bb:
432  %soff2 = mul i32 %soff, 2
433  %a = alloca i8, i32 64, align 4, addrspace(5)
434  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
435  %voff = call i32 @llvm.amdgcn.workitem.id.x()
436  %voff2 = mul i32 %voff, 2
437  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
438  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
439  store volatile i8 1, i8 addrspace(5)* %p1
440  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
441  store volatile i8 2, i8 addrspace(5)* %p2
442  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
443  store volatile i8 4, i8 addrspace(5)* %p4
444  ret void
445}
446
447define amdgpu_kernel void @soff2_voff4(i32 %soff) {
448; GFX940-SDAG-LABEL: soff2_voff4:
449; GFX940-SDAG:       ; %bb.0: ; %bb
450; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
451; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
452; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
453; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
454; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
455; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
456; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
457; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
458; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
459; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
460; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
461; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
462; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
463; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
464; GFX940-SDAG-NEXT:    s_endpgm
465;
466; GFX940-GISEL-LABEL: soff2_voff4:
467; GFX940-GISEL:       ; %bb.0: ; %bb
468; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
469; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
470; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
471; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
472; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
473; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
474; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
475; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
476; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
477; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
478; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
479; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
480; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
481; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
482; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
483; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
484; GFX940-GISEL-NEXT:    s_endpgm
485;
486; GFX11-SDAG-LABEL: soff2_voff4:
487; GFX11-SDAG:       ; %bb.0: ; %bb
488; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
489; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
490; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
491; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
492; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 1
493; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
494; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
495; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
496; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
497; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
498; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
499; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
500; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
501; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
502; GFX11-SDAG-NEXT:    s_endpgm
503;
504; GFX11-GISEL-LABEL: soff2_voff4:
505; GFX11-GISEL:       ; %bb.0: ; %bb
506; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
507; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
508; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
509; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
510; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 1
511; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
512; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
513; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
514; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
515; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
516; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
517; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
518; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
519; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
520; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
521; GFX11-GISEL-NEXT:    s_endpgm
522bb:
523  %soff2 = mul i32 %soff, 2
524  %a = alloca i8, i32 64, align 4, addrspace(5)
525  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
526  %voff = call i32 @llvm.amdgcn.workitem.id.x()
527  %voff4 = mul i32 %voff, 4
528  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
529  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
530  store volatile i8 1, i8 addrspace(5)* %p1
531  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
532  store volatile i8 2, i8 addrspace(5)* %p2
533  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
534  store volatile i8 4, i8 addrspace(5)* %p4
535  ret void
536}
537
538define amdgpu_kernel void @soff4_voff1(i32 %soff) {
539; GFX940-SDAG-LABEL: soff4_voff1:
540; GFX940-SDAG:       ; %bb.0: ; %bb
541; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
542; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
543; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
544; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
545; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
546; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
547; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
548; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
549; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
550; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
551; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
552; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
553; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
554; GFX940-SDAG-NEXT:    s_endpgm
555;
556; GFX940-GISEL-LABEL: soff4_voff1:
557; GFX940-GISEL:       ; %bb.0: ; %bb
558; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
559; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
560; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
561; GFX940-GISEL-NEXT:    v_mov_b32_e32 v3, 2
562; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
563; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
564; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
565; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
566; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
567; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
568; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
569; GFX940-GISEL-NEXT:    scratch_store_byte v0, v3, off offset:2 sc0 sc1
570; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
571; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
572; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
573; GFX940-GISEL-NEXT:    s_endpgm
574;
575; GFX11-SDAG-LABEL: soff4_voff1:
576; GFX11-SDAG:       ; %bb.0: ; %bb
577; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
578; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v4, 4
579; GFX11-SDAG-NEXT:    v_mov_b32_e32 v3, 2
580; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
581; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
582; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
583; GFX11-SDAG-NEXT:    v_add3_u32 v2, 4, s0, v0
584; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
585; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v1, off offset:1 dlc
586; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
587; GFX11-SDAG-NEXT:    scratch_store_b8 v2, v3, off offset:2 dlc
588; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
589; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
590; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
591; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
592; GFX11-SDAG-NEXT:    s_endpgm
593;
594; GFX11-GISEL-LABEL: soff4_voff1:
595; GFX11-GISEL:       ; %bb.0: ; %bb
596; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
597; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
598; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
599; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
600; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
601; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
602; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
603; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
604; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
605; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
606; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
607; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
608; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
609; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
610; GFX11-GISEL-NEXT:    s_endpgm
611bb:
612  %soff4 = mul i32 %soff, 4
613  %a = alloca i8, i32 64, align 4, addrspace(5)
614  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
615  %voff = call i32 @llvm.amdgcn.workitem.id.x()
616  %voff1 = mul i32 %voff, 1
617  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
618  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
619  store volatile i8 1, i8 addrspace(5)* %p1
620  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
621  store volatile i8 2, i8 addrspace(5)* %p2
622  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
623  store volatile i8 4, i8 addrspace(5)* %p4
624  ret void
625}
626
627define amdgpu_kernel void @soff4_voff2(i32 %soff) {
628; GFX940-SDAG-LABEL: soff4_voff2:
629; GFX940-SDAG:       ; %bb.0: ; %bb
630; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
631; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
632; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
633; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
634; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
635; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
636; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
637; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
638; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
639; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
640; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
641; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
642; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
643; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
644; GFX940-SDAG-NEXT:    s_endpgm
645;
646; GFX940-GISEL-LABEL: soff4_voff2:
647; GFX940-GISEL:       ; %bb.0: ; %bb
648; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
649; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
650; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
651; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
652; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
653; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
654; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
655; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
656; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
657; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
658; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
659; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
660; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
661; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
662; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
663; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
664; GFX940-GISEL-NEXT:    s_endpgm
665;
666; GFX11-SDAG-LABEL: soff4_voff2:
667; GFX11-SDAG:       ; %bb.0: ; %bb
668; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
669; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_lshlrev_b32 v0, 1, v0
670; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 2
671; GFX11-SDAG-NEXT:    v_mov_b32_e32 v4, 4
672; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
673; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
674; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
675; GFX11-SDAG-NEXT:    v_add3_u32 v3, 4, s0, v0
676; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
677; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
678; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
679; GFX11-SDAG-NEXT:    scratch_store_b8 v3, v2, off offset:2 dlc
680; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
681; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v4, s0 offset:4 dlc
682; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
683; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
684; GFX11-SDAG-NEXT:    s_endpgm
685;
686; GFX11-GISEL-LABEL: soff4_voff2:
687; GFX11-GISEL:       ; %bb.0: ; %bb
688; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
689; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 1, v0
690; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
691; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
692; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
693; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
694; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
695; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
696; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
697; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
698; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
699; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
700; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
701; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
702; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
703; GFX11-GISEL-NEXT:    s_endpgm
704bb:
705  %soff4 = mul i32 %soff, 4
706  %a = alloca i8, i32 64, align 4, addrspace(5)
707  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
708  %voff = call i32 @llvm.amdgcn.workitem.id.x()
709  %voff2 = mul i32 %voff, 2
710  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
711  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
712  store volatile i8 1, i8 addrspace(5)* %p1
713  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
714  store volatile i8 2, i8 addrspace(5)* %p2
715  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
716  store volatile i8 4, i8 addrspace(5)* %p4
717  ret void
718}
719
720define amdgpu_kernel void @soff4_voff4(i32 %soff) {
721; GFX940-SDAG-LABEL: soff4_voff4:
722; GFX940-SDAG:       ; %bb.0: ; %bb
723; GFX940-SDAG-NEXT:    s_load_dword s0, s[0:1], 0x24
724; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 1
725; GFX940-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
726; GFX940-SDAG-NEXT:    v_mov_b32_e32 v2, 2
727; GFX940-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
728; GFX940-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
729; GFX940-SDAG-NEXT:    s_add_i32 s0, s0, 4
730; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
731; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
732; GFX940-SDAG-NEXT:    scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
733; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
734; GFX940-SDAG-NEXT:    v_mov_b32_e32 v1, 4
735; GFX940-SDAG-NEXT:    scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
736; GFX940-SDAG-NEXT:    s_waitcnt vmcnt(0)
737; GFX940-SDAG-NEXT:    s_endpgm
738;
739; GFX940-GISEL-LABEL: soff4_voff4:
740; GFX940-GISEL:       ; %bb.0: ; %bb
741; GFX940-GISEL-NEXT:    s_load_dword s0, s[0:1], 0x24
742; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
743; GFX940-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
744; GFX940-GISEL-NEXT:    v_mov_b32_e32 v2, 1
745; GFX940-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
746; GFX940-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
747; GFX940-GISEL-NEXT:    v_add_u32_e32 v1, s0, v1
748; GFX940-GISEL-NEXT:    v_add_u32_e32 v0, v1, v0
749; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 2
750; GFX940-GISEL-NEXT:    scratch_store_byte v0, v2, off offset:1 sc0 sc1
751; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
752; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:2 sc0 sc1
753; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
754; GFX940-GISEL-NEXT:    v_mov_b32_e32 v1, 4
755; GFX940-GISEL-NEXT:    scratch_store_byte v0, v1, off offset:4 sc0 sc1
756; GFX940-GISEL-NEXT:    s_waitcnt vmcnt(0)
757; GFX940-GISEL-NEXT:    s_endpgm
758;
759; GFX11-SDAG-LABEL: soff4_voff4:
760; GFX11-SDAG:       ; %bb.0: ; %bb
761; GFX11-SDAG-NEXT:    s_load_b32 s0, s[0:1], 0x24
762; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 1 :: v_dual_mov_b32 v2, 2
763; GFX11-SDAG-NEXT:    v_dual_mov_b32 v3, 4 :: v_dual_lshlrev_b32 v0, 2, v0
764; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
765; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
766; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
767; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 4
768; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v1, s0 offset:1 dlc
769; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
770; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v2, s0 offset:2 dlc
771; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
772; GFX11-SDAG-NEXT:    scratch_store_b8 v0, v3, s0 offset:4 dlc
773; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
774; GFX11-SDAG-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
775; GFX11-SDAG-NEXT:    s_endpgm
776;
777; GFX11-GISEL-LABEL: soff4_voff4:
778; GFX11-GISEL:       ; %bb.0: ; %bb
779; GFX11-GISEL-NEXT:    s_load_b32 s0, s[0:1], 0x24
780; GFX11-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
781; GFX11-GISEL-NEXT:    v_dual_mov_b32 v2, 1 :: v_dual_mov_b32 v3, 4
782; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
783; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 2
784; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
785; GFX11-GISEL-NEXT:    v_add_nc_u32_e64 v1, s0, 4
786; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 2 :: v_dual_add_nc_u32 v0, v1, v0
787; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v2, off offset:1 dlc
788; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
789; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v1, off offset:2 dlc
790; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
791; GFX11-GISEL-NEXT:    scratch_store_b8 v0, v3, off offset:4 dlc
792; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
793; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
794; GFX11-GISEL-NEXT:    s_endpgm
795bb:
796  %soff4 = mul i32 %soff, 4
797  %a = alloca i8, i32 64, align 4, addrspace(5)
798  %as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
799  %voff = call i32 @llvm.amdgcn.workitem.id.x()
800  %voff4 = mul i32 %voff, 4
801  %asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
802  %p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
803  store volatile i8 1, i8 addrspace(5)* %p1
804  %p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
805  store volatile i8 2, i8 addrspace(5)* %p2
806  %p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
807  store volatile i8 4, i8 addrspace(5)* %p4
808  ret void
809}
810