1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX11 %s
5
6; Test using saddr addressing mode of global_*store_* flat instructions.
7
8define amdgpu_ps void @global_store_saddr_i8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
9; GCN-LABEL: global_store_saddr_i8_zext_vgpr:
10; GCN:       ; %bb.0:
11; GCN-NEXT:    global_load_dword v0, v[0:1], off
12; GCN-NEXT:    s_waitcnt vmcnt(0)
13; GCN-NEXT:    global_store_byte v0, v2, s[2:3]
14; GCN-NEXT:    s_endpgm
15;
16; GFX11-LABEL: global_store_saddr_i8_zext_vgpr:
17; GFX11:       ; %bb.0:
18; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
19; GFX11-NEXT:    s_waitcnt vmcnt(0)
20; GFX11-NEXT:    global_store_b8 v0, v2, s[2:3]
21; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
22; GFX11-NEXT:    s_endpgm
23  %voffset = load i32, i32 addrspace(1)* %voffset.ptr
24  %zext.offset = zext i32 %voffset to i64
25  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
26  store i8 %data, i8 addrspace(1)* %gep0
27  ret void
28}
29
30; Maximum positive offset on gfx10
31define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_2047(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
32; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
33; GCN:       ; %bb.0:
34; GCN-NEXT:    global_load_dword v0, v[0:1], off
35; GCN-NEXT:    s_waitcnt vmcnt(0)
36; GCN-NEXT:    global_store_byte v0, v2, s[2:3] offset:2047
37; GCN-NEXT:    s_endpgm
38;
39; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_2047:
40; GFX11:       ; %bb.0:
41; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
42; GFX11-NEXT:    s_waitcnt vmcnt(0)
43; GFX11-NEXT:    global_store_b8 v0, v2, s[2:3] offset:2047
44; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
45; GFX11-NEXT:    s_endpgm
46  %voffset = load i32, i32 addrspace(1)* %voffset.ptr
47  %zext.offset = zext i32 %voffset to i64
48  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
49  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 2047
50  store i8 %data, i8 addrspace(1)* %gep1
51  ret void
52}
53
54; Maximum negative offset on gfx10
55define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(i8 addrspace(1)* inreg %sbase, i32 addrspace(1)* %voffset.ptr, i8 %data) {
56; GCN-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
57; GCN:       ; %bb.0:
58; GCN-NEXT:    global_load_dword v0, v[0:1], off
59; GCN-NEXT:    s_waitcnt vmcnt(0)
60; GCN-NEXT:    global_store_byte v0, v2, s[2:3] offset:-2048
61; GCN-NEXT:    s_endpgm
62;
63; GFX11-LABEL: global_store_saddr_i8_zext_vgpr_offset_neg2048:
64; GFX11:       ; %bb.0:
65; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
66; GFX11-NEXT:    s_waitcnt vmcnt(0)
67; GFX11-NEXT:    global_store_b8 v0, v2, s[2:3] offset:-2048
68; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
69; GFX11-NEXT:    s_endpgm
70  %voffset = load i32, i32 addrspace(1)* %voffset.ptr
71  %zext.offset = zext i32 %voffset to i64
72  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
73  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -2048
74  store i8 %data, i8 addrspace(1)* %gep1
75  ret void
76}
77
78; --------------------------------------------------------------------------------
79; Uniformity edge cases
80; --------------------------------------------------------------------------------
81
82@ptr.in.lds = internal addrspace(3) global i8 addrspace(1)* undef
83
84; Base pointer is uniform, but also in VGPRs
85define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
86; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
87; GFX9:       ; %bb.0:
88; GFX9-NEXT:    v_mov_b32_e32 v2, 0
89; GFX9-NEXT:    ds_read_b64 v[2:3], v2
90; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
91; GFX9-NEXT:    v_readfirstlane_b32 s0, v2
92; GFX9-NEXT:    v_readfirstlane_b32 s1, v3
93; GFX9-NEXT:    s_nop 4
94; GFX9-NEXT:    global_store_byte v0, v1, s[0:1]
95; GFX9-NEXT:    s_endpgm
96;
97; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
98; GFX10:       ; %bb.0:
99; GFX10-NEXT:    v_mov_b32_e32 v2, 0
100; GFX10-NEXT:    ds_read_b64 v[2:3], v2
101; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
102; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
103; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
104; GFX10-NEXT:    global_store_byte v0, v1, s[0:1]
105; GFX10-NEXT:    s_endpgm
106;
107; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
108; GFX11:       ; %bb.0:
109; GFX11-NEXT:    v_mov_b32_e32 v2, 0
110; GFX11-NEXT:    ds_load_b64 v[2:3], v2
111; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
112; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
113; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
114; GFX11-NEXT:    global_store_b8 v0, v1, s[0:1]
115; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
116; GFX11-NEXT:    s_endpgm
117  %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
118  %zext.offset = zext i32 %voffset to i64
119  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
120  store i8 %data, i8 addrspace(1)* %gep0
121  ret void
122}
123
124; Base pointer is uniform, but also in VGPRs, with imm offset
125define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
126; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
127; GFX9:       ; %bb.0:
128; GFX9-NEXT:    v_mov_b32_e32 v2, 0
129; GFX9-NEXT:    ds_read_b64 v[2:3], v2
130; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
131; GFX9-NEXT:    v_readfirstlane_b32 s0, v2
132; GFX9-NEXT:    v_readfirstlane_b32 s1, v3
133; GFX9-NEXT:    s_nop 4
134; GFX9-NEXT:    global_store_byte v0, v1, s[0:1] offset:-120
135; GFX9-NEXT:    s_endpgm
136;
137; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
138; GFX10:       ; %bb.0:
139; GFX10-NEXT:    v_mov_b32_e32 v2, 0
140; GFX10-NEXT:    ds_read_b64 v[2:3], v2
141; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
143; GFX10-NEXT:    v_readfirstlane_b32 s1, v3
144; GFX10-NEXT:    global_store_byte v0, v1, s[0:1] offset:-120
145; GFX10-NEXT:    s_endpgm
146;
147; GFX11-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
148; GFX11:       ; %bb.0:
149; GFX11-NEXT:    v_mov_b32_e32 v2, 0
150; GFX11-NEXT:    ds_load_b64 v[2:3], v2
151; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
152; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
153; GFX11-NEXT:    v_readfirstlane_b32 s1, v3
154; GFX11-NEXT:    global_store_b8 v0, v1, s[0:1] offset:-120
155; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
156; GFX11-NEXT:    s_endpgm
157  %sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
158  %zext.offset = zext i32 %voffset to i64
159  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
160  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -120
161  store i8 %data, i8 addrspace(1)* %gep1
162  ret void
163}
164
165; --------------------------------------------------------------------------------
166; Stress various type stores
167; --------------------------------------------------------------------------------
168
169define amdgpu_ps void @global_store_saddr_i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) {
170; GCN-LABEL: global_store_saddr_i16_zext_vgpr:
171; GCN:       ; %bb.0:
172; GCN-NEXT:    global_store_short v0, v1, s[2:3]
173; GCN-NEXT:    s_endpgm
174;
175; GFX11-LABEL: global_store_saddr_i16_zext_vgpr:
176; GFX11:       ; %bb.0:
177; GFX11-NEXT:    global_store_b16 v0, v1, s[2:3]
178; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
179; GFX11-NEXT:    s_endpgm
180  %zext.offset = zext i32 %voffset to i64
181  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
182  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)*
183  store i16 %data, i16 addrspace(1)* %gep0.cast
184  ret void
185}
186
187define amdgpu_ps void @global_store_saddr_i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i16 %data) {
188; GCN-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
189; GCN:       ; %bb.0:
190; GCN-NEXT:    global_store_short v0, v1, s[2:3] offset:-128
191; GCN-NEXT:    s_endpgm
192;
193; GFX11-LABEL: global_store_saddr_i16_zext_vgpr_offset_neg128:
194; GFX11:       ; %bb.0:
195; GFX11-NEXT:    global_store_b16 v0, v1, s[2:3] offset:-128
196; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
197; GFX11-NEXT:    s_endpgm
198  %zext.offset = zext i32 %voffset to i64
199  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
200  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
201  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)*
202  store i16 %data, i16 addrspace(1)* %gep1.cast
203  ret void
204}
205
206define amdgpu_ps void @global_store_saddr_f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) {
207; GCN-LABEL: global_store_saddr_f16_zext_vgpr:
208; GCN:       ; %bb.0:
209; GCN-NEXT:    global_store_short v0, v1, s[2:3]
210; GCN-NEXT:    s_endpgm
211;
212; GFX11-LABEL: global_store_saddr_f16_zext_vgpr:
213; GFX11:       ; %bb.0:
214; GFX11-NEXT:    global_store_b16 v0, v1, s[2:3]
215; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
216; GFX11-NEXT:    s_endpgm
217  %zext.offset = zext i32 %voffset to i64
218  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
219  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to half addrspace(1)*
220  store half %data, half addrspace(1)* %gep0.cast
221  ret void
222}
223
224define amdgpu_ps void @global_store_saddr_f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, half %data) {
225; GCN-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
226; GCN:       ; %bb.0:
227; GCN-NEXT:    global_store_short v0, v1, s[2:3] offset:-128
228; GCN-NEXT:    s_endpgm
229;
230; GFX11-LABEL: global_store_saddr_f16_zext_vgpr_offset_neg128:
231; GFX11:       ; %bb.0:
232; GFX11-NEXT:    global_store_b16 v0, v1, s[2:3] offset:-128
233; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
234; GFX11-NEXT:    s_endpgm
235  %zext.offset = zext i32 %voffset to i64
236  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
237  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
238  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to half addrspace(1)*
239  store half %data, half addrspace(1)* %gep1.cast
240  ret void
241}
242
243define amdgpu_ps void @global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
244; GCN-LABEL: global_store_saddr_i32_zext_vgpr:
245; GCN:       ; %bb.0:
246; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
247; GCN-NEXT:    s_endpgm
248;
249; GFX11-LABEL: global_store_saddr_i32_zext_vgpr:
250; GFX11:       ; %bb.0:
251; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
252; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
253; GFX11-NEXT:    s_endpgm
254  %zext.offset = zext i32 %voffset to i64
255  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
256  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
257  store i32 %data, i32 addrspace(1)* %gep0.cast
258  ret void
259}
260
261define amdgpu_ps void @global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
262; GCN-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
263; GCN:       ; %bb.0:
264; GCN-NEXT:    global_store_dword v0, v1, s[2:3] offset:-128
265; GCN-NEXT:    s_endpgm
266;
267; GFX11-LABEL: global_store_saddr_i32_zext_vgpr_offset_neg128:
268; GFX11:       ; %bb.0:
269; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3] offset:-128
270; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
271; GFX11-NEXT:    s_endpgm
272  %zext.offset = zext i32 %voffset to i64
273  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
274  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
275  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
276  store i32 %data, i32 addrspace(1)* %gep1.cast
277  ret void
278}
279
280define amdgpu_ps void @global_store_saddr_f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
281; GCN-LABEL: global_store_saddr_f32_zext_vgpr:
282; GCN:       ; %bb.0:
283; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
284; GCN-NEXT:    s_endpgm
285;
286; GFX11-LABEL: global_store_saddr_f32_zext_vgpr:
287; GFX11:       ; %bb.0:
288; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
289; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
290; GFX11-NEXT:    s_endpgm
291  %zext.offset = zext i32 %voffset to i64
292  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
293  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to float addrspace(1)*
294  store float %data, float addrspace(1)* %gep0.cast
295  ret void
296}
297
298define amdgpu_ps void @global_store_saddr_f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, float %data) {
299; GCN-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
300; GCN:       ; %bb.0:
301; GCN-NEXT:    global_store_dword v0, v1, s[2:3] offset:-128
302; GCN-NEXT:    s_endpgm
303;
304; GFX11-LABEL: global_store_saddr_f32_zext_vgpr_offset_neg128:
305; GFX11:       ; %bb.0:
306; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3] offset:-128
307; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
308; GFX11-NEXT:    s_endpgm
309  %zext.offset = zext i32 %voffset to i64
310  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
311  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
312  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to float addrspace(1)*
313  store float %data, float addrspace(1)* %gep1.cast
314  ret void
315}
316
317define amdgpu_ps void @global_store_saddr_p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) {
318; GCN-LABEL: global_store_saddr_p3_zext_vgpr:
319; GCN:       ; %bb.0:
320; GCN-NEXT:    global_store_dword v0, v1, s[2:3]
321; GCN-NEXT:    s_endpgm
322;
323; GFX11-LABEL: global_store_saddr_p3_zext_vgpr:
324; GFX11:       ; %bb.0:
325; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
326; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
327; GFX11-NEXT:    s_endpgm
328  %zext.offset = zext i32 %voffset to i64
329  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
330  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(3)* addrspace(1)*
331  store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep0.cast
332  ret void
333}
334
335define amdgpu_ps void @global_store_saddr_p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(3)* %data) {
336; GCN-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
337; GCN:       ; %bb.0:
338; GCN-NEXT:    global_store_dword v0, v1, s[2:3] offset:-128
339; GCN-NEXT:    s_endpgm
340;
341; GFX11-LABEL: global_store_saddr_p3_zext_vgpr_offset_neg128:
342; GFX11:       ; %bb.0:
343; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3] offset:-128
344; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
345; GFX11-NEXT:    s_endpgm
346  %zext.offset = zext i32 %voffset to i64
347  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
348  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
349  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(3)* addrspace(1)*
350  store i8 addrspace(3)* %data, i8 addrspace(3)* addrspace(1)* %gep1.cast
351  ret void
352}
353
354define amdgpu_ps void @global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
355; GCN-LABEL: global_store_saddr_i64_zext_vgpr:
356; GCN:       ; %bb.0:
357; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
358; GCN-NEXT:    s_endpgm
359;
360; GFX11-LABEL: global_store_saddr_i64_zext_vgpr:
361; GFX11:       ; %bb.0:
362; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
363; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
364; GFX11-NEXT:    s_endpgm
365  %zext.offset = zext i32 %voffset to i64
366  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
367  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)*
368  store i64 %data, i64 addrspace(1)* %gep0.cast
369  ret void
370}
371
372define amdgpu_ps void @global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
373; GCN-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
374; GCN:       ; %bb.0:
375; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
376; GCN-NEXT:    s_endpgm
377;
378; GFX11-LABEL: global_store_saddr_i64_zext_vgpr_offset_neg128:
379; GFX11:       ; %bb.0:
380; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
381; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
382; GFX11-NEXT:    s_endpgm
383  %zext.offset = zext i32 %voffset to i64
384  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
385  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
386  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)*
387  store i64 %data, i64 addrspace(1)* %gep1.cast
388  ret void
389}
390
391define amdgpu_ps void @global_store_saddr_f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) {
392; GCN-LABEL: global_store_saddr_f64_zext_vgpr:
393; GCN:       ; %bb.0:
394; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
395; GCN-NEXT:    s_endpgm
396;
397; GFX11-LABEL: global_store_saddr_f64_zext_vgpr:
398; GFX11:       ; %bb.0:
399; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
400; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
401; GFX11-NEXT:    s_endpgm
402  %zext.offset = zext i32 %voffset to i64
403  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
404  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to double addrspace(1)*
405  store double %data, double addrspace(1)* %gep0.cast
406  ret void
407}
408
409define amdgpu_ps void @global_store_saddr_f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, double %data) {
410; GCN-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
411; GCN:       ; %bb.0:
412; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
413; GCN-NEXT:    s_endpgm
414;
415; GFX11-LABEL: global_store_saddr_f64_zext_vgpr_offset_neg128:
416; GFX11:       ; %bb.0:
417; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
418; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
419; GFX11-NEXT:    s_endpgm
420  %zext.offset = zext i32 %voffset to i64
421  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
422  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
423  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to double addrspace(1)*
424  store double %data, double addrspace(1)* %gep1.cast
425  ret void
426}
427
428define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) {
429; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr:
430; GCN:       ; %bb.0:
431; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
432; GCN-NEXT:    s_endpgm
433;
434; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr:
435; GFX11:       ; %bb.0:
436; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
437; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
438; GFX11-NEXT:    s_endpgm
439  %zext.offset = zext i32 %voffset to i64
440  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
441  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i32> addrspace(1)*
442  store <2 x i32> %data, <2 x i32> addrspace(1)* %gep0.cast
443  ret void
444}
445
446define amdgpu_ps void @global_store_saddr_v2i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i32> %data) {
447; GCN-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
448; GCN:       ; %bb.0:
449; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
450; GCN-NEXT:    s_endpgm
451;
452; GFX11-LABEL: global_store_saddr_v2i32_zext_vgpr_offset_neg128:
453; GFX11:       ; %bb.0:
454; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
455; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
456; GFX11-NEXT:    s_endpgm
457  %zext.offset = zext i32 %voffset to i64
458  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
459  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
460  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i32> addrspace(1)*
461  store <2 x i32> %data, <2 x i32> addrspace(1)* %gep1.cast
462  ret void
463}
464
465define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) {
466; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr:
467; GCN:       ; %bb.0:
468; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
469; GCN-NEXT:    s_endpgm
470;
471; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr:
472; GFX11:       ; %bb.0:
473; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
474; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
475; GFX11-NEXT:    s_endpgm
476  %zext.offset = zext i32 %voffset to i64
477  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
478  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x float> addrspace(1)*
479  store <2 x float> %data, <2 x float> addrspace(1)* %gep0.cast
480  ret void
481}
482
483define amdgpu_ps void @global_store_saddr_v2f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x float> %data) {
484; GCN-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
485; GCN:       ; %bb.0:
486; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
487; GCN-NEXT:    s_endpgm
488;
489; GFX11-LABEL: global_store_saddr_v2f32_zext_vgpr_offset_neg128:
490; GFX11:       ; %bb.0:
491; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
492; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
493; GFX11-NEXT:    s_endpgm
494  %zext.offset = zext i32 %voffset to i64
495  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
496  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
497  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x float> addrspace(1)*
498  store <2 x float> %data, <2 x float> addrspace(1)* %gep1.cast
499  ret void
500}
501
502define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) {
503; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr:
504; GCN:       ; %bb.0:
505; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
506; GCN-NEXT:    s_endpgm
507;
508; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr:
509; GFX11:       ; %bb.0:
510; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
511; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
512; GFX11-NEXT:    s_endpgm
513  %zext.offset = zext i32 %voffset to i64
514  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
515  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i16> addrspace(1)*
516  store <4 x i16> %data, <4 x i16> addrspace(1)* %gep0.cast
517  ret void
518}
519
520define amdgpu_ps void @global_store_saddr_v4i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i16> %data) {
521; GCN-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
522; GCN:       ; %bb.0:
523; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
524; GCN-NEXT:    s_endpgm
525;
526; GFX11-LABEL: global_store_saddr_v4i16_zext_vgpr_offset_neg128:
527; GFX11:       ; %bb.0:
528; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
529; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
530; GFX11-NEXT:    s_endpgm
531  %zext.offset = zext i32 %voffset to i64
532  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
533  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
534  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i16> addrspace(1)*
535  store <4 x i16> %data, <4 x i16> addrspace(1)* %gep1.cast
536  ret void
537}
538
539define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) {
540; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr:
541; GCN:       ; %bb.0:
542; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
543; GCN-NEXT:    s_endpgm
544;
545; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr:
546; GFX11:       ; %bb.0:
547; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
548; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
549; GFX11-NEXT:    s_endpgm
550  %zext.offset = zext i32 %voffset to i64
551  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
552  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x half> addrspace(1)*
553  store <4 x half> %data, <4 x half> addrspace(1)* %gep0.cast
554  ret void
555}
556
557define amdgpu_ps void @global_store_saddr_v4f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x half> %data) {
558; GCN-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
559; GCN:       ; %bb.0:
560; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
561; GCN-NEXT:    s_endpgm
562;
563; GFX11-LABEL: global_store_saddr_v4f16_zext_vgpr_offset_neg128:
564; GFX11:       ; %bb.0:
565; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
566; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
567; GFX11-NEXT:    s_endpgm
568  %zext.offset = zext i32 %voffset to i64
569  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
570  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
571  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x half> addrspace(1)*
572  store <4 x half> %data, <4 x half> addrspace(1)* %gep1.cast
573  ret void
574}
575
576define amdgpu_ps void @global_store_saddr_p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) {
577; GCN-LABEL: global_store_saddr_p1_zext_vgpr:
578; GCN:       ; %bb.0:
579; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
580; GCN-NEXT:    s_endpgm
581;
582; GFX11-LABEL: global_store_saddr_p1_zext_vgpr:
583; GFX11:       ; %bb.0:
584; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
585; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
586; GFX11-NEXT:    s_endpgm
587  %zext.offset = zext i32 %voffset to i64
588  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
589  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i8 addrspace(1)* addrspace(1)*
590  store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep0.cast
591  ret void
592}
593
594define amdgpu_ps void @global_store_saddr_p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i8 addrspace(1)* %data) {
595; GCN-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
596; GCN:       ; %bb.0:
597; GCN-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
598; GCN-NEXT:    s_endpgm
599;
600; GFX11-LABEL: global_store_saddr_p1_zext_vgpr_offset_neg128:
601; GFX11:       ; %bb.0:
602; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
603; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
604; GFX11-NEXT:    s_endpgm
605  %zext.offset = zext i32 %voffset to i64
606  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
607  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
608  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i8 addrspace(1)* addrspace(1)*
609  store i8 addrspace(1)* %data, i8 addrspace(1)* addrspace(1)* %gep1.cast
610  ret void
611}
612
613define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) {
614; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr:
615; GCN:       ; %bb.0:
616; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3]
617; GCN-NEXT:    s_endpgm
618;
619; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr:
620; GFX11:       ; %bb.0:
621; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3]
622; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
623; GFX11-NEXT:    s_endpgm
624  %zext.offset = zext i32 %voffset to i64
625  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
626  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x i32> addrspace(1)*
627  store <3 x i32> %data, <3 x i32> addrspace(1)* %gep0.cast
628  ret void
629}
630
631define amdgpu_ps void @global_store_saddr_v3i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x i32> %data) {
632; GCN-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
633; GCN:       ; %bb.0:
634; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
635; GCN-NEXT:    s_endpgm
636;
637; GFX11-LABEL: global_store_saddr_v3i32_zext_vgpr_offset_neg128:
638; GFX11:       ; %bb.0:
639; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3] offset:-128
640; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
641; GFX11-NEXT:    s_endpgm
642  %zext.offset = zext i32 %voffset to i64
643  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
644  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
645  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x i32> addrspace(1)*
646  store <3 x i32> %data, <3 x i32> addrspace(1)* %gep1.cast
647  ret void
648}
649
650define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) {
651; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr:
652; GCN:       ; %bb.0:
653; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3]
654; GCN-NEXT:    s_endpgm
655;
656; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr:
657; GFX11:       ; %bb.0:
658; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3]
659; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
660; GFX11-NEXT:    s_endpgm
661  %zext.offset = zext i32 %voffset to i64
662  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
663  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <3 x float> addrspace(1)*
664  store <3 x float> %data, <3 x float> addrspace(1)* %gep0.cast
665  ret void
666}
667
668define amdgpu_ps void @global_store_saddr_v3f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <3 x float> %data) {
669; GCN-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
670; GCN:       ; %bb.0:
671; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
672; GCN-NEXT:    s_endpgm
673;
674; GFX11-LABEL: global_store_saddr_v3f32_zext_vgpr_offset_neg128:
675; GFX11:       ; %bb.0:
676; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3] offset:-128
677; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
678; GFX11-NEXT:    s_endpgm
679  %zext.offset = zext i32 %voffset to i64
680  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
681  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
682  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <3 x float> addrspace(1)*
683  store <3 x float> %data, <3 x float> addrspace(1)* %gep1.cast
684  ret void
685}
686
687define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) {
688; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr:
689; GCN:       ; %bb.0:
690; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3]
691; GCN-NEXT:    s_endpgm
692;
693; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr:
694; GFX11:       ; %bb.0:
695; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3]
696; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
697; GFX11-NEXT:    s_endpgm
698  %zext.offset = zext i32 %voffset to i64
699  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
700  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x i16> addrspace(1)*
701  store <6 x i16> %data, <6 x i16> addrspace(1)* %gep0.cast
702  ret void
703}
704
705define amdgpu_ps void @global_store_saddr_v6i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x i16> %data) {
706; GCN-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
707; GCN:       ; %bb.0:
708; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
709; GCN-NEXT:    s_endpgm
710;
711; GFX11-LABEL: global_store_saddr_v6i16_zext_vgpr_offset_neg128:
712; GFX11:       ; %bb.0:
713; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3] offset:-128
714; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
715; GFX11-NEXT:    s_endpgm
716  %zext.offset = zext i32 %voffset to i64
717  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
718  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
719  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x i16> addrspace(1)*
720  store <6 x i16> %data, <6 x i16> addrspace(1)* %gep1.cast
721  ret void
722}
723
724define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) {
725; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr:
726; GCN:       ; %bb.0:
727; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3]
728; GCN-NEXT:    s_endpgm
729;
730; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr:
731; GFX11:       ; %bb.0:
732; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3]
733; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
734; GFX11-NEXT:    s_endpgm
735  %zext.offset = zext i32 %voffset to i64
736  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
737  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <6 x half> addrspace(1)*
738  store <6 x half> %data, <6 x half> addrspace(1)* %gep0.cast
739  ret void
740}
741
742define amdgpu_ps void @global_store_saddr_v6f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <6 x half> %data) {
743; GCN-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
744; GCN:       ; %bb.0:
745; GCN-NEXT:    global_store_dwordx3 v0, v[1:3], s[2:3] offset:-128
746; GCN-NEXT:    s_endpgm
747;
748; GFX11-LABEL: global_store_saddr_v6f16_zext_vgpr_offset_neg128:
749; GFX11:       ; %bb.0:
750; GFX11-NEXT:    global_store_b96 v0, v[1:3], s[2:3] offset:-128
751; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
752; GFX11-NEXT:    s_endpgm
753  %zext.offset = zext i32 %voffset to i64
754  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
755  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
756  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <6 x half> addrspace(1)*
757  store <6 x half> %data, <6 x half> addrspace(1)* %gep1.cast
758  ret void
759}
760
761define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) {
762; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr:
763; GCN:       ; %bb.0:
764; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
765; GCN-NEXT:    s_endpgm
766;
767; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr:
768; GFX11:       ; %bb.0:
769; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
770; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
771; GFX11-NEXT:    s_endpgm
772  %zext.offset = zext i32 %voffset to i64
773  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
774  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i32> addrspace(1)*
775  store <4 x i32> %data, <4 x i32> addrspace(1)* %gep0.cast
776  ret void
777}
778
779define amdgpu_ps void @global_store_saddr_v4i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i32> %data) {
780; GCN-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
781; GCN:       ; %bb.0:
782; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
783; GCN-NEXT:    s_endpgm
784;
785; GFX11-LABEL: global_store_saddr_v4i32_zext_vgpr_offset_neg128:
786; GFX11:       ; %bb.0:
787; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
788; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
789; GFX11-NEXT:    s_endpgm
790  %zext.offset = zext i32 %voffset to i64
791  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
792  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
793  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i32> addrspace(1)*
794  store <4 x i32> %data, <4 x i32> addrspace(1)* %gep1.cast
795  ret void
796}
797
798define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) {
799; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr:
800; GCN:       ; %bb.0:
801; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
802; GCN-NEXT:    s_endpgm
803;
804; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr:
805; GFX11:       ; %bb.0:
806; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
807; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
808; GFX11-NEXT:    s_endpgm
809  %zext.offset = zext i32 %voffset to i64
810  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
811  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x float> addrspace(1)*
812  store <4 x float> %data, <4 x float> addrspace(1)* %gep0.cast
813  ret void
814}
815
816define amdgpu_ps void @global_store_saddr_v4f32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x float> %data) {
817; GCN-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
818; GCN:       ; %bb.0:
819; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
820; GCN-NEXT:    s_endpgm
821;
822; GFX11-LABEL: global_store_saddr_v4f32_zext_vgpr_offset_neg128:
823; GFX11:       ; %bb.0:
824; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
825; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
826; GFX11-NEXT:    s_endpgm
827  %zext.offset = zext i32 %voffset to i64
828  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
829  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
830  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x float> addrspace(1)*
831  store <4 x float> %data, <4 x float> addrspace(1)* %gep1.cast
832  ret void
833}
834
835define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) {
836; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr:
837; GCN:       ; %bb.0:
838; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
839; GCN-NEXT:    s_endpgm
840;
841; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr:
842; GFX11:       ; %bb.0:
843; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
844; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
845; GFX11-NEXT:    s_endpgm
846  %zext.offset = zext i32 %voffset to i64
847  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
848  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i64> addrspace(1)*
849  store <2 x i64> %data, <2 x i64> addrspace(1)* %gep0.cast
850  ret void
851}
852
853define amdgpu_ps void @global_store_saddr_v2i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i64> %data) {
854; GCN-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
855; GCN:       ; %bb.0:
856; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
857; GCN-NEXT:    s_endpgm
858;
859; GFX11-LABEL: global_store_saddr_v2i64_zext_vgpr_offset_neg128:
860; GFX11:       ; %bb.0:
861; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
862; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
863; GFX11-NEXT:    s_endpgm
864  %zext.offset = zext i32 %voffset to i64
865  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
866  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
867  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i64> addrspace(1)*
868  store <2 x i64> %data, <2 x i64> addrspace(1)* %gep1.cast
869  ret void
870}
871
872define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) {
873; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr:
874; GCN:       ; %bb.0:
875; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
876; GCN-NEXT:    s_endpgm
877;
878; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr:
879; GFX11:       ; %bb.0:
880; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
881; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
882; GFX11-NEXT:    s_endpgm
883  %zext.offset = zext i32 %voffset to i64
884  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
885  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x double> addrspace(1)*
886  store <2 x double> %data, <2 x double> addrspace(1)* %gep0.cast
887  ret void
888}
889
890define amdgpu_ps void @global_store_saddr_v2f64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x double> %data) {
891; GCN-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
892; GCN:       ; %bb.0:
893; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
894; GCN-NEXT:    s_endpgm
895;
896; GFX11-LABEL: global_store_saddr_v2f64_zext_vgpr_offset_neg128:
897; GFX11:       ; %bb.0:
898; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
899; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
900; GFX11-NEXT:    s_endpgm
901  %zext.offset = zext i32 %voffset to i64
902  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
903  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
904  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x double> addrspace(1)*
905  store <2 x double> %data, <2 x double> addrspace(1)* %gep1.cast
906  ret void
907}
908
909define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) {
910; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr:
911; GCN:       ; %bb.0:
912; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
913; GCN-NEXT:    s_endpgm
914;
915; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr:
916; GFX11:       ; %bb.0:
917; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
918; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
919; GFX11-NEXT:    s_endpgm
920  %zext.offset = zext i32 %voffset to i64
921  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
922  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x i16> addrspace(1)*
923  store <8 x i16> %data, <8 x i16> addrspace(1)* %gep0.cast
924  ret void
925}
926
927define amdgpu_ps void @global_store_saddr_v8i16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x i16> %data) {
928; GCN-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
929; GCN:       ; %bb.0:
930; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
931; GCN-NEXT:    s_endpgm
932;
933; GFX11-LABEL: global_store_saddr_v8i16_zext_vgpr_offset_neg128:
934; GFX11:       ; %bb.0:
935; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
936; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
937; GFX11-NEXT:    s_endpgm
938  %zext.offset = zext i32 %voffset to i64
939  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
940  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
941  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x i16> addrspace(1)*
942  store <8 x i16> %data, <8 x i16> addrspace(1)* %gep1.cast
943  ret void
944}
945
946define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) {
947; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr:
948; GCN:       ; %bb.0:
949; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
950; GCN-NEXT:    s_endpgm
951;
952; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr:
953; GFX11:       ; %bb.0:
954; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
955; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
956; GFX11-NEXT:    s_endpgm
957  %zext.offset = zext i32 %voffset to i64
958  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
959  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <8 x half> addrspace(1)*
960  store <8 x half> %data, <8 x half> addrspace(1)* %gep0.cast
961  ret void
962}
963
964define amdgpu_ps void @global_store_saddr_v8f16_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <8 x half> %data) {
965; GCN-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
966; GCN:       ; %bb.0:
967; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
968; GCN-NEXT:    s_endpgm
969;
970; GFX11-LABEL: global_store_saddr_v8f16_zext_vgpr_offset_neg128:
971; GFX11:       ; %bb.0:
972; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
973; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
974; GFX11-NEXT:    s_endpgm
975  %zext.offset = zext i32 %voffset to i64
976  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
977  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
978  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <8 x half> addrspace(1)*
979  store <8 x half> %data, <8 x half> addrspace(1)* %gep1.cast
980  ret void
981}
982
983define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) {
984; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr:
985; GCN:       ; %bb.0:
986; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
987; GCN-NEXT:    s_endpgm
988;
989; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr:
990; GFX11:       ; %bb.0:
991; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
992; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
993; GFX11-NEXT:    s_endpgm
994  %zext.offset = zext i32 %voffset to i64
995  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
996  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <2 x i8 addrspace(1)*> addrspace(1)*
997  store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep0.cast
998  ret void
999}
1000
1001define amdgpu_ps void @global_store_saddr_v2p1_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i8 addrspace(1)*> %data) {
1002; GCN-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1003; GCN:       ; %bb.0:
1004; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1005; GCN-NEXT:    s_endpgm
1006;
1007; GFX11-LABEL: global_store_saddr_v2p1_zext_vgpr_offset_neg128:
1008; GFX11:       ; %bb.0:
1009; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
1010; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1011; GFX11-NEXT:    s_endpgm
1012  %zext.offset = zext i32 %voffset to i64
1013  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1014  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1015  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <2 x i8 addrspace(1)*> addrspace(1)*
1016  store <2 x i8 addrspace(1)*> %data, <2 x i8 addrspace(1)*> addrspace(1)* %gep1.cast
1017  ret void
1018}
1019
1020define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) {
1021; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr:
1022; GCN:       ; %bb.0:
1023; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3]
1024; GCN-NEXT:    s_endpgm
1025;
1026; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr:
1027; GFX11:       ; %bb.0:
1028; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3]
1029; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1030; GFX11-NEXT:    s_endpgm
1031  %zext.offset = zext i32 %voffset to i64
1032  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1033  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to <4 x i8 addrspace(3)*> addrspace(1)*
1034  store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep0.cast
1035  ret void
1036}
1037
1038define amdgpu_ps void @global_store_saddr_v4p3_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <4 x i8 addrspace(3)*> %data) {
1039; GCN-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1040; GCN:       ; %bb.0:
1041; GCN-NEXT:    global_store_dwordx4 v0, v[1:4], s[2:3] offset:-128
1042; GCN-NEXT:    s_endpgm
1043;
1044; GFX11-LABEL: global_store_saddr_v4p3_zext_vgpr_offset_neg128:
1045; GFX11:       ; %bb.0:
1046; GFX11-NEXT:    global_store_b128 v0, v[1:4], s[2:3] offset:-128
1047; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1048; GFX11-NEXT:    s_endpgm
1049  %zext.offset = zext i32 %voffset to i64
1050  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1051  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1052  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to <4 x i8 addrspace(3)*> addrspace(1)*
1053  store <4 x i8 addrspace(3)*> %data, <4 x i8 addrspace(3)*> addrspace(1)* %gep1.cast
1054  ret void
1055}
1056
1057; --------------------------------------------------------------------------------
1058; Atomic store
1059; --------------------------------------------------------------------------------
1060
1061define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
1062; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1063; GFX9:       ; %bb.0:
1064; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1065; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
1066; GFX9-NEXT:    s_endpgm
1067;
1068; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1069; GFX10:       ; %bb.0:
1070; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1071; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1072; GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
1073; GFX10-NEXT:    s_endpgm
1074;
1075; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr:
1076; GFX11:       ; %bb.0:
1077; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1078; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1079; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3]
1080; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1081; GFX11-NEXT:    s_endpgm
1082  %zext.offset = zext i32 %voffset to i64
1083  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1084  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i32 addrspace(1)*
1085  store atomic i32 %data, i32 addrspace(1)* %gep0.cast seq_cst, align 4
1086  ret void
1087}
1088
1089define amdgpu_ps void @atomic_global_store_saddr_i32_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i32 %data) {
1090; GFX9-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1091; GFX9:       ; %bb.0:
1092; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1093; GFX9-NEXT:    global_store_dword v0, v1, s[2:3] offset:-128
1094; GFX9-NEXT:    s_endpgm
1095;
1096; GFX10-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1097; GFX10:       ; %bb.0:
1098; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1099; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1100; GFX10-NEXT:    global_store_dword v0, v1, s[2:3] offset:-128
1101; GFX10-NEXT:    s_endpgm
1102;
1103; GFX11-LABEL: atomic_global_store_saddr_i32_zext_vgpr_offset_neg128:
1104; GFX11:       ; %bb.0:
1105; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1106; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1107; GFX11-NEXT:    global_store_b32 v0, v1, s[2:3] offset:-128
1108; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1109; GFX11-NEXT:    s_endpgm
1110  %zext.offset = zext i32 %voffset to i64
1111  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1112  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1113  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i32 addrspace(1)*
1114  store atomic i32 %data, i32 addrspace(1)* %gep1.cast seq_cst, align 4
1115  ret void
1116}
1117
1118define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
1119; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1120; GFX9:       ; %bb.0:
1121; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1122; GFX9-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
1123; GFX9-NEXT:    s_endpgm
1124;
1125; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1126; GFX10:       ; %bb.0:
1127; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1128; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1129; GFX10-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3]
1130; GFX10-NEXT:    s_endpgm
1131;
1132; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr:
1133; GFX11:       ; %bb.0:
1134; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1135; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1136; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3]
1137; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1138; GFX11-NEXT:    s_endpgm
1139  %zext.offset = zext i32 %voffset to i64
1140  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1141  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i64 addrspace(1)*
1142  store atomic i64 %data, i64 addrspace(1)* %gep0.cast seq_cst, align 8
1143  ret void
1144}
1145
1146define amdgpu_ps void @atomic_global_store_saddr_i64_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, i64 %data) {
1147; GFX9-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1148; GFX9:       ; %bb.0:
1149; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1150; GFX9-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
1151; GFX9-NEXT:    s_endpgm
1152;
1153; GFX10-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1154; GFX10:       ; %bb.0:
1155; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1156; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1157; GFX10-NEXT:    global_store_dwordx2 v0, v[1:2], s[2:3] offset:-128
1158; GFX10-NEXT:    s_endpgm
1159;
1160; GFX11-LABEL: atomic_global_store_saddr_i64_zext_vgpr_offset_neg128:
1161; GFX11:       ; %bb.0:
1162; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1163; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1164; GFX11-NEXT:    global_store_b64 v0, v[1:2], s[2:3] offset:-128
1165; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1166; GFX11-NEXT:    s_endpgm
1167  %zext.offset = zext i32 %voffset to i64
1168  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1169  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1170  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i64 addrspace(1)*
1171  store atomic i64 %data, i64 addrspace(1)* %gep1.cast seq_cst, align 8
1172  ret void
1173}
1174
1175; --------------------------------------------------------------------------------
1176; D16 HI store (hi 16)
1177; --------------------------------------------------------------------------------
1178
1179define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
1180; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1181; GCN:       ; %bb.0:
1182; GCN-NEXT:    global_store_short_d16_hi v0, v1, s[2:3]
1183; GCN-NEXT:    s_endpgm
1184;
1185; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr:
1186; GFX11:       ; %bb.0:
1187; GFX11-NEXT:    global_store_d16_hi_b16 v0, v1, s[2:3]
1188; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1189; GFX11-NEXT:    s_endpgm
1190  %zext.offset = zext i32 %voffset to i64
1191  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1192  %gep0.cast = bitcast i8 addrspace(1)* %gep0 to i16 addrspace(1)*
1193  %data.hi = extractelement <2 x i16> %data, i32 1
1194  store i16 %data.hi, i16 addrspace(1)* %gep0.cast
1195  ret void
1196}
1197
1198define amdgpu_ps void @global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
1199; GCN-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1200; GCN:       ; %bb.0:
1201; GCN-NEXT:    global_store_short_d16_hi v0, v1, s[2:3] offset:-128
1202; GCN-NEXT:    s_endpgm
1203;
1204; GFX11-LABEL: global_store_saddr_i16_d16hi_zext_vgpr_offset_neg128:
1205; GFX11:       ; %bb.0:
1206; GFX11-NEXT:    global_store_d16_hi_b16 v0, v1, s[2:3] offset:-128
1207; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1208; GFX11-NEXT:    s_endpgm
1209  %zext.offset = zext i32 %voffset to i64
1210  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1211  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1212  %gep1.cast = bitcast i8 addrspace(1)* %gep1 to i16 addrspace(1)*
1213  %data.hi = extractelement <2 x i16> %data, i32 1
1214  store i16 %data.hi, i16 addrspace(1)* %gep1.cast
1215  ret void
1216}
1217
1218define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
1219; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1220; GCN:       ; %bb.0:
1221; GCN-NEXT:    global_store_byte_d16_hi v0, v1, s[2:3]
1222; GCN-NEXT:    s_endpgm
1223;
1224; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr:
1225; GFX11:       ; %bb.0:
1226; GFX11-NEXT:    global_store_d16_hi_b8 v0, v1, s[2:3]
1227; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1228; GFX11-NEXT:    s_endpgm
1229  %zext.offset = zext i32 %voffset to i64
1230  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1231  %data.hi = extractelement <2 x i16> %data, i32 1
1232  %data.hi.trunc = trunc i16 %data.hi to i8
1233  store i8 %data.hi.trunc, i8 addrspace(1)* %gep0
1234  ret void
1235}
1236
1237define amdgpu_ps void @global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128(i8 addrspace(1)* inreg %sbase, i32 %voffset, <2 x i16> %data) {
1238; GCN-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1239; GCN:       ; %bb.0:
1240; GCN-NEXT:    global_store_byte_d16_hi v0, v1, s[2:3] offset:-128
1241; GCN-NEXT:    s_endpgm
1242;
1243; GFX11-LABEL: global_store_saddr_i16_d16hi_trunci8_zext_vgpr_offset_neg128:
1244; GFX11:       ; %bb.0:
1245; GFX11-NEXT:    global_store_d16_hi_b8 v0, v1, s[2:3] offset:-128
1246; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1247; GFX11-NEXT:    s_endpgm
1248  %zext.offset = zext i32 %voffset to i64
1249  %gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
1250  %gep1 = getelementptr inbounds i8, i8 addrspace(1)* %gep0, i64 -128
1251  %data.hi = extractelement <2 x i16> %data, i32 1
1252  %data.hi.trunc = trunc i16 %data.hi to i8
1253  store i8 %data.hi.trunc, i8 addrspace(1)* %gep1
1254  ret void
1255}
1256