1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
5
6; Test splitting flat instruction offsets into the low and high bits
7; when the offset doesn't fit in the offset field.
8
9define i8 @flat_inst_valu_offset_1(i8* %p) {
10; GFX9-LABEL: flat_inst_valu_offset_1:
11; GFX9:       ; %bb.0:
12; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1
14; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
15; GFX9-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX10-LABEL: flat_inst_valu_offset_1:
18; GFX10:       ; %bb.0:
19; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
21; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 1
22; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
23; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
24; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
25; GFX10-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX11-LABEL: flat_inst_valu_offset_1:
28; GFX11:       ; %bb.0:
29; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
31; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:1
32; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
33; GFX11-NEXT:    s_setpc_b64 s[30:31]
34  %gep = getelementptr i8, i8* %p, i64 1
35  %load = load i8, i8* %gep, align 4
36  ret i8 %load
37}
38
39define i8 @flat_inst_valu_offset_11bit_max(i8* %p) {
40; GFX9-LABEL: flat_inst_valu_offset_11bit_max:
41; GFX9:       ; %bb.0:
42; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
44; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
45; GFX9-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10-LABEL: flat_inst_valu_offset_11bit_max:
48; GFX10:       ; %bb.0:
49; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
51; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
52; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
53; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
54; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
55; GFX10-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX11-LABEL: flat_inst_valu_offset_11bit_max:
58; GFX11:       ; %bb.0:
59; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
61; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
62; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
63; GFX11-NEXT:    s_setpc_b64 s[30:31]
64  %gep = getelementptr i8, i8* %p, i64 2047
65  %load = load i8, i8* %gep, align 4
66  ret i8 %load
67}
68
69define i8 @flat_inst_valu_offset_12bit_max(i8* %p) {
70; GFX9-LABEL: flat_inst_valu_offset_12bit_max:
71; GFX9:       ; %bb.0:
72; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
74; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
75; GFX9-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX10-LABEL: flat_inst_valu_offset_12bit_max:
78; GFX10:       ; %bb.0:
79; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
81; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
82; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
83; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
84; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
85; GFX10-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX11-LABEL: flat_inst_valu_offset_12bit_max:
88; GFX11:       ; %bb.0:
89; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
91; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
92; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94  %gep = getelementptr i8, i8* %p, i64 4095
95  %load = load i8, i8* %gep, align 4
96  ret i8 %load
97}
98
99define i8 @flat_inst_valu_offset_13bit_max(i8* %p) {
100; GFX9-LABEL: flat_inst_valu_offset_13bit_max:
101; GFX9:       ; %bb.0:
102; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
104; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
105; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
106; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
107; GFX9-NEXT:    s_setpc_b64 s[30:31]
108;
109; GFX10-LABEL: flat_inst_valu_offset_13bit_max:
110; GFX10:       ; %bb.0:
111; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
113; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
114; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
115; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
116; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
117; GFX10-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX11-LABEL: flat_inst_valu_offset_13bit_max:
120; GFX11:       ; %bb.0:
121; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
123; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
124; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
125; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
126; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
127; GFX11-NEXT:    s_setpc_b64 s[30:31]
128  %gep = getelementptr i8, i8* %p, i64 8191
129  %load = load i8, i8* %gep, align 4
130  ret i8 %load
131}
132
133define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) {
134; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
135; GFX9:       ; %bb.0:
136; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
138; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
139; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
140; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
141; GFX9-NEXT:    s_setpc_b64 s[30:31]
142;
143; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max:
144; GFX10:       ; %bb.0:
145; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
147; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
148; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
149; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
150; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
151; GFX10-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max:
154; GFX11:       ; %bb.0:
155; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
157; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
158; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
159; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
160; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
161; GFX11-NEXT:    s_setpc_b64 s[30:31]
162  %gep = getelementptr i8, i8* %p, i64 -2048
163  %load = load i8, i8* %gep, align 4
164  ret i8 %load
165}
166
167define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) {
168; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max:
169; GFX9:       ; %bb.0:
170; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
172; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
173; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
174; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
175; GFX9-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max:
178; GFX10:       ; %bb.0:
179; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
181; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
182; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
183; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
184; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
185; GFX10-NEXT:    s_setpc_b64 s[30:31]
186;
187; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max:
188; GFX11:       ; %bb.0:
189; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
191; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
192; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
193; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
194; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
195; GFX11-NEXT:    s_setpc_b64 s[30:31]
196  %gep = getelementptr i8, i8* %p, i64 -4096
197  %load = load i8, i8* %gep, align 4
198  ret i8 %load
199}
200
201define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) {
202; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max:
203; GFX9:       ; %bb.0:
204; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
206; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
207; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
208; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
209; GFX9-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max:
212; GFX10:       ; %bb.0:
213; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
215; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
216; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
217; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
218; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
219; GFX10-NEXT:    s_setpc_b64 s[30:31]
220;
221; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max:
222; GFX11:       ; %bb.0:
223; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
225; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
226; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
227; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
228; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
229; GFX11-NEXT:    s_setpc_b64 s[30:31]
230  %gep = getelementptr i8, i8* %p, i64 -8192
231  %load = load i8, i8* %gep, align 4
232  ret i8 %load
233}
234
235define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) {
236; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max:
237; GFX9:       ; %bb.0:
238; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
240; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
241; GFX9-NEXT:    s_setpc_b64 s[30:31]
242;
243; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max:
244; GFX10:       ; %bb.0:
245; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
247; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
248; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
249; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
250; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
251; GFX10-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max:
254; GFX11:       ; %bb.0:
255; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
257; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
258; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
259; GFX11-NEXT:    s_setpc_b64 s[30:31]
260  %gep = getelementptr i8, i8* %p, i64 4095
261  %load = load i8, i8* %gep, align 4
262  ret i8 %load
263}
264
265define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) {
266; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max:
267; GFX9:       ; %bb.0:
268; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
270; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
271; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
272; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
273; GFX9-NEXT:    s_setpc_b64 s[30:31]
274;
275; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max:
276; GFX10:       ; %bb.0:
277; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
279; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
280; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
281; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
282; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
283; GFX10-NEXT:    s_setpc_b64 s[30:31]
284;
285; GFX11-LABEL: flat_inst_valu_offset_2x_12bit_max:
286; GFX11:       ; %bb.0:
287; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
289; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
290; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
291; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
292; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
293; GFX11-NEXT:    s_setpc_b64 s[30:31]
294  %gep = getelementptr i8, i8* %p, i64 8191
295  %load = load i8, i8* %gep, align 4
296  ret i8 %load
297}
298
299define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) {
300; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max:
301; GFX9:       ; %bb.0:
302; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
304; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
305; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
306; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
307; GFX9-NEXT:    s_setpc_b64 s[30:31]
308;
309; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max:
310; GFX10:       ; %bb.0:
311; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
313; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3fff, v0
314; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
315; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
316; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
317; GFX10-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX11-LABEL: flat_inst_valu_offset_2x_13bit_max:
320; GFX11:       ; %bb.0:
321; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
323; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3000, v0
324; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
325; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
326; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
327; GFX11-NEXT:    s_setpc_b64 s[30:31]
328  %gep = getelementptr i8, i8* %p, i64 16383
329  %load = load i8, i8* %gep, align 4
330  ret i8 %load
331}
332
333define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) {
334; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
335; GFX9:       ; %bb.0:
336; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
338; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
339; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
340; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
341; GFX9-NEXT:    s_setpc_b64 s[30:31]
342;
343; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
344; GFX10:       ; %bb.0:
345; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
347; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
348; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
349; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
350; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
351; GFX10-NEXT:    s_setpc_b64 s[30:31]
352;
353; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
354; GFX11:       ; %bb.0:
355; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
357; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
358; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
359; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
360; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
361; GFX11-NEXT:    s_setpc_b64 s[30:31]
362  %gep = getelementptr i8, i8* %p, i64 -4096
363  %load = load i8, i8* %gep, align 4
364  ret i8 %load
365}
366
367define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) {
368; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
369; GFX9:       ; %bb.0:
370; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
372; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
373; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
374; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
375; GFX9-NEXT:    s_setpc_b64 s[30:31]
376;
377; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
378; GFX10:       ; %bb.0:
379; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
381; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
382; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
383; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
384; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
385; GFX10-NEXT:    s_setpc_b64 s[30:31]
386;
387; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
388; GFX11:       ; %bb.0:
389; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
391; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
392; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
393; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
394; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
395; GFX11-NEXT:    s_setpc_b64 s[30:31]
396  %gep = getelementptr i8, i8* %p, i64 -8192
397  %load = load i8, i8* %gep, align 4
398  ret i8 %load
399}
400
401define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) {
402; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
403; GFX9:       ; %bb.0:
404; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
406; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
407; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
408; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
409; GFX9-NEXT:    s_setpc_b64 s[30:31]
410;
411; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
412; GFX10:       ; %bb.0:
413; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
415; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
416; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
417; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
418; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
419; GFX10-NEXT:    s_setpc_b64 s[30:31]
420;
421; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
422; GFX11:       ; %bb.0:
423; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
425; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
426; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
427; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
428; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
429; GFX11-NEXT:    s_setpc_b64 s[30:31]
430  %gep = getelementptr i8, i8* %p, i64 -16384
431  %load = load i8, i8* %gep, align 4
432  ret i8 %load
433}
434
435; Fill 11-bit low-bits (1ull << 33) | 2047
436define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) {
437; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
438; GFX9:       ; %bb.0:
439; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
441; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
442; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
443; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
444; GFX9-NEXT:    s_setpc_b64 s[30:31]
445;
446; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
447; GFX10:       ; %bb.0:
448; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
450; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
451; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
452; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
453; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
454; GFX10-NEXT:    s_setpc_b64 s[30:31]
455;
456; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
457; GFX11:       ; %bb.0:
458; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
460; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
461; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
462; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047
463; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
464; GFX11-NEXT:    s_setpc_b64 s[30:31]
465  %gep = getelementptr i8, i8* %p, i64 8589936639
466  %load = load i8, i8* %gep, align 4
467  ret i8 %load
468}
469
470; Fill 11-bit low-bits (1ull << 33) | 2048
471define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) {
472; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
473; GFX9:       ; %bb.0:
474; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
476; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
477; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
478; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
479; GFX9-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
482; GFX10:       ; %bb.0:
483; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
485; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
486; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
487; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
488; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
489; GFX10-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
492; GFX11:       ; %bb.0:
493; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
495; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
496; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
497; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2048
498; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
499; GFX11-NEXT:    s_setpc_b64 s[30:31]
500  %gep = getelementptr i8, i8* %p, i64 8589936640
501  %load = load i8, i8* %gep, align 4
502  ret i8 %load
503}
504
505; Fill 12-bit low-bits (1ull << 33) | 4095
506define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) {
507; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
508; GFX9:       ; %bb.0:
509; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
511; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
512; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
513; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
514; GFX9-NEXT:    s_setpc_b64 s[30:31]
515;
516; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
517; GFX10:       ; %bb.0:
518; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
520; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
521; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
522; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
523; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
524; GFX10-NEXT:    s_setpc_b64 s[30:31]
525;
526; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
527; GFX11:       ; %bb.0:
528; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
530; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
531; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
532; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
533; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
534; GFX11-NEXT:    s_setpc_b64 s[30:31]
535  %gep = getelementptr i8, i8* %p, i64 8589938687
536  %load = load i8, i8* %gep, align 4
537  ret i8 %load
538}
539
540; Fill 12-bit low-bits (1ull << 33) | 4096
541define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) {
542; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
543; GFX9:       ; %bb.0:
544; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
546; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
547; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
548; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
549; GFX9-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
552; GFX10:       ; %bb.0:
553; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
555; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
556; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
557; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
558; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
559; GFX10-NEXT:    s_setpc_b64 s[30:31]
560;
561; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
562; GFX11:       ; %bb.0:
563; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
565; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
566; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
567; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
568; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
569; GFX11-NEXT:    s_setpc_b64 s[30:31]
570  %gep = getelementptr i8, i8* %p, i64 8589938688
571  %load = load i8, i8* %gep, align 4
572  ret i8 %load
573}
574
575; Fill 13-bit low-bits (1ull << 33) | 8191
576define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) {
577; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
578; GFX9:       ; %bb.0:
579; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
581; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
582; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
583; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
584; GFX9-NEXT:    s_setpc_b64 s[30:31]
585;
586; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
587; GFX10:       ; %bb.0:
588; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
590; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
591; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
592; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
593; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
594; GFX10-NEXT:    s_setpc_b64 s[30:31]
595;
596; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
597; GFX11:       ; %bb.0:
598; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
600; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
601; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
602; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095
603; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
604; GFX11-NEXT:    s_setpc_b64 s[30:31]
605  %gep = getelementptr i8, i8* %p, i64 8589942783
606  %load = load i8, i8* %gep, align 4
607  ret i8 %load
608}
609
610; Fill 13-bit low-bits (1ull << 33) | 8192
611define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) {
612; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
613; GFX9:       ; %bb.0:
614; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
616; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
617; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
618; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
619; GFX9-NEXT:    s_setpc_b64 s[30:31]
620;
621; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
622; GFX10:       ; %bb.0:
623; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
625; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
626; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
627; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
628; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
629; GFX10-NEXT:    s_setpc_b64 s[30:31]
630;
631; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
632; GFX11:       ; %bb.0:
633; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
635; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
636; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
637; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
638; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
639; GFX11-NEXT:    s_setpc_b64 s[30:31]
640  %gep = getelementptr i8, i8* %p, i64 8589942784
641  %load = load i8, i8* %gep, align 4
642  ret i8 %load
643}
644
645; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
646define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) {
647; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
648; GFX9:       ; %bb.0:
649; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
651; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
652; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
653; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
654; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
655; GFX9-NEXT:    s_setpc_b64 s[30:31]
656;
657; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
658; GFX10:       ; %bb.0:
659; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
661; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
662; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
663; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
664; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
665; GFX10-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
668; GFX11:       ; %bb.0:
669; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
671; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, v0
672; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
673; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
674; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
675; GFX11-NEXT:    s_setpc_b64 s[30:31]
676  %gep = getelementptr i8, i8* %p, i64 -9223372036854773761
677  %load = load i8, i8* %gep, align 4
678  ret i8 %load
679}
680
681; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
682define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) {
683; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
684; GFX9:       ; %bb.0:
685; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
686; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
687; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
688; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
689; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
690; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
691; GFX9-NEXT:    s_setpc_b64 s[30:31]
692;
693; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
694; GFX10:       ; %bb.0:
695; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
697; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
698; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
699; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
700; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
701; GFX10-NEXT:    s_setpc_b64 s[30:31]
702;
703; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
704; GFX11:       ; %bb.0:
705; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
707; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
708; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
709; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
710; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
711; GFX11-NEXT:    s_setpc_b64 s[30:31]
712  %gep = getelementptr i8, i8* %p, i64 -9223372036854773760
713  %load = load i8, i8* %gep, align 4
714  ret i8 %load
715}
716
717; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
718define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) {
719; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
720; GFX9:       ; %bb.0:
721; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
723; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
724; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
725; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
726; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
727; GFX9-NEXT:    s_setpc_b64 s[30:31]
728;
729; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
730; GFX10:       ; %bb.0:
731; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
733; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
734; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
735; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
736; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
737; GFX10-NEXT:    s_setpc_b64 s[30:31]
738;
739; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
740; GFX11:       ; %bb.0:
741; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
743; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, v0
744; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
745; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
746; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
747; GFX11-NEXT:    s_setpc_b64 s[30:31]
748  %gep = getelementptr i8, i8* %p, i64 -9223372036854771713
749  %load = load i8, i8* %gep, align 4
750  ret i8 %load
751}
752
753; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
754define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) {
755; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
756; GFX9:       ; %bb.0:
757; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
759; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
760; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
761; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
762; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
763; GFX9-NEXT:    s_setpc_b64 s[30:31]
764;
765; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
766; GFX10:       ; %bb.0:
767; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
769; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
770; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
771; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
772; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
773; GFX10-NEXT:    s_setpc_b64 s[30:31]
774;
775; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
776; GFX11:       ; %bb.0:
777; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
779; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
780; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
781; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
782; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
783; GFX11-NEXT:    s_setpc_b64 s[30:31]
784  %gep = getelementptr i8, i8* %p, i64 -9223372036854771712
785  %load = load i8, i8* %gep, align 4
786  ret i8 %load
787}
788
789; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
790define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) {
791; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
792; GFX9:       ; %bb.0:
793; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
794; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
795; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
796; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
797; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
798; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
799; GFX9-NEXT:    s_setpc_b64 s[30:31]
800;
801; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
802; GFX10:       ; %bb.0:
803; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
805; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
806; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
807; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
808; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
809; GFX10-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
812; GFX11:       ; %bb.0:
813; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
815; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, v0
816; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
817; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
818; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
819; GFX11-NEXT:    s_setpc_b64 s[30:31]
820  %gep = getelementptr i8, i8* %p, i64 -9223372036854767617
821  %load = load i8, i8* %gep, align 4
822  ret i8 %load
823}
824
825; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
826define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) {
827; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
828; GFX9:       ; %bb.0:
829; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
831; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
832; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
833; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
834; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
835; GFX9-NEXT:    s_setpc_b64 s[30:31]
836;
837; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
838; GFX10:       ; %bb.0:
839; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
841; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
842; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
843; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
844; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
845; GFX10-NEXT:    s_setpc_b64 s[30:31]
846;
847; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
848; GFX11:       ; %bb.0:
849; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
851; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
852; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
853; GFX11-NEXT:    flat_load_u8 v0, v[0:1]
854; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
855; GFX11-NEXT:    s_setpc_b64 s[30:31]
856  %gep = getelementptr i8, i8* %p, i64 -9223372036854767616
857  %load = load i8, i8* %gep, align 4
858  ret i8 %load
859}
860
861define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) {
862; GFX9-LABEL: flat_inst_salu_offset_1:
863; GFX9:       ; %bb.0:
864; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
865; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
866; GFX9-NEXT:    v_mov_b32_e32 v0, s0
867; GFX9-NEXT:    v_mov_b32_e32 v1, s1
868; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1 glc
869; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
870; GFX9-NEXT:    flat_store_byte v[0:1], v0
871; GFX9-NEXT:    s_endpgm
872;
873; GFX10-LABEL: flat_inst_salu_offset_1:
874; GFX10:       ; %bb.0:
875; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
876; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
877; GFX10-NEXT:    s_add_u32 s0, s0, 1
878; GFX10-NEXT:    s_addc_u32 s1, s1, 0
879; GFX10-NEXT:    v_mov_b32_e32 v0, s0
880; GFX10-NEXT:    v_mov_b32_e32 v1, s1
881; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
882; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
883; GFX10-NEXT:    flat_store_byte v[0:1], v0
884; GFX10-NEXT:    s_endpgm
885;
886; GFX11-LABEL: flat_inst_salu_offset_1:
887; GFX11:       ; %bb.0:
888; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
889; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
890; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
891; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:1 glc dlc
892; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
893; GFX11-NEXT:    flat_store_b8 v[0:1], v0
894; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
895; GFX11-NEXT:    s_endpgm
896  %gep = getelementptr i8, i8* %p, i64 1
897  %load = load volatile i8, i8* %gep, align 1
898  store i8 %load, i8* undef
899  ret void
900}
901
902define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) {
903; GFX9-LABEL: flat_inst_salu_offset_11bit_max:
904; GFX9:       ; %bb.0:
905; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
906; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
907; GFX9-NEXT:    v_mov_b32_e32 v0, s0
908; GFX9-NEXT:    v_mov_b32_e32 v1, s1
909; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047 glc
910; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
911; GFX9-NEXT:    flat_store_byte v[0:1], v0
912; GFX9-NEXT:    s_endpgm
913;
914; GFX10-LABEL: flat_inst_salu_offset_11bit_max:
915; GFX10:       ; %bb.0:
916; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
917; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
918; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
919; GFX10-NEXT:    s_addc_u32 s1, s1, 0
920; GFX10-NEXT:    v_mov_b32_e32 v0, s0
921; GFX10-NEXT:    v_mov_b32_e32 v1, s1
922; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
923; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
924; GFX10-NEXT:    flat_store_byte v[0:1], v0
925; GFX10-NEXT:    s_endpgm
926;
927; GFX11-LABEL: flat_inst_salu_offset_11bit_max:
928; GFX11:       ; %bb.0:
929; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
930; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
931; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
932; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 glc dlc
933; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
934; GFX11-NEXT:    flat_store_b8 v[0:1], v0
935; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
936; GFX11-NEXT:    s_endpgm
937  %gep = getelementptr i8, i8* %p, i64 2047
938  %load = load volatile i8, i8* %gep, align 1
939  store i8 %load, i8* undef
940  ret void
941}
942
943define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) {
944; GFX9-LABEL: flat_inst_salu_offset_12bit_max:
945; GFX9:       ; %bb.0:
946; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
947; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
948; GFX9-NEXT:    v_mov_b32_e32 v0, s0
949; GFX9-NEXT:    v_mov_b32_e32 v1, s1
950; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
951; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
952; GFX9-NEXT:    flat_store_byte v[0:1], v0
953; GFX9-NEXT:    s_endpgm
954;
955; GFX10-LABEL: flat_inst_salu_offset_12bit_max:
956; GFX10:       ; %bb.0:
957; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
958; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
959; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
960; GFX10-NEXT:    s_addc_u32 s1, s1, 0
961; GFX10-NEXT:    v_mov_b32_e32 v0, s0
962; GFX10-NEXT:    v_mov_b32_e32 v1, s1
963; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
964; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
965; GFX10-NEXT:    flat_store_byte v[0:1], v0
966; GFX10-NEXT:    s_endpgm
967;
968; GFX11-LABEL: flat_inst_salu_offset_12bit_max:
969; GFX11:       ; %bb.0:
970; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
971; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
972; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
973; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
974; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
975; GFX11-NEXT:    flat_store_b8 v[0:1], v0
976; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
977; GFX11-NEXT:    s_endpgm
978  %gep = getelementptr i8, i8* %p, i64 4095
979  %load = load volatile i8, i8* %gep, align 1
980  store i8 %load, i8* undef
981  ret void
982}
983
984define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) {
985; GFX9-LABEL: flat_inst_salu_offset_13bit_max:
986; GFX9:       ; %bb.0:
987; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
988; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
989; GFX9-NEXT:    v_mov_b32_e32 v0, s0
990; GFX9-NEXT:    v_mov_b32_e32 v1, s1
991; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
992; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
993; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
994; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
995; GFX9-NEXT:    flat_store_byte v[0:1], v0
996; GFX9-NEXT:    s_endpgm
997;
998; GFX10-LABEL: flat_inst_salu_offset_13bit_max:
999; GFX10:       ; %bb.0:
1000; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1001; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1002; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1003; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1004; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1005; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1006; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1007; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1008; GFX10-NEXT:    flat_store_byte v[0:1], v0
1009; GFX10-NEXT:    s_endpgm
1010;
1011; GFX11-LABEL: flat_inst_salu_offset_13bit_max:
1012; GFX11:       ; %bb.0:
1013; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1014; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1015; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1016; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1017; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1018; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1019; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1020; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1021; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1022; GFX11-NEXT:    s_endpgm
1023  %gep = getelementptr i8, i8* %p, i64 8191
1024  %load = load volatile i8, i8* %gep, align 1
1025  store i8 %load, i8* undef
1026  ret void
1027}
1028
1029define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) {
1030; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max:
1031; GFX9:       ; %bb.0:
1032; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1033; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1034; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1035; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1036; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1037; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1038; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1039; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1040; GFX9-NEXT:    flat_store_byte v[0:1], v0
1041; GFX9-NEXT:    s_endpgm
1042;
1043; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max:
1044; GFX10:       ; %bb.0:
1045; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1046; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1047; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff800
1048; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1049; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1050; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1051; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1052; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1053; GFX10-NEXT:    flat_store_byte v[0:1], v0
1054; GFX10-NEXT:    s_endpgm
1055;
1056; GFX11-LABEL: flat_inst_salu_offset_neg_11bit_max:
1057; GFX11:       ; %bb.0:
1058; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1059; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1060; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xfffff800, s0
1061; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1062; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1063; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1064; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1065; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1066; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1067; GFX11-NEXT:    s_endpgm
1068  %gep = getelementptr i8, i8* %p, i64 -2048
1069  %load = load volatile i8, i8* %gep, align 1
1070  store i8 %load, i8* undef
1071  ret void
1072}
1073
1074define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) {
1075; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max:
1076; GFX9:       ; %bb.0:
1077; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1078; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1079; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1080; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1081; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
1082; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1083; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1084; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1085; GFX9-NEXT:    flat_store_byte v[0:1], v0
1086; GFX9-NEXT:    s_endpgm
1087;
1088; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max:
1089; GFX10:       ; %bb.0:
1090; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1091; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1092; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
1093; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1094; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1095; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1096; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1097; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1098; GFX10-NEXT:    flat_store_byte v[0:1], v0
1099; GFX10-NEXT:    s_endpgm
1100;
1101; GFX11-LABEL: flat_inst_salu_offset_neg_12bit_max:
1102; GFX11:       ; %bb.0:
1103; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1104; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1105; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
1106; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1107; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1108; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1109; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1110; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1111; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1112; GFX11-NEXT:    s_endpgm
1113  %gep = getelementptr i8, i8* %p, i64 -4096
1114  %load = load volatile i8, i8* %gep, align 1
1115  store i8 %load, i8* undef
1116  ret void
1117}
1118
1119define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) {
1120; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max:
1121; GFX9:       ; %bb.0:
1122; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1123; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1124; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1125; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1126; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
1127; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1128; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1129; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1130; GFX9-NEXT:    flat_store_byte v[0:1], v0
1131; GFX9-NEXT:    s_endpgm
1132;
1133; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max:
1134; GFX10:       ; %bb.0:
1135; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1136; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1137; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
1138; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1139; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1140; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1141; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1142; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1143; GFX10-NEXT:    flat_store_byte v[0:1], v0
1144; GFX10-NEXT:    s_endpgm
1145;
1146; GFX11-LABEL: flat_inst_salu_offset_neg_13bit_max:
1147; GFX11:       ; %bb.0:
1148; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1149; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1150; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1151; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1152; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1153; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1154; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1155; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1156; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1157; GFX11-NEXT:    s_endpgm
1158  %gep = getelementptr i8, i8* %p, i64 -8192
1159  %load = load volatile i8, i8* %gep, align 1
1160  store i8 %load, i8* undef
1161  ret void
1162}
1163
1164define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) {
1165; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max:
1166; GFX9:       ; %bb.0:
1167; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1168; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1169; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1170; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1171; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1172; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1173; GFX9-NEXT:    flat_store_byte v[0:1], v0
1174; GFX9-NEXT:    s_endpgm
1175;
1176; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max:
1177; GFX10:       ; %bb.0:
1178; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1179; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1180; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1181; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1182; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1183; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1184; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1185; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1186; GFX10-NEXT:    flat_store_byte v[0:1], v0
1187; GFX10-NEXT:    s_endpgm
1188;
1189; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max:
1190; GFX11:       ; %bb.0:
1191; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1192; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1193; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1194; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1195; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1196; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1197; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1198; GFX11-NEXT:    s_endpgm
1199  %gep = getelementptr i8, i8* %p, i64 4095
1200  %load = load volatile i8, i8* %gep, align 1
1201  store i8 %load, i8* undef
1202  ret void
1203}
1204
1205define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) {
1206; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max:
1207; GFX9:       ; %bb.0:
1208; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1209; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1210; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1211; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1212; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1213; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1214; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1215; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1216; GFX9-NEXT:    flat_store_byte v[0:1], v0
1217; GFX9-NEXT:    s_endpgm
1218;
1219; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max:
1220; GFX10:       ; %bb.0:
1221; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1222; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1223; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1224; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1225; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1226; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1227; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1228; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1229; GFX10-NEXT:    flat_store_byte v[0:1], v0
1230; GFX10-NEXT:    s_endpgm
1231;
1232; GFX11-LABEL: flat_inst_salu_offset_2x_12bit_max:
1233; GFX11:       ; %bb.0:
1234; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1235; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1236; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1237; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1238; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1239; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1240; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1241; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1242; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1243; GFX11-NEXT:    s_endpgm
1244  %gep = getelementptr i8, i8* %p, i64 8191
1245  %load = load volatile i8, i8* %gep, align 1
1246  store i8 %load, i8* undef
1247  ret void
1248}
1249
1250define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) {
1251; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max:
1252; GFX9:       ; %bb.0:
1253; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1254; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1255; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1256; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1257; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
1258; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1259; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1260; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1261; GFX9-NEXT:    flat_store_byte v[0:1], v0
1262; GFX9-NEXT:    s_endpgm
1263;
1264; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max:
1265; GFX10:       ; %bb.0:
1266; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1267; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1268; GFX10-NEXT:    s_add_u32 s0, s0, 0x3fff
1269; GFX10-NEXT:    s_addc_u32 s1, s1, 0
1270; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1271; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1272; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1273; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1274; GFX10-NEXT:    flat_store_byte v[0:1], v0
1275; GFX10-NEXT:    s_endpgm
1276;
1277; GFX11-LABEL: flat_inst_salu_offset_2x_13bit_max:
1278; GFX11:       ; %bb.0:
1279; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1280; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1281; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x3000, s0
1282; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1283; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1284; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1285; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1286; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1287; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1288; GFX11-NEXT:    s_endpgm
1289  %gep = getelementptr i8, i8* %p, i64 16383
1290  %load = load volatile i8, i8* %gep, align 1
1291  store i8 %load, i8* undef
1292  ret void
1293}
1294
1295define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) {
1296; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1297; GFX9:       ; %bb.0:
1298; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1299; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1300; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1301; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1302; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
1303; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1304; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1305; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1306; GFX9-NEXT:    flat_store_byte v[0:1], v0
1307; GFX9-NEXT:    s_endpgm
1308;
1309; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1310; GFX10:       ; %bb.0:
1311; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1312; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1313; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
1314; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1315; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1316; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1317; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1318; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1319; GFX10-NEXT:    flat_store_byte v[0:1], v0
1320; GFX10-NEXT:    s_endpgm
1321;
1322; GFX11-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1323; GFX11:       ; %bb.0:
1324; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1325; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1326; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
1327; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1328; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1329; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1330; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1331; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1332; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1333; GFX11-NEXT:    s_endpgm
1334  %gep = getelementptr i8, i8* %p, i64 -4096
1335  %load = load volatile i8, i8* %gep, align 1
1336  store i8 %load, i8* undef
1337  ret void
1338}
1339
1340define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) {
1341; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
1342; GFX9:       ; %bb.0:
1343; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1344; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1345; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1346; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1347; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
1348; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1349; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1350; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1351; GFX9-NEXT:    flat_store_byte v[0:1], v0
1352; GFX9-NEXT:    s_endpgm
1353;
1354; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
1355; GFX10:       ; %bb.0:
1356; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1357; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1358; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
1359; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1360; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1361; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1362; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1363; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1364; GFX10-NEXT:    flat_store_byte v[0:1], v0
1365; GFX10-NEXT:    s_endpgm
1366;
1367; GFX11-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
1368; GFX11:       ; %bb.0:
1369; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1370; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1371; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1372; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1373; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1374; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1375; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1376; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1377; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1378; GFX11-NEXT:    s_endpgm
1379  %gep = getelementptr i8, i8* %p, i64 -8192
1380  %load = load volatile i8, i8* %gep, align 1
1381  store i8 %load, i8* undef
1382  ret void
1383}
1384
1385define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) {
1386; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
1387; GFX9:       ; %bb.0:
1388; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1389; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1390; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1391; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1392; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
1393; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1394; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1395; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1396; GFX9-NEXT:    flat_store_byte v[0:1], v0
1397; GFX9-NEXT:    s_endpgm
1398;
1399; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
1400; GFX10:       ; %bb.0:
1401; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1402; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1403; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffc000
1404; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1405; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1406; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1407; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1408; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1409; GFX10-NEXT:    flat_store_byte v[0:1], v0
1410; GFX10-NEXT:    s_endpgm
1411;
1412; GFX11-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
1413; GFX11:       ; %bb.0:
1414; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1415; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1416; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
1417; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1418; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1419; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1420; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1421; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1422; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1423; GFX11-NEXT:    s_endpgm
1424  %gep = getelementptr i8, i8* %p, i64 -16384
1425  %load = load volatile i8, i8* %gep, align 1
1426  store i8 %load, i8* undef
1427  ret void
1428}
1429
1430; Fill 11-bit low-bits (1ull << 33) | 2047
1431define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) {
1432; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
1433; GFX9:       ; %bb.0:
1434; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1435; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1436; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1437; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1438; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1439; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047 glc
1440; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1441; GFX9-NEXT:    flat_store_byte v[0:1], v0
1442; GFX9-NEXT:    s_endpgm
1443;
1444; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
1445; GFX10:       ; %bb.0:
1446; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1447; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1448; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1449; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1450; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1451; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1452; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1453; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1454; GFX10-NEXT:    flat_store_byte v[0:1], v0
1455; GFX10-NEXT:    s_endpgm
1456;
1457; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
1458; GFX11:       ; %bb.0:
1459; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1460; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1461; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1462; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1463; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1464; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2047 glc dlc
1465; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1466; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1467; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1468; GFX11-NEXT:    s_endpgm
1469  %gep = getelementptr i8, i8* %p, i64 8589936639
1470  %load = load volatile i8, i8* %gep, align 1
1471  store i8 %load, i8* undef
1472  ret void
1473}
1474
1475; Fill 11-bit low-bits (1ull << 33) | 2048
1476define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) {
1477; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
1478; GFX9:       ; %bb.0:
1479; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1480; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1481; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1482; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1483; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1484; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048 glc
1485; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1486; GFX9-NEXT:    flat_store_byte v[0:1], v0
1487; GFX9-NEXT:    s_endpgm
1488;
1489; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
1490; GFX10:       ; %bb.0:
1491; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1492; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1493; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
1494; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1495; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1496; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1497; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1498; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1499; GFX10-NEXT:    flat_store_byte v[0:1], v0
1500; GFX10-NEXT:    s_endpgm
1501;
1502; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
1503; GFX11:       ; %bb.0:
1504; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1505; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1506; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1507; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1508; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1509; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:2048 glc dlc
1510; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1511; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1512; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1513; GFX11-NEXT:    s_endpgm
1514  %gep = getelementptr i8, i8* %p, i64 8589936640
1515  %load = load volatile i8, i8* %gep, align 1
1516  store i8 %load, i8* undef
1517  ret void
1518}
1519
1520; Fill 12-bit low-bits (1ull << 33) | 4095
1521define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) {
1522; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
1523; GFX9:       ; %bb.0:
1524; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1525; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1526; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1527; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1528; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1529; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1530; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1531; GFX9-NEXT:    flat_store_byte v[0:1], v0
1532; GFX9-NEXT:    s_endpgm
1533;
1534; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
1535; GFX10:       ; %bb.0:
1536; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1537; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1538; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1539; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1540; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1541; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1542; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1543; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1544; GFX10-NEXT:    flat_store_byte v[0:1], v0
1545; GFX10-NEXT:    s_endpgm
1546;
1547; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
1548; GFX11:       ; %bb.0:
1549; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1550; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1551; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1552; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1553; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1554; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1555; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1556; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1557; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1558; GFX11-NEXT:    s_endpgm
1559  %gep = getelementptr i8, i8* %p, i64 8589938687
1560  %load = load volatile i8, i8* %gep, align 1
1561  store i8 %load, i8* undef
1562  ret void
1563}
1564
1565; Fill 12-bit low-bits (1ull << 33) | 4096
1566define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) {
1567; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
1568; GFX9:       ; %bb.0:
1569; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1570; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1571; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1572; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1573; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1574; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1575; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1576; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1577; GFX9-NEXT:    flat_store_byte v[0:1], v0
1578; GFX9-NEXT:    s_endpgm
1579;
1580; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
1581; GFX10:       ; %bb.0:
1582; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1583; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1584; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
1585; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1586; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1587; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1588; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1589; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1590; GFX10-NEXT:    flat_store_byte v[0:1], v0
1591; GFX10-NEXT:    s_endpgm
1592;
1593; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
1594; GFX11:       ; %bb.0:
1595; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1596; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1597; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1598; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1599; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1600; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1601; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1602; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1603; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1604; GFX11-NEXT:    s_endpgm
1605  %gep = getelementptr i8, i8* %p, i64 8589938688
1606  %load = load volatile i8, i8* %gep, align 1
1607  store i8 %load, i8* undef
1608  ret void
1609}
1610
1611; Fill 13-bit low-bits (1ull << 33) | 8191
1612define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) {
1613; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
1614; GFX9:       ; %bb.0:
1615; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1616; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1617; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1618; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1619; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1620; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1621; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095 glc
1622; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1623; GFX9-NEXT:    flat_store_byte v[0:1], v0
1624; GFX9-NEXT:    s_endpgm
1625;
1626; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
1627; GFX10:       ; %bb.0:
1628; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1629; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1630; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1631; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1632; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1633; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1634; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1635; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1636; GFX10-NEXT:    flat_store_byte v[0:1], v0
1637; GFX10-NEXT:    s_endpgm
1638;
1639; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
1640; GFX11:       ; %bb.0:
1641; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1642; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1643; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1644; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1645; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1646; GFX11-NEXT:    flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1647; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1648; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1649; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1650; GFX11-NEXT:    s_endpgm
1651  %gep = getelementptr i8, i8* %p, i64 8589942783
1652  %load = load volatile i8, i8* %gep, align 1
1653  store i8 %load, i8* undef
1654  ret void
1655}
1656
1657; Fill 13-bit low-bits (1ull << 33) | 8192
1658define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) {
1659; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
1660; GFX9:       ; %bb.0:
1661; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1662; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1663; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1664; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1665; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1666; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1667; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1668; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1669; GFX9-NEXT:    flat_store_byte v[0:1], v0
1670; GFX9-NEXT:    s_endpgm
1671;
1672; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
1673; GFX10:       ; %bb.0:
1674; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1675; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1676; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
1677; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1678; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1679; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1680; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1681; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1682; GFX10-NEXT:    flat_store_byte v[0:1], v0
1683; GFX10-NEXT:    s_endpgm
1684;
1685; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
1686; GFX11:       ; %bb.0:
1687; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1688; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1689; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
1690; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1691; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1692; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1693; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1694; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1695; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1696; GFX11-NEXT:    s_endpgm
1697  %gep = getelementptr i8, i8* %p, i64 8589942784
1698  %load = load volatile i8, i8* %gep, align 1
1699  store i8 %load, i8* undef
1700  ret void
1701}
1702
1703; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1704define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) {
1705; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
1706; GFX9:       ; %bb.0:
1707; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1708; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1709; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1710; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1711; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1712; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
1713; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1714; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1715; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1716; GFX9-NEXT:    flat_store_byte v[0:1], v0
1717; GFX9-NEXT:    s_endpgm
1718;
1719; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
1720; GFX10:       ; %bb.0:
1721; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1722; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1723; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1724; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1725; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1726; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1727; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1728; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1729; GFX10-NEXT:    flat_store_byte v[0:1], v0
1730; GFX10-NEXT:    s_endpgm
1731;
1732; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
1733; GFX11:       ; %bb.0:
1734; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1735; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1736; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1737; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x7ff, s0
1738; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1739; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1740; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1741; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1742; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1743; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1744; GFX11-NEXT:    s_endpgm
1745  %gep = getelementptr i8, i8* %p, i64 -9223372036854773761
1746  %load = load volatile i8, i8* %gep, align 1
1747  store i8 %load, i8* undef
1748  ret void
1749}
1750
1751; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1752define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) {
1753; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
1754; GFX9:       ; %bb.0:
1755; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1756; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1757; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1758; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1759; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1760; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1761; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1762; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1763; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1764; GFX9-NEXT:    flat_store_byte v[0:1], v0
1765; GFX9-NEXT:    s_endpgm
1766;
1767; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
1768; GFX10:       ; %bb.0:
1769; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1770; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1771; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
1772; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1773; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1774; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1775; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1776; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1777; GFX10-NEXT:    flat_store_byte v[0:1], v0
1778; GFX10-NEXT:    s_endpgm
1779;
1780; GFX11-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
1781; GFX11:       ; %bb.0:
1782; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1783; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1784; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1785; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, s0
1786; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1787; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1788; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1789; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1790; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1791; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1792; GFX11-NEXT:    s_endpgm
1793  %gep = getelementptr i8, i8* %p, i64 -9223372036854773760
1794  %load = load volatile i8, i8* %gep, align 1
1795  store i8 %load, i8* undef
1796  ret void
1797}
1798
1799; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1800define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) {
1801; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
1802; GFX9:       ; %bb.0:
1803; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1804; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1805; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1806; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1807; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1808; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1809; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1810; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1811; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1812; GFX9-NEXT:    flat_store_byte v[0:1], v0
1813; GFX9-NEXT:    s_endpgm
1814;
1815; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
1816; GFX10:       ; %bb.0:
1817; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1818; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1819; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1820; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1821; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1822; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1823; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1824; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1825; GFX10-NEXT:    flat_store_byte v[0:1], v0
1826; GFX10-NEXT:    s_endpgm
1827;
1828; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
1829; GFX11:       ; %bb.0:
1830; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1831; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1832; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1833; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfff, s0
1834; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1835; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1836; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1837; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1838; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1839; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1840; GFX11-NEXT:    s_endpgm
1841  %gep = getelementptr i8, i8* %p, i64 -9223372036854771713
1842  %load = load volatile i8, i8* %gep, align 1
1843  store i8 %load, i8* undef
1844  ret void
1845}
1846
1847; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1848define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) {
1849; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
1850; GFX9:       ; %bb.0:
1851; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1852; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1853; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1854; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1855; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1856; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1857; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1858; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1859; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1860; GFX9-NEXT:    flat_store_byte v[0:1], v0
1861; GFX9-NEXT:    s_endpgm
1862;
1863; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
1864; GFX10:       ; %bb.0:
1865; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1866; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1867; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
1868; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1869; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1870; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1871; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1872; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1873; GFX10-NEXT:    flat_store_byte v[0:1], v0
1874; GFX10-NEXT:    s_endpgm
1875;
1876; GFX11-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
1877; GFX11:       ; %bb.0:
1878; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1879; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1880; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1881; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, s0
1882; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1883; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1884; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1885; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1886; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1887; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1888; GFX11-NEXT:    s_endpgm
1889  %gep = getelementptr i8, i8* %p, i64 -9223372036854771712
1890  %load = load volatile i8, i8* %gep, align 1
1891  store i8 %load, i8* undef
1892  ret void
1893}
1894
1895; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1896define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) {
1897; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
1898; GFX9:       ; %bb.0:
1899; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1900; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1901; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1902; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1903; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1904; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1905; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1906; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1907; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1908; GFX9-NEXT:    flat_store_byte v[0:1], v0
1909; GFX9-NEXT:    s_endpgm
1910;
1911; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
1912; GFX10:       ; %bb.0:
1913; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1914; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1915; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1916; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1917; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1918; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1919; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1920; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1921; GFX10-NEXT:    flat_store_byte v[0:1], v0
1922; GFX10-NEXT:    s_endpgm
1923;
1924; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
1925; GFX11:       ; %bb.0:
1926; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1927; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1928; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1929; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1fff, s0
1930; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1931; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1932; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1933; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1934; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1935; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1936; GFX11-NEXT:    s_endpgm
1937  %gep = getelementptr i8, i8* %p, i64 -9223372036854767617
1938  %load = load volatile i8, i8* %gep, align 1
1939  store i8 %load, i8* undef
1940  ret void
1941}
1942
1943; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1944define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) {
1945; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
1946; GFX9:       ; %bb.0:
1947; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1948; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1949; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1950; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1951; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1952; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1953; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1954; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] glc
1955; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1956; GFX9-NEXT:    flat_store_byte v[0:1], v0
1957; GFX9-NEXT:    s_endpgm
1958;
1959; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
1960; GFX10:       ; %bb.0:
1961; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1962; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1963; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
1964; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1965; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1966; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1967; GFX10-NEXT:    flat_load_ubyte v0, v[0:1] glc dlc
1968; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1969; GFX10-NEXT:    flat_store_byte v[0:1], v0
1970; GFX10-NEXT:    s_endpgm
1971;
1972; GFX11-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
1973; GFX11:       ; %bb.0:
1974; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1975; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1976; GFX11-NEXT:    v_mov_b32_e32 v1, s1
1977; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, s0
1978; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1979; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1980; GFX11-NEXT:    flat_load_u8 v0, v[0:1] glc dlc
1981; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1982; GFX11-NEXT:    flat_store_b8 v[0:1], v0
1983; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1984; GFX11-NEXT:    s_endpgm
1985  %gep = getelementptr i8, i8* %p, i64 -9223372036854767616
1986  %load = load volatile i8, i8* %gep, align 1
1987  store i8 %load, i8* undef
1988  ret void
1989}
1990