1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4
5; Test splitting flat instruction offsets into the low and high bits
6; when the offset doesn't fit in the offset field.
7
8define i8 @flat_inst_valu_offset_1(i8* %p) {
9; GFX9-LABEL: flat_inst_valu_offset_1:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1
13; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
14; GFX9-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX10-LABEL: flat_inst_valu_offset_1:
17; GFX10:       ; %bb.0:
18; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
20; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, v0, 1
21; GFX10-NEXT:    ; implicit-def: $vcc_hi
22; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
23; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
24; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
25; GFX10-NEXT:    s_setpc_b64 s[30:31]
26  %gep = getelementptr i8, i8* %p, i64 1
27  %load = load i8, i8* %gep, align 4
28  ret i8 %load
29}
30
31define i8 @flat_inst_valu_offset_11bit_max(i8* %p) {
32; GFX9-LABEL: flat_inst_valu_offset_11bit_max:
33; GFX9:       ; %bb.0:
34; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
36; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
37; GFX9-NEXT:    s_setpc_b64 s[30:31]
38;
39; GFX10-LABEL: flat_inst_valu_offset_11bit_max:
40; GFX10:       ; %bb.0:
41; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
43; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0
44; GFX10-NEXT:    ; implicit-def: $vcc_hi
45; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
46; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
47; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
48; GFX10-NEXT:    s_setpc_b64 s[30:31]
49  %gep = getelementptr i8, i8* %p, i64 2047
50  %load = load i8, i8* %gep, align 4
51  ret i8 %load
52}
53
54define i8 @flat_inst_valu_offset_12bit_max(i8* %p) {
55; GFX9-LABEL: flat_inst_valu_offset_12bit_max:
56; GFX9:       ; %bb.0:
57; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
59; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    s_setpc_b64 s[30:31]
61;
62; GFX10-LABEL: flat_inst_valu_offset_12bit_max:
63; GFX10:       ; %bb.0:
64; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
66; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
67; GFX10-NEXT:    ; implicit-def: $vcc_hi
68; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
69; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
70; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
71; GFX10-NEXT:    s_setpc_b64 s[30:31]
72  %gep = getelementptr i8, i8* %p, i64 4095
73  %load = load i8, i8* %gep, align 4
74  ret i8 %load
75}
76
77define i8 @flat_inst_valu_offset_13bit_max(i8* %p) {
78; GFX9-LABEL: flat_inst_valu_offset_13bit_max:
79; GFX9:       ; %bb.0:
80; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
82; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
83; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
84; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
85; GFX9-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX10-LABEL: flat_inst_valu_offset_13bit_max:
88; GFX10:       ; %bb.0:
89; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
91; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
92; GFX10-NEXT:    ; implicit-def: $vcc_hi
93; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
94; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
95; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
96; GFX10-NEXT:    s_setpc_b64 s[30:31]
97  %gep = getelementptr i8, i8* %p, i64 8191
98  %load = load i8, i8* %gep, align 4
99  ret i8 %load
100}
101
102define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) {
103; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
104; GFX9:       ; %bb.0:
105; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
107; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
108; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
109; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
110; GFX9-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max:
113; GFX10:       ; %bb.0:
114; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
116; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff800, v0
117; GFX10-NEXT:    ; implicit-def: $vcc_hi
118; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
119; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
120; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
121; GFX10-NEXT:    s_setpc_b64 s[30:31]
122  %gep = getelementptr i8, i8* %p, i64 -2048
123  %load = load i8, i8* %gep, align 4
124  ret i8 %load
125}
126
127define i8 @flat_inst_valu_offset_neg_12bit_max(i8* %p) {
128; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max:
129; GFX9:       ; %bb.0:
130; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
132; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
133; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
134; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
135; GFX9-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max:
138; GFX10:       ; %bb.0:
139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
141; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
142; GFX10-NEXT:    ; implicit-def: $vcc_hi
143; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
144; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
145; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
146; GFX10-NEXT:    s_setpc_b64 s[30:31]
147  %gep = getelementptr i8, i8* %p, i64 -4096
148  %load = load i8, i8* %gep, align 4
149  ret i8 %load
150}
151
152define i8 @flat_inst_valu_offset_neg_13bit_max(i8* %p) {
153; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max:
154; GFX9:       ; %bb.0:
155; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
157; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
158; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
159; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
160; GFX9-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max:
163; GFX10:       ; %bb.0:
164; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
166; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
167; GFX10-NEXT:    ; implicit-def: $vcc_hi
168; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
169; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
170; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
171; GFX10-NEXT:    s_setpc_b64 s[30:31]
172  %gep = getelementptr i8, i8* %p, i64 -8192
173  %load = load i8, i8* %gep, align 4
174  ret i8 %load
175}
176
177define i8 @flat_inst_valu_offset_2x_11bit_max(i8* %p) {
178; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max:
179; GFX9:       ; %bb.0:
180; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
182; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
183; GFX9-NEXT:    s_setpc_b64 s[30:31]
184;
185; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max:
186; GFX10:       ; %bb.0:
187; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
189; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
190; GFX10-NEXT:    ; implicit-def: $vcc_hi
191; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
192; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
193; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
194; GFX10-NEXT:    s_setpc_b64 s[30:31]
195  %gep = getelementptr i8, i8* %p, i64 4095
196  %load = load i8, i8* %gep, align 4
197  ret i8 %load
198}
199
200define i8 @flat_inst_valu_offset_2x_12bit_max(i8* %p) {
201; GFX9-LABEL: flat_inst_valu_offset_2x_12bit_max:
202; GFX9:       ; %bb.0:
203; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
205; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
206; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
207; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
208; GFX9-NEXT:    s_setpc_b64 s[30:31]
209;
210; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max:
211; GFX10:       ; %bb.0:
212; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
214; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
215; GFX10-NEXT:    ; implicit-def: $vcc_hi
216; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
217; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
218; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
219; GFX10-NEXT:    s_setpc_b64 s[30:31]
220  %gep = getelementptr i8, i8* %p, i64 8191
221  %load = load i8, i8* %gep, align 4
222  ret i8 %load
223}
224
225define i8 @flat_inst_valu_offset_2x_13bit_max(i8* %p) {
226; GFX9-LABEL: flat_inst_valu_offset_2x_13bit_max:
227; GFX9:       ; %bb.0:
228; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
230; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
231; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
232; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
233; GFX9-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max:
236; GFX10:       ; %bb.0:
237; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
239; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x3fff, v0
240; GFX10-NEXT:    ; implicit-def: $vcc_hi
241; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
242; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
243; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
244; GFX10-NEXT:    s_setpc_b64 s[30:31]
245  %gep = getelementptr i8, i8* %p, i64 16383
246  %load = load i8, i8* %gep, align 4
247  ret i8 %load
248}
249
250define i8 @flat_inst_valu_offset_2x_neg_11bit_max(i8* %p) {
251; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
252; GFX9:       ; %bb.0:
253; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
255; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
256; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
257; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
258; GFX9-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
261; GFX10:       ; %bb.0:
262; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
264; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
265; GFX10-NEXT:    ; implicit-def: $vcc_hi
266; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
267; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
268; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
269; GFX10-NEXT:    s_setpc_b64 s[30:31]
270  %gep = getelementptr i8, i8* %p, i64 -4096
271  %load = load i8, i8* %gep, align 4
272  ret i8 %load
273}
274
275define i8 @flat_inst_valu_offset_2x_neg_12bit_max(i8* %p) {
276; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
277; GFX9:       ; %bb.0:
278; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
280; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
281; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
282; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
283; GFX9-NEXT:    s_setpc_b64 s[30:31]
284;
285; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
286; GFX10:       ; %bb.0:
287; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
289; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
290; GFX10-NEXT:    ; implicit-def: $vcc_hi
291; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
292; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
293; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
294; GFX10-NEXT:    s_setpc_b64 s[30:31]
295  %gep = getelementptr i8, i8* %p, i64 -8192
296  %load = load i8, i8* %gep, align 4
297  ret i8 %load
298}
299
300define i8 @flat_inst_valu_offset_2x_neg_13bit_max(i8* %p) {
301; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
302; GFX9:       ; %bb.0:
303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
305; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
306; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
307; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
308; GFX9-NEXT:    s_setpc_b64 s[30:31]
309;
310; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
311; GFX10:       ; %bb.0:
312; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
314; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0
315; GFX10-NEXT:    ; implicit-def: $vcc_hi
316; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
317; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
318; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
319; GFX10-NEXT:    s_setpc_b64 s[30:31]
320  %gep = getelementptr i8, i8* %p, i64 -16384
321  %load = load i8, i8* %gep, align 4
322  ret i8 %load
323}
324
325; Fill 11-bit low-bits (1ull << 33) | 2047
326define i8 @flat_inst_valu_offset_64bit_11bit_split0(i8* %p) {
327; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
328; GFX9:       ; %bb.0:
329; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
331; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
332; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
333; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
334; GFX9-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
337; GFX10:       ; %bb.0:
338; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
340; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0
341; GFX10-NEXT:    ; implicit-def: $vcc_hi
342; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
343; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
344; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
345; GFX10-NEXT:    s_setpc_b64 s[30:31]
346  %gep = getelementptr i8, i8* %p, i64 8589936639
347  %load = load i8, i8* %gep, align 4
348  ret i8 %load
349}
350
351; Fill 11-bit low-bits (1ull << 33) | 2048
352define i8 @flat_inst_valu_offset_64bit_11bit_split1(i8* %p) {
353; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
354; GFX9:       ; %bb.0:
355; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
357; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
358; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
359; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
360; GFX9-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
363; GFX10:       ; %bb.0:
364; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
366; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
367; GFX10-NEXT:    ; implicit-def: $vcc_hi
368; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
369; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
370; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
371; GFX10-NEXT:    s_setpc_b64 s[30:31]
372  %gep = getelementptr i8, i8* %p, i64 8589936640
373  %load = load i8, i8* %gep, align 4
374  ret i8 %load
375}
376
377; Fill 12-bit low-bits (1ull << 33) | 4095
378define i8 @flat_inst_valu_offset_64bit_12bit_split0(i8* %p) {
379; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
380; GFX9:       ; %bb.0:
381; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
383; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
384; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
385; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
386; GFX9-NEXT:    s_setpc_b64 s[30:31]
387;
388; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
389; GFX10:       ; %bb.0:
390; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
392; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
393; GFX10-NEXT:    ; implicit-def: $vcc_hi
394; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
395; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
396; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
397; GFX10-NEXT:    s_setpc_b64 s[30:31]
398  %gep = getelementptr i8, i8* %p, i64 8589938687
399  %load = load i8, i8* %gep, align 4
400  ret i8 %load
401}
402
403; Fill 12-bit low-bits (1ull << 33) | 4096
404define i8 @flat_inst_valu_offset_64bit_12bit_split1(i8* %p) {
405; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
406; GFX9:       ; %bb.0:
407; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
409; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
410; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
411; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
412; GFX9-NEXT:    s_setpc_b64 s[30:31]
413;
414; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
415; GFX10:       ; %bb.0:
416; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
418; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
419; GFX10-NEXT:    ; implicit-def: $vcc_hi
420; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
421; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
422; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
423; GFX10-NEXT:    s_setpc_b64 s[30:31]
424  %gep = getelementptr i8, i8* %p, i64 8589938688
425  %load = load i8, i8* %gep, align 4
426  ret i8 %load
427}
428
429; Fill 13-bit low-bits (1ull << 33) | 8191
430define i8 @flat_inst_valu_offset_64bit_13bit_split0(i8* %p) {
431; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
432; GFX9:       ; %bb.0:
433; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
435; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
436; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
437; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
438; GFX9-NEXT:    s_setpc_b64 s[30:31]
439;
440; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
441; GFX10:       ; %bb.0:
442; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
444; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
445; GFX10-NEXT:    ; implicit-def: $vcc_hi
446; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
447; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
448; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
449; GFX10-NEXT:    s_setpc_b64 s[30:31]
450  %gep = getelementptr i8, i8* %p, i64 8589942783
451  %load = load i8, i8* %gep, align 4
452  ret i8 %load
453}
454
455; Fill 13-bit low-bits (1ull << 33) | 8192
456define i8 @flat_inst_valu_offset_64bit_13bit_split1(i8* %p) {
457; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
458; GFX9:       ; %bb.0:
459; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
460; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
461; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
462; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
463; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
464; GFX9-NEXT:    s_setpc_b64 s[30:31]
465;
466; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
467; GFX10:       ; %bb.0:
468; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
470; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
471; GFX10-NEXT:    ; implicit-def: $vcc_hi
472; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
473; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
474; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
475; GFX10-NEXT:    s_setpc_b64 s[30:31]
476  %gep = getelementptr i8, i8* %p, i64 8589942784
477  %load = load i8, i8* %gep, align 4
478  ret i8 %load
479}
480
481; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
482define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) {
483; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
484; GFX9:       ; %bb.0:
485; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
487; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
488; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
489; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
490; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
491; GFX9-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
494; GFX10:       ; %bb.0:
495; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
497; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x7ff, v0
498; GFX10-NEXT:    ; implicit-def: $vcc_hi
499; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
500; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
501; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
502; GFX10-NEXT:    s_setpc_b64 s[30:31]
503  %gep = getelementptr i8, i8* %p, i64 -9223372036854773761
504  %load = load i8, i8* %gep, align 4
505  ret i8 %load
506}
507
508; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
509define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) {
510; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
511; GFX9:       ; %bb.0:
512; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
514; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
515; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
516; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
517; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
518; GFX9-NEXT:    s_setpc_b64 s[30:31]
519;
520; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
521; GFX10:       ; %bb.0:
522; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
524; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
525; GFX10-NEXT:    ; implicit-def: $vcc_hi
526; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
527; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
528; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
529; GFX10-NEXT:    s_setpc_b64 s[30:31]
530  %gep = getelementptr i8, i8* %p, i64 -9223372036854773760
531  %load = load i8, i8* %gep, align 4
532  ret i8 %load
533}
534
535; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
536define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) {
537; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
538; GFX9:       ; %bb.0:
539; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
541; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
542; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
543; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
544; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
545; GFX9-NEXT:    s_setpc_b64 s[30:31]
546;
547; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
548; GFX10:       ; %bb.0:
549; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
550; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
551; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfff, v0
552; GFX10-NEXT:    ; implicit-def: $vcc_hi
553; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
554; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
555; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
556; GFX10-NEXT:    s_setpc_b64 s[30:31]
557  %gep = getelementptr i8, i8* %p, i64 -9223372036854771713
558  %load = load i8, i8* %gep, align 4
559  ret i8 %load
560}
561
562; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
563define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(i8* %p) {
564; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
565; GFX9:       ; %bb.0:
566; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
568; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
569; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
570; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
571; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
572; GFX9-NEXT:    s_setpc_b64 s[30:31]
573;
574; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
575; GFX10:       ; %bb.0:
576; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
577; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
578; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
579; GFX10-NEXT:    ; implicit-def: $vcc_hi
580; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
581; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
582; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
583; GFX10-NEXT:    s_setpc_b64 s[30:31]
584  %gep = getelementptr i8, i8* %p, i64 -9223372036854771712
585  %load = load i8, i8* %gep, align 4
586  ret i8 %load
587}
588
589; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
590define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) {
591; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
592; GFX9:       ; %bb.0:
593; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
594; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
595; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
596; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
597; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
598; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
599; GFX9-NEXT:    s_setpc_b64 s[30:31]
600;
601; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
602; GFX10:       ; %bb.0:
603; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
605; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1fff, v0
606; GFX10-NEXT:    ; implicit-def: $vcc_hi
607; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
608; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
609; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
610; GFX10-NEXT:    s_setpc_b64 s[30:31]
611  %gep = getelementptr i8, i8* %p, i64 -9223372036854767617
612  %load = load i8, i8* %gep, align 4
613  ret i8 %load
614}
615
616; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
617define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(i8* %p) {
618; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
619; GFX9:       ; %bb.0:
620; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
622; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
623; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
624; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
625; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
626; GFX9-NEXT:    s_setpc_b64 s[30:31]
627;
628; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
629; GFX10:       ; %bb.0:
630; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
632; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
633; GFX10-NEXT:    ; implicit-def: $vcc_hi
634; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
635; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
636; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
637; GFX10-NEXT:    s_setpc_b64 s[30:31]
638  %gep = getelementptr i8, i8* %p, i64 -9223372036854767616
639  %load = load i8, i8* %gep, align 4
640  ret i8 %load
641}
642
643define amdgpu_kernel void @flat_inst_salu_offset_1(i8* %p) {
644; GFX9-LABEL: flat_inst_salu_offset_1:
645; GFX9:       ; %bb.0:
646; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
647; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
648; GFX9-NEXT:    v_mov_b32_e32 v0, s0
649; GFX9-NEXT:    v_mov_b32_e32 v1, s1
650; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:1
651; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
652; GFX9-NEXT:    flat_store_byte v[0:1], v0
653; GFX9-NEXT:    s_endpgm
654;
655; GFX10-LABEL: flat_inst_salu_offset_1:
656; GFX10:       ; %bb.0:
657; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
658; GFX10-NEXT:    ; implicit-def: $vcc_hi
659; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
660; GFX10-NEXT:    s_add_u32 s0, s0, 1
661; GFX10-NEXT:    s_addc_u32 s1, s1, 0
662; GFX10-NEXT:    v_mov_b32_e32 v0, s0
663; GFX10-NEXT:    v_mov_b32_e32 v1, s1
664; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
665; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
666; GFX10-NEXT:    flat_store_byte v[0:1], v0
667; GFX10-NEXT:    s_endpgm
668  %gep = getelementptr i8, i8* %p, i64 1
669  %load = load volatile i8, i8* %gep, align 1
670  store i8 %load, i8* undef
671  ret void
672}
673
674define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(i8* %p) {
675; GFX9-LABEL: flat_inst_salu_offset_11bit_max:
676; GFX9:       ; %bb.0:
677; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
678; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
679; GFX9-NEXT:    v_mov_b32_e32 v0, s0
680; GFX9-NEXT:    v_mov_b32_e32 v1, s1
681; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
682; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
683; GFX9-NEXT:    flat_store_byte v[0:1], v0
684; GFX9-NEXT:    s_endpgm
685;
686; GFX10-LABEL: flat_inst_salu_offset_11bit_max:
687; GFX10:       ; %bb.0:
688; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
689; GFX10-NEXT:    ; implicit-def: $vcc_hi
690; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
691; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
692; GFX10-NEXT:    s_addc_u32 s1, s1, 0
693; GFX10-NEXT:    v_mov_b32_e32 v0, s0
694; GFX10-NEXT:    v_mov_b32_e32 v1, s1
695; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
696; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
697; GFX10-NEXT:    flat_store_byte v[0:1], v0
698; GFX10-NEXT:    s_endpgm
699  %gep = getelementptr i8, i8* %p, i64 2047
700  %load = load volatile i8, i8* %gep, align 1
701  store i8 %load, i8* undef
702  ret void
703}
704
705define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(i8* %p) {
706; GFX9-LABEL: flat_inst_salu_offset_12bit_max:
707; GFX9:       ; %bb.0:
708; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
709; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
710; GFX9-NEXT:    v_mov_b32_e32 v0, s0
711; GFX9-NEXT:    v_mov_b32_e32 v1, s1
712; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
713; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
714; GFX9-NEXT:    flat_store_byte v[0:1], v0
715; GFX9-NEXT:    s_endpgm
716;
717; GFX10-LABEL: flat_inst_salu_offset_12bit_max:
718; GFX10:       ; %bb.0:
719; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
720; GFX10-NEXT:    ; implicit-def: $vcc_hi
721; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
722; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
723; GFX10-NEXT:    s_addc_u32 s1, s1, 0
724; GFX10-NEXT:    v_mov_b32_e32 v0, s0
725; GFX10-NEXT:    v_mov_b32_e32 v1, s1
726; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
727; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
728; GFX10-NEXT:    flat_store_byte v[0:1], v0
729; GFX10-NEXT:    s_endpgm
730  %gep = getelementptr i8, i8* %p, i64 4095
731  %load = load volatile i8, i8* %gep, align 1
732  store i8 %load, i8* undef
733  ret void
734}
735
736define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(i8* %p) {
737; GFX9-LABEL: flat_inst_salu_offset_13bit_max:
738; GFX9:       ; %bb.0:
739; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
740; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
741; GFX9-NEXT:    v_mov_b32_e32 v0, s0
742; GFX9-NEXT:    v_mov_b32_e32 v1, s1
743; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
744; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
745; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
746; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
747; GFX9-NEXT:    flat_store_byte v[0:1], v0
748; GFX9-NEXT:    s_endpgm
749;
750; GFX10-LABEL: flat_inst_salu_offset_13bit_max:
751; GFX10:       ; %bb.0:
752; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
753; GFX10-NEXT:    ; implicit-def: $vcc_hi
754; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
755; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
756; GFX10-NEXT:    s_addc_u32 s1, s1, 0
757; GFX10-NEXT:    v_mov_b32_e32 v0, s0
758; GFX10-NEXT:    v_mov_b32_e32 v1, s1
759; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
760; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
761; GFX10-NEXT:    flat_store_byte v[0:1], v0
762; GFX10-NEXT:    s_endpgm
763  %gep = getelementptr i8, i8* %p, i64 8191
764  %load = load volatile i8, i8* %gep, align 1
765  store i8 %load, i8* undef
766  ret void
767}
768
769define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) {
770; GFX9-LABEL: flat_inst_salu_offset_neg_11bit_max:
771; GFX9:       ; %bb.0:
772; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
773; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
774; GFX9-NEXT:    v_mov_b32_e32 v0, s0
775; GFX9-NEXT:    v_mov_b32_e32 v1, s1
776; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
777; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
778; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
779; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
780; GFX9-NEXT:    flat_store_byte v[0:1], v0
781; GFX9-NEXT:    s_endpgm
782;
783; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max:
784; GFX10:       ; %bb.0:
785; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
786; GFX10-NEXT:    ; implicit-def: $vcc_hi
787; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
788; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff800
789; GFX10-NEXT:    s_addc_u32 s1, s1, -1
790; GFX10-NEXT:    v_mov_b32_e32 v0, s0
791; GFX10-NEXT:    v_mov_b32_e32 v1, s1
792; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
793; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
794; GFX10-NEXT:    flat_store_byte v[0:1], v0
795; GFX10-NEXT:    s_endpgm
796  %gep = getelementptr i8, i8* %p, i64 -2048
797  %load = load volatile i8, i8* %gep, align 1
798  store i8 %load, i8* undef
799  ret void
800}
801
802define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(i8* %p) {
803; GFX9-LABEL: flat_inst_salu_offset_neg_12bit_max:
804; GFX9:       ; %bb.0:
805; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
806; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
807; GFX9-NEXT:    v_mov_b32_e32 v0, s0
808; GFX9-NEXT:    v_mov_b32_e32 v1, s1
809; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
810; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
811; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
812; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
813; GFX9-NEXT:    flat_store_byte v[0:1], v0
814; GFX9-NEXT:    s_endpgm
815;
816; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max:
817; GFX10:       ; %bb.0:
818; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
819; GFX10-NEXT:    ; implicit-def: $vcc_hi
820; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
821; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
822; GFX10-NEXT:    s_addc_u32 s1, s1, -1
823; GFX10-NEXT:    v_mov_b32_e32 v0, s0
824; GFX10-NEXT:    v_mov_b32_e32 v1, s1
825; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
826; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
827; GFX10-NEXT:    flat_store_byte v[0:1], v0
828; GFX10-NEXT:    s_endpgm
829  %gep = getelementptr i8, i8* %p, i64 -4096
830  %load = load volatile i8, i8* %gep, align 1
831  store i8 %load, i8* undef
832  ret void
833}
834
835define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(i8* %p) {
836; GFX9-LABEL: flat_inst_salu_offset_neg_13bit_max:
837; GFX9:       ; %bb.0:
838; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
839; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
840; GFX9-NEXT:    v_mov_b32_e32 v0, s0
841; GFX9-NEXT:    v_mov_b32_e32 v1, s1
842; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
843; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
844; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
845; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
846; GFX9-NEXT:    flat_store_byte v[0:1], v0
847; GFX9-NEXT:    s_endpgm
848;
849; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max:
850; GFX10:       ; %bb.0:
851; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
852; GFX10-NEXT:    ; implicit-def: $vcc_hi
853; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
854; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
855; GFX10-NEXT:    s_addc_u32 s1, s1, -1
856; GFX10-NEXT:    v_mov_b32_e32 v0, s0
857; GFX10-NEXT:    v_mov_b32_e32 v1, s1
858; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
859; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
860; GFX10-NEXT:    flat_store_byte v[0:1], v0
861; GFX10-NEXT:    s_endpgm
862  %gep = getelementptr i8, i8* %p, i64 -8192
863  %load = load volatile i8, i8* %gep, align 1
864  store i8 %load, i8* undef
865  ret void
866}
867
868define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(i8* %p) {
869; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max:
870; GFX9:       ; %bb.0:
871; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
872; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
873; GFX9-NEXT:    v_mov_b32_e32 v0, s0
874; GFX9-NEXT:    v_mov_b32_e32 v1, s1
875; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
876; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
877; GFX9-NEXT:    flat_store_byte v[0:1], v0
878; GFX9-NEXT:    s_endpgm
879;
880; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max:
881; GFX10:       ; %bb.0:
882; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
883; GFX10-NEXT:    ; implicit-def: $vcc_hi
884; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
885; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
886; GFX10-NEXT:    s_addc_u32 s1, s1, 0
887; GFX10-NEXT:    v_mov_b32_e32 v0, s0
888; GFX10-NEXT:    v_mov_b32_e32 v1, s1
889; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
890; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
891; GFX10-NEXT:    flat_store_byte v[0:1], v0
892; GFX10-NEXT:    s_endpgm
893  %gep = getelementptr i8, i8* %p, i64 4095
894  %load = load volatile i8, i8* %gep, align 1
895  store i8 %load, i8* undef
896  ret void
897}
898
899define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(i8* %p) {
900; GFX9-LABEL: flat_inst_salu_offset_2x_12bit_max:
901; GFX9:       ; %bb.0:
902; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
903; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
904; GFX9-NEXT:    v_mov_b32_e32 v0, s0
905; GFX9-NEXT:    v_mov_b32_e32 v1, s1
906; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
907; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
908; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
909; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
910; GFX9-NEXT:    flat_store_byte v[0:1], v0
911; GFX9-NEXT:    s_endpgm
912;
913; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max:
914; GFX10:       ; %bb.0:
915; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
916; GFX10-NEXT:    ; implicit-def: $vcc_hi
917; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
918; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
919; GFX10-NEXT:    s_addc_u32 s1, s1, 0
920; GFX10-NEXT:    v_mov_b32_e32 v0, s0
921; GFX10-NEXT:    v_mov_b32_e32 v1, s1
922; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
923; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
924; GFX10-NEXT:    flat_store_byte v[0:1], v0
925; GFX10-NEXT:    s_endpgm
926  %gep = getelementptr i8, i8* %p, i64 8191
927  %load = load volatile i8, i8* %gep, align 1
928  store i8 %load, i8* undef
929  ret void
930}
931
932define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(i8* %p) {
933; GFX9-LABEL: flat_inst_salu_offset_2x_13bit_max:
934; GFX9:       ; %bb.0:
935; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
936; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
937; GFX9-NEXT:    v_mov_b32_e32 v0, s0
938; GFX9-NEXT:    v_mov_b32_e32 v1, s1
939; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
940; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
941; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
942; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
943; GFX9-NEXT:    flat_store_byte v[0:1], v0
944; GFX9-NEXT:    s_endpgm
945;
946; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max:
947; GFX10:       ; %bb.0:
948; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
949; GFX10-NEXT:    ; implicit-def: $vcc_hi
950; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
951; GFX10-NEXT:    s_add_u32 s0, s0, 0x3fff
952; GFX10-NEXT:    s_addc_u32 s1, s1, 0
953; GFX10-NEXT:    v_mov_b32_e32 v0, s0
954; GFX10-NEXT:    v_mov_b32_e32 v1, s1
955; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
956; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
957; GFX10-NEXT:    flat_store_byte v[0:1], v0
958; GFX10-NEXT:    s_endpgm
959  %gep = getelementptr i8, i8* %p, i64 16383
960  %load = load volatile i8, i8* %gep, align 1
961  store i8 %load, i8* undef
962  ret void
963}
964
965define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(i8* %p) {
966; GFX9-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
967; GFX9:       ; %bb.0:
968; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
969; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
970; GFX9-NEXT:    v_mov_b32_e32 v0, s0
971; GFX9-NEXT:    v_mov_b32_e32 v1, s1
972; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
973; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
974; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
975; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
976; GFX9-NEXT:    flat_store_byte v[0:1], v0
977; GFX9-NEXT:    s_endpgm
978;
979; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
980; GFX10:       ; %bb.0:
981; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
982; GFX10-NEXT:    ; implicit-def: $vcc_hi
983; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
984; GFX10-NEXT:    s_add_u32 s0, s0, 0xfffff000
985; GFX10-NEXT:    s_addc_u32 s1, s1, -1
986; GFX10-NEXT:    v_mov_b32_e32 v0, s0
987; GFX10-NEXT:    v_mov_b32_e32 v1, s1
988; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
989; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
990; GFX10-NEXT:    flat_store_byte v[0:1], v0
991; GFX10-NEXT:    s_endpgm
992  %gep = getelementptr i8, i8* %p, i64 -4096
993  %load = load volatile i8, i8* %gep, align 1
994  store i8 %load, i8* undef
995  ret void
996}
997
998define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(i8* %p) {
999; GFX9-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
1000; GFX9:       ; %bb.0:
1001; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1002; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1003; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1004; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1005; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
1006; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1007; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1008; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1009; GFX9-NEXT:    flat_store_byte v[0:1], v0
1010; GFX9-NEXT:    s_endpgm
1011;
1012; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
1013; GFX10:       ; %bb.0:
1014; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1015; GFX10-NEXT:    ; implicit-def: $vcc_hi
1016; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1017; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffe000
1018; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1019; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1020; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1021; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1022; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1023; GFX10-NEXT:    flat_store_byte v[0:1], v0
1024; GFX10-NEXT:    s_endpgm
1025  %gep = getelementptr i8, i8* %p, i64 -8192
1026  %load = load volatile i8, i8* %gep, align 1
1027  store i8 %load, i8* undef
1028  ret void
1029}
1030
1031define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(i8* %p) {
1032; GFX9-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
1033; GFX9:       ; %bb.0:
1034; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1035; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1036; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1037; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1038; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
1039; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1040; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1041; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1042; GFX9-NEXT:    flat_store_byte v[0:1], v0
1043; GFX9-NEXT:    s_endpgm
1044;
1045; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
1046; GFX10:       ; %bb.0:
1047; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1048; GFX10-NEXT:    ; implicit-def: $vcc_hi
1049; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1050; GFX10-NEXT:    s_add_u32 s0, s0, 0xffffc000
1051; GFX10-NEXT:    s_addc_u32 s1, s1, -1
1052; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1053; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1054; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1055; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1056; GFX10-NEXT:    flat_store_byte v[0:1], v0
1057; GFX10-NEXT:    s_endpgm
1058  %gep = getelementptr i8, i8* %p, i64 -16384
1059  %load = load volatile i8, i8* %gep, align 1
1060  store i8 %load, i8* undef
1061  ret void
1062}
1063
1064; Fill 11-bit low-bits (1ull << 33) | 2047
1065define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(i8* %p) {
1066; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
1067; GFX9:       ; %bb.0:
1068; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1069; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1070; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1071; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1072; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1073; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
1074; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1075; GFX9-NEXT:    flat_store_byte v[0:1], v0
1076; GFX9-NEXT:    s_endpgm
1077;
1078; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
1079; GFX10:       ; %bb.0:
1080; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1081; GFX10-NEXT:    ; implicit-def: $vcc_hi
1082; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1083; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1084; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1085; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1086; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1087; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1088; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1089; GFX10-NEXT:    flat_store_byte v[0:1], v0
1090; GFX10-NEXT:    s_endpgm
1091  %gep = getelementptr i8, i8* %p, i64 8589936639
1092  %load = load volatile i8, i8* %gep, align 1
1093  store i8 %load, i8* undef
1094  ret void
1095}
1096
1097; Fill 11-bit low-bits (1ull << 33) | 2048
1098define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(i8* %p) {
1099; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
1100; GFX9:       ; %bb.0:
1101; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1102; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1103; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1104; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1105; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1106; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
1107; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1108; GFX9-NEXT:    flat_store_byte v[0:1], v0
1109; GFX9-NEXT:    s_endpgm
1110;
1111; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
1112; GFX10:       ; %bb.0:
1113; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1114; GFX10-NEXT:    ; implicit-def: $vcc_hi
1115; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1116; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
1117; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1118; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1119; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1120; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1121; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1122; GFX10-NEXT:    flat_store_byte v[0:1], v0
1123; GFX10-NEXT:    s_endpgm
1124  %gep = getelementptr i8, i8* %p, i64 8589936640
1125  %load = load volatile i8, i8* %gep, align 1
1126  store i8 %load, i8* undef
1127  ret void
1128}
1129
1130; Fill 12-bit low-bits (1ull << 33) | 4095
1131define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(i8* %p) {
1132; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
1133; GFX9:       ; %bb.0:
1134; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1135; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1136; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1137; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1138; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1139; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
1140; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1141; GFX9-NEXT:    flat_store_byte v[0:1], v0
1142; GFX9-NEXT:    s_endpgm
1143;
1144; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
1145; GFX10:       ; %bb.0:
1146; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1147; GFX10-NEXT:    ; implicit-def: $vcc_hi
1148; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1149; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1150; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1151; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1152; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1153; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1154; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1155; GFX10-NEXT:    flat_store_byte v[0:1], v0
1156; GFX10-NEXT:    s_endpgm
1157  %gep = getelementptr i8, i8* %p, i64 8589938687
1158  %load = load volatile i8, i8* %gep, align 1
1159  store i8 %load, i8* undef
1160  ret void
1161}
1162
1163; Fill 12-bit low-bits (1ull << 33) | 4096
1164define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(i8* %p) {
1165; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
1166; GFX9:       ; %bb.0:
1167; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1168; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1169; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1170; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1171; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1172; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1173; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1174; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1175; GFX9-NEXT:    flat_store_byte v[0:1], v0
1176; GFX9-NEXT:    s_endpgm
1177;
1178; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
1179; GFX10:       ; %bb.0:
1180; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1181; GFX10-NEXT:    ; implicit-def: $vcc_hi
1182; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1183; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
1184; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1185; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1186; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1187; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1188; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1189; GFX10-NEXT:    flat_store_byte v[0:1], v0
1190; GFX10-NEXT:    s_endpgm
1191  %gep = getelementptr i8, i8* %p, i64 8589938688
1192  %load = load volatile i8, i8* %gep, align 1
1193  store i8 %load, i8* undef
1194  ret void
1195}
1196
1197; Fill 13-bit low-bits (1ull << 33) | 8191
1198define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(i8* %p) {
1199; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
1200; GFX9:       ; %bb.0:
1201; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1202; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1203; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1204; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1205; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1206; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1207; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
1208; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1209; GFX9-NEXT:    flat_store_byte v[0:1], v0
1210; GFX9-NEXT:    s_endpgm
1211;
1212; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
1213; GFX10:       ; %bb.0:
1214; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1215; GFX10-NEXT:    ; implicit-def: $vcc_hi
1216; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1217; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1218; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1219; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1220; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1221; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1222; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1223; GFX10-NEXT:    flat_store_byte v[0:1], v0
1224; GFX10-NEXT:    s_endpgm
1225  %gep = getelementptr i8, i8* %p, i64 8589942783
1226  %load = load volatile i8, i8* %gep, align 1
1227  store i8 %load, i8* undef
1228  ret void
1229}
1230
1231; Fill 13-bit low-bits (1ull << 33) | 8192
1232define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(i8* %p) {
1233; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
1234; GFX9:       ; %bb.0:
1235; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1236; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1237; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1238; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1239; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1240; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1241; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1242; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1243; GFX9-NEXT:    flat_store_byte v[0:1], v0
1244; GFX9-NEXT:    s_endpgm
1245;
1246; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
1247; GFX10:       ; %bb.0:
1248; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1249; GFX10-NEXT:    ; implicit-def: $vcc_hi
1250; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1251; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
1252; GFX10-NEXT:    s_addc_u32 s1, s1, 2
1253; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1254; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1255; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1256; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1257; GFX10-NEXT:    flat_store_byte v[0:1], v0
1258; GFX10-NEXT:    s_endpgm
1259  %gep = getelementptr i8, i8* %p, i64 8589942784
1260  %load = load volatile i8, i8* %gep, align 1
1261  store i8 %load, i8* undef
1262  ret void
1263}
1264
1265; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1266define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8* %p) {
1267; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
1268; GFX9:       ; %bb.0:
1269; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1270; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1271; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1272; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1273; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1274; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
1275; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1276; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1277; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1278; GFX9-NEXT:    flat_store_byte v[0:1], v0
1279; GFX9-NEXT:    s_endpgm
1280;
1281; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
1282; GFX10:       ; %bb.0:
1283; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1284; GFX10-NEXT:    ; implicit-def: $vcc_hi
1285; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1286; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1287; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1288; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1289; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1290; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1291; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1292; GFX10-NEXT:    flat_store_byte v[0:1], v0
1293; GFX10-NEXT:    s_endpgm
1294  %gep = getelementptr i8, i8* %p, i64 -9223372036854773761
1295  %load = load volatile i8, i8* %gep, align 1
1296  store i8 %load, i8* undef
1297  ret void
1298}
1299
1300; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1301define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8* %p) {
1302; GFX9-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
1303; GFX9:       ; %bb.0:
1304; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1305; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1306; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1307; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1308; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1309; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
1310; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1311; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1312; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1313; GFX9-NEXT:    flat_store_byte v[0:1], v0
1314; GFX9-NEXT:    s_endpgm
1315;
1316; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
1317; GFX10:       ; %bb.0:
1318; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1319; GFX10-NEXT:    ; implicit-def: $vcc_hi
1320; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1321; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
1322; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1323; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1324; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1325; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1326; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1327; GFX10-NEXT:    flat_store_byte v[0:1], v0
1328; GFX10-NEXT:    s_endpgm
1329  %gep = getelementptr i8, i8* %p, i64 -9223372036854773760
1330  %load = load volatile i8, i8* %gep, align 1
1331  store i8 %load, i8* undef
1332  ret void
1333}
1334
1335; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1336define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8* %p) {
1337; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
1338; GFX9:       ; %bb.0:
1339; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1340; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1341; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1342; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1343; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1344; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
1345; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1346; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1347; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1348; GFX9-NEXT:    flat_store_byte v[0:1], v0
1349; GFX9-NEXT:    s_endpgm
1350;
1351; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
1352; GFX10:       ; %bb.0:
1353; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1354; GFX10-NEXT:    ; implicit-def: $vcc_hi
1355; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1356; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1357; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1358; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1359; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1360; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1361; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1362; GFX10-NEXT:    flat_store_byte v[0:1], v0
1363; GFX10-NEXT:    s_endpgm
1364  %gep = getelementptr i8, i8* %p, i64 -9223372036854771713
1365  %load = load volatile i8, i8* %gep, align 1
1366  store i8 %load, i8* undef
1367  ret void
1368}
1369
1370; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1371define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(i8* %p) {
1372; GFX9-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
1373; GFX9:       ; %bb.0:
1374; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1375; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1376; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1377; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1378; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1379; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1380; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1381; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1382; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1383; GFX9-NEXT:    flat_store_byte v[0:1], v0
1384; GFX9-NEXT:    s_endpgm
1385;
1386; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
1387; GFX10:       ; %bb.0:
1388; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1389; GFX10-NEXT:    ; implicit-def: $vcc_hi
1390; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1391; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
1392; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1393; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1394; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1395; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1396; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1397; GFX10-NEXT:    flat_store_byte v[0:1], v0
1398; GFX10-NEXT:    s_endpgm
1399  %gep = getelementptr i8, i8* %p, i64 -9223372036854771712
1400  %load = load volatile i8, i8* %gep, align 1
1401  store i8 %load, i8* undef
1402  ret void
1403}
1404
1405; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1406define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8* %p) {
1407; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
1408; GFX9:       ; %bb.0:
1409; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1410; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1411; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1412; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1413; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1414; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1415; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1416; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1417; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1418; GFX9-NEXT:    flat_store_byte v[0:1], v0
1419; GFX9-NEXT:    s_endpgm
1420;
1421; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
1422; GFX10:       ; %bb.0:
1423; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1424; GFX10-NEXT:    ; implicit-def: $vcc_hi
1425; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1426; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1427; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1428; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1429; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1430; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1431; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1432; GFX10-NEXT:    flat_store_byte v[0:1], v0
1433; GFX10-NEXT:    s_endpgm
1434  %gep = getelementptr i8, i8* %p, i64 -9223372036854767617
1435  %load = load volatile i8, i8* %gep, align 1
1436  store i8 %load, i8* undef
1437  ret void
1438}
1439
1440; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1441define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(i8* %p) {
1442; GFX9-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
1443; GFX9:       ; %bb.0:
1444; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1445; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1446; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1447; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1448; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1449; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1450; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1451; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
1452; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1453; GFX9-NEXT:    flat_store_byte v[0:1], v0
1454; GFX9-NEXT:    s_endpgm
1455;
1456; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
1457; GFX10:       ; %bb.0:
1458; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1459; GFX10-NEXT:    ; implicit-def: $vcc_hi
1460; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1461; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
1462; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1463; GFX10-NEXT:    v_mov_b32_e32 v0, s0
1464; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1465; GFX10-NEXT:    flat_load_ubyte v0, v[0:1]
1466; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1467; GFX10-NEXT:    flat_store_byte v[0:1], v0
1468; GFX10-NEXT:    s_endpgm
1469  %gep = getelementptr i8, i8* %p, i64 -9223372036854767616
1470  %load = load volatile i8, i8* %gep, align 1
1471  store i8 %load, i8* undef
1472  ret void
1473}
1474