1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
5
6; Test splitting flat instruction offsets into the low and high bits
7; when the offset doesn't fit in the offset field.
8
9define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) {
10; GFX9-LABEL: global_inst_valu_offset_1:
11; GFX9:       ; %bb.0:
12; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
14; GFX9-NEXT:    s_waitcnt vmcnt(0)
15; GFX9-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX10-LABEL: global_inst_valu_offset_1:
18; GFX10:       ; %bb.0:
19; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
21; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
22; GFX10-NEXT:    s_waitcnt vmcnt(0)
23; GFX10-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX11-LABEL: global_inst_valu_offset_1:
26; GFX11:       ; %bb.0:
27; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
29; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:1
30; GFX11-NEXT:    s_waitcnt vmcnt(0)
31; GFX11-NEXT:    s_setpc_b64 s[30:31]
32  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
33  %load = load i8, i8 addrspace(1)* %gep, align 4
34  ret i8 %load
35}
36
37define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) {
38; GFX9-LABEL: global_inst_valu_offset_11bit_max:
39; GFX9:       ; %bb.0:
40; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
42; GFX9-NEXT:    s_waitcnt vmcnt(0)
43; GFX9-NEXT:    s_setpc_b64 s[30:31]
44;
45; GFX10-LABEL: global_inst_valu_offset_11bit_max:
46; GFX10:       ; %bb.0:
47; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
49; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
50; GFX10-NEXT:    s_waitcnt vmcnt(0)
51; GFX10-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX11-LABEL: global_inst_valu_offset_11bit_max:
54; GFX11:       ; %bb.0:
55; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
57; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
58; GFX11-NEXT:    s_waitcnt vmcnt(0)
59; GFX11-NEXT:    s_setpc_b64 s[30:31]
60  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
61  %load = load i8, i8 addrspace(1)* %gep, align 4
62  ret i8 %load
63}
64
65define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) {
66; GFX9-LABEL: global_inst_valu_offset_12bit_max:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
70; GFX9-NEXT:    s_waitcnt vmcnt(0)
71; GFX9-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX10-LABEL: global_inst_valu_offset_12bit_max:
74; GFX10:       ; %bb.0:
75; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
77; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
78; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
79; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
80; GFX10-NEXT:    s_waitcnt vmcnt(0)
81; GFX10-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX11-LABEL: global_inst_valu_offset_12bit_max:
84; GFX11:       ; %bb.0:
85; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
87; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
88; GFX11-NEXT:    s_waitcnt vmcnt(0)
89; GFX11-NEXT:    s_setpc_b64 s[30:31]
90  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
91  %load = load i8, i8 addrspace(1)* %gep, align 4
92  ret i8 %load
93}
94
95define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) {
96; GFX9-LABEL: global_inst_valu_offset_13bit_max:
97; GFX9:       ; %bb.0:
98; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
100; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
101; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
102; GFX9-NEXT:    s_waitcnt vmcnt(0)
103; GFX9-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX10-LABEL: global_inst_valu_offset_13bit_max:
106; GFX10:       ; %bb.0:
107; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
109; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
110; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
111; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
112; GFX10-NEXT:    s_waitcnt vmcnt(0)
113; GFX10-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX11-LABEL: global_inst_valu_offset_13bit_max:
116; GFX11:       ; %bb.0:
117; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
119; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
120; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
121; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
122; GFX11-NEXT:    s_waitcnt vmcnt(0)
123; GFX11-NEXT:    s_setpc_b64 s[30:31]
124  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
125  %load = load i8, i8 addrspace(1)* %gep, align 4
126  ret i8 %load
127}
128
129define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
130; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
131; GFX9:       ; %bb.0:
132; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
134; GFX9-NEXT:    s_waitcnt vmcnt(0)
135; GFX9-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
138; GFX10:       ; %bb.0:
139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
141; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
142; GFX10-NEXT:    s_waitcnt vmcnt(0)
143; GFX10-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max:
146; GFX11:       ; %bb.0:
147; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
149; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-2048
150; GFX11-NEXT:    s_waitcnt vmcnt(0)
151; GFX11-NEXT:    s_setpc_b64 s[30:31]
152  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
153  %load = load i8, i8 addrspace(1)* %gep, align 4
154  ret i8 %load
155}
156
157define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
158; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
159; GFX9:       ; %bb.0:
160; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
162; GFX9-NEXT:    s_waitcnt vmcnt(0)
163; GFX9-NEXT:    s_setpc_b64 s[30:31]
164;
165; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
166; GFX10:       ; %bb.0:
167; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
169; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
170; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
171; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
172; GFX10-NEXT:    s_waitcnt vmcnt(0)
173; GFX10-NEXT:    s_setpc_b64 s[30:31]
174;
175; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max:
176; GFX11:       ; %bb.0:
177; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
179; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
180; GFX11-NEXT:    s_waitcnt vmcnt(0)
181; GFX11-NEXT:    s_setpc_b64 s[30:31]
182  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
183  %load = load i8, i8 addrspace(1)* %gep, align 4
184  ret i8 %load
185}
186
187define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
188; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
189; GFX9:       ; %bb.0:
190; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
192; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
193; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
194; GFX9-NEXT:    s_waitcnt vmcnt(0)
195; GFX9-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
198; GFX10:       ; %bb.0:
199; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
201; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
202; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
203; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
204; GFX10-NEXT:    s_waitcnt vmcnt(0)
205; GFX10-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max:
208; GFX11:       ; %bb.0:
209; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
211; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
212; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
213; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
214; GFX11-NEXT:    s_waitcnt vmcnt(0)
215; GFX11-NEXT:    s_setpc_b64 s[30:31]
216  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
217  %load = load i8, i8 addrspace(1)* %gep, align 4
218  ret i8 %load
219}
220
221define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
222; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
223; GFX9:       ; %bb.0:
224; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
226; GFX9-NEXT:    s_waitcnt vmcnt(0)
227; GFX9-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max:
230; GFX10:       ; %bb.0:
231; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
233; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
234; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
235; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
236; GFX10-NEXT:    s_waitcnt vmcnt(0)
237; GFX10-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max:
240; GFX11:       ; %bb.0:
241; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
243; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
244; GFX11-NEXT:    s_waitcnt vmcnt(0)
245; GFX11-NEXT:    s_setpc_b64 s[30:31]
246  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
247  %load = load i8, i8 addrspace(1)* %gep, align 4
248  ret i8 %load
249}
250
251define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
252; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
253; GFX9:       ; %bb.0:
254; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
256; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
257; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
258; GFX9-NEXT:    s_waitcnt vmcnt(0)
259; GFX9-NEXT:    s_setpc_b64 s[30:31]
260;
261; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max:
262; GFX10:       ; %bb.0:
263; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
265; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
266; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
267; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
268; GFX10-NEXT:    s_waitcnt vmcnt(0)
269; GFX10-NEXT:    s_setpc_b64 s[30:31]
270;
271; GFX11-LABEL: global_inst_valu_offset_2x_12bit_max:
272; GFX11:       ; %bb.0:
273; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
275; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
276; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
277; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
278; GFX11-NEXT:    s_waitcnt vmcnt(0)
279; GFX11-NEXT:    s_setpc_b64 s[30:31]
280  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
281  %load = load i8, i8 addrspace(1)* %gep, align 4
282  ret i8 %load
283}
284
285define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
286; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
287; GFX9:       ; %bb.0:
288; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
290; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
291; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
292; GFX9-NEXT:    s_waitcnt vmcnt(0)
293; GFX9-NEXT:    s_setpc_b64 s[30:31]
294;
295; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max:
296; GFX10:       ; %bb.0:
297; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
299; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3800, v0
300; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
301; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
302; GFX10-NEXT:    s_waitcnt vmcnt(0)
303; GFX10-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX11-LABEL: global_inst_valu_offset_2x_13bit_max:
306; GFX11:       ; %bb.0:
307; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
309; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x3000, v0
310; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
311; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
312; GFX11-NEXT:    s_waitcnt vmcnt(0)
313; GFX11-NEXT:    s_setpc_b64 s[30:31]
314  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
315  %load = load i8, i8 addrspace(1)* %gep, align 4
316  ret i8 %load
317}
318
319define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
320; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
321; GFX9:       ; %bb.0:
322; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
324; GFX9-NEXT:    s_waitcnt vmcnt(0)
325; GFX9-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
328; GFX10:       ; %bb.0:
329; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
331; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
332; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
333; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
334; GFX10-NEXT:    s_waitcnt vmcnt(0)
335; GFX10-NEXT:    s_setpc_b64 s[30:31]
336;
337; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
338; GFX11:       ; %bb.0:
339; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
341; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-4096
342; GFX11-NEXT:    s_waitcnt vmcnt(0)
343; GFX11-NEXT:    s_setpc_b64 s[30:31]
344  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
345  %load = load i8, i8 addrspace(1)* %gep, align 4
346  ret i8 %load
347}
348
349define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
350; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
351; GFX9:       ; %bb.0:
352; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
354; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
355; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
356; GFX9-NEXT:    s_waitcnt vmcnt(0)
357; GFX9-NEXT:    s_setpc_b64 s[30:31]
358;
359; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
360; GFX10:       ; %bb.0:
361; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
363; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
364; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
365; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
366; GFX10-NEXT:    s_waitcnt vmcnt(0)
367; GFX10-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
370; GFX11:       ; %bb.0:
371; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
373; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
374; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
375; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
376; GFX11-NEXT:    s_waitcnt vmcnt(0)
377; GFX11-NEXT:    s_setpc_b64 s[30:31]
378  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
379  %load = load i8, i8 addrspace(1)* %gep, align 4
380  ret i8 %load
381}
382
383define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
384; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
385; GFX9:       ; %bb.0:
386; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
388; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
389; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
390; GFX9-NEXT:    s_waitcnt vmcnt(0)
391; GFX9-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
394; GFX10:       ; %bb.0:
395; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
397; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
398; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
399; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
400; GFX10-NEXT:    s_waitcnt vmcnt(0)
401; GFX10-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
404; GFX11:       ; %bb.0:
405; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
407; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
408; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
409; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
410; GFX11-NEXT:    s_waitcnt vmcnt(0)
411; GFX11-NEXT:    s_setpc_b64 s[30:31]
412  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
413  %load = load i8, i8 addrspace(1)* %gep, align 4
414  ret i8 %load
415}
416
417; Fill 11-bit low-bits (1ull << 33) | 2047
418define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
419; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
420; GFX9:       ; %bb.0:
421; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
423; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
424; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
425; GFX9-NEXT:    s_waitcnt vmcnt(0)
426; GFX9-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0:
429; GFX10:       ; %bb.0:
430; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
432; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
433; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
434; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
435; GFX10-NEXT:    s_waitcnt vmcnt(0)
436; GFX10-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_split0:
439; GFX11:       ; %bb.0:
440; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
442; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
443; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
444; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2047
445; GFX11-NEXT:    s_waitcnt vmcnt(0)
446; GFX11-NEXT:    s_setpc_b64 s[30:31]
447  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
448  %load = load i8, i8 addrspace(1)* %gep, align 4
449  ret i8 %load
450}
451
452; Fill 11-bit low-bits (1ull << 33) | 2048
453define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
454; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
455; GFX9:       ; %bb.0:
456; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
458; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
459; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
460; GFX9-NEXT:    s_waitcnt vmcnt(0)
461; GFX9-NEXT:    s_setpc_b64 s[30:31]
462;
463; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
464; GFX10:       ; %bb.0:
465; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
467; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
468; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
469; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
470; GFX10-NEXT:    s_waitcnt vmcnt(0)
471; GFX10-NEXT:    s_setpc_b64 s[30:31]
472;
473; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_split1:
474; GFX11:       ; %bb.0:
475; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
477; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
478; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
479; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2048
480; GFX11-NEXT:    s_waitcnt vmcnt(0)
481; GFX11-NEXT:    s_setpc_b64 s[30:31]
482  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
483  %load = load i8, i8 addrspace(1)* %gep, align 4
484  ret i8 %load
485}
486
487; Fill 12-bit low-bits (1ull << 33) | 4095
488define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
489; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
490; GFX9:       ; %bb.0:
491; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
493; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
494; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
495; GFX9-NEXT:    s_waitcnt vmcnt(0)
496; GFX9-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0:
499; GFX10:       ; %bb.0:
500; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
502; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
503; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
504; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
505; GFX10-NEXT:    s_waitcnt vmcnt(0)
506; GFX10-NEXT:    s_setpc_b64 s[30:31]
507;
508; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split0:
509; GFX11:       ; %bb.0:
510; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
512; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0, v0
513; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
514; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
515; GFX11-NEXT:    s_waitcnt vmcnt(0)
516; GFX11-NEXT:    s_setpc_b64 s[30:31]
517  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
518  %load = load i8, i8 addrspace(1)* %gep, align 4
519  ret i8 %load
520}
521
522; Fill 12-bit low-bits (1ull << 33) | 4096
523define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
524; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
525; GFX9:       ; %bb.0:
526; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
528; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
529; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
530; GFX9-NEXT:    s_waitcnt vmcnt(0)
531; GFX9-NEXT:    s_setpc_b64 s[30:31]
532;
533; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
534; GFX10:       ; %bb.0:
535; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
537; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
538; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
539; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
540; GFX10-NEXT:    s_waitcnt vmcnt(0)
541; GFX10-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1:
544; GFX11:       ; %bb.0:
545; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
547; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
548; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
549; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
550; GFX11-NEXT:    s_waitcnt vmcnt(0)
551; GFX11-NEXT:    s_setpc_b64 s[30:31]
552  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
553  %load = load i8, i8 addrspace(1)* %gep, align 4
554  ret i8 %load
555}
556
557; Fill 13-bit low-bits (1ull << 33) | 8191
558define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
559; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
560; GFX9:       ; %bb.0:
561; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
563; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
564; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
565; GFX9-NEXT:    s_waitcnt vmcnt(0)
566; GFX9-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0:
569; GFX10:       ; %bb.0:
570; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
572; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1800, v0
573; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
574; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
575; GFX10-NEXT:    s_waitcnt vmcnt(0)
576; GFX10-NEXT:    s_setpc_b64 s[30:31]
577;
578; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split0:
579; GFX11:       ; %bb.0:
580; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
581; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
582; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
583; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
584; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095
585; GFX11-NEXT:    s_waitcnt vmcnt(0)
586; GFX11-NEXT:    s_setpc_b64 s[30:31]
587  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
588  %load = load i8, i8 addrspace(1)* %gep, align 4
589  ret i8 %load
590}
591
592; Fill 13-bit low-bits (1ull << 33) | 8192
593define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
594; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
595; GFX9:       ; %bb.0:
596; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
598; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
599; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
600; GFX9-NEXT:    s_waitcnt vmcnt(0)
601; GFX9-NEXT:    s_setpc_b64 s[30:31]
602;
603; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
604; GFX10:       ; %bb.0:
605; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
606; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
607; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
608; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
609; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
610; GFX10-NEXT:    s_waitcnt vmcnt(0)
611; GFX10-NEXT:    s_setpc_b64 s[30:31]
612;
613; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1:
614; GFX11:       ; %bb.0:
615; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
616; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
617; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
618; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
619; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
620; GFX11-NEXT:    s_waitcnt vmcnt(0)
621; GFX11-NEXT:    s_setpc_b64 s[30:31]
622  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
623  %load = load i8, i8 addrspace(1)* %gep, align 4
624  ret i8 %load
625}
626
627; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
628define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
629; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
630; GFX9:       ; %bb.0:
631; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
633; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
634; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
635; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
636; GFX9-NEXT:    s_waitcnt vmcnt(0)
637; GFX9-NEXT:    s_setpc_b64 s[30:31]
638;
639; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
640; GFX10:       ; %bb.0:
641; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
643; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
644; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
645; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
646; GFX10-NEXT:    s_waitcnt vmcnt(0)
647; GFX10-NEXT:    s_setpc_b64 s[30:31]
648;
649; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
650; GFX11:       ; %bb.0:
651; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
653; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
654; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
655; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-2049
656; GFX11-NEXT:    s_waitcnt vmcnt(0)
657; GFX11-NEXT:    s_setpc_b64 s[30:31]
658  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
659  %load = load i8, i8 addrspace(1)* %gep, align 4
660  ret i8 %load
661}
662
663; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
664define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
665; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
666; GFX9:       ; %bb.0:
667; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
669; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
670; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
671; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
672; GFX9-NEXT:    s_waitcnt vmcnt(0)
673; GFX9-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
676; GFX10:       ; %bb.0:
677; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
679; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v0
680; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
681; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
682; GFX10-NEXT:    s_waitcnt vmcnt(0)
683; GFX10-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX11-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
686; GFX11:       ; %bb.0:
687; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
689; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
690; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
691; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-2048
692; GFX11-NEXT:    s_waitcnt vmcnt(0)
693; GFX11-NEXT:    s_setpc_b64 s[30:31]
694  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
695  %load = load i8, i8 addrspace(1)* %gep, align 4
696  ret i8 %load
697}
698
699; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
700define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
701; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
702; GFX9:       ; %bb.0:
703; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
705; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
706; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
707; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
708; GFX9-NEXT:    s_waitcnt vmcnt(0)
709; GFX9-NEXT:    s_setpc_b64 s[30:31]
710;
711; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
712; GFX10:       ; %bb.0:
713; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
715; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
716; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
717; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
718; GFX10-NEXT:    s_waitcnt vmcnt(0)
719; GFX10-NEXT:    s_setpc_b64 s[30:31]
720;
721; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
722; GFX11:       ; %bb.0:
723; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
725; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
726; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
727; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-1
728; GFX11-NEXT:    s_waitcnt vmcnt(0)
729; GFX11-NEXT:    s_setpc_b64 s[30:31]
730  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
731  %load = load i8, i8 addrspace(1)* %gep, align 4
732  ret i8 %load
733}
734
735; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
736define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
737; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
738; GFX9:       ; %bb.0:
739; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
741; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
742; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
743; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
744; GFX9-NEXT:    s_waitcnt vmcnt(0)
745; GFX9-NEXT:    s_setpc_b64 s[30:31]
746;
747; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
748; GFX10:       ; %bb.0:
749; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
751; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
752; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
753; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
754; GFX10-NEXT:    s_waitcnt vmcnt(0)
755; GFX10-NEXT:    s_setpc_b64 s[30:31]
756;
757; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
758; GFX11:       ; %bb.0:
759; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
761; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x1000, v0
762; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
763; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
764; GFX11-NEXT:    s_waitcnt vmcnt(0)
765; GFX11-NEXT:    s_setpc_b64 s[30:31]
766  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
767  %load = load i8, i8 addrspace(1)* %gep, align 4
768  ret i8 %load
769}
770
771; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
772define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
773; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
774; GFX9:       ; %bb.0:
775; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
777; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
778; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
779; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
780; GFX9-NEXT:    s_waitcnt vmcnt(0)
781; GFX9-NEXT:    s_setpc_b64 s[30:31]
782;
783; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
784; GFX10:       ; %bb.0:
785; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
787; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
788; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
789; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
790; GFX10-NEXT:    s_waitcnt vmcnt(0)
791; GFX10-NEXT:    s_setpc_b64 s[30:31]
792;
793; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
794; GFX11:       ; %bb.0:
795; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
797; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
798; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
799; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:-1
800; GFX11-NEXT:    s_waitcnt vmcnt(0)
801; GFX11-NEXT:    s_setpc_b64 s[30:31]
802  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
803  %load = load i8, i8 addrspace(1)* %gep, align 4
804  ret i8 %load
805}
806
807; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
808define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
809; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
810; GFX9:       ; %bb.0:
811; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
812; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
813; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
814; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
815; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
816; GFX9-NEXT:    s_waitcnt vmcnt(0)
817; GFX9-NEXT:    s_setpc_b64 s[30:31]
818;
819; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
820; GFX10:       ; %bb.0:
821; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
822; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
823; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
824; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
825; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
826; GFX10-NEXT:    s_waitcnt vmcnt(0)
827; GFX10-NEXT:    s_setpc_b64 s[30:31]
828;
829; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
830; GFX11:       ; %bb.0:
831; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
833; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, 0x2000, v0
834; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
835; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
836; GFX11-NEXT:    s_waitcnt vmcnt(0)
837; GFX11-NEXT:    s_setpc_b64 s[30:31]
838  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
839  %load = load i8, i8 addrspace(1)* %gep, align 4
840  ret i8 %load
841}
842
843define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) {
844; GFX9-LABEL: global_inst_salu_offset_1:
845; GFX9:       ; %bb.0:
846; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
847; GFX9-NEXT:    v_mov_b32_e32 v0, 0
848; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
849; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1 glc
850; GFX9-NEXT:    s_waitcnt vmcnt(0)
851; GFX9-NEXT:    global_store_byte v[0:1], v0, off
852; GFX9-NEXT:    s_endpgm
853;
854; GFX10-LABEL: global_inst_salu_offset_1:
855; GFX10:       ; %bb.0:
856; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
857; GFX10-NEXT:    v_mov_b32_e32 v0, 0
858; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
859; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc
860; GFX10-NEXT:    s_waitcnt vmcnt(0)
861; GFX10-NEXT:    global_store_byte v[0:1], v0, off
862; GFX10-NEXT:    s_endpgm
863;
864; GFX11-LABEL: global_inst_salu_offset_1:
865; GFX11:       ; %bb.0:
866; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
867; GFX11-NEXT:    v_mov_b32_e32 v0, 0
868; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
869; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:1 glc dlc
870; GFX11-NEXT:    s_waitcnt vmcnt(0)
871; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
872; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
873; GFX11-NEXT:    s_endpgm
874  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
875  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
876  store i8 %load, i8 addrspace(1)* undef
877  ret void
878}
879
880define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) {
881; GFX9-LABEL: global_inst_salu_offset_11bit_max:
882; GFX9:       ; %bb.0:
883; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
884; GFX9-NEXT:    v_mov_b32_e32 v0, 0
885; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
886; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc
887; GFX9-NEXT:    s_waitcnt vmcnt(0)
888; GFX9-NEXT:    global_store_byte v[0:1], v0, off
889; GFX9-NEXT:    s_endpgm
890;
891; GFX10-LABEL: global_inst_salu_offset_11bit_max:
892; GFX10:       ; %bb.0:
893; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
894; GFX10-NEXT:    v_mov_b32_e32 v0, 0
895; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
896; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
897; GFX10-NEXT:    s_waitcnt vmcnt(0)
898; GFX10-NEXT:    global_store_byte v[0:1], v0, off
899; GFX10-NEXT:    s_endpgm
900;
901; GFX11-LABEL: global_inst_salu_offset_11bit_max:
902; GFX11:       ; %bb.0:
903; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
904; GFX11-NEXT:    v_mov_b32_e32 v0, 0
905; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
906; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc
907; GFX11-NEXT:    s_waitcnt vmcnt(0)
908; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
909; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
910; GFX11-NEXT:    s_endpgm
911  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
912  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
913  store i8 %load, i8 addrspace(1)* undef
914  ret void
915}
916
917define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) {
918; GFX9-LABEL: global_inst_salu_offset_12bit_max:
919; GFX9:       ; %bb.0:
920; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
921; GFX9-NEXT:    v_mov_b32_e32 v0, 0
922; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
923; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
924; GFX9-NEXT:    s_waitcnt vmcnt(0)
925; GFX9-NEXT:    global_store_byte v[0:1], v0, off
926; GFX9-NEXT:    s_endpgm
927;
928; GFX10-LABEL: global_inst_salu_offset_12bit_max:
929; GFX10:       ; %bb.0:
930; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
931; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
932; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
933; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
934; GFX10-NEXT:    s_waitcnt vmcnt(0)
935; GFX10-NEXT:    global_store_byte v[0:1], v0, off
936; GFX10-NEXT:    s_endpgm
937;
938; GFX11-LABEL: global_inst_salu_offset_12bit_max:
939; GFX11:       ; %bb.0:
940; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
941; GFX11-NEXT:    v_mov_b32_e32 v0, 0
942; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
943; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
944; GFX11-NEXT:    s_waitcnt vmcnt(0)
945; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
946; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
947; GFX11-NEXT:    s_endpgm
948  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
949  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
950  store i8 %load, i8 addrspace(1)* undef
951  ret void
952}
953
954define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) {
955; GFX9-LABEL: global_inst_salu_offset_13bit_max:
956; GFX9:       ; %bb.0:
957; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
958; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
959; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
960; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
961; GFX9-NEXT:    s_waitcnt vmcnt(0)
962; GFX9-NEXT:    global_store_byte v[0:1], v0, off
963; GFX9-NEXT:    s_endpgm
964;
965; GFX10-LABEL: global_inst_salu_offset_13bit_max:
966; GFX10:       ; %bb.0:
967; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
968; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
969; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
970; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
971; GFX10-NEXT:    s_waitcnt vmcnt(0)
972; GFX10-NEXT:    global_store_byte v[0:1], v0, off
973; GFX10-NEXT:    s_endpgm
974;
975; GFX11-LABEL: global_inst_salu_offset_13bit_max:
976; GFX11:       ; %bb.0:
977; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
978; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
979; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
980; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
981; GFX11-NEXT:    s_waitcnt vmcnt(0)
982; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
983; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
984; GFX11-NEXT:    s_endpgm
985  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
986  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
987  store i8 %load, i8 addrspace(1)* undef
988  ret void
989}
990
991define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
992; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
993; GFX9:       ; %bb.0:
994; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
995; GFX9-NEXT:    v_mov_b32_e32 v0, 0
996; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
997; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048 glc
998; GFX9-NEXT:    s_waitcnt vmcnt(0)
999; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1000; GFX9-NEXT:    s_endpgm
1001;
1002; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
1003; GFX10:       ; %bb.0:
1004; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1005; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1006; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1007; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc
1008; GFX10-NEXT:    s_waitcnt vmcnt(0)
1009; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1010; GFX10-NEXT:    s_endpgm
1011;
1012; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max:
1013; GFX11:       ; %bb.0:
1014; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1015; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1016; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1017; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc
1018; GFX11-NEXT:    s_waitcnt vmcnt(0)
1019; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1020; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1021; GFX11-NEXT:    s_endpgm
1022  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
1023  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1024  store i8 %load, i8 addrspace(1)* undef
1025  ret void
1026}
1027
1028define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
1029; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
1030; GFX9:       ; %bb.0:
1031; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1032; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1033; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1034; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
1035; GFX9-NEXT:    s_waitcnt vmcnt(0)
1036; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1037; GFX9-NEXT:    s_endpgm
1038;
1039; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max:
1040; GFX10:       ; %bb.0:
1041; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1042; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1043; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
1044; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1045; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1046; GFX10-NEXT:    s_waitcnt vmcnt(0)
1047; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1048; GFX10-NEXT:    s_endpgm
1049;
1050; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max:
1051; GFX11:       ; %bb.0:
1052; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1053; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1054; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1055; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
1056; GFX11-NEXT:    s_waitcnt vmcnt(0)
1057; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1058; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1059; GFX11-NEXT:    s_endpgm
1060  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
1061  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1062  store i8 %load, i8 addrspace(1)* undef
1063  ret void
1064}
1065
1066define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
1067; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
1068; GFX9:       ; %bb.0:
1069; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1070; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1071; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1072; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffe000
1073; GFX9-NEXT:    s_addc_u32 s1, s1, -1
1074; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1075; GFX9-NEXT:    s_waitcnt vmcnt(0)
1076; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1077; GFX9-NEXT:    s_endpgm
1078;
1079; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
1080; GFX10:       ; %bb.0:
1081; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1082; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1083; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1084; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1085; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1086; GFX10-NEXT:    s_waitcnt vmcnt(0)
1087; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1088; GFX10-NEXT:    s_endpgm
1089;
1090; GFX11-LABEL: global_inst_salu_offset_neg_13bit_max:
1091; GFX11:       ; %bb.0:
1092; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1093; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1094; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1095; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1096; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1097; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
1098; GFX11-NEXT:    s_waitcnt vmcnt(0)
1099; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1100; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1101; GFX11-NEXT:    s_endpgm
1102  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
1103  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1104  store i8 %load, i8 addrspace(1)* undef
1105  ret void
1106}
1107
1108define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
1109; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
1110; GFX9:       ; %bb.0:
1111; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1112; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1113; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1114; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1115; GFX9-NEXT:    s_waitcnt vmcnt(0)
1116; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1117; GFX9-NEXT:    s_endpgm
1118;
1119; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
1120; GFX10:       ; %bb.0:
1121; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1122; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
1123; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1124; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1125; GFX10-NEXT:    s_waitcnt vmcnt(0)
1126; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1127; GFX10-NEXT:    s_endpgm
1128;
1129; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max:
1130; GFX11:       ; %bb.0:
1131; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1132; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1133; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1134; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1135; GFX11-NEXT:    s_waitcnt vmcnt(0)
1136; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1137; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1138; GFX11-NEXT:    s_endpgm
1139  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
1140  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1141  store i8 %load, i8 addrspace(1)* undef
1142  ret void
1143}
1144
1145define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
1146; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
1147; GFX9:       ; %bb.0:
1148; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1149; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
1150; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1151; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1152; GFX9-NEXT:    s_waitcnt vmcnt(0)
1153; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1154; GFX9-NEXT:    s_endpgm
1155;
1156; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
1157; GFX10:       ; %bb.0:
1158; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1159; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
1160; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1161; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1162; GFX10-NEXT:    s_waitcnt vmcnt(0)
1163; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1164; GFX10-NEXT:    s_endpgm
1165;
1166; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max:
1167; GFX11:       ; %bb.0:
1168; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1169; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
1170; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1171; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1172; GFX11-NEXT:    s_waitcnt vmcnt(0)
1173; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1174; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1175; GFX11-NEXT:    s_endpgm
1176  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
1177  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1178  store i8 %load, i8 addrspace(1)* undef
1179  ret void
1180}
1181
1182define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
1183; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
1184; GFX9:       ; %bb.0:
1185; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1186; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3000
1187; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1188; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1189; GFX9-NEXT:    s_waitcnt vmcnt(0)
1190; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1191; GFX9-NEXT:    s_endpgm
1192;
1193; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
1194; GFX10:       ; %bb.0:
1195; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1196; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800
1197; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1198; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1199; GFX10-NEXT:    s_waitcnt vmcnt(0)
1200; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1201; GFX10-NEXT:    s_endpgm
1202;
1203; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max:
1204; GFX11:       ; %bb.0:
1205; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1206; GFX11-NEXT:    v_mov_b32_e32 v0, 0x3000
1207; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1208; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1209; GFX11-NEXT:    s_waitcnt vmcnt(0)
1210; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1211; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1212; GFX11-NEXT:    s_endpgm
1213  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
1214  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1215  store i8 %load, i8 addrspace(1)* undef
1216  ret void
1217}
1218
1219define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
1220; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1221; GFX9:       ; %bb.0:
1222; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1223; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1224; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1225; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
1226; GFX9-NEXT:    s_waitcnt vmcnt(0)
1227; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1228; GFX9-NEXT:    s_endpgm
1229;
1230; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1231; GFX10:       ; %bb.0:
1232; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1233; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1234; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
1235; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1236; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1237; GFX10-NEXT:    s_waitcnt vmcnt(0)
1238; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1239; GFX10-NEXT:    s_endpgm
1240;
1241; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1242; GFX11:       ; %bb.0:
1243; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1244; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1245; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1246; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
1247; GFX11-NEXT:    s_waitcnt vmcnt(0)
1248; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1249; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1250; GFX11-NEXT:    s_endpgm
1251  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
1252  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1253  store i8 %load, i8 addrspace(1)* undef
1254  ret void
1255}
1256
1257define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
1258; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1259; GFX9:       ; %bb.0:
1260; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1261; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1262; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1263; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffe000
1264; GFX9-NEXT:    s_addc_u32 s1, s1, -1
1265; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1266; GFX9-NEXT:    s_waitcnt vmcnt(0)
1267; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1268; GFX9-NEXT:    s_endpgm
1269;
1270; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1271; GFX10:       ; %bb.0:
1272; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1273; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1274; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1275; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1276; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1277; GFX10-NEXT:    s_waitcnt vmcnt(0)
1278; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1279; GFX10-NEXT:    s_endpgm
1280;
1281; GFX11-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1282; GFX11:       ; %bb.0:
1283; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1284; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1285; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
1286; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1287; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1288; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
1289; GFX11-NEXT:    s_waitcnt vmcnt(0)
1290; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1291; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1292; GFX11-NEXT:    s_endpgm
1293  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
1294  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1295  store i8 %load, i8 addrspace(1)* undef
1296  ret void
1297}
1298
1299define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
1300; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1301; GFX9:       ; %bb.0:
1302; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1303; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1304; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1305; GFX9-NEXT:    s_add_u32 s0, s0, 0xffffc000
1306; GFX9-NEXT:    s_addc_u32 s1, s1, -1
1307; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1308; GFX9-NEXT:    s_waitcnt vmcnt(0)
1309; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1310; GFX9-NEXT:    s_endpgm
1311;
1312; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1313; GFX10:       ; %bb.0:
1314; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1315; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1316; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
1317; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1318; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1319; GFX10-NEXT:    s_waitcnt vmcnt(0)
1320; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1321; GFX10-NEXT:    s_endpgm
1322;
1323; GFX11-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1324; GFX11:       ; %bb.0:
1325; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1326; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1327; GFX11-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
1328; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1329; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1330; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
1331; GFX11-NEXT:    s_waitcnt vmcnt(0)
1332; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1333; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1334; GFX11-NEXT:    s_endpgm
1335  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
1336  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1337  store i8 %load, i8 addrspace(1)* undef
1338  ret void
1339}
1340
1341; Fill 11-bit low-bits (1ull << 33) | 2047
1342define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
1343; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1344; GFX9:       ; %bb.0:
1345; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1346; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1347; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1348; GFX9-NEXT:    s_add_u32 s0, s0, 0x7ff
1349; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1350; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1351; GFX9-NEXT:    s_waitcnt vmcnt(0)
1352; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1353; GFX9-NEXT:    s_endpgm
1354;
1355; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1356; GFX10:       ; %bb.0:
1357; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1358; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1359; GFX10-NEXT:    v_add_co_u32 v0, s0, 0, s0
1360; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1361; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
1362; GFX10-NEXT:    s_waitcnt vmcnt(0)
1363; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1364; GFX10-NEXT:    s_endpgm
1365;
1366; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1367; GFX11:       ; %bb.0:
1368; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1369; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1370; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1371; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1372; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1373; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2047 glc dlc
1374; GFX11-NEXT:    s_waitcnt vmcnt(0)
1375; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1376; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1377; GFX11-NEXT:    s_endpgm
1378  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
1379  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1380  store i8 %load, i8 addrspace(1)* undef
1381  ret void
1382}
1383
1384; Fill 11-bit low-bits (1ull << 33) | 2048
1385define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
1386; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1387; GFX9:       ; %bb.0:
1388; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1389; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1390; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1391; GFX9-NEXT:    s_add_u32 s0, s0, 0x800
1392; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1393; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1394; GFX9-NEXT:    s_waitcnt vmcnt(0)
1395; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1396; GFX9-NEXT:    s_endpgm
1397;
1398; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1399; GFX10:       ; %bb.0:
1400; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1401; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1402; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
1403; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1404; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1405; GFX10-NEXT:    s_waitcnt vmcnt(0)
1406; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1407; GFX10-NEXT:    s_endpgm
1408;
1409; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1410; GFX11:       ; %bb.0:
1411; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1412; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1413; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1414; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1415; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1416; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:2048 glc dlc
1417; GFX11-NEXT:    s_waitcnt vmcnt(0)
1418; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1419; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1420; GFX11-NEXT:    s_endpgm
1421  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
1422  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1423  store i8 %load, i8 addrspace(1)* undef
1424  ret void
1425}
1426
1427; Fill 12-bit low-bits (1ull << 33) | 4095
1428define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
1429; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1430; GFX9:       ; %bb.0:
1431; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1432; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1433; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1434; GFX9-NEXT:    s_add_u32 s0, s0, 0xfff
1435; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1436; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1437; GFX9-NEXT:    s_waitcnt vmcnt(0)
1438; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1439; GFX9-NEXT:    s_endpgm
1440;
1441; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1442; GFX10:       ; %bb.0:
1443; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1444; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1445; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
1446; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1447; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
1448; GFX10-NEXT:    s_waitcnt vmcnt(0)
1449; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1450; GFX10-NEXT:    s_endpgm
1451;
1452; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1453; GFX11:       ; %bb.0:
1454; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1455; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1456; GFX11-NEXT:    v_add_co_u32 v0, s0, 0, s0
1457; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1458; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1459; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095 glc dlc
1460; GFX11-NEXT:    s_waitcnt vmcnt(0)
1461; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1462; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1463; GFX11-NEXT:    s_endpgm
1464  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
1465  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1466  store i8 %load, i8 addrspace(1)* undef
1467  ret void
1468}
1469
1470; Fill 12-bit low-bits (1ull << 33) | 4096
1471define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
1472; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1473; GFX9:       ; %bb.0:
1474; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1475; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1476; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1477; GFX9-NEXT:    s_add_u32 s0, s0, 0x1000
1478; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1479; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1480; GFX9-NEXT:    s_waitcnt vmcnt(0)
1481; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1482; GFX9-NEXT:    s_endpgm
1483;
1484; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1485; GFX10:       ; %bb.0:
1486; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1487; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1488; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1489; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1490; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1491; GFX10-NEXT:    s_waitcnt vmcnt(0)
1492; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1493; GFX10-NEXT:    s_endpgm
1494;
1495; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1496; GFX11:       ; %bb.0:
1497; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1498; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1499; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1500; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1501; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1502; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
1503; GFX11-NEXT:    s_waitcnt vmcnt(0)
1504; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1505; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1506; GFX11-NEXT:    s_endpgm
1507  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
1508  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1509  store i8 %load, i8 addrspace(1)* undef
1510  ret void
1511}
1512
1513; Fill 13-bit low-bits (1ull << 33) | 8191
1514define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
1515; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1516; GFX9:       ; %bb.0:
1517; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1518; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1519; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1520; GFX9-NEXT:    s_add_u32 s0, s0, 0x1fff
1521; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1522; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1523; GFX9-NEXT:    s_waitcnt vmcnt(0)
1524; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1525; GFX9-NEXT:    s_endpgm
1526;
1527; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1528; GFX10:       ; %bb.0:
1529; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1530; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1531; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x1800, s0
1532; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1533; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
1534; GFX10-NEXT:    s_waitcnt vmcnt(0)
1535; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1536; GFX10-NEXT:    s_endpgm
1537;
1538; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1539; GFX11:       ; %bb.0:
1540; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1541; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1542; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
1543; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1544; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1545; GFX11-NEXT:    global_load_u8 v0, v[0:1], off offset:4095 glc dlc
1546; GFX11-NEXT:    s_waitcnt vmcnt(0)
1547; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1548; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1549; GFX11-NEXT:    s_endpgm
1550  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
1551  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1552  store i8 %load, i8 addrspace(1)* undef
1553  ret void
1554}
1555
1556; Fill 13-bit low-bits (1ull << 33) | 8192
1557define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
1558; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1559; GFX9:       ; %bb.0:
1560; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1561; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1562; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1563; GFX9-NEXT:    s_add_u32 s0, s0, 0x2000
1564; GFX9-NEXT:    s_addc_u32 s1, s1, 2
1565; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1566; GFX9-NEXT:    s_waitcnt vmcnt(0)
1567; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1568; GFX9-NEXT:    s_endpgm
1569;
1570; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1571; GFX10:       ; %bb.0:
1572; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1573; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1574; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
1575; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1576; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
1577; GFX10-NEXT:    s_waitcnt vmcnt(0)
1578; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1579; GFX10-NEXT:    s_endpgm
1580;
1581; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1582; GFX11:       ; %bb.0:
1583; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1584; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1585; GFX11-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
1586; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1587; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
1588; GFX11-NEXT:    global_load_u8 v0, v[0:1], off glc dlc
1589; GFX11-NEXT:    s_waitcnt vmcnt(0)
1590; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1591; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1592; GFX11-NEXT:    s_endpgm
1593  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
1594  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1595  store i8 %load, i8 addrspace(1)* undef
1596  ret void
1597}
1598
1599; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1600define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
1601; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1602; GFX9:       ; %bb.0:
1603; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1604; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1605; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1606; GFX9-NEXT:    s_add_u32 s0, s0, 0x7ff
1607; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1608; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1609; GFX9-NEXT:    s_waitcnt vmcnt(0)
1610; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1611; GFX9-NEXT:    s_endpgm
1612;
1613; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1614; GFX10:       ; %bb.0:
1615; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1616; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1617; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1618; GFX10-NEXT:    s_add_u32 s0, s0, 0x7ff
1619; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1620; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1621; GFX10-NEXT:    s_waitcnt vmcnt(0)
1622; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1623; GFX10-NEXT:    s_endpgm
1624;
1625; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1626; GFX11:       ; %bb.0:
1627; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1628; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1629; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1630; GFX11-NEXT:    s_add_u32 s0, s0, 0x7ff
1631; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1632; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1633; GFX11-NEXT:    s_waitcnt vmcnt(0)
1634; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1635; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1636; GFX11-NEXT:    s_endpgm
1637  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
1638  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1639  store i8 %load, i8 addrspace(1)* undef
1640  ret void
1641}
1642
1643; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1644define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
1645; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1646; GFX9:       ; %bb.0:
1647; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1648; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1649; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1650; GFX9-NEXT:    s_add_u32 s0, s0, 0x800
1651; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1652; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1653; GFX9-NEXT:    s_waitcnt vmcnt(0)
1654; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1655; GFX9-NEXT:    s_endpgm
1656;
1657; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1658; GFX10:       ; %bb.0:
1659; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1660; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1661; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1662; GFX10-NEXT:    s_add_u32 s0, s0, 0x800
1663; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1664; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1665; GFX10-NEXT:    s_waitcnt vmcnt(0)
1666; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1667; GFX10-NEXT:    s_endpgm
1668;
1669; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1670; GFX11:       ; %bb.0:
1671; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1672; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1673; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1674; GFX11-NEXT:    s_add_u32 s0, s0, 0x800
1675; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1676; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1677; GFX11-NEXT:    s_waitcnt vmcnt(0)
1678; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1679; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1680; GFX11-NEXT:    s_endpgm
1681  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
1682  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1683  store i8 %load, i8 addrspace(1)* undef
1684  ret void
1685}
1686
1687; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1688define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
1689; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1690; GFX9:       ; %bb.0:
1691; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1692; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1693; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1694; GFX9-NEXT:    s_add_u32 s0, s0, 0xfff
1695; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1696; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1697; GFX9-NEXT:    s_waitcnt vmcnt(0)
1698; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1699; GFX9-NEXT:    s_endpgm
1700;
1701; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1702; GFX10:       ; %bb.0:
1703; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1704; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1705; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1706; GFX10-NEXT:    s_add_u32 s0, s0, 0xfff
1707; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1708; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1709; GFX10-NEXT:    s_waitcnt vmcnt(0)
1710; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1711; GFX10-NEXT:    s_endpgm
1712;
1713; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1714; GFX11:       ; %bb.0:
1715; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1716; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1717; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1718; GFX11-NEXT:    s_add_u32 s0, s0, 0xfff
1719; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1720; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1721; GFX11-NEXT:    s_waitcnt vmcnt(0)
1722; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1723; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1724; GFX11-NEXT:    s_endpgm
1725  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
1726  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1727  store i8 %load, i8 addrspace(1)* undef
1728  ret void
1729}
1730
1731; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1732define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
1733; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1734; GFX9:       ; %bb.0:
1735; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1736; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1737; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1738; GFX9-NEXT:    s_add_u32 s0, s0, 0x1000
1739; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1740; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1741; GFX9-NEXT:    s_waitcnt vmcnt(0)
1742; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1743; GFX9-NEXT:    s_endpgm
1744;
1745; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1746; GFX10:       ; %bb.0:
1747; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1748; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1749; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1750; GFX10-NEXT:    s_add_u32 s0, s0, 0x1000
1751; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1752; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1753; GFX10-NEXT:    s_waitcnt vmcnt(0)
1754; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1755; GFX10-NEXT:    s_endpgm
1756;
1757; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1758; GFX11:       ; %bb.0:
1759; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1760; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1761; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1762; GFX11-NEXT:    s_add_u32 s0, s0, 0x1000
1763; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1764; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1765; GFX11-NEXT:    s_waitcnt vmcnt(0)
1766; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1767; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1768; GFX11-NEXT:    s_endpgm
1769  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
1770  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1771  store i8 %load, i8 addrspace(1)* undef
1772  ret void
1773}
1774
1775; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1776define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
1777; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1778; GFX9:       ; %bb.0:
1779; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1780; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1781; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1782; GFX9-NEXT:    s_add_u32 s0, s0, 0x1fff
1783; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1784; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1785; GFX9-NEXT:    s_waitcnt vmcnt(0)
1786; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1787; GFX9-NEXT:    s_endpgm
1788;
1789; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1790; GFX10:       ; %bb.0:
1791; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1792; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1793; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1794; GFX10-NEXT:    s_add_u32 s0, s0, 0x1fff
1795; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1796; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1797; GFX10-NEXT:    s_waitcnt vmcnt(0)
1798; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1799; GFX10-NEXT:    s_endpgm
1800;
1801; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1802; GFX11:       ; %bb.0:
1803; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1804; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1805; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1806; GFX11-NEXT:    s_add_u32 s0, s0, 0x1fff
1807; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1808; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1809; GFX11-NEXT:    s_waitcnt vmcnt(0)
1810; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1811; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1812; GFX11-NEXT:    s_endpgm
1813  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
1814  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1815  store i8 %load, i8 addrspace(1)* undef
1816  ret void
1817}
1818
1819; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1820define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
1821; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1822; GFX9:       ; %bb.0:
1823; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1824; GFX9-NEXT:    v_mov_b32_e32 v0, 0
1825; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1826; GFX9-NEXT:    s_add_u32 s0, s0, 0x2000
1827; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
1828; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] glc
1829; GFX9-NEXT:    s_waitcnt vmcnt(0)
1830; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1831; GFX9-NEXT:    s_endpgm
1832;
1833; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1834; GFX10:       ; %bb.0:
1835; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1836; GFX10-NEXT:    v_mov_b32_e32 v0, 0
1837; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1838; GFX10-NEXT:    s_add_u32 s0, s0, 0x2000
1839; GFX10-NEXT:    s_addc_u32 s1, s1, 0x80000000
1840; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] glc dlc
1841; GFX10-NEXT:    s_waitcnt vmcnt(0)
1842; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1843; GFX10-NEXT:    s_endpgm
1844;
1845; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1846; GFX11:       ; %bb.0:
1847; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x24
1848; GFX11-NEXT:    v_mov_b32_e32 v0, 0
1849; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1850; GFX11-NEXT:    s_add_u32 s0, s0, 0x2000
1851; GFX11-NEXT:    s_addc_u32 s1, s1, 0x80000000
1852; GFX11-NEXT:    global_load_u8 v0, v0, s[0:1] glc dlc
1853; GFX11-NEXT:    s_waitcnt vmcnt(0)
1854; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
1855; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1856; GFX11-NEXT:    s_endpgm
1857  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
1858  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1859  store i8 %load, i8 addrspace(1)* undef
1860  ret void
1861}
1862