1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4
5; Test splitting flat instruction offsets into the low and high bits
6; when the offset doesn't fit in the offset field.
7
8define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) {
9; GFX9-LABEL: global_inst_valu_offset_1:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
13; GFX9-NEXT:    s_waitcnt vmcnt(0)
14; GFX9-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX10-LABEL: global_inst_valu_offset_1:
17; GFX10:       ; %bb.0:
18; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
20; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
21; GFX10-NEXT:    ; implicit-def: $vcc_hi
22; GFX10-NEXT:    s_waitcnt vmcnt(0)
23; GFX10-NEXT:    s_setpc_b64 s[30:31]
24  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
25  %load = load i8, i8 addrspace(1)* %gep, align 4
26  ret i8 %load
27}
28
29define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) {
30; GFX9-LABEL: global_inst_valu_offset_11bit_max:
31; GFX9:       ; %bb.0:
32; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
34; GFX9-NEXT:    s_waitcnt vmcnt(0)
35; GFX9-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX10-LABEL: global_inst_valu_offset_11bit_max:
38; GFX10:       ; %bb.0:
39; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
41; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
42; GFX10-NEXT:    ; implicit-def: $vcc_hi
43; GFX10-NEXT:    s_waitcnt vmcnt(0)
44; GFX10-NEXT:    s_setpc_b64 s[30:31]
45  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
46  %load = load i8, i8 addrspace(1)* %gep, align 4
47  ret i8 %load
48}
49
50define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) {
51; GFX9-LABEL: global_inst_valu_offset_12bit_max:
52; GFX9:       ; %bb.0:
53; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
55; GFX9-NEXT:    s_waitcnt vmcnt(0)
56; GFX9-NEXT:    s_setpc_b64 s[30:31]
57;
58; GFX10-LABEL: global_inst_valu_offset_12bit_max:
59; GFX10:       ; %bb.0:
60; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
62; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
63; GFX10-NEXT:    ; implicit-def: $vcc_hi
64; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
65; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
66; GFX10-NEXT:    s_waitcnt vmcnt(0)
67; GFX10-NEXT:    s_setpc_b64 s[30:31]
68  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
69  %load = load i8, i8 addrspace(1)* %gep, align 4
70  ret i8 %load
71}
72
73define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) {
74; GFX9-LABEL: global_inst_valu_offset_13bit_max:
75; GFX9:       ; %bb.0:
76; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
78; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
79; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
80; GFX9-NEXT:    s_waitcnt vmcnt(0)
81; GFX9-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX10-LABEL: global_inst_valu_offset_13bit_max:
84; GFX10:       ; %bb.0:
85; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
87; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
88; GFX10-NEXT:    ; implicit-def: $vcc_hi
89; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
90; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
91; GFX10-NEXT:    s_waitcnt vmcnt(0)
92; GFX10-NEXT:    s_setpc_b64 s[30:31]
93  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
94  %load = load i8, i8 addrspace(1)* %gep, align 4
95  ret i8 %load
96}
97
98define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
99; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
100; GFX9:       ; %bb.0:
101; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
103; GFX9-NEXT:    s_waitcnt vmcnt(0)
104; GFX9-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
107; GFX10:       ; %bb.0:
108; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
110; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
111; GFX10-NEXT:    ; implicit-def: $vcc_hi
112; GFX10-NEXT:    s_waitcnt vmcnt(0)
113; GFX10-NEXT:    s_setpc_b64 s[30:31]
114  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
115  %load = load i8, i8 addrspace(1)* %gep, align 4
116  ret i8 %load
117}
118
119define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
120; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
121; GFX9:       ; %bb.0:
122; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
124; GFX9-NEXT:    s_waitcnt vmcnt(0)
125; GFX9-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
128; GFX10:       ; %bb.0:
129; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
131; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
132; GFX10-NEXT:    ; implicit-def: $vcc_hi
133; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
134; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
135; GFX10-NEXT:    s_waitcnt vmcnt(0)
136; GFX10-NEXT:    s_setpc_b64 s[30:31]
137  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
138  %load = load i8, i8 addrspace(1)* %gep, align 4
139  ret i8 %load
140}
141
142define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
143; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
147; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
148; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
149; GFX9-NEXT:    s_waitcnt vmcnt(0)
150; GFX9-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
153; GFX10:       ; %bb.0:
154; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
156; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
157; GFX10-NEXT:    ; implicit-def: $vcc_hi
158; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
159; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
160; GFX10-NEXT:    s_waitcnt vmcnt(0)
161; GFX10-NEXT:    s_setpc_b64 s[30:31]
162  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
163  %load = load i8, i8 addrspace(1)* %gep, align 4
164  ret i8 %load
165}
166
167define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
168; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
169; GFX9:       ; %bb.0:
170; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
172; GFX9-NEXT:    s_waitcnt vmcnt(0)
173; GFX9-NEXT:    s_setpc_b64 s[30:31]
174;
175; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max:
176; GFX10:       ; %bb.0:
177; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
179; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
180; GFX10-NEXT:    ; implicit-def: $vcc_hi
181; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
182; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
183; GFX10-NEXT:    s_waitcnt vmcnt(0)
184; GFX10-NEXT:    s_setpc_b64 s[30:31]
185  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
186  %load = load i8, i8 addrspace(1)* %gep, align 4
187  ret i8 %load
188}
189
190define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
191; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
192; GFX9:       ; %bb.0:
193; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
195; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
196; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
197; GFX9-NEXT:    s_waitcnt vmcnt(0)
198; GFX9-NEXT:    s_setpc_b64 s[30:31]
199;
200; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max:
201; GFX10:       ; %bb.0:
202; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
204; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
205; GFX10-NEXT:    ; implicit-def: $vcc_hi
206; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
207; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
208; GFX10-NEXT:    s_waitcnt vmcnt(0)
209; GFX10-NEXT:    s_setpc_b64 s[30:31]
210  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
211  %load = load i8, i8 addrspace(1)* %gep, align 4
212  ret i8 %load
213}
214
215define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
216; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
217; GFX9:       ; %bb.0:
218; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
220; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
221; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
222; GFX9-NEXT:    s_waitcnt vmcnt(0)
223; GFX9-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max:
226; GFX10:       ; %bb.0:
227; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
229; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0
230; GFX10-NEXT:    ; implicit-def: $vcc_hi
231; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
232; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
233; GFX10-NEXT:    s_waitcnt vmcnt(0)
234; GFX10-NEXT:    s_setpc_b64 s[30:31]
235  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
236  %load = load i8, i8 addrspace(1)* %gep, align 4
237  ret i8 %load
238}
239
240define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
241; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
242; GFX9:       ; %bb.0:
243; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
245; GFX9-NEXT:    s_waitcnt vmcnt(0)
246; GFX9-NEXT:    s_setpc_b64 s[30:31]
247;
248; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
249; GFX10:       ; %bb.0:
250; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
252; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
253; GFX10-NEXT:    ; implicit-def: $vcc_hi
254; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
255; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
256; GFX10-NEXT:    s_waitcnt vmcnt(0)
257; GFX10-NEXT:    s_setpc_b64 s[30:31]
258  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
259  %load = load i8, i8 addrspace(1)* %gep, align 4
260  ret i8 %load
261}
262
263define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
264; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
265; GFX9:       ; %bb.0:
266; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
268; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
269; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
270; GFX9-NEXT:    s_waitcnt vmcnt(0)
271; GFX9-NEXT:    s_setpc_b64 s[30:31]
272;
273; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
274; GFX10:       ; %bb.0:
275; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
277; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
278; GFX10-NEXT:    ; implicit-def: $vcc_hi
279; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
280; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
281; GFX10-NEXT:    s_waitcnt vmcnt(0)
282; GFX10-NEXT:    s_setpc_b64 s[30:31]
283  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
284  %load = load i8, i8 addrspace(1)* %gep, align 4
285  ret i8 %load
286}
287
288define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
289; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
290; GFX9:       ; %bb.0:
291; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
293; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
294; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
295; GFX9-NEXT:    s_waitcnt vmcnt(0)
296; GFX9-NEXT:    s_setpc_b64 s[30:31]
297;
298; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
299; GFX10:       ; %bb.0:
300; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
302; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0
303; GFX10-NEXT:    ; implicit-def: $vcc_hi
304; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
305; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
306; GFX10-NEXT:    s_waitcnt vmcnt(0)
307; GFX10-NEXT:    s_setpc_b64 s[30:31]
308  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
309  %load = load i8, i8 addrspace(1)* %gep, align 4
310  ret i8 %load
311}
312
313; Fill 11-bit low-bits (1ull << 33) | 2047
314define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
315; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
316; GFX9:       ; %bb.0:
317; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
319; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
320; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
321; GFX9-NEXT:    s_waitcnt vmcnt(0)
322; GFX9-NEXT:    s_setpc_b64 s[30:31]
323;
324; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0:
325; GFX10:       ; %bb.0:
326; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
328; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0, v0
329; GFX10-NEXT:    ; implicit-def: $vcc_hi
330; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
331; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
332; GFX10-NEXT:    s_waitcnt vmcnt(0)
333; GFX10-NEXT:    s_setpc_b64 s[30:31]
334  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
335  %load = load i8, i8 addrspace(1)* %gep, align 4
336  ret i8 %load
337}
338
339; Fill 11-bit low-bits (1ull << 33) | 2048
340define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
341; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
342; GFX9:       ; %bb.0:
343; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
345; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
346; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
347; GFX9-NEXT:    s_waitcnt vmcnt(0)
348; GFX9-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
351; GFX10:       ; %bb.0:
352; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
354; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
355; GFX10-NEXT:    ; implicit-def: $vcc_hi
356; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
357; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
358; GFX10-NEXT:    s_waitcnt vmcnt(0)
359; GFX10-NEXT:    s_setpc_b64 s[30:31]
360  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
361  %load = load i8, i8 addrspace(1)* %gep, align 4
362  ret i8 %load
363}
364
365; Fill 12-bit low-bits (1ull << 33) | 4095
366define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
367; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
368; GFX9:       ; %bb.0:
369; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
371; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
372; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
373; GFX9-NEXT:    s_waitcnt vmcnt(0)
374; GFX9-NEXT:    s_setpc_b64 s[30:31]
375;
376; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0:
377; GFX10:       ; %bb.0:
378; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
380; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
381; GFX10-NEXT:    ; implicit-def: $vcc_hi
382; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
383; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
384; GFX10-NEXT:    s_waitcnt vmcnt(0)
385; GFX10-NEXT:    s_setpc_b64 s[30:31]
386  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
387  %load = load i8, i8 addrspace(1)* %gep, align 4
388  ret i8 %load
389}
390
391; Fill 12-bit low-bits (1ull << 33) | 4096
392define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
393; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
394; GFX9:       ; %bb.0:
395; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
397; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
398; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
399; GFX9-NEXT:    s_waitcnt vmcnt(0)
400; GFX9-NEXT:    s_setpc_b64 s[30:31]
401;
402; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
403; GFX10:       ; %bb.0:
404; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
406; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
407; GFX10-NEXT:    ; implicit-def: $vcc_hi
408; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
409; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
410; GFX10-NEXT:    s_waitcnt vmcnt(0)
411; GFX10-NEXT:    s_setpc_b64 s[30:31]
412  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
413  %load = load i8, i8 addrspace(1)* %gep, align 4
414  ret i8 %load
415}
416
417; Fill 13-bit low-bits (1ull << 33) | 8191
418define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
419; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
420; GFX9:       ; %bb.0:
421; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
423; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
424; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
425; GFX9-NEXT:    s_waitcnt vmcnt(0)
426; GFX9-NEXT:    s_setpc_b64 s[30:31]
427;
428; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0:
429; GFX10:       ; %bb.0:
430; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
432; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
433; GFX10-NEXT:    ; implicit-def: $vcc_hi
434; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
435; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
436; GFX10-NEXT:    s_waitcnt vmcnt(0)
437; GFX10-NEXT:    s_setpc_b64 s[30:31]
438  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
439  %load = load i8, i8 addrspace(1)* %gep, align 4
440  ret i8 %load
441}
442
443; Fill 13-bit low-bits (1ull << 33) | 8192
444define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
445; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
446; GFX9:       ; %bb.0:
447; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
449; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
450; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
451; GFX9-NEXT:    s_waitcnt vmcnt(0)
452; GFX9-NEXT:    s_setpc_b64 s[30:31]
453;
454; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
455; GFX10:       ; %bb.0:
456; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
458; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
459; GFX10-NEXT:    ; implicit-def: $vcc_hi
460; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
461; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
462; GFX10-NEXT:    s_waitcnt vmcnt(0)
463; GFX10-NEXT:    s_setpc_b64 s[30:31]
464  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
465  %load = load i8, i8 addrspace(1)* %gep, align 4
466  ret i8 %load
467}
468
469; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
470define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
471; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
472; GFX9:       ; %bb.0:
473; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
475; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
476; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
477; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
478; GFX9-NEXT:    s_waitcnt vmcnt(0)
479; GFX9-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
482; GFX10:       ; %bb.0:
483; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
485; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
486; GFX10-NEXT:    ; implicit-def: $vcc_hi
487; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
488; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
489; GFX10-NEXT:    s_waitcnt vmcnt(0)
490; GFX10-NEXT:    s_setpc_b64 s[30:31]
491  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
492  %load = load i8, i8 addrspace(1)* %gep, align 4
493  ret i8 %load
494}
495
496; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
497define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
498; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
499; GFX9:       ; %bb.0:
500; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
502; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
503; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
504; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
505; GFX9-NEXT:    s_waitcnt vmcnt(0)
506; GFX9-NEXT:    s_setpc_b64 s[30:31]
507;
508; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
509; GFX10:       ; %bb.0:
510; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
512; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
513; GFX10-NEXT:    ; implicit-def: $vcc_hi
514; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
515; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
516; GFX10-NEXT:    s_waitcnt vmcnt(0)
517; GFX10-NEXT:    s_setpc_b64 s[30:31]
518  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
519  %load = load i8, i8 addrspace(1)* %gep, align 4
520  ret i8 %load
521}
522
523; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
524define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
525; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
526; GFX9:       ; %bb.0:
527; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
529; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
530; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
531; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
532; GFX9-NEXT:    s_waitcnt vmcnt(0)
533; GFX9-NEXT:    s_setpc_b64 s[30:31]
534;
535; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
536; GFX10:       ; %bb.0:
537; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
539; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
540; GFX10-NEXT:    ; implicit-def: $vcc_hi
541; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
542; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
543; GFX10-NEXT:    s_waitcnt vmcnt(0)
544; GFX10-NEXT:    s_setpc_b64 s[30:31]
545  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
546  %load = load i8, i8 addrspace(1)* %gep, align 4
547  ret i8 %load
548}
549
550; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
551define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
552; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
553; GFX9:       ; %bb.0:
554; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
556; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
557; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
558; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
559; GFX9-NEXT:    s_waitcnt vmcnt(0)
560; GFX9-NEXT:    s_setpc_b64 s[30:31]
561;
562; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
563; GFX10:       ; %bb.0:
564; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
566; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
567; GFX10-NEXT:    ; implicit-def: $vcc_hi
568; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
569; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
570; GFX10-NEXT:    s_waitcnt vmcnt(0)
571; GFX10-NEXT:    s_setpc_b64 s[30:31]
572  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
573  %load = load i8, i8 addrspace(1)* %gep, align 4
574  ret i8 %load
575}
576
577; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
578define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
579; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
580; GFX9:       ; %bb.0:
581; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
583; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
584; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
585; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
586; GFX9-NEXT:    s_waitcnt vmcnt(0)
587; GFX9-NEXT:    s_setpc_b64 s[30:31]
588;
589; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
590; GFX10:       ; %bb.0:
591; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
593; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
594; GFX10-NEXT:    ; implicit-def: $vcc_hi
595; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
596; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
597; GFX10-NEXT:    s_waitcnt vmcnt(0)
598; GFX10-NEXT:    s_setpc_b64 s[30:31]
599  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
600  %load = load i8, i8 addrspace(1)* %gep, align 4
601  ret i8 %load
602}
603
604; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
605define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
606; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
607; GFX9:       ; %bb.0:
608; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
610; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
611; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
612; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
613; GFX9-NEXT:    s_waitcnt vmcnt(0)
614; GFX9-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
617; GFX10:       ; %bb.0:
618; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
620; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
621; GFX10-NEXT:    ; implicit-def: $vcc_hi
622; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
623; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
624; GFX10-NEXT:    s_waitcnt vmcnt(0)
625; GFX10-NEXT:    s_setpc_b64 s[30:31]
626  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
627  %load = load i8, i8 addrspace(1)* %gep, align 4
628  ret i8 %load
629}
630
631define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) {
632; GFX9-LABEL: global_inst_salu_offset_1:
633; GFX9:       ; %bb.0:
634; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
635; GFX9-NEXT:    v_mov_b32_e32 v0, 0
636; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
637; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1
638; GFX9-NEXT:    s_waitcnt vmcnt(0)
639; GFX9-NEXT:    global_store_byte v[0:1], v0, off
640; GFX9-NEXT:    s_endpgm
641;
642; GFX10-LABEL: global_inst_salu_offset_1:
643; GFX10:       ; %bb.0:
644; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
645; GFX10-NEXT:    v_mov_b32_e32 v0, 0
646; GFX10-NEXT:    ; implicit-def: $vcc_hi
647; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
648; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:1
649; GFX10-NEXT:    s_waitcnt vmcnt(0)
650; GFX10-NEXT:    global_store_byte v[0:1], v0, off
651; GFX10-NEXT:    s_endpgm
652  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
653  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
654  store i8 %load, i8 addrspace(1)* undef
655  ret void
656}
657
658define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) {
659; GFX9-LABEL: global_inst_salu_offset_11bit_max:
660; GFX9:       ; %bb.0:
661; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
662; GFX9-NEXT:    v_mov_b32_e32 v0, 0
663; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
664; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
665; GFX9-NEXT:    s_waitcnt vmcnt(0)
666; GFX9-NEXT:    global_store_byte v[0:1], v0, off
667; GFX9-NEXT:    s_endpgm
668;
669; GFX10-LABEL: global_inst_salu_offset_11bit_max:
670; GFX10:       ; %bb.0:
671; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
672; GFX10-NEXT:    v_mov_b32_e32 v0, 0
673; GFX10-NEXT:    ; implicit-def: $vcc_hi
674; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
675; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
676; GFX10-NEXT:    s_waitcnt vmcnt(0)
677; GFX10-NEXT:    global_store_byte v[0:1], v0, off
678; GFX10-NEXT:    s_endpgm
679  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
680  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
681  store i8 %load, i8 addrspace(1)* undef
682  ret void
683}
684
685define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) {
686; GFX9-LABEL: global_inst_salu_offset_12bit_max:
687; GFX9:       ; %bb.0:
688; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
689; GFX9-NEXT:    v_mov_b32_e32 v0, 0
690; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
691; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095
692; GFX9-NEXT:    s_waitcnt vmcnt(0)
693; GFX9-NEXT:    global_store_byte v[0:1], v0, off
694; GFX9-NEXT:    s_endpgm
695;
696; GFX10-LABEL: global_inst_salu_offset_12bit_max:
697; GFX10:       ; %bb.0:
698; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
699; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
700; GFX10-NEXT:    ; implicit-def: $vcc_hi
701; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
702; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
703; GFX10-NEXT:    s_waitcnt vmcnt(0)
704; GFX10-NEXT:    global_store_byte v[0:1], v0, off
705; GFX10-NEXT:    s_endpgm
706  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
707  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
708  store i8 %load, i8 addrspace(1)* undef
709  ret void
710}
711
712define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) {
713; GFX9-LABEL: global_inst_salu_offset_13bit_max:
714; GFX9:       ; %bb.0:
715; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
716; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
717; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
718; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095
719; GFX9-NEXT:    s_waitcnt vmcnt(0)
720; GFX9-NEXT:    global_store_byte v[0:1], v0, off
721; GFX9-NEXT:    s_endpgm
722;
723; GFX10-LABEL: global_inst_salu_offset_13bit_max:
724; GFX10:       ; %bb.0:
725; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
726; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
727; GFX10-NEXT:    ; implicit-def: $vcc_hi
728; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
729; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
730; GFX10-NEXT:    s_waitcnt vmcnt(0)
731; GFX10-NEXT:    global_store_byte v[0:1], v0, off
732; GFX10-NEXT:    s_endpgm
733  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
734  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
735  store i8 %load, i8 addrspace(1)* undef
736  ret void
737}
738
739define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
740; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
741; GFX9:       ; %bb.0:
742; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
743; GFX9-NEXT:    v_mov_b32_e32 v0, 0
744; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
745; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048
746; GFX9-NEXT:    s_waitcnt vmcnt(0)
747; GFX9-NEXT:    global_store_byte v[0:1], v0, off
748; GFX9-NEXT:    s_endpgm
749;
750; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
751; GFX10:       ; %bb.0:
752; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
753; GFX10-NEXT:    v_mov_b32_e32 v0, 0
754; GFX10-NEXT:    ; implicit-def: $vcc_hi
755; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
756; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-2048
757; GFX10-NEXT:    s_waitcnt vmcnt(0)
758; GFX10-NEXT:    global_store_byte v[0:1], v0, off
759; GFX10-NEXT:    s_endpgm
760  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
761  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
762  store i8 %load, i8 addrspace(1)* undef
763  ret void
764}
765
766define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
767; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
768; GFX9:       ; %bb.0:
769; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
770; GFX9-NEXT:    v_mov_b32_e32 v0, 0
771; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096
773; GFX9-NEXT:    s_waitcnt vmcnt(0)
774; GFX9-NEXT:    global_store_byte v[0:1], v0, off
775; GFX9-NEXT:    s_endpgm
776;
777; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max:
778; GFX10:       ; %bb.0:
779; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
780; GFX10-NEXT:    ; implicit-def: $vcc_hi
781; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
782; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xfffff000, s0
783; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
784; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
785; GFX10-NEXT:    s_waitcnt vmcnt(0)
786; GFX10-NEXT:    global_store_byte v[0:1], v0, off
787; GFX10-NEXT:    s_endpgm
788  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
789  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
790  store i8 %load, i8 addrspace(1)* undef
791  ret void
792}
793
794define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
795; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
796; GFX9:       ; %bb.0:
797; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
798; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
799; GFX9-NEXT:    v_mov_b32_e32 v0, s0
800; GFX9-NEXT:    v_mov_b32_e32 v1, s1
801; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
802; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
803; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
804; GFX9-NEXT:    s_waitcnt vmcnt(0)
805; GFX9-NEXT:    global_store_byte v[0:1], v0, off
806; GFX9-NEXT:    s_endpgm
807;
808; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
809; GFX10:       ; %bb.0:
810; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
811; GFX10-NEXT:    ; implicit-def: $vcc_hi
812; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
813; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffe000, s0
814; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
815; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
816; GFX10-NEXT:    s_waitcnt vmcnt(0)
817; GFX10-NEXT:    global_store_byte v[0:1], v0, off
818; GFX10-NEXT:    s_endpgm
819  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
820  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
821  store i8 %load, i8 addrspace(1)* undef
822  ret void
823}
824
825define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
826; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
827; GFX9:       ; %bb.0:
828; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
829; GFX9-NEXT:    v_mov_b32_e32 v0, 0
830; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
831; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095
832; GFX9-NEXT:    s_waitcnt vmcnt(0)
833; GFX9-NEXT:    global_store_byte v[0:1], v0, off
834; GFX9-NEXT:    s_endpgm
835;
836; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
837; GFX10:       ; %bb.0:
838; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
839; GFX10-NEXT:    v_mov_b32_e32 v0, 0x800
840; GFX10-NEXT:    ; implicit-def: $vcc_hi
841; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
842; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
843; GFX10-NEXT:    s_waitcnt vmcnt(0)
844; GFX10-NEXT:    global_store_byte v[0:1], v0, off
845; GFX10-NEXT:    s_endpgm
846  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
847  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
848  store i8 %load, i8 addrspace(1)* undef
849  ret void
850}
851
852define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
853; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
854; GFX9:       ; %bb.0:
855; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
856; GFX9-NEXT:    v_mov_b32_e32 v0, 0x1000
857; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
858; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095
859; GFX9-NEXT:    s_waitcnt vmcnt(0)
860; GFX9-NEXT:    global_store_byte v[0:1], v0, off
861; GFX9-NEXT:    s_endpgm
862;
863; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
864; GFX10:       ; %bb.0:
865; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
866; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1800
867; GFX10-NEXT:    ; implicit-def: $vcc_hi
868; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
869; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
870; GFX10-NEXT:    s_waitcnt vmcnt(0)
871; GFX10-NEXT:    global_store_byte v[0:1], v0, off
872; GFX10-NEXT:    s_endpgm
873  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
874  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
875  store i8 %load, i8 addrspace(1)* undef
876  ret void
877}
878
879define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
880; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
881; GFX9:       ; %bb.0:
882; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
883; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3000
884; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
885; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:4095
886; GFX9-NEXT:    s_waitcnt vmcnt(0)
887; GFX9-NEXT:    global_store_byte v[0:1], v0, off
888; GFX9-NEXT:    s_endpgm
889;
890; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
891; GFX10:       ; %bb.0:
892; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
893; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800
894; GFX10-NEXT:    ; implicit-def: $vcc_hi
895; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
896; GFX10-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:2047
897; GFX10-NEXT:    s_waitcnt vmcnt(0)
898; GFX10-NEXT:    global_store_byte v[0:1], v0, off
899; GFX10-NEXT:    s_endpgm
900  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
901  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
902  store i8 %load, i8 addrspace(1)* undef
903  ret void
904}
905
906define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
907; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
908; GFX9:       ; %bb.0:
909; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
910; GFX9-NEXT:    v_mov_b32_e32 v0, 0
911; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
912; GFX9-NEXT:    global_load_ubyte v0, v0, s[0:1] offset:-4096
913; GFX9-NEXT:    s_waitcnt vmcnt(0)
914; GFX9-NEXT:    global_store_byte v[0:1], v0, off
915; GFX9-NEXT:    s_endpgm
916;
917; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
918; GFX10:       ; %bb.0:
919; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
920; GFX10-NEXT:    ; implicit-def: $vcc_hi
921; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
922; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xfffff000, s0
923; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
924; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
925; GFX10-NEXT:    s_waitcnt vmcnt(0)
926; GFX10-NEXT:    global_store_byte v[0:1], v0, off
927; GFX10-NEXT:    s_endpgm
928  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
929  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
930  store i8 %load, i8 addrspace(1)* undef
931  ret void
932}
933
934define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
935; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
936; GFX9:       ; %bb.0:
937; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
938; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
939; GFX9-NEXT:    v_mov_b32_e32 v0, s0
940; GFX9-NEXT:    v_mov_b32_e32 v1, s1
941; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
942; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
943; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
944; GFX9-NEXT:    s_waitcnt vmcnt(0)
945; GFX9-NEXT:    global_store_byte v[0:1], v0, off
946; GFX9-NEXT:    s_endpgm
947;
948; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
949; GFX10:       ; %bb.0:
950; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
951; GFX10-NEXT:    ; implicit-def: $vcc_hi
952; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
953; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffe000, s0
954; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
955; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
956; GFX10-NEXT:    s_waitcnt vmcnt(0)
957; GFX10-NEXT:    global_store_byte v[0:1], v0, off
958; GFX10-NEXT:    s_endpgm
959  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
960  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
961  store i8 %load, i8 addrspace(1)* undef
962  ret void
963}
964
965define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
966; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
967; GFX9:       ; %bb.0:
968; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
969; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
970; GFX9-NEXT:    v_mov_b32_e32 v0, s0
971; GFX9-NEXT:    v_mov_b32_e32 v1, s1
972; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
973; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
974; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
975; GFX9-NEXT:    s_waitcnt vmcnt(0)
976; GFX9-NEXT:    global_store_byte v[0:1], v0, off
977; GFX9-NEXT:    s_endpgm
978;
979; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
980; GFX10:       ; %bb.0:
981; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
982; GFX10-NEXT:    ; implicit-def: $vcc_hi
983; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
984; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffc000, s0
985; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
986; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
987; GFX10-NEXT:    s_waitcnt vmcnt(0)
988; GFX10-NEXT:    global_store_byte v[0:1], v0, off
989; GFX10-NEXT:    s_endpgm
990  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
991  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
992  store i8 %load, i8 addrspace(1)* undef
993  ret void
994}
995
996; Fill 11-bit low-bits (1ull << 33) | 2047
997define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
998; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
999; GFX9:       ; %bb.0:
1000; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1001; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1002; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1003; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1004; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1005; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1006; GFX9-NEXT:    s_waitcnt vmcnt(0)
1007; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1008; GFX9-NEXT:    s_endpgm
1009;
1010; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1011; GFX10:       ; %bb.0:
1012; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1013; GFX10-NEXT:    ; implicit-def: $vcc_hi
1014; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1015; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0, s0
1016; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1017; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1018; GFX10-NEXT:    s_waitcnt vmcnt(0)
1019; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1020; GFX10-NEXT:    s_endpgm
1021  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
1022  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1023  store i8 %load, i8 addrspace(1)* undef
1024  ret void
1025}
1026
1027; Fill 11-bit low-bits (1ull << 33) | 2048
1028define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
1029; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1030; GFX9:       ; %bb.0:
1031; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1032; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1033; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1034; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1035; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1036; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
1037; GFX9-NEXT:    s_waitcnt vmcnt(0)
1038; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1039; GFX9-NEXT:    s_endpgm
1040;
1041; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1042; GFX10:       ; %bb.0:
1043; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1044; GFX10-NEXT:    ; implicit-def: $vcc_hi
1045; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1046; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
1047; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1048; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1049; GFX10-NEXT:    s_waitcnt vmcnt(0)
1050; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1051; GFX10-NEXT:    s_endpgm
1052  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
1053  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1054  store i8 %load, i8 addrspace(1)* undef
1055  ret void
1056}
1057
1058; Fill 12-bit low-bits (1ull << 33) | 4095
1059define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
1060; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1061; GFX9:       ; %bb.0:
1062; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1063; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1064; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1065; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1066; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1067; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1068; GFX9-NEXT:    s_waitcnt vmcnt(0)
1069; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1070; GFX9-NEXT:    s_endpgm
1071;
1072; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1073; GFX10:       ; %bb.0:
1074; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1075; GFX10-NEXT:    ; implicit-def: $vcc_hi
1076; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1077; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
1078; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1079; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1080; GFX10-NEXT:    s_waitcnt vmcnt(0)
1081; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1082; GFX10-NEXT:    s_endpgm
1083  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
1084  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1085  store i8 %load, i8 addrspace(1)* undef
1086  ret void
1087}
1088
1089; Fill 12-bit low-bits (1ull << 33) | 4096
1090define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
1091; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1092; GFX9:       ; %bb.0:
1093; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1094; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1095; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1096; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1097; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1098; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1099; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1100; GFX9-NEXT:    s_waitcnt vmcnt(0)
1101; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1102; GFX9-NEXT:    s_endpgm
1103;
1104; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1105; GFX10:       ; %bb.0:
1106; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1107; GFX10-NEXT:    ; implicit-def: $vcc_hi
1108; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1109; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1000, s0
1110; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1111; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1112; GFX10-NEXT:    s_waitcnt vmcnt(0)
1113; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1114; GFX10-NEXT:    s_endpgm
1115  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
1116  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1117  store i8 %load, i8 addrspace(1)* undef
1118  ret void
1119}
1120
1121; Fill 13-bit low-bits (1ull << 33) | 8191
1122define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
1123; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1124; GFX9:       ; %bb.0:
1125; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1126; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1127; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1128; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1129; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1130; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1131; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1132; GFX9-NEXT:    s_waitcnt vmcnt(0)
1133; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1134; GFX9-NEXT:    s_endpgm
1135;
1136; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1137; GFX10:       ; %bb.0:
1138; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1139; GFX10-NEXT:    ; implicit-def: $vcc_hi
1140; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1141; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1800, s0
1142; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1143; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1144; GFX10-NEXT:    s_waitcnt vmcnt(0)
1145; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1146; GFX10-NEXT:    s_endpgm
1147  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
1148  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1149  store i8 %load, i8 addrspace(1)* undef
1150  ret void
1151}
1152
1153; Fill 13-bit low-bits (1ull << 33) | 8192
1154define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
1155; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1156; GFX9:       ; %bb.0:
1157; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1158; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1159; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1160; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1161; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1162; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1163; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1164; GFX9-NEXT:    s_waitcnt vmcnt(0)
1165; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1166; GFX9-NEXT:    s_endpgm
1167;
1168; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1169; GFX10:       ; %bb.0:
1170; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1171; GFX10-NEXT:    ; implicit-def: $vcc_hi
1172; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1173; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x2000, s0
1174; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1175; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1176; GFX10-NEXT:    s_waitcnt vmcnt(0)
1177; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1178; GFX10-NEXT:    s_endpgm
1179  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
1180  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1181  store i8 %load, i8 addrspace(1)* undef
1182  ret void
1183}
1184
1185; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1186define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
1187; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1188; GFX9:       ; %bb.0:
1189; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1190; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1191; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1192; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1193; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1194; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1195; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1196; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
1197; GFX9-NEXT:    s_waitcnt vmcnt(0)
1198; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1199; GFX9-NEXT:    s_endpgm
1200;
1201; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1202; GFX10:       ; %bb.0:
1203; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1204; GFX10-NEXT:    ; implicit-def: $vcc_hi
1205; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1206; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1207; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
1208; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1209; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1210; GFX10-NEXT:    s_waitcnt vmcnt(0)
1211; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1212; GFX10-NEXT:    s_endpgm
1213  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
1214  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1215  store i8 %load, i8 addrspace(1)* undef
1216  ret void
1217}
1218
1219; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1220define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
1221; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1222; GFX9:       ; %bb.0:
1223; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1224; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1225; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1226; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1227; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1228; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1229; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1230; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
1231; GFX9-NEXT:    s_waitcnt vmcnt(0)
1232; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1233; GFX9-NEXT:    s_endpgm
1234;
1235; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1236; GFX10:       ; %bb.0:
1237; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1238; GFX10-NEXT:    ; implicit-def: $vcc_hi
1239; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1240; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1241; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
1242; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1243; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1244; GFX10-NEXT:    s_waitcnt vmcnt(0)
1245; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1246; GFX10-NEXT:    s_endpgm
1247  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
1248  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1249  store i8 %load, i8 addrspace(1)* undef
1250  ret void
1251}
1252
1253; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1254define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
1255; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1256; GFX9:       ; %bb.0:
1257; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1258; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1259; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1260; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1261; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1262; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1263; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1264; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1265; GFX9-NEXT:    s_waitcnt vmcnt(0)
1266; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1267; GFX9-NEXT:    s_endpgm
1268;
1269; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1270; GFX10:       ; %bb.0:
1271; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1272; GFX10-NEXT:    ; implicit-def: $vcc_hi
1273; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1274; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1275; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1276; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1277; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1278; GFX10-NEXT:    s_waitcnt vmcnt(0)
1279; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1280; GFX10-NEXT:    s_endpgm
1281  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
1282  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1283  store i8 %load, i8 addrspace(1)* undef
1284  ret void
1285}
1286
1287; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1288define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
1289; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1290; GFX9:       ; %bb.0:
1291; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1292; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1293; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1294; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1295; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1296; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1297; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1298; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1299; GFX9-NEXT:    s_waitcnt vmcnt(0)
1300; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1301; GFX9-NEXT:    s_endpgm
1302;
1303; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1304; GFX10:       ; %bb.0:
1305; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1306; GFX10-NEXT:    ; implicit-def: $vcc_hi
1307; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1308; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1309; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1310; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1311; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1312; GFX10-NEXT:    s_waitcnt vmcnt(0)
1313; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1314; GFX10-NEXT:    s_endpgm
1315  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
1316  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1317  store i8 %load, i8 addrspace(1)* undef
1318  ret void
1319}
1320
1321; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1322define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
1323; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1324; GFX9:       ; %bb.0:
1325; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1326; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1327; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1328; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1329; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1330; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1331; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1332; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1333; GFX9-NEXT:    s_waitcnt vmcnt(0)
1334; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1335; GFX9-NEXT:    s_endpgm
1336;
1337; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1338; GFX10:       ; %bb.0:
1339; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1340; GFX10-NEXT:    ; implicit-def: $vcc_hi
1341; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1342; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1343; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1344; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1345; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1346; GFX10-NEXT:    s_waitcnt vmcnt(0)
1347; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1348; GFX10-NEXT:    s_endpgm
1349  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
1350  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1351  store i8 %load, i8 addrspace(1)* undef
1352  ret void
1353}
1354
1355; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1356define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
1357; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1358; GFX9:       ; %bb.0:
1359; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1360; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1361; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1362; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1363; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1364; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1365; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1366; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1367; GFX9-NEXT:    s_waitcnt vmcnt(0)
1368; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1369; GFX9-NEXT:    s_endpgm
1370;
1371; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1372; GFX10:       ; %bb.0:
1373; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1374; GFX10-NEXT:    ; implicit-def: $vcc_hi
1375; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1376; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1377; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1378; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1379; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1380; GFX10-NEXT:    s_waitcnt vmcnt(0)
1381; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1382; GFX10-NEXT:    s_endpgm
1383  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
1384  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1385  store i8 %load, i8 addrspace(1)* undef
1386  ret void
1387}
1388