1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
4
5; Test splitting flat instruction offsets into the low and high bits
6; when the offset doesn't fit in the offset field.
7
8define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) {
9; GFX9-LABEL: global_inst_valu_offset_1:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
13; GFX9-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX10-LABEL: global_inst_valu_offset_1:
16; GFX10:       ; %bb.0:
17; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
19; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
20; GFX10-NEXT:    ; implicit-def: $vcc_hi
21; GFX10-NEXT:    s_setpc_b64 s[30:31]
22  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
23  %load = load i8, i8 addrspace(1)* %gep, align 4
24  ret i8 %load
25}
26
27define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) {
28; GFX9-LABEL: global_inst_valu_offset_11bit_max:
29; GFX9:       ; %bb.0:
30; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
32; GFX9-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX10-LABEL: global_inst_valu_offset_11bit_max:
35; GFX10:       ; %bb.0:
36; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
38; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
39; GFX10-NEXT:    ; implicit-def: $vcc_hi
40; GFX10-NEXT:    s_setpc_b64 s[30:31]
41  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
42  %load = load i8, i8 addrspace(1)* %gep, align 4
43  ret i8 %load
44}
45
46define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) {
47; GFX9-LABEL: global_inst_valu_offset_12bit_max:
48; GFX9:       ; %bb.0:
49; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
51; GFX9-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX10-LABEL: global_inst_valu_offset_12bit_max:
54; GFX10:       ; %bb.0:
55; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
57; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
58; GFX10-NEXT:    ; implicit-def: $vcc_hi
59; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
60; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
61; GFX10-NEXT:    s_setpc_b64 s[30:31]
62  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
63  %load = load i8, i8 addrspace(1)* %gep, align 4
64  ret i8 %load
65}
66
67define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) {
68; GFX9-LABEL: global_inst_valu_offset_13bit_max:
69; GFX9:       ; %bb.0:
70; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
72; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
73; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
74; GFX9-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX10-LABEL: global_inst_valu_offset_13bit_max:
77; GFX10:       ; %bb.0:
78; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
80; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
81; GFX10-NEXT:    ; implicit-def: $vcc_hi
82; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
83; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
84; GFX10-NEXT:    s_setpc_b64 s[30:31]
85  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
86  %load = load i8, i8 addrspace(1)* %gep, align 4
87  ret i8 %load
88}
89
90define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
91; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
92; GFX9:       ; %bb.0:
93; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
95; GFX9-NEXT:    s_setpc_b64 s[30:31]
96;
97; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
98; GFX10:       ; %bb.0:
99; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
101; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
102; GFX10-NEXT:    ; implicit-def: $vcc_hi
103; GFX10-NEXT:    s_setpc_b64 s[30:31]
104  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
105  %load = load i8, i8 addrspace(1)* %gep, align 4
106  ret i8 %load
107}
108
109define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
110; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
111; GFX9:       ; %bb.0:
112; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
114; GFX9-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
117; GFX10:       ; %bb.0:
118; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
120; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
121; GFX10-NEXT:    ; implicit-def: $vcc_hi
122; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
123; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
124; GFX10-NEXT:    s_setpc_b64 s[30:31]
125  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
126  %load = load i8, i8 addrspace(1)* %gep, align 4
127  ret i8 %load
128}
129
130define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
131; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
132; GFX9:       ; %bb.0:
133; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
135; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
136; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
137; GFX9-NEXT:    s_setpc_b64 s[30:31]
138;
139; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
140; GFX10:       ; %bb.0:
141; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
143; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
144; GFX10-NEXT:    ; implicit-def: $vcc_hi
145; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
146; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
147; GFX10-NEXT:    s_setpc_b64 s[30:31]
148  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
149  %load = load i8, i8 addrspace(1)* %gep, align 4
150  ret i8 %load
151}
152
153define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
154; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
155; GFX9:       ; %bb.0:
156; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
158; GFX9-NEXT:    s_setpc_b64 s[30:31]
159;
160; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max:
161; GFX10:       ; %bb.0:
162; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
164; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
165; GFX10-NEXT:    ; implicit-def: $vcc_hi
166; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
167; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
168; GFX10-NEXT:    s_setpc_b64 s[30:31]
169  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
170  %load = load i8, i8 addrspace(1)* %gep, align 4
171  ret i8 %load
172}
173
174define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
175; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
176; GFX9:       ; %bb.0:
177; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
179; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
180; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
181; GFX9-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max:
184; GFX10:       ; %bb.0:
185; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
187; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
188; GFX10-NEXT:    ; implicit-def: $vcc_hi
189; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
190; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
191; GFX10-NEXT:    s_setpc_b64 s[30:31]
192  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
193  %load = load i8, i8 addrspace(1)* %gep, align 4
194  ret i8 %load
195}
196
197define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
198; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
199; GFX9:       ; %bb.0:
200; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
202; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
203; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
204; GFX9-NEXT:    s_setpc_b64 s[30:31]
205;
206; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max:
207; GFX10:       ; %bb.0:
208; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
210; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0
211; GFX10-NEXT:    ; implicit-def: $vcc_hi
212; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
213; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
214; GFX10-NEXT:    s_setpc_b64 s[30:31]
215  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
216  %load = load i8, i8 addrspace(1)* %gep, align 4
217  ret i8 %load
218}
219
220define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
221; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
222; GFX9:       ; %bb.0:
223; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
225; GFX9-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
228; GFX10:       ; %bb.0:
229; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
231; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
232; GFX10-NEXT:    ; implicit-def: $vcc_hi
233; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
234; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
235; GFX10-NEXT:    s_setpc_b64 s[30:31]
236  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
237  %load = load i8, i8 addrspace(1)* %gep, align 4
238  ret i8 %load
239}
240
241define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
242; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
243; GFX9:       ; %bb.0:
244; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
246; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
247; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
248; GFX9-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
251; GFX10:       ; %bb.0:
252; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
254; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
255; GFX10-NEXT:    ; implicit-def: $vcc_hi
256; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
257; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
258; GFX10-NEXT:    s_setpc_b64 s[30:31]
259  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
260  %load = load i8, i8 addrspace(1)* %gep, align 4
261  ret i8 %load
262}
263
264define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
265; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
269; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
270; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
271; GFX9-NEXT:    s_setpc_b64 s[30:31]
272;
273; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
274; GFX10:       ; %bb.0:
275; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
277; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0
278; GFX10-NEXT:    ; implicit-def: $vcc_hi
279; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
280; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
281; GFX10-NEXT:    s_setpc_b64 s[30:31]
282  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
283  %load = load i8, i8 addrspace(1)* %gep, align 4
284  ret i8 %load
285}
286
287; Fill 11-bit low-bits (1ull << 33) | 2047
288define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
289; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
290; GFX9:       ; %bb.0:
291; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
293; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
294; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
295; GFX9-NEXT:    s_setpc_b64 s[30:31]
296;
297; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0:
298; GFX10:       ; %bb.0:
299; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
300; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
301; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0, v0
302; GFX10-NEXT:    ; implicit-def: $vcc_hi
303; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
304; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
305; GFX10-NEXT:    s_setpc_b64 s[30:31]
306  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
307  %load = load i8, i8 addrspace(1)* %gep, align 4
308  ret i8 %load
309}
310
311; Fill 11-bit low-bits (1ull << 33) | 2048
312define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
313; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
314; GFX9:       ; %bb.0:
315; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
317; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
318; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
319; GFX9-NEXT:    s_setpc_b64 s[30:31]
320;
321; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
322; GFX10:       ; %bb.0:
323; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
325; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
326; GFX10-NEXT:    ; implicit-def: $vcc_hi
327; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
328; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
329; GFX10-NEXT:    s_setpc_b64 s[30:31]
330  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
331  %load = load i8, i8 addrspace(1)* %gep, align 4
332  ret i8 %load
333}
334
335; Fill 12-bit low-bits (1ull << 33) | 4095
336define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
337; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
338; GFX9:       ; %bb.0:
339; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
341; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
342; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
343; GFX9-NEXT:    s_setpc_b64 s[30:31]
344;
345; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0:
346; GFX10:       ; %bb.0:
347; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
348; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
349; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
350; GFX10-NEXT:    ; implicit-def: $vcc_hi
351; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
352; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
353; GFX10-NEXT:    s_setpc_b64 s[30:31]
354  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
355  %load = load i8, i8 addrspace(1)* %gep, align 4
356  ret i8 %load
357}
358
359; Fill 12-bit low-bits (1ull << 33) | 4096
360define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
361; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
362; GFX9:       ; %bb.0:
363; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
365; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
366; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
367; GFX9-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
370; GFX10:       ; %bb.0:
371; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
373; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
374; GFX10-NEXT:    ; implicit-def: $vcc_hi
375; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
376; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
377; GFX10-NEXT:    s_setpc_b64 s[30:31]
378  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
379  %load = load i8, i8 addrspace(1)* %gep, align 4
380  ret i8 %load
381}
382
383; Fill 13-bit low-bits (1ull << 33) | 8191
384define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
385; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
386; GFX9:       ; %bb.0:
387; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
389; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
390; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
391; GFX9-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0:
394; GFX10:       ; %bb.0:
395; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
397; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
398; GFX10-NEXT:    ; implicit-def: $vcc_hi
399; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
400; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
401; GFX10-NEXT:    s_setpc_b64 s[30:31]
402  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
403  %load = load i8, i8 addrspace(1)* %gep, align 4
404  ret i8 %load
405}
406
407; Fill 13-bit low-bits (1ull << 33) | 8192
408define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
409; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
410; GFX9:       ; %bb.0:
411; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
413; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
414; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
415; GFX9-NEXT:    s_setpc_b64 s[30:31]
416;
417; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
418; GFX10:       ; %bb.0:
419; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
421; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
422; GFX10-NEXT:    ; implicit-def: $vcc_hi
423; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
424; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
425; GFX10-NEXT:    s_setpc_b64 s[30:31]
426  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
427  %load = load i8, i8 addrspace(1)* %gep, align 4
428  ret i8 %load
429}
430
431; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
432define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
433; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
434; GFX9:       ; %bb.0:
435; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
437; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
438; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
439; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
440; GFX9-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
443; GFX10:       ; %bb.0:
444; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
446; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
447; GFX10-NEXT:    ; implicit-def: $vcc_hi
448; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
449; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
450; GFX10-NEXT:    s_setpc_b64 s[30:31]
451  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
452  %load = load i8, i8 addrspace(1)* %gep, align 4
453  ret i8 %load
454}
455
456; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
457define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
458; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
459; GFX9:       ; %bb.0:
460; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
462; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
463; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
464; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
465; GFX9-NEXT:    s_setpc_b64 s[30:31]
466;
467; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
468; GFX10:       ; %bb.0:
469; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
471; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
472; GFX10-NEXT:    ; implicit-def: $vcc_hi
473; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
474; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
475; GFX10-NEXT:    s_setpc_b64 s[30:31]
476  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
477  %load = load i8, i8 addrspace(1)* %gep, align 4
478  ret i8 %load
479}
480
481; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
482define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
483; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
484; GFX9:       ; %bb.0:
485; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
487; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
488; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
489; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
490; GFX9-NEXT:    s_setpc_b64 s[30:31]
491;
492; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
493; GFX10:       ; %bb.0:
494; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
496; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
497; GFX10-NEXT:    ; implicit-def: $vcc_hi
498; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
499; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
500; GFX10-NEXT:    s_setpc_b64 s[30:31]
501  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
502  %load = load i8, i8 addrspace(1)* %gep, align 4
503  ret i8 %load
504}
505
506; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
507define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
508; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
509; GFX9:       ; %bb.0:
510; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
512; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
513; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
514; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
515; GFX9-NEXT:    s_setpc_b64 s[30:31]
516;
517; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
518; GFX10:       ; %bb.0:
519; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
521; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
522; GFX10-NEXT:    ; implicit-def: $vcc_hi
523; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
524; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
525; GFX10-NEXT:    s_setpc_b64 s[30:31]
526  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
527  %load = load i8, i8 addrspace(1)* %gep, align 4
528  ret i8 %load
529}
530
531; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
532define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
533; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
534; GFX9:       ; %bb.0:
535; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
537; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
538; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
539; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
540; GFX9-NEXT:    s_setpc_b64 s[30:31]
541;
542; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
543; GFX10:       ; %bb.0:
544; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
546; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
547; GFX10-NEXT:    ; implicit-def: $vcc_hi
548; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
549; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
550; GFX10-NEXT:    s_setpc_b64 s[30:31]
551  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
552  %load = load i8, i8 addrspace(1)* %gep, align 4
553  ret i8 %load
554}
555
556; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
557define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
558; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
559; GFX9:       ; %bb.0:
560; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
562; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
563; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
564; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
565; GFX9-NEXT:    s_setpc_b64 s[30:31]
566;
567; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
568; GFX10:       ; %bb.0:
569; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
570; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
571; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
572; GFX10-NEXT:    ; implicit-def: $vcc_hi
573; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
574; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
575; GFX10-NEXT:    s_setpc_b64 s[30:31]
576  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
577  %load = load i8, i8 addrspace(1)* %gep, align 4
578  ret i8 %load
579}
580
581define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) {
582; GFX9-LABEL: global_inst_salu_offset_1:
583; GFX9:       ; %bb.0:
584; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
585; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
586; GFX9-NEXT:    v_mov_b32_e32 v0, s0
587; GFX9-NEXT:    v_mov_b32_e32 v1, s1
588; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
589; GFX9-NEXT:    s_waitcnt vmcnt(0)
590; GFX9-NEXT:    global_store_byte v[0:1], v0, off
591; GFX9-NEXT:    s_endpgm
592;
593; GFX10-LABEL: global_inst_salu_offset_1:
594; GFX10:       ; %bb.0:
595; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
596; GFX10-NEXT:    ; implicit-def: $vcc_hi
597; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
598; GFX10-NEXT:    v_mov_b32_e32 v0, s0
599; GFX10-NEXT:    v_mov_b32_e32 v1, s1
600; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
601; GFX10-NEXT:    s_waitcnt vmcnt(0)
602; GFX10-NEXT:    global_store_byte v[0:1], v0, off
603; GFX10-NEXT:    s_endpgm
604  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
605  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
606  store i8 %load, i8 addrspace(1)* undef
607  ret void
608}
609
610define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) {
611; GFX9-LABEL: global_inst_salu_offset_11bit_max:
612; GFX9:       ; %bb.0:
613; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
614; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
615; GFX9-NEXT:    v_mov_b32_e32 v0, s0
616; GFX9-NEXT:    v_mov_b32_e32 v1, s1
617; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
618; GFX9-NEXT:    s_waitcnt vmcnt(0)
619; GFX9-NEXT:    global_store_byte v[0:1], v0, off
620; GFX9-NEXT:    s_endpgm
621;
622; GFX10-LABEL: global_inst_salu_offset_11bit_max:
623; GFX10:       ; %bb.0:
624; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
625; GFX10-NEXT:    ; implicit-def: $vcc_hi
626; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
627; GFX10-NEXT:    v_mov_b32_e32 v0, s0
628; GFX10-NEXT:    v_mov_b32_e32 v1, s1
629; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
630; GFX10-NEXT:    s_waitcnt vmcnt(0)
631; GFX10-NEXT:    global_store_byte v[0:1], v0, off
632; GFX10-NEXT:    s_endpgm
633  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
634  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
635  store i8 %load, i8 addrspace(1)* undef
636  ret void
637}
638
639define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) {
640; GFX9-LABEL: global_inst_salu_offset_12bit_max:
641; GFX9:       ; %bb.0:
642; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
643; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
644; GFX9-NEXT:    v_mov_b32_e32 v0, s0
645; GFX9-NEXT:    v_mov_b32_e32 v1, s1
646; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
647; GFX9-NEXT:    s_waitcnt vmcnt(0)
648; GFX9-NEXT:    global_store_byte v[0:1], v0, off
649; GFX9-NEXT:    s_endpgm
650;
651; GFX10-LABEL: global_inst_salu_offset_12bit_max:
652; GFX10:       ; %bb.0:
653; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
654; GFX10-NEXT:    ; implicit-def: $vcc_hi
655; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
656; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
657; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
658; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
659; GFX10-NEXT:    s_waitcnt vmcnt(0)
660; GFX10-NEXT:    global_store_byte v[0:1], v0, off
661; GFX10-NEXT:    s_endpgm
662  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
663  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
664  store i8 %load, i8 addrspace(1)* undef
665  ret void
666}
667
668define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) {
669; GFX9-LABEL: global_inst_salu_offset_13bit_max:
670; GFX9:       ; %bb.0:
671; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
672; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
673; GFX9-NEXT:    v_mov_b32_e32 v0, s0
674; GFX9-NEXT:    v_mov_b32_e32 v1, s1
675; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
676; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
677; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
678; GFX9-NEXT:    s_waitcnt vmcnt(0)
679; GFX9-NEXT:    global_store_byte v[0:1], v0, off
680; GFX9-NEXT:    s_endpgm
681;
682; GFX10-LABEL: global_inst_salu_offset_13bit_max:
683; GFX10:       ; %bb.0:
684; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
685; GFX10-NEXT:    ; implicit-def: $vcc_hi
686; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
687; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1800, s0
688; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
689; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
690; GFX10-NEXT:    s_waitcnt vmcnt(0)
691; GFX10-NEXT:    global_store_byte v[0:1], v0, off
692; GFX10-NEXT:    s_endpgm
693  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
694  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
695  store i8 %load, i8 addrspace(1)* undef
696  ret void
697}
698
699define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
700; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
701; GFX9:       ; %bb.0:
702; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
703; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
704; GFX9-NEXT:    v_mov_b32_e32 v0, s0
705; GFX9-NEXT:    v_mov_b32_e32 v1, s1
706; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
707; GFX9-NEXT:    s_waitcnt vmcnt(0)
708; GFX9-NEXT:    global_store_byte v[0:1], v0, off
709; GFX9-NEXT:    s_endpgm
710;
711; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
712; GFX10:       ; %bb.0:
713; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
714; GFX10-NEXT:    ; implicit-def: $vcc_hi
715; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
716; GFX10-NEXT:    v_mov_b32_e32 v0, s0
717; GFX10-NEXT:    v_mov_b32_e32 v1, s1
718; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
719; GFX10-NEXT:    s_waitcnt vmcnt(0)
720; GFX10-NEXT:    global_store_byte v[0:1], v0, off
721; GFX10-NEXT:    s_endpgm
722  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
723  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
724  store i8 %load, i8 addrspace(1)* undef
725  ret void
726}
727
728define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
729; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
730; GFX9:       ; %bb.0:
731; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
732; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
733; GFX9-NEXT:    v_mov_b32_e32 v0, s0
734; GFX9-NEXT:    v_mov_b32_e32 v1, s1
735; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
736; GFX9-NEXT:    s_waitcnt vmcnt(0)
737; GFX9-NEXT:    global_store_byte v[0:1], v0, off
738; GFX9-NEXT:    s_endpgm
739;
740; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max:
741; GFX10:       ; %bb.0:
742; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
743; GFX10-NEXT:    ; implicit-def: $vcc_hi
744; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
745; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xfffff000, s0
746; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
747; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
748; GFX10-NEXT:    s_waitcnt vmcnt(0)
749; GFX10-NEXT:    global_store_byte v[0:1], v0, off
750; GFX10-NEXT:    s_endpgm
751  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
752  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
753  store i8 %load, i8 addrspace(1)* undef
754  ret void
755}
756
757define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
758; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
759; GFX9:       ; %bb.0:
760; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
761; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
762; GFX9-NEXT:    v_mov_b32_e32 v0, s0
763; GFX9-NEXT:    v_mov_b32_e32 v1, s1
764; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
765; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
766; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
767; GFX9-NEXT:    s_waitcnt vmcnt(0)
768; GFX9-NEXT:    global_store_byte v[0:1], v0, off
769; GFX9-NEXT:    s_endpgm
770;
771; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
772; GFX10:       ; %bb.0:
773; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
774; GFX10-NEXT:    ; implicit-def: $vcc_hi
775; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
776; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffe000, s0
777; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
778; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
779; GFX10-NEXT:    s_waitcnt vmcnt(0)
780; GFX10-NEXT:    global_store_byte v[0:1], v0, off
781; GFX10-NEXT:    s_endpgm
782  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
783  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
784  store i8 %load, i8 addrspace(1)* undef
785  ret void
786}
787
788define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
789; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
790; GFX9:       ; %bb.0:
791; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
792; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
793; GFX9-NEXT:    v_mov_b32_e32 v0, s0
794; GFX9-NEXT:    v_mov_b32_e32 v1, s1
795; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
796; GFX9-NEXT:    s_waitcnt vmcnt(0)
797; GFX9-NEXT:    global_store_byte v[0:1], v0, off
798; GFX9-NEXT:    s_endpgm
799;
800; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
801; GFX10:       ; %bb.0:
802; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
803; GFX10-NEXT:    ; implicit-def: $vcc_hi
804; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
805; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
806; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
807; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
808; GFX10-NEXT:    s_waitcnt vmcnt(0)
809; GFX10-NEXT:    global_store_byte v[0:1], v0, off
810; GFX10-NEXT:    s_endpgm
811  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
812  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
813  store i8 %load, i8 addrspace(1)* undef
814  ret void
815}
816
817define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
818; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
819; GFX9:       ; %bb.0:
820; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
821; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
822; GFX9-NEXT:    v_mov_b32_e32 v0, s0
823; GFX9-NEXT:    v_mov_b32_e32 v1, s1
824; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
825; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
826; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
827; GFX9-NEXT:    s_waitcnt vmcnt(0)
828; GFX9-NEXT:    global_store_byte v[0:1], v0, off
829; GFX9-NEXT:    s_endpgm
830;
831; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
832; GFX10:       ; %bb.0:
833; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
834; GFX10-NEXT:    ; implicit-def: $vcc_hi
835; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
836; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1800, s0
837; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
838; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
839; GFX10-NEXT:    s_waitcnt vmcnt(0)
840; GFX10-NEXT:    global_store_byte v[0:1], v0, off
841; GFX10-NEXT:    s_endpgm
842  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
843  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
844  store i8 %load, i8 addrspace(1)* undef
845  ret void
846}
847
848define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
849; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
850; GFX9:       ; %bb.0:
851; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
852; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
853; GFX9-NEXT:    v_mov_b32_e32 v0, s0
854; GFX9-NEXT:    v_mov_b32_e32 v1, s1
855; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x3000, v0
856; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
857; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
858; GFX9-NEXT:    s_waitcnt vmcnt(0)
859; GFX9-NEXT:    global_store_byte v[0:1], v0, off
860; GFX9-NEXT:    s_endpgm
861;
862; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
863; GFX10:       ; %bb.0:
864; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
865; GFX10-NEXT:    ; implicit-def: $vcc_hi
866; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
867; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x3800, s0
868; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
869; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
870; GFX10-NEXT:    s_waitcnt vmcnt(0)
871; GFX10-NEXT:    global_store_byte v[0:1], v0, off
872; GFX10-NEXT:    s_endpgm
873  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
874  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
875  store i8 %load, i8 addrspace(1)* undef
876  ret void
877}
878
879define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
880; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
881; GFX9:       ; %bb.0:
882; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
883; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
884; GFX9-NEXT:    v_mov_b32_e32 v0, s0
885; GFX9-NEXT:    v_mov_b32_e32 v1, s1
886; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
887; GFX9-NEXT:    s_waitcnt vmcnt(0)
888; GFX9-NEXT:    global_store_byte v[0:1], v0, off
889; GFX9-NEXT:    s_endpgm
890;
891; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
892; GFX10:       ; %bb.0:
893; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
894; GFX10-NEXT:    ; implicit-def: $vcc_hi
895; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
896; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xfffff000, s0
897; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
898; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
899; GFX10-NEXT:    s_waitcnt vmcnt(0)
900; GFX10-NEXT:    global_store_byte v[0:1], v0, off
901; GFX10-NEXT:    s_endpgm
902  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
903  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
904  store i8 %load, i8 addrspace(1)* undef
905  ret void
906}
907
908define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
909; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
910; GFX9:       ; %bb.0:
911; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
912; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
913; GFX9-NEXT:    v_mov_b32_e32 v0, s0
914; GFX9-NEXT:    v_mov_b32_e32 v1, s1
915; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
916; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
917; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
918; GFX9-NEXT:    s_waitcnt vmcnt(0)
919; GFX9-NEXT:    global_store_byte v[0:1], v0, off
920; GFX9-NEXT:    s_endpgm
921;
922; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
923; GFX10:       ; %bb.0:
924; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
925; GFX10-NEXT:    ; implicit-def: $vcc_hi
926; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
927; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffe000, s0
928; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
929; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
930; GFX10-NEXT:    s_waitcnt vmcnt(0)
931; GFX10-NEXT:    global_store_byte v[0:1], v0, off
932; GFX10-NEXT:    s_endpgm
933  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
934  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
935  store i8 %load, i8 addrspace(1)* undef
936  ret void
937}
938
939define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
940; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
941; GFX9:       ; %bb.0:
942; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
943; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
944; GFX9-NEXT:    v_mov_b32_e32 v0, s0
945; GFX9-NEXT:    v_mov_b32_e32 v1, s1
946; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
947; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
948; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
949; GFX9-NEXT:    s_waitcnt vmcnt(0)
950; GFX9-NEXT:    global_store_byte v[0:1], v0, off
951; GFX9-NEXT:    s_endpgm
952;
953; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
954; GFX10:       ; %bb.0:
955; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
956; GFX10-NEXT:    ; implicit-def: $vcc_hi
957; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
958; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0xffffc000, s0
959; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
960; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
961; GFX10-NEXT:    s_waitcnt vmcnt(0)
962; GFX10-NEXT:    global_store_byte v[0:1], v0, off
963; GFX10-NEXT:    s_endpgm
964  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
965  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
966  store i8 %load, i8 addrspace(1)* undef
967  ret void
968}
969
970; Fill 11-bit low-bits (1ull << 33) | 2047
971define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
972; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
973; GFX9:       ; %bb.0:
974; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
975; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
976; GFX9-NEXT:    v_mov_b32_e32 v1, s1
977; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
978; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
979; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
980; GFX9-NEXT:    s_waitcnt vmcnt(0)
981; GFX9-NEXT:    global_store_byte v[0:1], v0, off
982; GFX9-NEXT:    s_endpgm
983;
984; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
985; GFX10:       ; %bb.0:
986; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
987; GFX10-NEXT:    ; implicit-def: $vcc_hi
988; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
989; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0, s0
990; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
991; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
992; GFX10-NEXT:    s_waitcnt vmcnt(0)
993; GFX10-NEXT:    global_store_byte v[0:1], v0, off
994; GFX10-NEXT:    s_endpgm
995  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
996  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
997  store i8 %load, i8 addrspace(1)* undef
998  ret void
999}
1000
1001; Fill 11-bit low-bits (1ull << 33) | 2048
1002define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
1003; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1004; GFX9:       ; %bb.0:
1005; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1006; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1007; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1008; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1009; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1010; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
1011; GFX9-NEXT:    s_waitcnt vmcnt(0)
1012; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1013; GFX9-NEXT:    s_endpgm
1014;
1015; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1016; GFX10:       ; %bb.0:
1017; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1018; GFX10-NEXT:    ; implicit-def: $vcc_hi
1019; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1020; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
1021; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1022; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1023; GFX10-NEXT:    s_waitcnt vmcnt(0)
1024; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1025; GFX10-NEXT:    s_endpgm
1026  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
1027  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1028  store i8 %load, i8 addrspace(1)* undef
1029  ret void
1030}
1031
1032; Fill 12-bit low-bits (1ull << 33) | 4095
1033define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
1034; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1035; GFX9:       ; %bb.0:
1036; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1037; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1038; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1039; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
1040; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1041; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1042; GFX9-NEXT:    s_waitcnt vmcnt(0)
1043; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1044; GFX9-NEXT:    s_endpgm
1045;
1046; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1047; GFX10:       ; %bb.0:
1048; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1049; GFX10-NEXT:    ; implicit-def: $vcc_hi
1050; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1051; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x800, s0
1052; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1053; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1054; GFX10-NEXT:    s_waitcnt vmcnt(0)
1055; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1056; GFX10-NEXT:    s_endpgm
1057  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
1058  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1059  store i8 %load, i8 addrspace(1)* undef
1060  ret void
1061}
1062
1063; Fill 12-bit low-bits (1ull << 33) | 4096
1064define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
1065; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1066; GFX9:       ; %bb.0:
1067; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1068; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1069; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1070; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1071; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1072; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1073; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1074; GFX9-NEXT:    s_waitcnt vmcnt(0)
1075; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1076; GFX9-NEXT:    s_endpgm
1077;
1078; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1079; GFX10:       ; %bb.0:
1080; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1081; GFX10-NEXT:    ; implicit-def: $vcc_hi
1082; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1083; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1000, s0
1084; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1085; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1086; GFX10-NEXT:    s_waitcnt vmcnt(0)
1087; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1088; GFX10-NEXT:    s_endpgm
1089  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
1090  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1091  store i8 %load, i8 addrspace(1)* undef
1092  ret void
1093}
1094
1095; Fill 13-bit low-bits (1ull << 33) | 8191
1096define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
1097; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1098; GFX9:       ; %bb.0:
1099; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1100; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1101; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1102; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1103; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1104; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1105; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
1106; GFX9-NEXT:    s_waitcnt vmcnt(0)
1107; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1108; GFX9-NEXT:    s_endpgm
1109;
1110; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1111; GFX10:       ; %bb.0:
1112; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1113; GFX10-NEXT:    ; implicit-def: $vcc_hi
1114; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1115; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x1800, s0
1116; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1117; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
1118; GFX10-NEXT:    s_waitcnt vmcnt(0)
1119; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1120; GFX10-NEXT:    s_endpgm
1121  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
1122  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1123  store i8 %load, i8 addrspace(1)* undef
1124  ret void
1125}
1126
1127; Fill 13-bit low-bits (1ull << 33) | 8192
1128define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
1129; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1130; GFX9:       ; %bb.0:
1131; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1132; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1133; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1134; GFX9-NEXT:    v_mov_b32_e32 v1, s1
1135; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1136; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1137; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1138; GFX9-NEXT:    s_waitcnt vmcnt(0)
1139; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1140; GFX9-NEXT:    s_endpgm
1141;
1142; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1143; GFX10:       ; %bb.0:
1144; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1145; GFX10-NEXT:    ; implicit-def: $vcc_hi
1146; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1147; GFX10-NEXT:    v_add_co_u32_e64 v0, s0, 0x2000, s0
1148; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1149; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1150; GFX10-NEXT:    s_waitcnt vmcnt(0)
1151; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1152; GFX10-NEXT:    s_endpgm
1153  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
1154  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1155  store i8 %load, i8 addrspace(1)* undef
1156  ret void
1157}
1158
1159; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1160define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
1161; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1162; GFX9:       ; %bb.0:
1163; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1164; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1165; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1166; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1167; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1168; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1169; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1170; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
1171; GFX9-NEXT:    s_waitcnt vmcnt(0)
1172; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1173; GFX9-NEXT:    s_endpgm
1174;
1175; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1176; GFX10:       ; %bb.0:
1177; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1178; GFX10-NEXT:    ; implicit-def: $vcc_hi
1179; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1180; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1181; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
1182; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1183; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1184; GFX10-NEXT:    s_waitcnt vmcnt(0)
1185; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1186; GFX10-NEXT:    s_endpgm
1187  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
1188  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1189  store i8 %load, i8 addrspace(1)* undef
1190  ret void
1191}
1192
1193; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1194define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
1195; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1196; GFX9:       ; %bb.0:
1197; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1198; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1199; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1200; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1201; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1202; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1203; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1204; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
1205; GFX9-NEXT:    s_waitcnt vmcnt(0)
1206; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1207; GFX9-NEXT:    s_endpgm
1208;
1209; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1210; GFX10:       ; %bb.0:
1211; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1212; GFX10-NEXT:    ; implicit-def: $vcc_hi
1213; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1214; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1215; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
1216; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1217; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1218; GFX10-NEXT:    s_waitcnt vmcnt(0)
1219; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1220; GFX10-NEXT:    s_endpgm
1221  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
1222  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1223  store i8 %load, i8 addrspace(1)* undef
1224  ret void
1225}
1226
1227; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1228define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
1229; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1230; GFX9:       ; %bb.0:
1231; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1232; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1233; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1234; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1235; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1236; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1237; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1238; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1239; GFX9-NEXT:    s_waitcnt vmcnt(0)
1240; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1241; GFX9-NEXT:    s_endpgm
1242;
1243; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1244; GFX10:       ; %bb.0:
1245; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1246; GFX10-NEXT:    ; implicit-def: $vcc_hi
1247; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1248; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1249; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1250; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1251; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1252; GFX10-NEXT:    s_waitcnt vmcnt(0)
1253; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1254; GFX10-NEXT:    s_endpgm
1255  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
1256  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1257  store i8 %load, i8 addrspace(1)* undef
1258  ret void
1259}
1260
1261; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1262define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
1263; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1264; GFX9:       ; %bb.0:
1265; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1266; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1267; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1268; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1269; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1270; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
1271; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1272; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1273; GFX9-NEXT:    s_waitcnt vmcnt(0)
1274; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1275; GFX9-NEXT:    s_endpgm
1276;
1277; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1278; GFX10:       ; %bb.0:
1279; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1280; GFX10-NEXT:    ; implicit-def: $vcc_hi
1281; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1282; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1283; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1284; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1285; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1286; GFX10-NEXT:    s_waitcnt vmcnt(0)
1287; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1288; GFX10-NEXT:    s_endpgm
1289  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
1290  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1291  store i8 %load, i8 addrspace(1)* undef
1292  ret void
1293}
1294
1295; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1296define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
1297; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1298; GFX9:       ; %bb.0:
1299; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1300; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1301; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1302; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1303; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1304; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1305; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1306; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1307; GFX9-NEXT:    s_waitcnt vmcnt(0)
1308; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1309; GFX9-NEXT:    s_endpgm
1310;
1311; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1312; GFX10:       ; %bb.0:
1313; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1314; GFX10-NEXT:    ; implicit-def: $vcc_hi
1315; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1316; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1317; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1318; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1319; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
1320; GFX10-NEXT:    s_waitcnt vmcnt(0)
1321; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1322; GFX10-NEXT:    s_endpgm
1323  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
1324  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1325  store i8 %load, i8 addrspace(1)* undef
1326  ret void
1327}
1328
1329; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1330define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
1331; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1332; GFX9:       ; %bb.0:
1333; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1334; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
1335; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1336; GFX9-NEXT:    v_mov_b32_e32 v0, s0
1337; GFX9-NEXT:    v_mov_b32_e32 v2, s1
1338; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
1339; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1340; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
1341; GFX9-NEXT:    s_waitcnt vmcnt(0)
1342; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1343; GFX9-NEXT:    s_endpgm
1344;
1345; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1346; GFX10:       ; %bb.0:
1347; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
1348; GFX10-NEXT:    ; implicit-def: $vcc_hi
1349; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
1350; GFX10-NEXT:    v_mov_b32_e32 v1, s1
1351; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1352; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1353; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
1354; GFX10-NEXT:    s_waitcnt vmcnt(0)
1355; GFX10-NEXT:    global_store_byte v[0:1], v0, off
1356; GFX10-NEXT:    s_endpgm
1357  %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
1358  %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1359  store i8 %load, i8 addrspace(1)* undef
1360  ret void
1361}
1362