1; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI
2; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI
3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
4
5;GCN-LABEL: {{^}}s_buffer_load_imm:
6;GCN-NOT: s_waitcnt;
7;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
8;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
9;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4
10define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
11main_body:
12  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
13  %bitcast = bitcast i32 %load to float
14  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
15  ret void
16}
17
18;GCN-LABEL: {{^}}s_buffer_load_index:
19;GCN-NOT: s_waitcnt;
20;GCN: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
21define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
22main_body:
23  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
24  %bitcast = bitcast i32 %load to float
25  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
26  ret void
27}
28
29;GCN-LABEL: {{^}}s_buffer_load_index_divergent:
30;GCN-NOT: s_waitcnt;
31;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
32define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
33main_body:
34  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
35  %bitcast = bitcast i32 %load to float
36  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
37  ret void
38}
39
40;GCN-LABEL: {{^}}s_buffer_loadx2_imm:
41;GCN-NOT: s_waitcnt;
42;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
43;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
44;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
45define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
46main_body:
47  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
48  %bitcast = bitcast <2 x i32> %load to <2 x float>
49  %x = extractelement <2 x float> %bitcast, i32 0
50  %y = extractelement <2 x float> %bitcast, i32 1
51  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
52  ret void
53}
54
55;GCN-LABEL: {{^}}s_buffer_loadx2_index:
56;GCN-NOT: s_waitcnt;
57;GCN: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
58define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
59main_body:
60  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
61  %bitcast = bitcast <2 x i32> %load to <2 x float>
62  %x = extractelement <2 x float> %bitcast, i32 0
63  %y = extractelement <2 x float> %bitcast, i32 1
64  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
65  ret void
66}
67
68;GCN-LABEL: {{^}}s_buffer_loadx2_index_divergent:
69;GCN-NOT: s_waitcnt;
70;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
71define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
72main_body:
73  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
74  %bitcast = bitcast <2 x i32> %load to <2 x float>
75  %x = extractelement <2 x float> %bitcast, i32 0
76  %y = extractelement <2 x float> %bitcast, i32 1
77  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
78  ret void
79}
80
81;GCN-LABEL: {{^}}s_buffer_loadx3_imm:
82;GCN-NOT: s_waitcnt;
83;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
84;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
85;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
86define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
87main_body:
88  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
89  %bitcast = bitcast <3 x i32> %load to <3 x float>
90  %x = extractelement <3 x float> %bitcast, i32 0
91  %y = extractelement <3 x float> %bitcast, i32 1
92  %z = extractelement <3 x float> %bitcast, i32 2
93  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
94  ret void
95}
96
97;GCN-LABEL: {{^}}s_buffer_loadx3_index:
98;GCN-NOT: s_waitcnt;
99;GCN: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
100define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
101main_body:
102  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
103  %bitcast = bitcast <3 x i32> %load to <3 x float>
104  %x = extractelement <3 x float> %bitcast, i32 0
105  %y = extractelement <3 x float> %bitcast, i32 1
106  %z = extractelement <3 x float> %bitcast, i32 2
107  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
108  ret void
109}
110
111;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent:
112;GCN-NOT: s_waitcnt;
113;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
114;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
115;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
116define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
117main_body:
118  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
119  %bitcast = bitcast <3 x i32> %load to <3 x float>
120  %x = extractelement <3 x float> %bitcast, i32 0
121  %y = extractelement <3 x float> %bitcast, i32 1
122  %z = extractelement <3 x float> %bitcast, i32 2
123  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
124  ret void
125}
126
127;GCN-LABEL: {{^}}s_buffer_loadx4_imm:
128;GCN-NOT: s_waitcnt;
129;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
130;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
131;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8
132define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
133main_body:
134  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
135  %bitcast = bitcast <4 x i32> %load to <4 x float>
136  %x = extractelement <4 x float> %bitcast, i32 0
137  %y = extractelement <4 x float> %bitcast, i32 1
138  %z = extractelement <4 x float> %bitcast, i32 2
139  %w = extractelement <4 x float> %bitcast, i32 3
140  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
141  ret void
142}
143
144;GCN-LABEL: {{^}}s_buffer_loadx4_index:
145;GCN-NOT: s_waitcnt;
146;GCN: buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
147define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
148main_body:
149  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
150  %bitcast = bitcast <4 x i32> %load to <4 x float>
151  %x = extractelement <4 x float> %bitcast, i32 0
152  %y = extractelement <4 x float> %bitcast, i32 1
153  %z = extractelement <4 x float> %bitcast, i32 2
154  %w = extractelement <4 x float> %bitcast, i32 3
155  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
156  ret void
157}
158
159;GCN-LABEL: {{^}}s_buffer_loadx4_index_divergent:
160;GCN-NOT: s_waitcnt;
161;GCN: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
162define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
163main_body:
164  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
165  %bitcast = bitcast <4 x i32> %load to <4 x float>
166  %x = extractelement <4 x float> %bitcast, i32 0
167  %y = extractelement <4 x float> %bitcast, i32 1
168  %z = extractelement <4 x float> %bitcast, i32 2
169  %w = extractelement <4 x float> %bitcast, i32 3
170  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
171  ret void
172}
173
174;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2:
175;GCN-NOT: s_waitcnt;
176;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
177;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
178;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4
179define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
180main_body:
181  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
182  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
183  %x = bitcast i32 %load0 to float
184  %y = bitcast i32 %load1 to float
185  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
186  ret void
187}
188
189;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4:
190;GCN-NOT: s_waitcnt;
191;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
192;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
193;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8
194define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
195main_body:
196  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
197  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
198  %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
199  %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
200  %x = bitcast i32 %load0 to float
201  %y = bitcast i32 %load1 to float
202  %z = bitcast i32 %load2 to float
203  %w = bitcast i32 %load3 to float
204  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
205  ret void
206}
207
208;GCN-LABEL: {{^}}s_buffer_load_index_across_bb:
209;GCN-NOT: s_waitcnt;
210;GCN: v_or_b32
211;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
212define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
213main_body:
214  %tmp = shl i32 %index, 4
215  br label %bb1
216
217bb1:                                              ; preds = %main_body
218  %tmp1 = or i32 %tmp, 8
219  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
220  %bitcast = bitcast i32 %load to float
221  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
222  ret void
223}
224
225;GCN-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
226;GCN-NOT: s_waitcnt;
227;GCN: v_or_b32
228;GCN: v_or_b32
229;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
230;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
231define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
232main_body:
233  %tmp = shl i32 %index, 4
234  br label %bb1
235
236bb1:                                              ; preds = %main_body
237  %tmp1 = or i32 %tmp, 8
238  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
239  %tmp2 = or i32 %tmp1, 4
240  %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
241  %bitcast = bitcast i32 %load to float
242  %bitcast2 = bitcast i32 %load2 to float
243  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
244  ret void
245}
246
247; GCN-LABEL: {{^}}s_buffer_load_imm_neg1:
248; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}}
249; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
250define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
251  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
252  ret i32 %load
253}
254
255; GCN-LABEL: {{^}}s_buffer_load_imm_neg4:
256; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
257; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
258
259; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}}
260
261; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
262; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
263define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
264  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
265  ret i32 %load
266}
267
268; GCN-LABEL: {{^}}s_buffer_load_imm_neg8:
269; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}}
270; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
271
272; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}}
273define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
274  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
275  ret i32 %load
276}
277
278; GCN-LABEL: {{^}}s_buffer_load_imm_bit31:
279; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
280; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
281
282; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}}
283
284; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
285; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
286define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
287  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
288  ret i32 %load
289}
290
291; GCN-LABEL: {{^}}s_buffer_load_imm_bit30:
292; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
293; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
294
295; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}}
296
297; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
298; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
299define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
300  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
301  ret i32 %load
302}
303
304; GCN-LABEL: {{^}}s_buffer_load_imm_bit29:
305; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
306; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
307
308; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}}
309
310; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
311; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
312define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
313  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
314  ret i32 %load
315}
316
317; GCN-LABEL: {{^}}s_buffer_load_imm_bit21:
318; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
319; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
320
321; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
322
323; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
324; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
325define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
326  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
327  ret i32 %load
328}
329
330; GCN-LABEL: {{^}}s_buffer_load_imm_bit20:
331; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
332; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
333
334; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}}
335
336; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
337; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
338define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
339  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
340  ret i32 %load
341}
342
343; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20:
344; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
345; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
346
347; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}}
348
349; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
350; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
351define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
352  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
353  ret i32 %load
354}
355
356; GCN-LABEL: {{^}}s_buffer_load_imm_bit19:
357; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}}
358; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
359
360; CI: s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
361
362; VI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
363define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
364  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
365  ret i32 %load
366}
367
368; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19:
369; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
370; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
371
372; CI: s_buffer_load_dword s0, s[0:3], 0x3ffe0000{{$}}
373
374; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
375; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
376define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
377  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
378  ret i32 %load
379}
380
381; GCN-LABEL: {{^}}s_buffer_load_imm_255:
382; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}}
383; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
384
385; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
386define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
387  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
388  ret i32 %load
389}
390
391; GCN-LABEL: {{^}}s_buffer_load_imm_256:
392; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}}
393; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
394define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
395  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
396  ret i32 %load
397}
398
399; GCN-LABEL: {{^}}s_buffer_load_imm_1016:
400; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}}
401; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}}
402define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
403  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
404  ret i32 %load
405}
406
407; GCN-LABEL: {{^}}s_buffer_load_imm_1020:
408; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
409; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}}
410define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
411  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
412  ret i32 %load
413}
414
415; GCN-LABEL: {{^}}s_buffer_load_imm_1021:
416; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}}
417; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
418define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
419  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
420  ret i32 %load
421}
422
423; GCN-LABEL: {{^}}s_buffer_load_imm_1024:
424; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
425; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
426
427; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
428
429; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
430define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
431  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
432  ret i32 %load
433}
434
435; GCN-LABEL: {{^}}s_buffer_load_imm_1025:
436; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}}
437; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
438
439; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}}
440define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
441  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
442  ret i32 %load
443}
444
445; GCN-LABEL: {{^}}s_buffer_load_imm_1028:
446; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
447; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
448
449; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
450; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
451define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
452  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
453  ret i32 %load
454}
455
456declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
457declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
458declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
459declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
460declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
461