1; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,SI,SICI
2; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,CI,SICI
3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,VI
4
5;GCN-LABEL: {{^}}s_buffer_load_imm:
6;GCN-NOT: s_waitcnt;
7;SI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
8;CI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x1
9;VI: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x4
10define amdgpu_ps void @s_buffer_load_imm(<4 x i32> inreg %desc) {
11main_body:
12  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
13  %bitcast = bitcast i32 %load to float
14  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
15  ret void
16}
17
18;GCN-LABEL: {{^}}s_buffer_load_index:
19;GCN-NOT: s_waitcnt;
20;GCN: s_buffer_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
21define amdgpu_ps void @s_buffer_load_index(<4 x i32> inreg %desc, i32 inreg %index) {
22main_body:
23  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
24  %bitcast = bitcast i32 %load to float
25  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
26  ret void
27}
28
29;GCN-LABEL: {{^}}s_buffer_load_index_divergent:
30;GCN-NOT: s_waitcnt;
31;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
32define amdgpu_ps void @s_buffer_load_index_divergent(<4 x i32> inreg %desc, i32 %index) {
33main_body:
34  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %index, i32 0)
35  %bitcast = bitcast i32 %load to float
36  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
37  ret void
38}
39
40;GCN-LABEL: {{^}}s_buffer_loadx2_imm:
41;GCN-NOT: s_waitcnt;
42;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
43;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
44;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
45define amdgpu_ps void @s_buffer_loadx2_imm(<4 x i32> inreg %desc) {
46main_body:
47  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 64, i32 0)
48  %bitcast = bitcast <2 x i32> %load to <2 x float>
49  %x = extractelement <2 x float> %bitcast, i32 0
50  %y = extractelement <2 x float> %bitcast, i32 1
51  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
52  ret void
53}
54
55;GCN-LABEL: {{^}}s_buffer_loadx2_index:
56;GCN-NOT: s_waitcnt;
57;GCN: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
58define amdgpu_ps void @s_buffer_loadx2_index(<4 x i32> inreg %desc, i32 inreg %index) {
59main_body:
60  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
61  %bitcast = bitcast <2 x i32> %load to <2 x float>
62  %x = extractelement <2 x float> %bitcast, i32 0
63  %y = extractelement <2 x float> %bitcast, i32 1
64  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
65  ret void
66}
67
68;GCN-LABEL: {{^}}s_buffer_loadx2_index_divergent:
69;GCN-NOT: s_waitcnt;
70;GCN: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
71define amdgpu_ps void @s_buffer_loadx2_index_divergent(<4 x i32> inreg %desc, i32 %index) {
72main_body:
73  %load = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %desc, i32 %index, i32 0)
74  %bitcast = bitcast <2 x i32> %load to <2 x float>
75  %x = extractelement <2 x float> %bitcast, i32 0
76  %y = extractelement <2 x float> %bitcast, i32 1
77  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
78  ret void
79}
80
81;GCN-LABEL: {{^}}s_buffer_loadx3_imm:
82;GCN-NOT: s_waitcnt;
83;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
84;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x10
85;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x40
86define amdgpu_ps void @s_buffer_loadx3_imm(<4 x i32> inreg %desc) {
87main_body:
88  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 64, i32 0)
89  %bitcast = bitcast <3 x i32> %load to <3 x float>
90  %x = extractelement <3 x float> %bitcast, i32 0
91  %y = extractelement <3 x float> %bitcast, i32 1
92  %z = extractelement <3 x float> %bitcast, i32 2
93  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
94  ret void
95}
96
97;GCN-LABEL: {{^}}s_buffer_loadx3_index:
98;GCN-NOT: s_waitcnt;
99;GCN: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
100define amdgpu_ps void @s_buffer_loadx3_index(<4 x i32> inreg %desc, i32 inreg %index) {
101main_body:
102  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
103  %bitcast = bitcast <3 x i32> %load to <3 x float>
104  %x = extractelement <3 x float> %bitcast, i32 0
105  %y = extractelement <3 x float> %bitcast, i32 1
106  %z = extractelement <3 x float> %bitcast, i32 2
107  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
108  ret void
109}
110
111;GCN-LABEL: {{^}}s_buffer_loadx3_index_divergent:
112;GCN-NOT: s_waitcnt;
113;SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
114;CI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
115;VI: buffer_load_dwordx3 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
116define amdgpu_ps void @s_buffer_loadx3_index_divergent(<4 x i32> inreg %desc, i32 %index) {
117main_body:
118  %load = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %desc, i32 %index, i32 0)
119  %bitcast = bitcast <3 x i32> %load to <3 x float>
120  %x = extractelement <3 x float> %bitcast, i32 0
121  %y = extractelement <3 x float> %bitcast, i32 1
122  %z = extractelement <3 x float> %bitcast, i32 2
123  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float undef, i1 true, i1 true)
124  ret void
125}
126
127;GCN-LABEL: {{^}}s_buffer_loadx4_imm:
128;GCN-NOT: s_waitcnt;
129;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
130;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x32
131;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0xc8
132define amdgpu_ps void @s_buffer_loadx4_imm(<4 x i32> inreg %desc) {
133main_body:
134  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 200, i32 0)
135  %bitcast = bitcast <4 x i32> %load to <4 x float>
136  %x = extractelement <4 x float> %bitcast, i32 0
137  %y = extractelement <4 x float> %bitcast, i32 1
138  %z = extractelement <4 x float> %bitcast, i32 2
139  %w = extractelement <4 x float> %bitcast, i32 3
140  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
141  ret void
142}
143
144;GCN-LABEL: {{^}}s_buffer_loadx4_index:
145;GCN-NOT: s_waitcnt;
146;GCN: buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
147define amdgpu_ps void @s_buffer_loadx4_index(<4 x i32> inreg %desc, i32 inreg %index) {
148main_body:
149  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
150  %bitcast = bitcast <4 x i32> %load to <4 x float>
151  %x = extractelement <4 x float> %bitcast, i32 0
152  %y = extractelement <4 x float> %bitcast, i32 1
153  %z = extractelement <4 x float> %bitcast, i32 2
154  %w = extractelement <4 x float> %bitcast, i32 3
155  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
156  ret void
157}
158
159;GCN-LABEL: {{^}}s_buffer_loadx4_index_divergent:
160;GCN-NOT: s_waitcnt;
161;GCN: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
162define amdgpu_ps void @s_buffer_loadx4_index_divergent(<4 x i32> inreg %desc, i32 %index) {
163main_body:
164  %load = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %desc, i32 %index, i32 0)
165  %bitcast = bitcast <4 x i32> %load to <4 x float>
166  %x = extractelement <4 x float> %bitcast, i32 0
167  %y = extractelement <4 x float> %bitcast, i32 1
168  %z = extractelement <4 x float> %bitcast, i32 2
169  %w = extractelement <4 x float> %bitcast, i32 3
170  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
171  ret void
172}
173
174;GCN-LABEL: {{^}}s_buffer_load_imm_mergex2:
175;GCN-NOT: s_waitcnt;
176;SI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
177;CI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x1
178;VI: s_buffer_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x4
179define amdgpu_ps void @s_buffer_load_imm_mergex2(<4 x i32> inreg %desc) {
180main_body:
181  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 4, i32 0)
182  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
183  %x = bitcast i32 %load0 to float
184  %y = bitcast i32 %load1 to float
185  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float undef, float undef, i1 true, i1 true)
186  ret void
187}
188
189;GCN-LABEL: {{^}}s_buffer_load_imm_mergex4:
190;GCN-NOT: s_waitcnt;
191;SI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
192;CI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x2
193;VI: s_buffer_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0x8
194define amdgpu_ps void @s_buffer_load_imm_mergex4(<4 x i32> inreg %desc) {
195main_body:
196  %load0 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 8, i32 0)
197  %load1 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 12, i32 0)
198  %load2 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 16, i32 0)
199  %load3 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 20, i32 0)
200  %x = bitcast i32 %load0 to float
201  %y = bitcast i32 %load1 to float
202  %z = bitcast i32 %load2 to float
203  %w = bitcast i32 %load3 to float
204  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
205  ret void
206}
207
208;GCN-LABEL: {{^}}s_buffer_load_index_across_bb:
209;GCN-NOT: s_waitcnt;
210;GCN: v_or_b32
211;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
212define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) {
213main_body:
214  %tmp = shl i32 %index, 4
215  br label %bb1
216
217bb1:                                              ; preds = %main_body
218  %tmp1 = or i32 %tmp, 8
219  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
220  %bitcast = bitcast i32 %load to float
221  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float undef, float undef, float undef, i1 true, i1 true)
222  ret void
223}
224
225;GCN-LABEL: {{^}}s_buffer_load_index_across_bb_merged:
226;GCN-NOT: s_waitcnt;
227;GCN: v_or_b32
228;GCN: v_or_b32
229;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
230;GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
231define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) {
232main_body:
233  %tmp = shl i32 %index, 4
234  br label %bb1
235
236bb1:                                              ; preds = %main_body
237  %tmp1 = or i32 %tmp, 8
238  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp1, i32 0)
239  %tmp2 = or i32 %tmp1, 4
240  %load2 = tail call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %tmp2, i32 0)
241  %bitcast = bitcast i32 %load to float
242  %bitcast2 = bitcast i32 %load2 to float
243  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %bitcast, float %bitcast2, float undef, float undef, i1 true, i1 true)
244  ret void
245}
246
247; GCN-LABEL: {{^}}s_buffer_load_imm_neg1:
248; GCN: s_mov_b32 [[K:s[0-9]+]], -1{{$}}
249; GCN: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
250define amdgpu_ps i32 @s_buffer_load_imm_neg1(<4 x i32> inreg %desc) {
251  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0)
252  ret i32 %load
253}
254
255; GCN-LABEL: {{^}}s_buffer_load_imm_neg4:
256; SI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
257; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
258
259; CI: s_buffer_load_dword s0, s[0:3], 0x3fffffff{{$}}
260
261; VI: s_mov_b32 [[K:s[0-9]+]], -4{{$}}
262; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
263define amdgpu_ps i32 @s_buffer_load_imm_neg4(<4 x i32> inreg %desc) {
264  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0)
265  ret i32 %load
266}
267
268; GCN-LABEL: {{^}}s_buffer_load_imm_neg8:
269; SI: s_mov_b32 [[K:s[0-9]+]], -8{{$}}
270; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
271
272; CI: s_buffer_load_dword s0, s[0:3], 0x3ffffffe{{$}}
273define amdgpu_ps i32 @s_buffer_load_imm_neg8(<4 x i32> inreg %desc) {
274  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0)
275  ret i32 %load
276}
277
278; GCN-LABEL: {{^}}s_buffer_load_imm_bit31:
279; SI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
280; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
281
282; CI: s_buffer_load_dword s0, s[0:3], 0x20000000{{$}}
283
284; VI: s_brev_b32 [[K:s[0-9]+]], 1{{$}}
285; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
286define amdgpu_ps i32 @s_buffer_load_imm_bit31(<4 x i32> inreg %desc) {
287  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0)
288  ret i32 %load
289}
290
291; GCN-LABEL: {{^}}s_buffer_load_imm_bit30:
292; SI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
293; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
294
295; CI: s_buffer_load_dword s0, s[0:3], 0x10000000{{$}}
296
297; VI: s_mov_b32 [[K:s[0-9]+]], 2.0{{$}}
298; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
299define amdgpu_ps i32 @s_buffer_load_imm_bit30(<4 x i32> inreg %desc) {
300  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 0)
301  ret i32 %load
302}
303
304; GCN-LABEL: {{^}}s_buffer_load_imm_bit29:
305; SI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
306; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
307
308; CI: s_buffer_load_dword s0, s[0:3], 0x8000000{{$}}
309
310; VI: s_brev_b32 [[K:s[0-9]+]], 4{{$}}
311; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
312define amdgpu_ps i32 @s_buffer_load_imm_bit29(<4 x i32> inreg %desc) {
313  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0)
314  ret i32 %load
315}
316
317; GCN-LABEL: {{^}}s_buffer_load_imm_bit21:
318; SI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
319; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
320
321; CI: s_buffer_load_dword s0, s[0:3], 0x80000{{$}}
322
323; VI: s_mov_b32 [[K:s[0-9]+]], 0x200000{{$}}
324; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
325define amdgpu_ps i32 @s_buffer_load_imm_bit21(<4 x i32> inreg %desc) {
326  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0)
327  ret i32 %load
328}
329
330; GCN-LABEL: {{^}}s_buffer_load_imm_bit20:
331; SI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
332; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
333
334; CI: s_buffer_load_dword s0, s[0:3], 0x40000{{$}}
335
336; VI: s_mov_b32 [[K:s[0-9]+]], 0x100000{{$}}
337; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
338define amdgpu_ps i32 @s_buffer_load_imm_bit20(<4 x i32> inreg %desc) {
339  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0)
340  ret i32 %load
341}
342
343; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit20:
344; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
345; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
346
347; CI: s_buffer_load_dword s0, s[0:3], 0x3ffc0000{{$}}
348
349; VI: s_mov_b32 [[K:s[0-9]+]], 0xfff00000{{$}}
350; VI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
351define amdgpu_ps i32 @s_buffer_load_imm_neg_bit20(<4 x i32> inreg %desc) {
352  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32  -1048576, i32 0)
353  ret i32 %load
354}
355
356; GCN-LABEL: {{^}}s_buffer_load_imm_bit19:
357; SI: s_mov_b32 [[K:s[0-9]+]], 0x80000{{$}}
358; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
359
360; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
361
362; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
363define amdgpu_ps i32 @s_buffer_load_imm_bit19(<4 x i32> inreg %desc) {
364  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0)
365  ret i32 %load
366}
367
368; GCN-LABEL: {{^}}s_buffer_load_imm_neg_bit19:
369; SI: s_mov_b32 [[K:s[0-9]+]], 0xfff80000{{$}}
370; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
371
372; CI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
373
374; VI s_buffer_load_dword s0, s[0:3], 0x20000{{$}}
375define amdgpu_ps i32 @s_buffer_load_imm_neg_bit19(<4 x i32> inreg %desc) {
376  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0)
377  ret i32 %load
378}
379
380; GCN-LABEL: {{^}}s_buffer_load_imm_255:
381; SICI: s_movk_i32 [[K:s[0-9]+]], 0xff{{$}}
382; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
383
384; VI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
385define amdgpu_ps i32 @s_buffer_load_imm_255(<4 x i32> inreg %desc) {
386  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 255, i32 0)
387  ret i32 %load
388}
389
390; GCN-LABEL: {{^}}s_buffer_load_imm_256:
391; SICI: s_buffer_load_dword s0, s[0:3], 0x40{{$}}
392; VI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
393define amdgpu_ps i32 @s_buffer_load_imm_256(<4 x i32> inreg %desc) {
394  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 256, i32 0)
395  ret i32 %load
396}
397
398; GCN-LABEL: {{^}}s_buffer_load_imm_1016:
399; SICI: s_buffer_load_dword s0, s[0:3], 0xfe{{$}}
400; VI: s_buffer_load_dword s0, s[0:3], 0x3f8{{$}}
401define amdgpu_ps i32 @s_buffer_load_imm_1016(<4 x i32> inreg %desc) {
402  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1016, i32 0)
403  ret i32 %load
404}
405
406; GCN-LABEL: {{^}}s_buffer_load_imm_1020:
407; SICI: s_buffer_load_dword s0, s[0:3], 0xff{{$}}
408; VI: s_buffer_load_dword s0, s[0:3], 0x3fc{{$}}
409define amdgpu_ps i32 @s_buffer_load_imm_1020(<4 x i32> inreg %desc) {
410  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1020, i32 0)
411  ret i32 %load
412}
413
414; GCN-LABEL: {{^}}s_buffer_load_imm_1021:
415; SICI: s_movk_i32 [[K:s[0-9]+]], 0x3fd{{$}}
416; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
417define amdgpu_ps i32 @s_buffer_load_imm_1021(<4 x i32> inreg %desc) {
418  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1021, i32 0)
419  ret i32 %load
420}
421
422; GCN-LABEL: {{^}}s_buffer_load_imm_1024:
423; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
424; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
425
426; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
427
428; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
429define amdgpu_ps i32 @s_buffer_load_imm_1024(<4 x i32> inreg %desc) {
430  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
431  ret i32 %load
432}
433
434; GCN-LABEL: {{^}}s_buffer_load_imm_1025:
435; SICI: s_movk_i32 [[K:s[0-9]+]], 0x401{{$}}
436; SICI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
437
438; VI: s_buffer_load_dword s0, s[0:3], 0x401{{$}}
439define amdgpu_ps i32 @s_buffer_load_imm_1025(<4 x i32> inreg %desc) {
440  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1025, i32 0)
441  ret i32 %load
442}
443
444; GCN-LABEL: {{^}}s_buffer_load_imm_1028:
445; SI: s_movk_i32 [[K:s[0-9]+]], 0x400{{$}}
446; SI: s_buffer_load_dword s0, s[0:3], [[K]]{{$}}
447
448; CI: s_buffer_load_dword s0, s[0:3], 0x100{{$}}
449; VI: s_buffer_load_dword s0, s[0:3], 0x400{{$}}
450define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
451  %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1024, i32 0)
452  ret i32 %load
453}
454
455declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
456declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
457declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
458declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
459declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
460