1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=GFX7
4; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
6; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
7
8; RUN: llc < %s -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
9; RUN: llc < %s -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=G_GFX7
10; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
11; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
12; RUN: llc < %s -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
13
14declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
15declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
16
17
18define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
19; SI-LABEL: raw_buffer_atomic_min_noret_f32:
20; SI:       ; %bb.0: ; %main_body
21; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
22; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
23; SI-NEXT:    s_waitcnt lgkmcnt(0)
24; SI-NEXT:    v_mov_b32_e32 v0, s4
25; SI-NEXT:    v_mov_b32_e32 v1, s5
26; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
27; SI-NEXT:    s_endpgm
28;
29; GFX7-LABEL: raw_buffer_atomic_min_noret_f32:
30; GFX7:       ; %bb.0: ; %main_body
31; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
32; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
33; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
34; GFX7-NEXT:    v_mov_b32_e32 v0, s4
35; GFX7-NEXT:    v_mov_b32_e32 v1, s5
36; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
37; GFX7-NEXT:    s_endpgm
38;
39; GFX10-LABEL: raw_buffer_atomic_min_noret_f32:
40; GFX10:       ; %bb.0: ; %main_body
41; GFX10-NEXT:    s_clause 0x1
42; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
43; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
44; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
45; GFX10-NEXT:    v_mov_b32_e32 v0, s2
46; GFX10-NEXT:    v_mov_b32_e32 v1, s3
47; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 0 offen
48; GFX10-NEXT:    s_endpgm
49;
50; GFX1030-LABEL: raw_buffer_atomic_min_noret_f32:
51; GFX1030:       ; %bb.0: ; %main_body
52; GFX1030-NEXT:    s_clause 0x1
53; GFX1030-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
54; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
55; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
56; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
57; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
58; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
59; GFX1030-NEXT:    s_endpgm
60;
61; GFX1100-LABEL: raw_buffer_atomic_min_noret_f32:
62; GFX1100:       ; %bb.0: ; %main_body
63; GFX1100-NEXT:    s_clause 0x1
64; GFX1100-NEXT:    s_load_b64 s[4:5], s[0:1], 0x34
65; GFX1100-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
66; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
67; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
68; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
69; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
70; GFX1100-NEXT:    s_endpgm
71;
72; G_SI-LABEL: raw_buffer_atomic_min_noret_f32:
73; G_SI:       ; %bb.0: ; %main_body
74; G_SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
75; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
76; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
77; G_SI-NEXT:    v_mov_b32_e32 v0, s4
78; G_SI-NEXT:    v_mov_b32_e32 v1, s5
79; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
80; G_SI-NEXT:    s_endpgm
81;
82; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f32:
83; G_GFX7:       ; %bb.0: ; %main_body
84; G_GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
85; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
86; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
87; G_GFX7-NEXT:    v_mov_b32_e32 v0, s4
88; G_GFX7-NEXT:    v_mov_b32_e32 v1, s5
89; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
90; G_GFX7-NEXT:    s_endpgm
91;
92; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f32:
93; G_GFX10:       ; %bb.0: ; %main_body
94; G_GFX10-NEXT:    s_clause 0x1
95; G_GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
96; G_GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
97; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
98; G_GFX10-NEXT:    v_mov_b32_e32 v0, s2
99; G_GFX10-NEXT:    v_mov_b32_e32 v1, s3
100; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 0 offen
101; G_GFX10-NEXT:    s_endpgm
102;
103; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f32:
104; G_GFX1030:       ; %bb.0: ; %main_body
105; G_GFX1030-NEXT:    s_clause 0x1
106; G_GFX1030-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
107; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
108; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
109; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s4
110; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s5
111; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
112; G_GFX1030-NEXT:    s_endpgm
113;
114; G_GFX1100-LABEL: raw_buffer_atomic_min_noret_f32:
115; G_GFX1100:       ; %bb.0: ; %main_body
116; G_GFX1100-NEXT:    s_clause 0x1
117; G_GFX1100-NEXT:    s_load_b64 s[4:5], s[0:1], 0x34
118; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
119; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
120; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
121; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
122; G_GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
123; G_GFX1100-NEXT:    s_endpgm
124main_body:
125  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
126  ret void
127}
128
129define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
130; SI-LABEL: raw_buffer_atomic_min_rtn_f32:
131; SI:       ; %bb.0: ; %main_body
132; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
133; SI-NEXT:    s_mov_b32 s3, 0xf000
134; SI-NEXT:    s_mov_b32 s2, -1
135; SI-NEXT:    s_waitcnt vmcnt(0)
136; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
137; SI-NEXT:    s_endpgm
138;
139; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32:
140; GFX7:       ; %bb.0: ; %main_body
141; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
142; GFX7-NEXT:    s_mov_b32 s3, 0xf000
143; GFX7-NEXT:    s_mov_b32 s2, -1
144; GFX7-NEXT:    s_waitcnt vmcnt(0)
145; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
146; GFX7-NEXT:    s_endpgm
147;
148; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32:
149; GFX10:       ; %bb.0: ; %main_body
150; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
151; GFX10-NEXT:    s_waitcnt vmcnt(0)
152; GFX10-NEXT:    global_store_dword v[0:1], v0, off
153; GFX10-NEXT:    s_endpgm
154;
155; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32:
156; GFX1030:       ; %bb.0: ; %main_body
157; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
158; GFX1030-NEXT:    s_waitcnt vmcnt(0)
159; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
160; GFX1030-NEXT:    s_endpgm
161;
162; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32:
163; GFX1100:       ; %bb.0: ; %main_body
164; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
165; GFX1100-NEXT:    s_waitcnt vmcnt(0)
166; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
167; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
168; GFX1100-NEXT:    s_endpgm
169;
170; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32:
171; G_SI:       ; %bb.0: ; %main_body
172; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
173; G_SI-NEXT:    s_mov_b32 s2, -1
174; G_SI-NEXT:    s_mov_b32 s3, 0xf000
175; G_SI-NEXT:    s_waitcnt vmcnt(0)
176; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
177; G_SI-NEXT:    s_endpgm
178;
179; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32:
180; G_GFX7:       ; %bb.0: ; %main_body
181; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
182; G_GFX7-NEXT:    s_mov_b32 s2, -1
183; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
184; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
185; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
186; G_GFX7-NEXT:    s_endpgm
187;
188; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32:
189; G_GFX10:       ; %bb.0: ; %main_body
190; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
191; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
192; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
193; G_GFX10-NEXT:    s_endpgm
194;
195; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32:
196; G_GFX1030:       ; %bb.0: ; %main_body
197; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
198; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
199; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
200; G_GFX1030-NEXT:    s_endpgm
201;
202; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32:
203; G_GFX1100:       ; %bb.0: ; %main_body
204; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
205; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
206; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
207; G_GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
208; G_GFX1100-NEXT:    s_endpgm
209main_body:
210  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
211  store float %ret, float addrspace(1)* undef
212  ret void
213}
214
215define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, float addrspace(3)* %out) {
216; SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
217; SI:       ; %bb.0: ; %main_body
218; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
219; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
220; SI-NEXT:    s_load_dword s0, s[0:1], 0xf
221; SI-NEXT:    s_mov_b32 m0, -1
222; SI-NEXT:    s_waitcnt lgkmcnt(0)
223; SI-NEXT:    v_mov_b32_e32 v0, s2
224; SI-NEXT:    v_mov_b32_e32 v1, s3
225; SI-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
226; SI-NEXT:    v_mov_b32_e32 v1, s0
227; SI-NEXT:    s_waitcnt vmcnt(0)
228; SI-NEXT:    ds_write_b32 v1, v0
229; SI-NEXT:    s_endpgm
230;
231; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
232; GFX7:       ; %bb.0: ; %main_body
233; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
234; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
235; GFX7-NEXT:    s_load_dword s0, s[0:1], 0xf
236; GFX7-NEXT:    s_mov_b32 m0, -1
237; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
238; GFX7-NEXT:    v_mov_b32_e32 v0, s2
239; GFX7-NEXT:    v_mov_b32_e32 v1, s3
240; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
241; GFX7-NEXT:    v_mov_b32_e32 v1, s0
242; GFX7-NEXT:    s_waitcnt vmcnt(0)
243; GFX7-NEXT:    ds_write_b32 v1, v0
244; GFX7-NEXT:    s_endpgm
245;
246; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
247; GFX10:       ; %bb.0: ; %main_body
248; GFX10-NEXT:    s_clause 0x1
249; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
250; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
251; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
252; GFX10-NEXT:    v_mov_b32_e32 v0, s2
253; GFX10-NEXT:    v_mov_b32_e32 v1, s3
254; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x3c
255; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
256; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
257; GFX10-NEXT:    v_mov_b32_e32 v1, s0
258; GFX10-NEXT:    s_waitcnt vmcnt(0)
259; GFX10-NEXT:    ds_write_b32 v1, v0
260; GFX10-NEXT:    s_endpgm
261;
262; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
263; GFX1030:       ; %bb.0: ; %main_body
264; GFX1030-NEXT:    s_clause 0x2
265; GFX1030-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
266; GFX1030-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
267; GFX1030-NEXT:    s_load_dword s0, s[0:1], 0x3c
268; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
269; GFX1030-NEXT:    v_mov_b32_e32 v0, s2
270; GFX1030-NEXT:    v_mov_b32_e32 v1, s3
271; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
272; GFX1030-NEXT:    v_mov_b32_e32 v1, s0
273; GFX1030-NEXT:    s_waitcnt vmcnt(0)
274; GFX1030-NEXT:    ds_write_b32 v1, v0
275; GFX1030-NEXT:    s_endpgm
276;
277; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
278; GFX1100:       ; %bb.0: ; %main_body
279; GFX1100-NEXT:    s_clause 0x2
280; GFX1100-NEXT:    s_load_b64 s[2:3], s[0:1], 0x34
281; GFX1100-NEXT:    s_load_b128 s[4:7], s[0:1], 0x24
282; GFX1100-NEXT:    s_load_b32 s0, s[0:1], 0x3c
283; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
284; GFX1100-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
285; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[4:7], 4 offen glc slc
286; GFX1100-NEXT:    v_mov_b32_e32 v1, s0
287; GFX1100-NEXT:    s_waitcnt vmcnt(0)
288; GFX1100-NEXT:    ds_store_b32 v1, v0
289; GFX1100-NEXT:    s_endpgm
290;
291; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
292; G_SI:       ; %bb.0: ; %main_body
293; G_SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
294; G_SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
295; G_SI-NEXT:    s_load_dword s0, s[0:1], 0xf
296; G_SI-NEXT:    s_mov_b32 m0, -1
297; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
298; G_SI-NEXT:    v_mov_b32_e32 v0, s2
299; G_SI-NEXT:    v_mov_b32_e32 v1, s3
300; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
301; G_SI-NEXT:    v_mov_b32_e32 v1, s0
302; G_SI-NEXT:    s_waitcnt vmcnt(0)
303; G_SI-NEXT:    ds_write_b32 v1, v0
304; G_SI-NEXT:    s_endpgm
305;
306; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
307; G_GFX7:       ; %bb.0: ; %main_body
308; G_GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
309; G_GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
310; G_GFX7-NEXT:    s_load_dword s0, s[0:1], 0xf
311; G_GFX7-NEXT:    s_mov_b32 m0, -1
312; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
313; G_GFX7-NEXT:    v_mov_b32_e32 v0, s2
314; G_GFX7-NEXT:    v_mov_b32_e32 v1, s3
315; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
316; G_GFX7-NEXT:    v_mov_b32_e32 v1, s0
317; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
318; G_GFX7-NEXT:    ds_write_b32 v1, v0
319; G_GFX7-NEXT:    s_endpgm
320;
321; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
322; G_GFX10:       ; %bb.0: ; %main_body
323; G_GFX10-NEXT:    s_clause 0x1
324; G_GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
325; G_GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
326; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
327; G_GFX10-NEXT:    v_mov_b32_e32 v0, s2
328; G_GFX10-NEXT:    v_mov_b32_e32 v1, s3
329; G_GFX10-NEXT:    s_load_dword s0, s[0:1], 0x3c
330; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
331; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
332; G_GFX10-NEXT:    v_mov_b32_e32 v1, s0
333; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
334; G_GFX10-NEXT:    ds_write_b32 v1, v0
335; G_GFX10-NEXT:    s_endpgm
336;
337; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
338; G_GFX1030:       ; %bb.0: ; %main_body
339; G_GFX1030-NEXT:    s_clause 0x2
340; G_GFX1030-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
341; G_GFX1030-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
342; G_GFX1030-NEXT:    s_load_dword s0, s[0:1], 0x3c
343; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
344; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s2
345; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s3
346; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc
347; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s0
348; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
349; G_GFX1030-NEXT:    ds_write_b32 v1, v0
350; G_GFX1030-NEXT:    s_endpgm
351;
352; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
353; G_GFX1100:       ; %bb.0: ; %main_body
354; G_GFX1100-NEXT:    s_clause 0x2
355; G_GFX1100-NEXT:    s_load_b64 s[2:3], s[0:1], 0x34
356; G_GFX1100-NEXT:    s_load_b128 s[4:7], s[0:1], 0x24
357; G_GFX1100-NEXT:    s_load_b32 s0, s[0:1], 0x3c
358; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
359; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
360; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[4:7], 4 offen glc slc
361; G_GFX1100-NEXT:    v_mov_b32_e32 v1, s0
362; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
363; G_GFX1100-NEXT:    ds_store_b32 v1, v0
364; G_GFX1100-NEXT:    s_endpgm
365; GFX1010-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
366main_body:
367  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
368  store float %ret, float addrspace(3)* %out, align 8
369  ret void
370}
371
372define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
373; SI-LABEL: raw_buffer_atomic_max_noret_f32:
374; SI:       ; %bb.0: ; %main_body
375; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
376; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
377; SI-NEXT:    s_waitcnt lgkmcnt(0)
378; SI-NEXT:    v_mov_b32_e32 v0, s4
379; SI-NEXT:    v_mov_b32_e32 v1, s5
380; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
381; SI-NEXT:    s_endpgm
382;
383; GFX7-LABEL: raw_buffer_atomic_max_noret_f32:
384; GFX7:       ; %bb.0: ; %main_body
385; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
386; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
387; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
388; GFX7-NEXT:    v_mov_b32_e32 v0, s4
389; GFX7-NEXT:    v_mov_b32_e32 v1, s5
390; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
391; GFX7-NEXT:    s_endpgm
392;
393; GFX10-LABEL: raw_buffer_atomic_max_noret_f32:
394; GFX10:       ; %bb.0: ; %main_body
395; GFX10-NEXT:    s_clause 0x1
396; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
397; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
398; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
399; GFX10-NEXT:    v_mov_b32_e32 v0, s2
400; GFX10-NEXT:    v_mov_b32_e32 v1, s3
401; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 0 offen
402; GFX10-NEXT:    s_endpgm
403;
404; GFX1030-LABEL: raw_buffer_atomic_max_noret_f32:
405; GFX1030:       ; %bb.0: ; %main_body
406; GFX1030-NEXT:    s_clause 0x1
407; GFX1030-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
408; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
409; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
410; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
411; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
412; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
413; GFX1030-NEXT:    s_endpgm
414;
415; GFX1100-LABEL: raw_buffer_atomic_max_noret_f32:
416; GFX1100:       ; %bb.0: ; %main_body
417; GFX1100-NEXT:    s_clause 0x1
418; GFX1100-NEXT:    s_load_b64 s[4:5], s[0:1], 0x34
419; GFX1100-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
420; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
421; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
422; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
423; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
424; GFX1100-NEXT:    s_endpgm
425;
426; G_SI-LABEL: raw_buffer_atomic_max_noret_f32:
427; G_SI:       ; %bb.0: ; %main_body
428; G_SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
429; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
430; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
431; G_SI-NEXT:    v_mov_b32_e32 v0, s4
432; G_SI-NEXT:    v_mov_b32_e32 v1, s5
433; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
434; G_SI-NEXT:    s_endpgm
435;
436; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f32:
437; G_GFX7:       ; %bb.0: ; %main_body
438; G_GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
439; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
440; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
441; G_GFX7-NEXT:    v_mov_b32_e32 v0, s4
442; G_GFX7-NEXT:    v_mov_b32_e32 v1, s5
443; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
444; G_GFX7-NEXT:    s_endpgm
445;
446; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f32:
447; G_GFX10:       ; %bb.0: ; %main_body
448; G_GFX10-NEXT:    s_clause 0x1
449; G_GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
450; G_GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
451; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
452; G_GFX10-NEXT:    v_mov_b32_e32 v0, s2
453; G_GFX10-NEXT:    v_mov_b32_e32 v1, s3
454; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 0 offen
455; G_GFX10-NEXT:    s_endpgm
456;
457; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f32:
458; G_GFX1030:       ; %bb.0: ; %main_body
459; G_GFX1030-NEXT:    s_clause 0x1
460; G_GFX1030-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
461; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
462; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
463; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s4
464; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s5
465; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
466; G_GFX1030-NEXT:    s_endpgm
467;
468; G_GFX1100-LABEL: raw_buffer_atomic_max_noret_f32:
469; G_GFX1100:       ; %bb.0: ; %main_body
470; G_GFX1100-NEXT:    s_clause 0x1
471; G_GFX1100-NEXT:    s_load_b64 s[4:5], s[0:1], 0x34
472; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[0:1], 0x24
473; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
474; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
475; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
476; G_GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
477; G_GFX1100-NEXT:    s_endpgm
478main_body:
479  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
480  ret void
481}
482
483define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
484; SI-LABEL: raw_buffer_atomic_max_rtn_f32:
485; SI:       ; %bb.0: ; %main_body
486; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
487; SI-NEXT:    s_mov_b32 s3, 0xf000
488; SI-NEXT:    s_mov_b32 s2, -1
489; SI-NEXT:    s_waitcnt vmcnt(0)
490; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
491; SI-NEXT:    s_endpgm
492;
493; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32:
494; GFX7:       ; %bb.0: ; %main_body
495; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
496; GFX7-NEXT:    s_mov_b32 s3, 0xf000
497; GFX7-NEXT:    s_mov_b32 s2, -1
498; GFX7-NEXT:    s_waitcnt vmcnt(0)
499; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
500; GFX7-NEXT:    s_endpgm
501;
502; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32:
503; GFX10:       ; %bb.0: ; %main_body
504; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
505; GFX10-NEXT:    s_waitcnt vmcnt(0)
506; GFX10-NEXT:    global_store_dword v[0:1], v0, off
507; GFX10-NEXT:    s_endpgm
508;
509; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32:
510; GFX1030:       ; %bb.0: ; %main_body
511; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
512; GFX1030-NEXT:    s_waitcnt vmcnt(0)
513; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
514; GFX1030-NEXT:    s_endpgm
515;
516; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32:
517; GFX1100:       ; %bb.0: ; %main_body
518; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
519; GFX1100-NEXT:    s_waitcnt vmcnt(0)
520; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
521; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
522; GFX1100-NEXT:    s_endpgm
523;
524; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32:
525; G_SI:       ; %bb.0: ; %main_body
526; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
527; G_SI-NEXT:    s_mov_b32 s2, -1
528; G_SI-NEXT:    s_mov_b32 s3, 0xf000
529; G_SI-NEXT:    s_waitcnt vmcnt(0)
530; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
531; G_SI-NEXT:    s_endpgm
532;
533; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32:
534; G_GFX7:       ; %bb.0: ; %main_body
535; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
536; G_GFX7-NEXT:    s_mov_b32 s2, -1
537; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
538; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
539; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
540; G_GFX7-NEXT:    s_endpgm
541;
542; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32:
543; G_GFX10:       ; %bb.0: ; %main_body
544; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
545; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
546; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
547; G_GFX10-NEXT:    s_endpgm
548;
549; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32:
550; G_GFX1030:       ; %bb.0: ; %main_body
551; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
552; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
553; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
554; G_GFX1030-NEXT:    s_endpgm
555;
556; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32:
557; G_GFX1100:       ; %bb.0: ; %main_body
558; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
559; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
560; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
561; G_GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
562; G_GFX1100-NEXT:    s_endpgm
563main_body:
564  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
565  store float %ret, float addrspace(1)* undef
566  ret void
567}
568
569define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, float addrspace(1)* %out) {
570; SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
571; SI:       ; %bb.0: ; %main_body
572; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
573; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
574; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
575; SI-NEXT:    s_waitcnt lgkmcnt(0)
576; SI-NEXT:    v_mov_b32_e32 v0, s2
577; SI-NEXT:    v_mov_b32_e32 v1, s3
578; SI-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
579; SI-NEXT:    s_mov_b32 s3, 0xf000
580; SI-NEXT:    s_mov_b32 s2, -1
581; SI-NEXT:    s_waitcnt vmcnt(0)
582; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
583; SI-NEXT:    s_endpgm
584;
585; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
586; GFX7:       ; %bb.0: ; %main_body
587; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
588; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
589; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
590; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
591; GFX7-NEXT:    v_mov_b32_e32 v0, s2
592; GFX7-NEXT:    v_mov_b32_e32 v1, s3
593; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
594; GFX7-NEXT:    s_mov_b32 s3, 0xf000
595; GFX7-NEXT:    s_mov_b32 s2, -1
596; GFX7-NEXT:    s_waitcnt vmcnt(0)
597; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
598; GFX7-NEXT:    s_endpgm
599;
600; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
601; GFX10:       ; %bb.0: ; %main_body
602; GFX10-NEXT:    s_clause 0x1
603; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
604; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
605; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
606; GFX10-NEXT:    v_mov_b32_e32 v0, s2
607; GFX10-NEXT:    v_mov_b32_e32 v1, s3
608; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
609; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
610; GFX10-NEXT:    v_mov_b32_e32 v1, 0
611; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
612; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
613; GFX10-NEXT:    s_endpgm
614;
615; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
616; GFX1030:       ; %bb.0: ; %main_body
617; GFX1030-NEXT:    s_clause 0x2
618; GFX1030-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
619; GFX1030-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
620; GFX1030-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
621; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
622; GFX1030-NEXT:    v_mov_b32_e32 v0, s2
623; GFX1030-NEXT:    v_mov_b32_e32 v1, s3
624; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
625; GFX1030-NEXT:    v_mov_b32_e32 v1, 0
626; GFX1030-NEXT:    s_waitcnt vmcnt(0)
627; GFX1030-NEXT:    global_store_dword v1, v0, s[0:1]
628; GFX1030-NEXT:    s_endpgm
629;
630; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
631; GFX1100:       ; %bb.0: ; %main_body
632; GFX1100-NEXT:    s_clause 0x2
633; GFX1100-NEXT:    s_load_b64 s[2:3], s[0:1], 0x34
634; GFX1100-NEXT:    s_load_b128 s[4:7], s[0:1], 0x24
635; GFX1100-NEXT:    s_load_b64 s[0:1], s[0:1], 0x3c
636; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
637; GFX1100-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
638; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[4:7], 4 offen glc slc
639; GFX1100-NEXT:    v_mov_b32_e32 v1, 0
640; GFX1100-NEXT:    s_waitcnt vmcnt(0)
641; GFX1100-NEXT:    global_store_b32 v1, v0, s[0:1]
642; GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
643; GFX1100-NEXT:    s_endpgm
644;
645; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
646; G_SI:       ; %bb.0: ; %main_body
647; G_SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
648; G_SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
649; G_SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
650; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
651; G_SI-NEXT:    v_mov_b32_e32 v0, s2
652; G_SI-NEXT:    v_mov_b32_e32 v1, s3
653; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
654; G_SI-NEXT:    s_mov_b32 s2, -1
655; G_SI-NEXT:    s_mov_b32 s3, 0xf000
656; G_SI-NEXT:    s_waitcnt vmcnt(0)
657; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
658; G_SI-NEXT:    s_endpgm
659;
660; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
661; G_GFX7:       ; %bb.0: ; %main_body
662; G_GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
663; G_GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
664; G_GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
665; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
666; G_GFX7-NEXT:    v_mov_b32_e32 v0, s2
667; G_GFX7-NEXT:    v_mov_b32_e32 v1, s3
668; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
669; G_GFX7-NEXT:    s_mov_b32 s2, -1
670; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
671; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
672; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
673; G_GFX7-NEXT:    s_endpgm
674;
675; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
676; G_GFX10:       ; %bb.0: ; %main_body
677; G_GFX10-NEXT:    s_clause 0x1
678; G_GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
679; G_GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
680; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
681; G_GFX10-NEXT:    v_mov_b32_e32 v0, s2
682; G_GFX10-NEXT:    v_mov_b32_e32 v1, s3
683; G_GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
684; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
685; G_GFX10-NEXT:    v_mov_b32_e32 v1, 0
686; G_GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
687; G_GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
688; G_GFX10-NEXT:    s_endpgm
689;
690; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
691; G_GFX1030:       ; %bb.0: ; %main_body
692; G_GFX1030-NEXT:    s_clause 0x2
693; G_GFX1030-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
694; G_GFX1030-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
695; G_GFX1030-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x3c
696; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
697; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s2
698; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s3
699; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[4:7], 4 offen glc slc
700; G_GFX1030-NEXT:    v_mov_b32_e32 v1, 0
701; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
702; G_GFX1030-NEXT:    global_store_dword v1, v0, s[0:1]
703; G_GFX1030-NEXT:    s_endpgm
704;
705; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
706; G_GFX1100:       ; %bb.0: ; %main_body
707; G_GFX1100-NEXT:    s_clause 0x2
708; G_GFX1100-NEXT:    s_load_b64 s[2:3], s[0:1], 0x34
709; G_GFX1100-NEXT:    s_load_b128 s[4:7], s[0:1], 0x24
710; G_GFX1100-NEXT:    s_load_b64 s[0:1], s[0:1], 0x3c
711; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
712; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
713; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[4:7], 4 offen glc slc
714; G_GFX1100-NEXT:    v_mov_b32_e32 v1, 0
715; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
716; G_GFX1100-NEXT:    global_store_b32 v1, v0, s[0:1]
717; G_GFX1100-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
718; G_GFX1100-NEXT:    s_endpgm
719main_body:
720  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
721  store float %ret, float addrspace(1)* %out, align 8
722  ret void
723}
724