1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -atomic-expand < %s | FileCheck -check-prefix=IR %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s
4
5define i32 @load_atomic_private_seq_cst_i32(i32 addrspace(5)* %ptr) {
6; IR-LABEL: @load_atomic_private_seq_cst_i32(
7; IR-NEXT:    [[LOAD:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
8; IR-NEXT:    ret i32 [[LOAD]]
9;
10; GCN-LABEL: load_atomic_private_seq_cst_i32:
11; GCN:       ; %bb.0:
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
14; GCN-NEXT:    s_waitcnt vmcnt(0)
15; GCN-NEXT:    s_setpc_b64 s[30:31]
16  %load = load atomic i32, i32 addrspace(5)* %ptr seq_cst, align 4
17  ret i32 %load
18}
19
20define i64 @load_atomic_private_seq_cst_i64(i64 addrspace(5)* %ptr) {
21; IR-LABEL: @load_atomic_private_seq_cst_i64(
22; IR-NEXT:    [[LOAD:%.*]] = load i64, i64 addrspace(5)* [[PTR:%.*]], align 8
23; IR-NEXT:    ret i64 [[LOAD]]
24;
25; GCN-LABEL: load_atomic_private_seq_cst_i64:
26; GCN:       ; %bb.0:
27; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GCN-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
29; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
30; GCN-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
31; GCN-NEXT:    s_waitcnt vmcnt(0)
32; GCN-NEXT:    s_setpc_b64 s[30:31]
33  %load = load atomic i64, i64 addrspace(5)* %ptr seq_cst, align 8
34  ret i64 %load
35}
36
37define void @atomic_store_seq_cst_i32(i32 addrspace(5)* %ptr, i32 %val) {
38; IR-LABEL: @atomic_store_seq_cst_i32(
39; IR-NEXT:    store i32 [[VAL:%.*]], i32 addrspace(5)* [[PTR:%.*]], align 4
40; IR-NEXT:    ret void
41;
42; GCN-LABEL: atomic_store_seq_cst_i32:
43; GCN:       ; %bb.0:
44; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
46; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
47; GCN-NEXT:    s_setpc_b64 s[30:31]
48  store atomic i32 %val, i32 addrspace(5)* %ptr seq_cst, align 4
49  ret void
50}
51
52define void @atomic_store_seq_cst_i64(i64 addrspace(5)* %ptr, i64 %val) {
53; IR-LABEL: @atomic_store_seq_cst_i64(
54; IR-NEXT:    store i64 [[VAL:%.*]], i64 addrspace(5)* [[PTR:%.*]], align 8
55; IR-NEXT:    ret void
56;
57; GCN-LABEL: atomic_store_seq_cst_i64:
58; GCN:       ; %bb.0:
59; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GCN-NEXT:    v_add_i32_e32 v3, vcc, 4, v0
61; GCN-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
62; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
64; GCN-NEXT:    s_setpc_b64 s[30:31]
65  store atomic i64 %val, i64 addrspace(5)* %ptr seq_cst, align 8
66  ret void
67}
68
69define i32 @load_atomic_private_seq_cst_syncscope_i32(i32 addrspace(5)* %ptr) {
70; IR-LABEL: @load_atomic_private_seq_cst_syncscope_i32(
71; IR-NEXT:    [[LOAD:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
72; IR-NEXT:    ret i32 [[LOAD]]
73;
74; GCN-LABEL: load_atomic_private_seq_cst_syncscope_i32:
75; GCN:       ; %bb.0:
76; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
78; GCN-NEXT:    s_waitcnt vmcnt(0)
79; GCN-NEXT:    s_setpc_b64 s[30:31]
80  %load = load atomic i32, i32 addrspace(5)* %ptr syncscope("agent") seq_cst, align 4
81  ret i32 %load
82}
83
84define void @atomic_store_seq_cst_syncscope_i32(i32 addrspace(5)* %ptr, i32 %val) {
85; IR-LABEL: @atomic_store_seq_cst_syncscope_i32(
86; IR-NEXT:    store i32 [[VAL:%.*]], i32 addrspace(5)* [[PTR:%.*]], align 4
87; IR-NEXT:    ret void
88;
89; GCN-LABEL: atomic_store_seq_cst_syncscope_i32:
90; GCN:       ; %bb.0:
91; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
93; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
94; GCN-NEXT:    s_setpc_b64 s[30:31]
95  store atomic i32 %val, i32 addrspace(5)* %ptr syncscope("agent") seq_cst, align 4
96  ret void
97}
98
99define i32 @cmpxchg_private_i32(i32 addrspace(5)* %ptr) {
100; IR-LABEL: @cmpxchg_private_i32(
101; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
102; IR-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
103; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]]
104; IR-NEXT:    store i32 [[TMP3]], i32 addrspace(5)* [[PTR]], align 4
105; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
106; IR-NEXT:    [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
107; IR-NEXT:    [[RESULT_0:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
108; IR-NEXT:    [[RESULT_1:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
109; IR-NEXT:    store i1 [[RESULT_1]], i1 addrspace(1)* poison, align 1
110; IR-NEXT:    ret i32 [[RESULT_0]]
111;
112; GCN-LABEL: cmpxchg_private_i32:
113; GCN:       ; %bb.0:
114; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
116; GCN-NEXT:    s_mov_b32 s7, 0xf000
117; GCN-NEXT:    s_mov_b32 s6, -1
118; GCN-NEXT:    s_waitcnt vmcnt(0)
119; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
120; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, 1, vcc
121; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
122; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
123; GCN-NEXT:    buffer_store_byte v0, off, s[4:7], 0
124; GCN-NEXT:    s_waitcnt expcnt(0)
125; GCN-NEXT:    v_mov_b32_e32 v0, v1
126; GCN-NEXT:    s_waitcnt vmcnt(0)
127; GCN-NEXT:    s_setpc_b64 s[30:31]
128  %result = cmpxchg i32 addrspace(5)* %ptr, i32 0, i32 1 acq_rel monotonic
129  %result.0 = extractvalue { i32, i1 } %result, 0
130  %result.1 = extractvalue { i32, i1 } %result, 1
131  store i1 %result.1, i1 addrspace(1)* poison
132  ret i32 %result.0
133}
134
135define i64 @cmpxchg_private_i64(i64 addrspace(5)* %ptr) {
136; IR-LABEL: @cmpxchg_private_i64(
137; IR-NEXT:    [[TMP1:%.*]] = load i64, i64 addrspace(5)* [[PTR:%.*]], align 4
138; IR-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
139; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]]
140; IR-NEXT:    store i64 [[TMP3]], i64 addrspace(5)* [[PTR]], align 4
141; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0
142; IR-NEXT:    [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1
143; IR-NEXT:    [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
144; IR-NEXT:    [[RESULT_1:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
145; IR-NEXT:    store i1 [[RESULT_1]], i1 addrspace(1)* poison, align 1
146; IR-NEXT:    ret i64 [[RESULT_0]]
147;
148; GCN-LABEL: cmpxchg_private_i64:
149; GCN:       ; %bb.0:
150; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GCN-NEXT:    v_mov_b32_e32 v2, v0
152; GCN-NEXT:    v_add_i32_e32 v3, vcc, 4, v2
153; GCN-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
154; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
155; GCN-NEXT:    s_mov_b32 s7, 0xf000
156; GCN-NEXT:    s_mov_b32 s6, -1
157; GCN-NEXT:    s_waitcnt vmcnt(0)
158; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
159; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, 0, vcc
160; GCN-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen
161; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, 1, vcc
162; GCN-NEXT:    s_waitcnt expcnt(0)
163; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
164; GCN-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
165; GCN-NEXT:    buffer_store_byte v4, off, s[4:7], 0
166; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
167; GCN-NEXT:    s_setpc_b64 s[30:31]
168  %result = cmpxchg i64 addrspace(5)* %ptr, i64 0, i64 1 acq_rel monotonic
169  %result.0 = extractvalue { i64, i1 } %result, 0
170  %result.1 = extractvalue { i64, i1 } %result, 1
171  store i1 %result.1, i1 addrspace(1)* poison
172  ret i64 %result.0
173}
174
175
176define i32 @atomicrmw_xchg_private_i32(i32 addrspace(5)* %ptr) {
177; IR-LABEL: @atomicrmw_xchg_private_i32(
178; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
179; IR-NEXT:    store i32 4, i32 addrspace(5)* [[PTR]], align 4
180; IR-NEXT:    ret i32 [[TMP1]]
181;
182; GCN-LABEL: atomicrmw_xchg_private_i32:
183; GCN:       ; %bb.0:
184; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
186; GCN-NEXT:    v_mov_b32_e32 v2, 4
187; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
188; GCN-NEXT:    s_waitcnt vmcnt(1)
189; GCN-NEXT:    v_mov_b32_e32 v0, v1
190; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
191; GCN-NEXT:    s_setpc_b64 s[30:31]
192  %result = atomicrmw xchg i32 addrspace(5)* %ptr, i32 4 seq_cst
193  ret i32 %result
194}
195
196define i32 @atomicrmw_add_private_i32(i32 addrspace(5)* %ptr) {
197; IR-LABEL: @atomicrmw_add_private_i32(
198; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
199; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP1]], 4
200; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
201; IR-NEXT:    ret i32 [[TMP1]]
202;
203; GCN-LABEL: atomicrmw_add_private_i32:
204; GCN:       ; %bb.0:
205; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
207; GCN-NEXT:    s_waitcnt vmcnt(0)
208; GCN-NEXT:    v_add_i32_e32 v2, vcc, 4, v1
209; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
210; GCN-NEXT:    v_mov_b32_e32 v0, v1
211; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
212; GCN-NEXT:    s_setpc_b64 s[30:31]
213  %result = atomicrmw add i32 addrspace(5)* %ptr, i32 4 seq_cst
214  ret i32 %result
215}
216
217define i32 @atomicrmw_sub_private_i32(i32 addrspace(5)* %ptr) {
218; IR-LABEL: @atomicrmw_sub_private_i32(
219; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
220; IR-NEXT:    [[NEW:%.*]] = sub i32 [[TMP1]], 4
221; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
222; IR-NEXT:    ret i32 [[TMP1]]
223;
224; GCN-LABEL: atomicrmw_sub_private_i32:
225; GCN:       ; %bb.0:
226; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
228; GCN-NEXT:    s_waitcnt vmcnt(0)
229; GCN-NEXT:    v_add_i32_e32 v2, vcc, -4, v1
230; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
231; GCN-NEXT:    v_mov_b32_e32 v0, v1
232; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
233; GCN-NEXT:    s_setpc_b64 s[30:31]
234  %result = atomicrmw sub i32 addrspace(5)* %ptr, i32 4 seq_cst
235  ret i32 %result
236}
237
238define i32 @atomicrmw_and_private_i32(i32 addrspace(5)* %ptr) {
239; IR-LABEL: @atomicrmw_and_private_i32(
240; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
241; IR-NEXT:    [[NEW:%.*]] = and i32 [[TMP1]], 4
242; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
243; IR-NEXT:    ret i32 [[TMP1]]
244;
245; GCN-LABEL: atomicrmw_and_private_i32:
246; GCN:       ; %bb.0:
247; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
249; GCN-NEXT:    s_waitcnt vmcnt(0)
250; GCN-NEXT:    v_and_b32_e32 v2, 4, v1
251; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
252; GCN-NEXT:    v_mov_b32_e32 v0, v1
253; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
254; GCN-NEXT:    s_setpc_b64 s[30:31]
255  %result = atomicrmw and i32 addrspace(5)* %ptr, i32 4 seq_cst
256  ret i32 %result
257}
258
259define i32 @atomicrmw_nand_private_i32(i32 addrspace(5)* %ptr) {
260; IR-LABEL: @atomicrmw_nand_private_i32(
261; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
262; IR-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 4
263; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP2]], -1
264; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
265; IR-NEXT:    ret i32 [[TMP1]]
266;
267; GCN-LABEL: atomicrmw_nand_private_i32:
268; GCN:       ; %bb.0:
269; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
270; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
271; GCN-NEXT:    s_waitcnt vmcnt(0)
272; GCN-NEXT:    v_not_b32_e32 v2, v1
273; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
274; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
275; GCN-NEXT:    v_mov_b32_e32 v0, v1
276; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
277; GCN-NEXT:    s_setpc_b64 s[30:31]
278  %result = atomicrmw nand i32 addrspace(5)* %ptr, i32 4 seq_cst
279  ret i32 %result
280}
281
282define i32 @atomicrmw_or_private_i32(i32 addrspace(5)* %ptr) {
283; IR-LABEL: @atomicrmw_or_private_i32(
284; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
285; IR-NEXT:    [[NEW:%.*]] = or i32 [[TMP1]], 4
286; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
287; IR-NEXT:    ret i32 [[TMP1]]
288;
289; GCN-LABEL: atomicrmw_or_private_i32:
290; GCN:       ; %bb.0:
291; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
293; GCN-NEXT:    s_waitcnt vmcnt(0)
294; GCN-NEXT:    v_or_b32_e32 v2, 4, v1
295; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
296; GCN-NEXT:    v_mov_b32_e32 v0, v1
297; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
298; GCN-NEXT:    s_setpc_b64 s[30:31]
299  %result = atomicrmw or i32 addrspace(5)* %ptr, i32 4 seq_cst
300  ret i32 %result
301}
302
303define i32 @atomicrmw_xor_private_i32(i32 addrspace(5)* %ptr) {
304; IR-LABEL: @atomicrmw_xor_private_i32(
305; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
306; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP1]], 4
307; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
308; IR-NEXT:    ret i32 [[TMP1]]
309;
310; GCN-LABEL: atomicrmw_xor_private_i32:
311; GCN:       ; %bb.0:
312; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
313; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
314; GCN-NEXT:    s_waitcnt vmcnt(0)
315; GCN-NEXT:    v_xor_b32_e32 v2, 4, v1
316; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
317; GCN-NEXT:    v_mov_b32_e32 v0, v1
318; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
319; GCN-NEXT:    s_setpc_b64 s[30:31]
320  %result = atomicrmw xor i32 addrspace(5)* %ptr, i32 4 seq_cst
321  ret i32 %result
322}
323
324define i32 @atomicrmw_max_private_i32(i32 addrspace(5)* %ptr) {
325; IR-LABEL: @atomicrmw_max_private_i32(
326; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
327; IR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4
328; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
329; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
330; IR-NEXT:    ret i32 [[TMP1]]
331;
332; GCN-LABEL: atomicrmw_max_private_i32:
333; GCN:       ; %bb.0:
334; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
336; GCN-NEXT:    s_waitcnt vmcnt(0)
337; GCN-NEXT:    v_max_i32_e32 v2, 4, v1
338; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
339; GCN-NEXT:    v_mov_b32_e32 v0, v1
340; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
341; GCN-NEXT:    s_setpc_b64 s[30:31]
342  %result = atomicrmw max i32 addrspace(5)* %ptr, i32 4 seq_cst
343  ret i32 %result
344}
345
346define i32 @atomicrmw_min_private_i32(i32 addrspace(5)* %ptr) {
347; IR-LABEL: @atomicrmw_min_private_i32(
348; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
349; IR-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4
350; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
351; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
352; IR-NEXT:    ret i32 [[TMP1]]
353;
354; GCN-LABEL: atomicrmw_min_private_i32:
355; GCN:       ; %bb.0:
356; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
358; GCN-NEXT:    s_waitcnt vmcnt(0)
359; GCN-NEXT:    v_min_i32_e32 v2, 4, v1
360; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
361; GCN-NEXT:    v_mov_b32_e32 v0, v1
362; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
363; GCN-NEXT:    s_setpc_b64 s[30:31]
364  %result = atomicrmw min i32 addrspace(5)* %ptr, i32 4 seq_cst
365  ret i32 %result
366}
367
368define i32 @atomicrmw_umax_private_i32(i32 addrspace(5)* %ptr) {
369; IR-LABEL: @atomicrmw_umax_private_i32(
370; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
371; IR-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4
372; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
373; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
374; IR-NEXT:    ret i32 [[TMP1]]
375;
376; GCN-LABEL: atomicrmw_umax_private_i32:
377; GCN:       ; %bb.0:
378; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
380; GCN-NEXT:    s_waitcnt vmcnt(0)
381; GCN-NEXT:    v_max_u32_e32 v2, 4, v1
382; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
383; GCN-NEXT:    v_mov_b32_e32 v0, v1
384; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
385; GCN-NEXT:    s_setpc_b64 s[30:31]
386  %result = atomicrmw umax i32 addrspace(5)* %ptr, i32 4 seq_cst
387  ret i32 %result
388}
389
390define i32 @atomicrmw_umin_private_i32(i32 addrspace(5)* %ptr) {
391; IR-LABEL: @atomicrmw_umin_private_i32(
392; IR-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(5)* [[PTR:%.*]], align 4
393; IR-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4
394; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
395; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[PTR]], align 4
396; IR-NEXT:    ret i32 [[TMP1]]
397;
398; GCN-LABEL: atomicrmw_umin_private_i32:
399; GCN:       ; %bb.0:
400; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
402; GCN-NEXT:    s_waitcnt vmcnt(0)
403; GCN-NEXT:    v_min_u32_e32 v2, 4, v1
404; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
405; GCN-NEXT:    v_mov_b32_e32 v0, v1
406; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
407; GCN-NEXT:    s_setpc_b64 s[30:31]
408  %result = atomicrmw umin i32 addrspace(5)* %ptr, i32 4 seq_cst
409  ret i32 %result
410}
411
412define float @atomicrmw_fadd_private_i32(float addrspace(5)* %ptr) {
413; IR-LABEL: @atomicrmw_fadd_private_i32(
414; IR-NEXT:    [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
415; IR-NEXT:    [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00
416; IR-NEXT:    store float [[NEW]], float addrspace(5)* [[PTR]], align 4
417; IR-NEXT:    ret float [[TMP1]]
418;
419; GCN-LABEL: atomicrmw_fadd_private_i32:
420; GCN:       ; %bb.0:
421; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
423; GCN-NEXT:    s_waitcnt vmcnt(0)
424; GCN-NEXT:    v_add_f32_e32 v2, 2.0, v1
425; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
426; GCN-NEXT:    v_mov_b32_e32 v0, v1
427; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
428; GCN-NEXT:    s_setpc_b64 s[30:31]
429  %result = atomicrmw fadd float addrspace(5)* %ptr, float 2.0 seq_cst
430  ret float %result
431}
432
433define float @atomicrmw_fsub_private_i32(float addrspace(5)* %ptr, float %val) {
434; IR-LABEL: @atomicrmw_fsub_private_i32(
435; IR-NEXT:    [[TMP1:%.*]] = load float, float addrspace(5)* [[PTR:%.*]], align 4
436; IR-NEXT:    [[NEW:%.*]] = fsub float [[TMP1]], [[VAL:%.*]]
437; IR-NEXT:    store float [[NEW]], float addrspace(5)* [[PTR]], align 4
438; IR-NEXT:    ret float [[TMP1]]
439;
440; GCN-LABEL: atomicrmw_fsub_private_i32:
441; GCN:       ; %bb.0:
442; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GCN-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
444; GCN-NEXT:    s_waitcnt vmcnt(0)
445; GCN-NEXT:    v_sub_f32_e32 v1, v2, v1
446; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
447; GCN-NEXT:    v_mov_b32_e32 v0, v2
448; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
449; GCN-NEXT:    s_setpc_b64 s[30:31]
450  %result = atomicrmw fsub float addrspace(5)* %ptr, float %val seq_cst
451  ret float %result
452}
453
454define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(i32 addrspace(1)* %out, i32 %in) nounwind {
455; IR-LABEL: @alloca_promote_atomicrmw_private_lds_promote(
456; IR-NEXT:  entry:
457; IR-NEXT:    [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
458; IR-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 0
459; IR-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 1
460; IR-NEXT:    store i32 0, i32 addrspace(5)* [[GEP1]], align 4
461; IR-NEXT:    store i32 1, i32 addrspace(5)* [[GEP2]], align 4
462; IR-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]]
463; IR-NEXT:    [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4
464; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP0]], 7
465; IR-NEXT:    store i32 [[NEW]], i32 addrspace(5)* [[GEP3]], align 4
466; IR-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
467; IR-NEXT:    ret void
468;
469; GCN-LABEL: alloca_promote_atomicrmw_private_lds_promote:
470; GCN:       ; %bb.0: ; %entry
471; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
472; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
473; GCN-NEXT:    s_mov_b32 s3, 0xf000
474; GCN-NEXT:    s_mov_b32 s2, -1
475; GCN-NEXT:    s_waitcnt lgkmcnt(0)
476; GCN-NEXT:    s_cmp_eq_u32 s4, 1
477; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
478; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
479; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
480; GCN-NEXT:    s_endpgm
481entry:
482  %tmp = alloca [2 x i32], addrspace(5)
483  %gep1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
484  %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
485  store i32 0, i32 addrspace(5)* %gep1
486  store i32 1, i32 addrspace(5)* %gep2
487  %gep3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
488  %rmw = atomicrmw add i32 addrspace(5)* %gep3, i32 7 acq_rel
489  store i32 %rmw, i32 addrspace(1)* %out
490  ret void
491}
492
493define amdgpu_kernel void @alloca_promote_cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
494; IR-LABEL: @alloca_promote_cmpxchg_private(
495; IR-NEXT:  entry:
496; IR-NEXT:    [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
497; IR-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 0
498; IR-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 1
499; IR-NEXT:    store i32 0, i32 addrspace(5)* [[GEP1]], align 4
500; IR-NEXT:    store i32 1, i32 addrspace(5)* [[GEP2]], align 4
501; IR-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* [[TMP]], i32 0, i32 [[IN:%.*]]
502; IR-NEXT:    [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[GEP3]], align 4
503; IR-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
504; IR-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP0]]
505; IR-NEXT:    store i32 [[TMP2]], i32 addrspace(5)* [[GEP3]], align 4
506; IR-NEXT:    [[TMP3:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP0]], 0
507; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i32, i1 } [[TMP3]], i1 [[TMP1]], 1
508; IR-NEXT:    [[VAL:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
509; IR-NEXT:    store i32 [[VAL]], i32 addrspace(1)* [[OUT:%.*]], align 4
510; IR-NEXT:    ret void
511;
512; GCN-LABEL: alloca_promote_cmpxchg_private:
513; GCN:       ; %bb.0: ; %entry
514; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
515; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
516; GCN-NEXT:    s_mov_b32 s3, 0xf000
517; GCN-NEXT:    s_mov_b32 s2, -1
518; GCN-NEXT:    s_waitcnt lgkmcnt(0)
519; GCN-NEXT:    s_cmp_eq_u32 s4, 1
520; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
521; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
522; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
523; GCN-NEXT:    s_endpgm
524entry:
525  %tmp = alloca [2 x i32], addrspace(5)
526  %gep1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0
527  %gep2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1
528  store i32 0, i32 addrspace(5)* %gep1
529  store i32 1, i32 addrspace(5)* %gep2
530  %gep3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 %in
531  %xchg = cmpxchg i32 addrspace(5)* %gep3, i32 0, i32 1 acq_rel monotonic
532  %val = extractvalue { i32, i1 } %xchg, 0
533  store i32 %val, i32 addrspace(1)* %out
534  ret void
535}
536