1; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
2; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
3; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
4; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1-M0
5; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
6
7target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
8
9define void @func(i32 %argc, i8** %argv) nounwind {
10entry:
11	%argc.addr = alloca i32		; <i32*> [#uses=1]
12	%argv.addr = alloca i8**		; <i8***> [#uses=1]
13	%val1 = alloca i32		; <i32*> [#uses=2]
14	%val2 = alloca i32		; <i32*> [#uses=15]
15	%andt = alloca i32		; <i32*> [#uses=2]
16	%ort = alloca i32		; <i32*> [#uses=2]
17	%xort = alloca i32		; <i32*> [#uses=2]
18	%old = alloca i32		; <i32*> [#uses=18]
19	%temp = alloca i32		; <i32*> [#uses=2]
20	store i32 %argc, i32* %argc.addr
21	store i8** %argv, i8*** %argv.addr
22	store i32 0, i32* %val1
23	store i32 31, i32* %val2
24	store i32 3855, i32* %andt
25	store i32 3855, i32* %ort
26	store i32 3855, i32* %xort
27	store i32 4, i32* %temp
28	%tmp = load i32, i32* %temp
29  ; CHECK: ldrex
30  ; CHECK: add
31  ; CHECK: strex
32  ; CHECK-T1: bl ___sync_fetch_and_add_4
33  ; CHECK-T1-M0: bl ___sync_fetch_and_add_4
34  ; CHECK-BAREMETAL: add
35  ; CHECK-BAREMETAL-NOT: __sync
36  %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
37	store i32 %0, i32* %old
38  ; CHECK: ldrex
39  ; CHECK: sub
40  ; CHECK: strex
41  ; CHECK-T1: bl ___sync_fetch_and_sub_4
42  ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
43  ; CHECK-BAREMETAL: sub
44  ; CHECK-BAREMETAL-NOT: __sync
45  %1 = atomicrmw sub i32* %val2, i32 30 monotonic
46	store i32 %1, i32* %old
47  ; CHECK: ldrex
48  ; CHECK: add
49  ; CHECK: strex
50  ; CHECK-T1: bl ___sync_fetch_and_add_4
51  ; CHECK-T1-M0: bl ___sync_fetch_and_add_4
52  ; CHECK-BAREMETAL: add
53  ; CHECK-BAREMETAL-NOT: __sync
54  %2 = atomicrmw add i32* %val2, i32 1 monotonic
55	store i32 %2, i32* %old
56  ; CHECK: ldrex
57  ; CHECK: sub
58  ; CHECK: strex
59  ; CHECK-T1: bl ___sync_fetch_and_sub_4
60  ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4
61  ; CHECK-BAREMETAL: sub
62  ; CHECK-BAREMETAL-NOT: __sync
63  %3 = atomicrmw sub i32* %val2, i32 1 monotonic
64	store i32 %3, i32* %old
65  ; CHECK: ldrex
66  ; CHECK: and
67  ; CHECK: strex
68  ; CHECK-T1: bl ___sync_fetch_and_and_4
69  ; CHECK-T1-M0: bl ___sync_fetch_and_and_4
70  ; CHECK-BAREMETAL: and
71  ; CHECK-BAREMETAL-NOT: __sync
72  %4 = atomicrmw and i32* %andt, i32 4080 monotonic
73	store i32 %4, i32* %old
74  ; CHECK: ldrex
75  ; CHECK: or
76  ; CHECK: strex
77  ; CHECK-T1: bl ___sync_fetch_and_or_4
78  ; CHECK-T1-M0: bl ___sync_fetch_and_or_4
79  ; CHECK-BAREMETAL: or
80  ; CHECK-BAREMETAL-NOT: __sync
81  %5 = atomicrmw or i32* %ort, i32 4080 monotonic
82	store i32 %5, i32* %old
83  ; CHECK: ldrex
84  ; CHECK: eor
85  ; CHECK: strex
86  ; CHECK-T1: bl ___sync_fetch_and_xor_4
87  ; CHECK-T1-M0: bl ___sync_fetch_and_xor_4
88  ; CHECK-BAREMETAL: eor
89  ; CHECK-BAREMETAL-NOT: __sync
90  %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
91	store i32 %6, i32* %old
92  ; CHECK: ldrex
93  ; CHECK: cmp
94  ; CHECK: strex
95  ; CHECK-T1: bl ___sync_fetch_and_min_4
96  ; CHECK-T1-M0: bl ___sync_fetch_and_min_4
97  ; CHECK-BAREMETAL: cmp
98  ; CHECK-BAREMETAL-NOT: __sync
99  %7 = atomicrmw min i32* %val2, i32 16 monotonic
100	store i32 %7, i32* %old
101	%neg = sub i32 0, 1
102  ; CHECK: ldrex
103  ; CHECK: cmp
104  ; CHECK: strex
105  ; CHECK-T1: bl ___sync_fetch_and_min_4
106  ; CHECK-T1-M0: bl ___sync_fetch_and_min_4
107  ; CHECK-BAREMETAL: cmp
108  ; CHECK-BAREMETAL-NOT: __sync
109  %8 = atomicrmw min i32* %val2, i32 %neg monotonic
110	store i32 %8, i32* %old
111  ; CHECK: ldrex
112  ; CHECK: cmp
113  ; CHECK: strex
114  ; CHECK-T1: bl ___sync_fetch_and_max_4
115  ; CHECK-T1-M0: bl ___sync_fetch_and_max_4
116  ; CHECK-BAREMETAL: cmp
117  ; CHECK-BAREMETAL-NOT: __sync
118  %9 = atomicrmw max i32* %val2, i32 1 monotonic
119	store i32 %9, i32* %old
120  ; CHECK: ldrex
121  ; CHECK: cmp
122  ; CHECK: strex
123  ; CHECK-T1: bl ___sync_fetch_and_max_4
124  ; CHECK-T1-M0: bl ___sync_fetch_and_max_4
125  ; CHECK-BAREMETAL: cmp
126  ; CHECK-BAREMETAL-NOT: __sync
127  %10 = atomicrmw max i32* %val2, i32 0 monotonic
128	store i32 %10, i32* %old
129  ; CHECK: ldrex
130  ; CHECK: cmp
131  ; CHECK: strex
132  ; CHECK-T1: bl ___sync_fetch_and_umin_4
133  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
134  ; CHECK-BAREMETAL: cmp
135  ; CHECK-BAREMETAL-NOT: __sync
136  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
137	store i32 %11, i32* %old
138	%uneg = sub i32 0, 1
139  ; CHECK: ldrex
140  ; CHECK: cmp
141  ; CHECK: strex
142  ; CHECK-T1: bl ___sync_fetch_and_umin_4
143  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4
144  ; CHECK-BAREMETAL: cmp
145  ; CHECK-BAREMETAL-NOT: __sync
146  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
147	store i32 %12, i32* %old
148  ; CHECK: ldrex
149  ; CHECK: cmp
150  ; CHECK: strex
151  ; CHECK-T1: bl ___sync_fetch_and_umax_4
152  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
153  ; CHECK-BAREMETAL: cmp
154  ; CHECK-BAREMETAL-NOT: __sync
155  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
156	store i32 %13, i32* %old
157  ; CHECK: ldrex
158  ; CHECK: cmp
159  ; CHECK: strex
160  ; CHECK-T1: bl ___sync_fetch_and_umax_4
161  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4
162  ; CHECK-BAREMETAL: cmp
163  ; CHECK-BAREMETAL-NOT: __sync
164  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
165	store i32 %14, i32* %old
166
167  ret void
168}
169
170define void @func2() nounwind {
171entry:
172  %val = alloca i16
173  %old = alloca i16
174  store i16 31, i16* %val
175  ; CHECK: ldrex
176  ; CHECK: cmp
177  ; CHECK: strex
178  ; CHECK-T1: bl ___sync_fetch_and_umin_2
179  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
180  ; CHECK-BAREMETAL: cmp
181  ; CHECK-BAREMETAL-NOT: __sync
182  %0 = atomicrmw umin i16* %val, i16 16 monotonic
183  store i16 %0, i16* %old
184  %uneg = sub i16 0, 1
185  ; CHECK: ldrex
186  ; CHECK: cmp
187  ; CHECK: strex
188  ; CHECK-T1: bl ___sync_fetch_and_umin_2
189  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2
190  ; CHECK-BAREMETAL: cmp
191  ; CHECK-BAREMETAL-NOT: __sync
192  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
193  store i16 %1, i16* %old
194  ; CHECK: ldrex
195  ; CHECK: cmp
196  ; CHECK: strex
197  ; CHECK-T1: bl ___sync_fetch_and_umax_2
198  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
199  ; CHECK-BAREMETAL: cmp
200  ; CHECK-BAREMETAL-NOT: __sync
201  %2 = atomicrmw umax i16* %val, i16 1 monotonic
202  store i16 %2, i16* %old
203  ; CHECK: ldrex
204  ; CHECK: cmp
205  ; CHECK: strex
206  ; CHECK-T1: bl ___sync_fetch_and_umax_2
207  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2
208  ; CHECK-BAREMETAL: cmp
209  ; CHECK-BAREMETAL-NOT: __sync
210  %3 = atomicrmw umax i16* %val, i16 0 monotonic
211  store i16 %3, i16* %old
212  ret void
213}
214
215define void @func3() nounwind {
216entry:
217  %val = alloca i8
218  %old = alloca i8
219  store i8 31, i8* %val
220  ; CHECK: ldrex
221  ; CHECK: cmp
222  ; CHECK: strex
223  ; CHECK-T1: bl ___sync_fetch_and_umin_1
224  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
225  ; CHECK-BAREMETAL: cmp
226  ; CHECK-BAREMETAL-NOT: __sync
227  %0 = atomicrmw umin i8* %val, i8 16 monotonic
228  store i8 %0, i8* %old
229  ; CHECK: ldrex
230  ; CHECK: cmp
231  ; CHECK: strex
232  ; CHECK-T1: bl ___sync_fetch_and_umin_1
233  ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1
234  ; CHECK-BAREMETAL: cmp
235  ; CHECK-BAREMETAL-NOT: __sync
236  %uneg = sub i8 0, 1
237  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
238  store i8 %1, i8* %old
239  ; CHECK: ldrex
240  ; CHECK: cmp
241  ; CHECK: strex
242  ; CHECK-T1: bl ___sync_fetch_and_umax_1
243  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
244  ; CHECK-BAREMETAL: cmp
245  ; CHECK-BAREMETAL-NOT: __sync
246  %2 = atomicrmw umax i8* %val, i8 1 monotonic
247  store i8 %2, i8* %old
248  ; CHECK: ldrex
249  ; CHECK: cmp
250  ; CHECK: strex
251  ; CHECK-T1: bl ___sync_fetch_and_umax_1
252  ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1
253  ; CHECK-BAREMETAL: cmp
254  ; CHECK-BAREMETAL-NOT: __sync
255  %3 = atomicrmw umax i8* %val, i8 0 monotonic
256  store i8 %3, i8* %old
257  ret void
258}
259
260; CHECK: func4
261; This function should not need to use callee-saved registers.
262; rdar://problem/12203728
263; CHECK-NOT: r4
264define i32 @func4(i32* %p) nounwind optsize ssp {
265entry:
266  %0 = atomicrmw add i32* %p, i32 1 monotonic
267  ret i32 %0
268}
269
270define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
271; CHECK-LABEL: test_cmpxchg_fail_order:
272
273  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
274  %oldval = extractvalue { i32, i1 } %pair, 0
275; CHECK-ARMV7:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
276; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
277; CHECK-ARMV7:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
278; CHECK-ARMV7:     dmb ish
279; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
280; CHECK-ARMV7:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
281; CHECK-ARMV7:     cmp     [[SUCCESS]], #0
282; CHECK-ARMV7:     beq     [[SUCCESS_BB:\.?LBB.*]]
283; CHECK-ARMV7:     ldrex   [[OLDVAL]], [r[[ADDR]]]
284; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
285; CHECK-ARMV7:     beq     [[LOOP_BB]]
286; CHECK-ARMV7: [[FAIL_BB]]:
287; CHECK-ARMV7:     clrex
288; CHECK-ARMV7:     bx      lr
289; CHECK-ARMV7: [[SUCCESS_BB]]:
290; CHECK-ARMV7:     dmb     ish
291; CHECK-ARMV7:     bx      lr
292
293; CHECK-T2:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
294; CHECK-T2:     cmp     [[OLDVAL]], r1
295; CHECK-T2:     bne     [[FAIL_BB:\.?LBB.*]]
296; CHECK-T2:     dmb ish
297; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
298; CHECK-T2:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
299; CHECK-T2:     cmp     [[SUCCESS]], #0
300; CHECK-T2:     dmbeq   ish
301; CHECK-T2:     bxeq    lr
302; CHECK-T2:     ldrex   [[OLDVAL]], [r[[ADDR]]]
303; CHECK-T2:     cmp     [[OLDVAL]], r1
304; CHECK-T2:     beq     [[LOOP_BB]]
305; CHECK-T2:     clrex
306
307  ret i32 %oldval
308}
309
310define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
311; CHECK-LABEL: test_cmpxchg_fail_order1:
312
313  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new acquire acquire
314  %oldval = extractvalue { i32, i1 } %pair, 0
315; CHECK-NOT:     dmb ish
316; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
317; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
318; CHECK:     cmp     [[OLDVAL]], r1
319; CHECK:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
320; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
321; CHECK:     cmp     [[SUCCESS]], #0
322; CHECK:     bne     [[LOOP_BB]]
323; CHECK:     b       [[END_BB:\.?LBB[0-9]+_[0-9]+]]
324; CHECK: [[FAIL_BB]]:
325; CHECK-NEXT: clrex
326; CHECK-NEXT: [[END_BB]]:
327; CHECK:     dmb     ish
328; CHECK:     bx      lr
329
330  ret i32 %oldval
331}
332
333define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
334; CHECK-LABEL: load_load_add_acquire
335  %val1 = load atomic i32, i32* %mem1 acquire, align 4
336  %val2 = load atomic i32, i32* %mem2 acquire, align 4
337  %tmp = add i32 %val1, %val2
338
339; CHECK: ldr {{r[0-9]}}, [r0]
340; CHECK: dmb
341; CHECK: ldr {{r[0-9]}}, [r1]
342; CHECK: dmb
343; CHECK: add r0,
344
345; CHECK-T1-M0: ldr {{r[0-9]}}, [r0]
346; CHECK-T1-M0: dmb
347; CHECK-T1-M0: ldr {{r[0-9]}}, [r1]
348; CHECK-T1-M0: dmb
349
350; CHECK-T1: ___sync_val_compare_and_swap_4
351; CHECK-T1: ___sync_val_compare_and_swap_4
352
353; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0]
354; CHECK-BAREMETAL-NOT: dmb
355; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1]
356; CHECK-BAREMETAL-NOT: dmb
357; CHECK-BAREMETAL: add r0,
358
359  ret i32 %tmp
360}
361
362define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
363; CHECK-LABEL: store_store_release
364  store atomic i32 %val1, i32* %mem1 release, align 4
365  store atomic i32 %val2, i32* %mem2 release, align 4
366
367; CHECK: dmb
368; CHECK: str r1, [r0]
369; CHECK: dmb
370; CHECK: str r3, [r2]
371
372; CHECK-T1: ___sync_lock_test_and_set
373; CHECK-T1: ___sync_lock_test_and_set
374
375; CHECK-T1-M0: dmb
376; CHECK-T1-M0: str r1, [r0]
377; CHECK-T1-M0: dmb
378; CHECK-T1-M0: str r3, [r2]
379
380; CHECK-BAREMETAL-NOT: dmb
381; CHECK-BAREMTEAL: str r1, [r0]
382; CHECK-BAREMETAL-NOT: dmb
383; CHECK-BAREMTEAL: str r3, [r2]
384
385  ret void
386}
387
388define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
389; CHECK-LABEL: load_fence_store_monotonic
390  %val = load atomic i32, i32* %mem1 monotonic, align 4
391  fence seq_cst
392  store atomic i32 %val, i32* %mem2 monotonic, align 4
393
394; CHECK: ldr [[R0:r[0-9]]], [r0]
395; CHECK: dmb
396; CHECK: str [[R0]], [r1]
397
398; CHECK-T1-M0: ldr [[R0:r[0-9]]], [r0]
399; CHECK-T1-M0: dmb
400; CHECK-T1-M0: str [[R0]], [r1]
401
402; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}]
403; CHECK-T1: {{dmb|bl ___sync_synchronize}}
404; CHECK-T1: str [[R0]], [{{r[0-9]+}}]
405
406; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0]
407; CHECK-BAREMETAL-NOT: dmb
408; CHECK-BAREMETAL: str [[R0]], [r1]
409
410  ret void
411}
412