1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
4target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
5
6declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
7declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
8declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
9declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
10declare void @llvm.init.trampoline(i8*, i8*, i8*)
11declare void @llvm.matrix.column.major.store(<6 x float>, float*, i64, i1, i32, i32)
12
13define void @test1(i32* %Q, i32* %P) {
14; CHECK-LABEL: @test1(
15; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
16; CHECK-NEXT:    ret void
17;
18  %DEAD = load i32, i32* %Q
19  store i32 %DEAD, i32* %P
20  store i32 0, i32* %P
21  ret void
22}
23
24; PR8677
25@g = global i32 1
26
27define i32 @test3(i32* %g_addr) nounwind {
28; CHECK-LABEL: @test3(
29; CHECK-NEXT:    [[G_VALUE:%.*]] = load i32, i32* [[G_ADDR:%.*]], align 4
30; CHECK-NEXT:    store i32 -1, i32* @g, align 4
31; CHECK-NEXT:    store i32 [[G_VALUE]], i32* [[G_ADDR]], align 4
32; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* @g, align 4
33; CHECK-NEXT:    ret i32 [[TMP3]]
34;
35  %g_value = load i32, i32* %g_addr, align 4
36  store i32 -1, i32* @g, align 4
37  store i32 %g_value, i32* %g_addr, align 4
38  %tmp3 = load i32, i32* @g, align 4
39  ret i32 %tmp3
40}
41
42
43define void @test4(i32* %Q) {
44; CHECK-LABEL: @test4(
45; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4
46; CHECK-NEXT:    store volatile i32 [[A]], i32* [[Q]], align 4
47; CHECK-NEXT:    ret void
48;
49  %a = load i32, i32* %Q
50  store volatile i32 %a, i32* %Q
51  ret void
52}
53
54; PR8576 - Should delete store of 10 even though p/q are may aliases.
55define void @test2(i32 *%p, i32 *%q) {
56; CHECK-LABEL: @test2(
57; CHECK-NEXT:    store i32 20, i32* [[Q:%.*]], align 4
58; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
59; CHECK-NEXT:    ret void
60;
61  store i32 10, i32* %p, align 4
62  store i32 20, i32* %q, align 4
63  store i32 30, i32* %p, align 4
64  ret void
65}
66
67; Should delete store of 10 even though memset is a may-store to P (P and Q may
68; alias).
69define void @test6(i32 *%p, i8 *%q) {
70; CHECK-LABEL: @test6(
71; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
72; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
73; CHECK-NEXT:    ret void
74;
75  store i32 10, i32* %p, align 4       ;; dead.
76  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
77  store i32 30, i32* %p, align 4
78  ret void
79}
80
81; Should delete store of 10 even though memset is a may-store to P (P and Q may
82; alias).
83define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
84; CHECK-LABEL: @test6_atomic(
85; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
86; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
87; CHECK-NEXT:    ret void
88;
89  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
90  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
91  store atomic i32 30, i32* %p unordered, align 4
92  ret void
93}
94
95; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
96; alias).
97define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
98; CHECK-LABEL: @test7(
99; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
100; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
101; CHECK-NEXT:    ret void
102;
103  store i32 10, i32* %p, align 4       ;; dead.
104  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
105  store i32 30, i32* %p, align 4
106  ret void
107}
108
109; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
110; alias).
111define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
112; CHECK-LABEL: @test7_atomic(
113; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
114; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
115; CHECK-NEXT:    ret void
116;
117  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
118  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
119  store atomic i32 30, i32* %p unordered, align 4
120  ret void
121}
122
123; Do not delete stores that are only partially killed.
124define i32 @test8() {
125; CHECK-LABEL: @test8(
126; CHECK-NEXT:    [[V:%.*]] = alloca i32, align 4
127; CHECK-NEXT:    store i32 1234567, i32* [[V]], align 4
128; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
129; CHECK-NEXT:    ret i32 [[X]]
130;
131  %V = alloca i32
132  store i32 1234567, i32* %V
133  %V2 = bitcast i32* %V to i8*
134  store i8 0, i8* %V2
135  %X = load i32, i32* %V
136  ret i32 %X
137
138}
139
140; Test for byval handling.
141%struct.x = type { i32, i32, i32, i32 }
142define void @test9(%struct.x* byval(%struct.x)  %a) nounwind  {
143; CHECK-LABEL: @test9(
144; CHECK-NEXT:    ret void
145;
146  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
147  store i32 1, i32* %tmp2, align 4
148  ret void
149}
150
151; Test for inalloca handling.
152define void @test9_2(%struct.x* inalloca(%struct.x) %a) nounwind {
153; CHECK-LABEL: @test9_2(
154; CHECK-NEXT:    ret void
155;
156  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
157  store i32 1, i32* %tmp2, align 4
158  ret void
159}
160
161; Test for preallocated handling.
162define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
163; CHECK-LABEL: @test9_3(
164; CHECK-NEXT:    ret void
165;
166  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
167  store i32 1, i32* %tmp2, align 4
168  ret void
169}
170
171; va_arg has fuzzy dependence, the store shouldn't be zapped.
172define double @test10(i8* %X) {
173; CHECK-LABEL: @test10(
174; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8*, align 8
175; CHECK-NEXT:    store i8* [[X:%.*]], i8** [[X_ADDR]], align 8
176; CHECK-NEXT:    [[TMP_0:%.*]] = va_arg i8** [[X_ADDR]], double
177; CHECK-NEXT:    ret double [[TMP_0]]
178;
179  %X_addr = alloca i8*
180  store i8* %X, i8** %X_addr
181  %tmp.0 = va_arg i8** %X_addr, double
182  ret double %tmp.0
183}
184
185; DSE should delete the dead trampoline.
186declare void @test11f()
187define void @test11() {
188; CHECK-LABEL: @test11(
189; CHECK-NEXT:    ret void
190;
191  %storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
192  %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
193  call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
194  ret void
195}
196
197; Specialized store intrinsics should be removed if dead.
198define void @test_matrix_store(i64 %stride) {
199; CHECK-LABEL: @test_matrix_store(
200; CHECK-NEXT:    ret void
201;
202  %a = alloca [6 x float]
203  %cast = bitcast [6 x float]* %a to float*
204  call void @llvm.matrix.column.major.store(<6 x float> zeroinitializer, float* %cast, i64 %stride, i1 false, i32 3, i32 2)
205  ret void
206}
207
208; %P doesn't escape, the DEAD instructions should be removed.
209declare void @may_unwind()
210define i32* @test_malloc_no_escape_before_return() {
211; CHECK-LABEL: @test_malloc_no_escape_before_return(
212; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i64 4)
213; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
214; CHECK-NEXT:    call void @may_unwind()
215; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
216; CHECK-NEXT:    ret i32* [[P]]
217;
218  %ptr = tail call i8* @malloc(i64 4)
219  %P = bitcast i8* %ptr to i32*
220  %DEAD = load i32, i32* %P
221  %DEAD2 = add i32 %DEAD, 1
222  store i32 %DEAD2, i32* %P
223  call void @may_unwind()
224  store i32 0, i32* %P
225  ret i32* %P
226}
227
228define i32* @test_custom_malloc_no_escape_before_return() {
229; CHECK-LABEL: @test_custom_malloc_no_escape_before_return(
230; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @custom_malloc(i32 4)
231; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
232; CHECK-NEXT:    call void @may_unwind()
233; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
234; CHECK-NEXT:    ret i32* [[P]]
235;
236  %ptr = tail call i8* @custom_malloc(i32 4)
237  %P = bitcast i8* %ptr to i32*
238  %DEAD = load i32, i32* %P
239  %DEAD2 = add i32 %DEAD, 1
240  store i32 %DEAD2, i32* %P
241  call void @may_unwind()
242  store i32 0, i32* %P
243  ret i32* %P
244}
245
246define i32 addrspace(1)* @test13_addrspacecast() {
247; CHECK-LABEL: @test13_addrspacecast(
248; CHECK-NEXT:    [[P:%.*]] = tail call i8* @malloc(i64 4)
249; CHECK-NEXT:    [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
250; CHECK-NEXT:    [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
251; CHECK-NEXT:    call void @may_unwind()
252; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[P]], align 4
253; CHECK-NEXT:    ret i32 addrspace(1)* [[P]]
254;
255  %p = tail call i8* @malloc(i64 4)
256  %p.bc = bitcast i8* %p to i32*
257  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
258  %DEAD = load i32, i32 addrspace(1)* %P
259  %DEAD2 = add i32 %DEAD, 1
260  store i32 %DEAD2, i32 addrspace(1)* %P
261  call void @may_unwind()
262  store i32 0, i32 addrspace(1)* %P
263  ret i32 addrspace(1)* %P
264}
265
266
267declare noalias i8* @malloc(i64) willreturn allockind("alloc,uninitialized")
268declare noalias i8* @custom_malloc(i32) willreturn
269declare noalias i8* @calloc(i32, i32) willreturn allockind("alloc,zeroed")
270
271define void @test14(i32* %Q) {
272; CHECK-LABEL: @test14(
273; CHECK-NEXT:    ret void
274;
275  %P = alloca i32
276  %DEAD = load i32, i32* %Q
277  store i32 %DEAD, i32* %P
278  ret void
279
280}
281
282; The store here is not dead because the byval call reads it.
283declare void @test19f({i32}* byval({i32}) align 4 %P)
284
285define void @test19({i32}* nocapture byval({i32}) align 4 %arg5) nounwind ssp {
286; CHECK-LABEL: @test19(
287; CHECK-NEXT:  bb:
288; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i32 }, { i32 }* [[ARG5:%.*]], i32 0, i32 0
289; CHECK-NEXT:    store i32 912, i32* [[TMP7]], align 4
290; CHECK-NEXT:    call void @test19f({ i32 }* byval({ i32 }) align 4 [[ARG5]])
291; CHECK-NEXT:    ret void
292;
293bb:
294  %tmp7 = getelementptr inbounds {i32}, {i32}* %arg5, i32 0, i32 0
295  store i32 912, i32* %tmp7
296  call void @test19f({i32}* byval({i32}) align 4 %arg5)
297  ret void
298
299}
300
301define void @malloc_no_escape() {
302; CHECK-LABEL: @malloc_no_escape(
303; CHECK-NEXT:    ret void
304;
305  %m = call i8* @malloc(i64 24)
306  store i8 0, i8* %m
307  ret void
308}
309
310define void @custom_malloc_no_escape() {
311; CHECK-LABEL: @custom_malloc_no_escape(
312; CHECK-NEXT:    [[M:%.*]] = call i8* @custom_malloc(i32 24)
313; CHECK-NEXT:    ret void
314;
315  %m = call i8* @custom_malloc(i32 24)
316  store i8 0, i8* %m
317  ret void
318}
319
320define void @test21() {
321; CHECK-LABEL: @test21(
322; CHECK-NEXT:    ret void
323;
324  %m = call i8* @calloc(i32 9, i32 7)
325  store i8 0, i8* %m
326  ret void
327}
328
329; Currently elimination of stores at the end of a function is limited to a
330; single underlying object, for compile-time. This case appears to not be
331; very important in practice.
332define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
333; CHECK-LABEL: @test22(
334; CHECK-NEXT:    [[K_ADDR:%.*]] = alloca i32, align 4
335; CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i32, align 4
336; CHECK-NEXT:    [[K_ADDR_M_ADDR:%.*]] = select i1 [[I:%.*]], i32* [[K_ADDR]], i32* [[M_ADDR]]
337; CHECK-NEXT:    store i32 0, i32* [[K_ADDR_M_ADDR]], align 4
338; CHECK-NEXT:    ret void
339;
340  %k.addr = alloca i32
341  %m.addr = alloca i32
342  %k.addr.m.addr = select i1 %i, i32* %k.addr, i32* %m.addr
343  store i32 0, i32* %k.addr.m.addr, align 4
344  ret void
345}
346
347; PR13547
348declare noalias i8* @strdup(i8* nocapture) nounwind
349define noalias i8* @test23() nounwind uwtable ssp {
350; CHECK-LABEL: @test23(
351; CHECK-NEXT:    [[X:%.*]] = alloca [2 x i8], align 1
352; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 0
353; CHECK-NEXT:    store i8 97, i8* [[ARRAYIDX]], align 1
354; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1
355; CHECK-NEXT:    store i8 0, i8* [[ARRAYIDX1]], align 1
356; CHECK-NEXT:    [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) #[[ATTR5:[0-9]+]]
357; CHECK-NEXT:    ret i8* [[CALL]]
358;
359  %x = alloca [2 x i8], align 1
360  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 0
361  store i8 97, i8* %arrayidx, align 1
362  %arrayidx1 = getelementptr inbounds [2 x i8], [2 x i8]* %x, i64 0, i64 1
363  store i8 0, i8* %arrayidx1, align 1
364  %call = call i8* @strdup(i8* %arrayidx) nounwind
365  ret i8* %call
366}
367
368; Make sure same sized store to later element is deleted
369define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
370; CHECK-LABEL: @test24(
371; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
372; CHECK-NEXT:    store i32 [[B:%.*]], i32* [[TMP1]], align 4
373; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
374; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[TMP2]], align 4
375; CHECK-NEXT:    ret void
376;
377  %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
378  store i32 0, i32* %1, align 4
379  %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
380  store i32 0, i32* %2, align 4
381  %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
382  store i32 %b, i32* %3, align 4
383  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
384  store i32 %c, i32* %4, align 4
385  ret void
386}
387
388; Check another case like PR13547 where strdup is not like malloc.
389define i8* @test25(i8* %p) nounwind {
390; CHECK-LABEL: @test25(
391; CHECK-NEXT:    [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4
392; CHECK-NEXT:    [[TMP:%.*]] = load i8, i8* [[P_4]], align 1
393; CHECK-NEXT:    store i8 0, i8* [[P_4]], align 1
394; CHECK-NEXT:    [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #[[ATTR10:[0-9]+]]
395; CHECK-NEXT:    store i8 [[TMP]], i8* [[P_4]], align 1
396; CHECK-NEXT:    ret i8* [[Q]]
397;
398  %p.4 = getelementptr i8, i8* %p, i64 4
399  %tmp = load i8, i8* %p.4, align 1
400  store i8 0, i8* %p.4, align 1
401  %q = call i8* @strdup(i8* %p) nounwind optsize
402  store i8 %tmp, i8* %p.4, align 1
403  ret i8* %q
404}
405
406; Don't remove redundant store because of may-aliased store.
407define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
408; CHECK-LABEL: @test28(
409; CHECK-NEXT:  entry:
410; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
411; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
412; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
413; CHECK:       bb1:
414; CHECK-NEXT:    br label [[BB3:%.*]]
415; CHECK:       bb2:
416; CHECK-NEXT:    br label [[BB3]]
417; CHECK:       bb3:
418; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
419; CHECK-NEXT:    ret i32 0
420;
421entry:
422  %v = load i32, i32* %p, align 4
423
424  ; Might overwrite value at %p
425  store i32 %i, i32* %p2, align 4
426  br i1 %c, label %bb1, label %bb2
427bb1:
428  br label %bb3
429bb2:
430  br label %bb3
431bb3:
432  store i32 %v, i32* %p, align 4
433  ret i32 0
434}
435
436; Don't remove redundant store because of may-aliased store.
437define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
438; CHECK-LABEL: @test29(
439; CHECK-NEXT:  entry:
440; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
441; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
442; CHECK:       bb1:
443; CHECK-NEXT:    br label [[BB3:%.*]]
444; CHECK:       bb2:
445; CHECK-NEXT:    store i32 [[I:%.*]], i32* [[P2:%.*]], align 4
446; CHECK-NEXT:    br label [[BB3]]
447; CHECK:       bb3:
448; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
449; CHECK-NEXT:    ret i32 0
450;
451entry:
452  %v = load i32, i32* %p, align 4
453  br i1 %c, label %bb1, label %bb2
454bb1:
455  br label %bb3
456bb2:
457  ; Might overwrite value at %p
458  store i32 %i, i32* %p2, align 4
459  br label %bb3
460bb3:
461  store i32 %v, i32* %p, align 4
462  ret i32 0
463}
464
465declare void @unknown_func()
466
467; Don't remove redundant store because of unknown call.
468define i32 @test30(i1 %c, i32* %p, i32 %i) {
469; CHECK-LABEL: @test30(
470; CHECK-NEXT:  entry:
471; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
472; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
473; CHECK:       bb1:
474; CHECK-NEXT:    br label [[BB3:%.*]]
475; CHECK:       bb2:
476; CHECK-NEXT:    call void @unknown_func()
477; CHECK-NEXT:    br label [[BB3]]
478; CHECK:       bb3:
479; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
480; CHECK-NEXT:    ret i32 0
481;
482entry:
483  %v = load i32, i32* %p, align 4
484  br i1 %c, label %bb1, label %bb2
485bb1:
486  br label %bb3
487bb2:
488  ; Might overwrite value at %p
489  call void @unknown_func()
490  br label %bb3
491bb3:
492  store i32 %v, i32* %p, align 4
493  ret i32 0
494}
495
496; Don't remove redundant store in a loop with a may-alias store.
497define i32 @test32(i1 %c, i32* %p, i32 %i) {
498; CHECK-LABEL: @test32(
499; CHECK-NEXT:  entry:
500; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
501; CHECK-NEXT:    br label [[BB1:%.*]]
502; CHECK:       bb1:
503; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
504; CHECK-NEXT:    call void @unknown_func()
505; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
506; CHECK:       bb2:
507; CHECK-NEXT:    ret i32 0
508;
509entry:
510  %v = load i32, i32* %p, align 4
511  br label %bb1
512bb1:
513  store i32 %v, i32* %p, align 4
514  ; Might read and overwrite value at %p
515  call void @unknown_func()
516  br i1 undef, label %bb1, label %bb2
517bb2:
518  ret i32 0
519}
520
521; We cannot remove any stores, because @unknown_func may unwind and the caller
522; may read %p while unwinding.
523define void @test34(i32* noalias %p) {
524; CHECK-LABEL: @test34(
525; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
526; CHECK-NEXT:    call void @unknown_func()
527; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
528; CHECK-NEXT:    ret void
529;
530  store i32 1, i32* %p
531  call void @unknown_func()
532  store i32 0, i32* %p
533  ret void
534}
535; Same as previous case, but with an sret argument.
536; TODO: The first store could be eliminated if sret is not visible on unwind.
537define void @test34_sret(i32* noalias sret(i32) %p) {
538; CHECK-LABEL: @test34_sret(
539; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
540; CHECK-NEXT:    call void @unknown_func()
541; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
542; CHECK-NEXT:    ret void
543;
544  store i32 1, i32* %p
545  call void @unknown_func()
546  store i32 0, i32* %p
547  ret void
548}
549
550; Remove redundant store even with an unwinding function in the same block
551define void @test35(i32* noalias %p) {
552; CHECK-LABEL: @test35(
553; CHECK-NEXT:    call void @unknown_func()
554; CHECK-NEXT:    store i32 0, i32* [[P:%.*]], align 4
555; CHECK-NEXT:    ret void
556;
557  call void @unknown_func()
558  store i32 1, i32* %p
559  store i32 0, i32* %p
560  ret void
561}
562
563; We cannot optimize away the first memmove since %P could overlap with %Q.
564define void @test36(i8* %P, i8* %Q) {
565; CHECK-LABEL: @test36(
566; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
567; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[Q]], i64 12, i1 false)
568; CHECK-NEXT:    ret void
569;
570
571  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
572  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
573  ret void
574}
575
576define void @test36_atomic(i8* %P, i8* %Q) {
577; CHECK-LABEL: @test36_atomic(
578; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
579; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[Q]], i64 12, i32 1)
580; CHECK-NEXT:    ret void
581;
582
583  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
584  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
585  ret void
586}
587
588define void @test37(i8* %P, i8* %Q, i8* %R) {
589; CHECK-LABEL: @test37(
590; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
591; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
592; CHECK-NEXT:    ret void
593;
594
595  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
596  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
597  ret void
598}
599
600define void @test37_atomic(i8* %P, i8* %Q, i8* %R) {
601; CHECK-LABEL: @test37_atomic(
602; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
603; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
604; CHECK-NEXT:    ret void
605;
606
607  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
608  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
609  ret void
610}
611
612; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not
613; inequal and overlapping).
614define void @test38(i8* %P, i8* %Q, i8* %R) {
615; CHECK-LABEL: @test38(
616; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
617; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
618; CHECK-NEXT:    ret void
619;
620
621  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
622  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false)
623  ret void
624}
625
626; See PR11763 - LLVM allows memcpy's source and destination to be equal (but not
627; inequal and overlapping).
628define void @test38_atomic(i8* %P, i8* %Q, i8* %R) {
629; CHECK-LABEL: @test38_atomic(
630; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
631; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
632; CHECK-NEXT:    ret void
633;
634
635  tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
636  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 12, i32 1)
637  ret void
638}
639
640define void @test39(i8* %P, i8* %Q, i8* %R) {
641; CHECK-LABEL: @test39(
642; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
643; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 8, i1 false)
644; CHECK-NEXT:    ret void
645;
646
647  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
648  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 8, i1 false)
649  ret void
650}
651
652define void @test39_atomic(i8* %P, i8* %Q, i8* %R) {
653; CHECK-LABEL: @test39_atomic(
654; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
655; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 8, i32 1)
656; CHECK-NEXT:    ret void
657;
658
659  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
660  tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %R, i64 8, i32 1)
661  ret void
662}
663
664declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
665declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)
666
667declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
668declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
669define void @test40(i32** noalias %Pp, i32* noalias %Q)  {
670; CHECK-LABEL: @test40(
671; CHECK-NEXT:  entry:
672; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
673; CHECK-NEXT:    [[AC:%.*]] = bitcast i32* [[A]] to i8*
674; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[AC]])
675; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32** [[PP:%.*]] to i8**
676; CHECK-NEXT:    [[PC:%.*]] = load i8*, i8** [[TMP0]], align 8
677; CHECK-NEXT:    [[QC:%.*]] = bitcast i32* [[Q:%.*]] to i8*
678; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 [[AC]], i8* align 4 [[QC]], i64 4, i1 false)
679; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[PC]], i8* nonnull align 4 [[AC]], i64 4, i1 true)
680; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[AC]])
681; CHECK-NEXT:    ret void
682;
683entry:
684  %A = alloca i32, align 4
685  %Ac = bitcast i32* %A to i8*
686  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %Ac)
687  %0 = bitcast i32** %Pp to i8**
688  %Pc = load i8*, i8** %0, align 8
689  %Qc = bitcast i32* %Q to i8*
690  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %Ac, i8* align 4 %Qc, i64 4, i1 false)
691  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %Pc, i8* nonnull align 4 %Ac, i64 4, i1 true)
692  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %Ac)
693  ret void
694}
695
696declare void @free(i8* nocapture) allockind("free")
697
698; We cannot remove `store i32 1, i32* %p`, because @unknown_func may unwind
699; and the caller may read %p while unwinding.
700define void @test41(i32* noalias %P) {
701; CHECK-LABEL: @test41(
702; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
703; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
704; CHECK-NEXT:    call void @unknown_func()
705; CHECK-NEXT:    call void @free(i8* [[P2]])
706; CHECK-NEXT:    ret void
707;
708  %P2 = bitcast i32* %P to i8*
709  store i32 1, i32* %P
710  call void @unknown_func()
711  store i32 2, i32* %P
712  call void @free(i8* %P2)
713  ret void
714}
715
716define void @test42(i32* %P, i32* %Q) {
717; CHECK-LABEL: @test42(
718; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
719; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
720; CHECK-NEXT:    store i32 2, i32* [[Q:%.*]], align 4
721; CHECK-NEXT:    store i8 3, i8* [[P2]], align 1
722; CHECK-NEXT:    ret void
723;
724  store i32 1, i32* %P
725  %P2 = bitcast i32* %P to i8*
726  store i32 2, i32* %Q
727  store i8 3, i8* %P2
728  ret void
729}
730
731define void @test42a(i32* %P, i32* %Q) {
732; CHECK-LABEL: @test42a(
733; CHECK-NEXT:    store atomic i32 1, i32* [[P:%.*]] unordered, align 4
734; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P]] to i8*
735; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
736; CHECK-NEXT:    store atomic i8 3, i8* [[P2]] unordered, align 4
737; CHECK-NEXT:    ret void
738;
739  store atomic i32 1, i32* %P unordered, align 4
740  %P2 = bitcast i32* %P to i8*
741  store atomic i32 2, i32* %Q unordered, align 4
742  store atomic i8 3, i8* %P2 unordered, align 4
743  ret void
744}
745
746define void @test43a(i32* %P, i32* noalias %Q) {
747; CHECK-LABEL: @test43a(
748; CHECK-NEXT:  entry:
749; CHECK-NEXT:    store atomic i32 50331649, i32* [[P:%.*]] unordered, align 4
750; CHECK-NEXT:    store atomic i32 2, i32* [[Q:%.*]] unordered, align 4
751; CHECK-NEXT:    ret void
752;
753entry:
754  store atomic i32 1, i32* %P unordered, align 4
755  %P2 = bitcast i32* %P to i8*
756  store atomic i32 2, i32* %Q unordered, align 4
757  store atomic i8 3, i8* %P2 unordered, align 4
758  ret void
759}
760
761; Some tests where volatile may block removing a store.
762
763; Here we can remove the first non-volatile store. We cannot remove the
764; volatile store.
765define void @test44_volatile(i32* %P) {
766; CHECK-LABEL: @test44_volatile(
767; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
768; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
769; CHECK-NEXT:    ret void
770;
771  store i32 1, i32* %P, align 4
772  store volatile i32 2, i32* %P, align 4
773  store i32 3, i32* %P, align 4
774  ret void
775}
776
777define void @test45_volatile(i32* %P) {
778; CHECK-LABEL: @test45_volatile(
779; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
780; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
781; CHECK-NEXT:    ret void
782;
783  store i32 1, i32* %P, align 4
784  store volatile i32 2, i32* %P, align 4
785  store volatile i32 3, i32* %P, align 4
786  ret void
787}
788
789define void @test46_volatile(i32* %P) {
790; CHECK-LABEL: @test46_volatile(
791; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
792; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
793; CHECK-NEXT:    ret void
794;
795  store volatile i32 2, i32* %P, align 4
796  store i32 1, i32* %P, align 4
797  store volatile i32 3, i32* %P, align 4
798  ret void
799}
800
801define void @test47_volatile(i32* %P) {
802; CHECK-LABEL: @test47_volatile(
803; CHECK-NEXT:    store volatile i32 2, i32* [[P:%.*]], align 4
804; CHECK-NEXT:    store volatile i32 3, i32* [[P]], align 4
805; CHECK-NEXT:    ret void
806;
807  store volatile i32 2, i32* %P, align 4
808  store volatile i32 3, i32* %P, align 4
809  ret void
810}
811
812define i32 @test48(i32* %P, i32* noalias %Q, i32* %R) {
813; CHECK-LABEL: @test48(
814; CHECK-NEXT:    store i32 2, i32* [[P:%.*]], align 4
815; CHECK-NEXT:    store i32 3, i32* [[Q:%.*]], align 4
816; CHECK-NEXT:    [[L:%.*]] = load i32, i32* [[R:%.*]], align 4
817; CHECK-NEXT:    ret i32 [[L]]
818;
819  store i32 1, i32* %Q
820  store i32 2, i32* %P
821  store i32 3, i32* %Q
822  %l = load i32, i32* %R
823  ret i32 %l
824}
825