1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
2; RUN: opt < %s -basic-aa -globals-aa -memcpyopt -S -verify-memoryssa | FileCheck %s
3
4target datalayout = "e"
5
6declare void @foo(i8*)
7declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
8declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
9declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
10
11; Check that the transformation isn't applied if the called function can
12; capture the pointer argument (i.e. the nocapture attribute isn't present)
13define void @test() {
14; CHECK-LABEL: define {{[^@]+}}@test() {
15; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
16; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
17; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
18; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
19; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
20; CHECK-NEXT:    ret void
21;
22  %ptr1 = alloca i8
23  %ptr2 = alloca i8
24  call void @foo(i8* %ptr2)
25  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
26  call void @foo(i8* %ptr1)
27  ret void
28}
29
30; Same as previous test, but with a bitcasted argument.
31define void @test_bitcast() {
32; CHECK-LABEL: define {{[^@]+}}@test_bitcast() {
33; CHECK-NEXT:    [[PTR1:%.*]] = alloca [2 x i8], align 1
34; CHECK-NEXT:    [[PTR2:%.*]] = alloca [2 x i8], align 1
35; CHECK-NEXT:    [[PTR1_CAST:%.*]] = bitcast [2 x i8]* [[PTR1]] to i8*
36; CHECK-NEXT:    [[PTR2_CAST:%.*]] = bitcast [2 x i8]* [[PTR2]] to i8*
37; CHECK-NEXT:    call void @foo(i8* [[PTR2_CAST]])
38; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1_CAST]], i8* [[PTR2_CAST]], i32 2, i1 false)
39; CHECK-NEXT:    call void @foo(i8* [[PTR1_CAST]])
40; CHECK-NEXT:    ret void
41;
42  %ptr1 = alloca [2 x i8]
43  %ptr2 = alloca [2 x i8]
44  %ptr1.cast = bitcast [2 x i8]* %ptr1 to i8*
45  %ptr2.cast = bitcast [2 x i8]* %ptr2 to i8*
46  call void @foo(i8* %ptr2.cast)
47  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1.cast, i8* %ptr2.cast, i32 2, i1 false)
48  call void @foo(i8* %ptr1.cast)
49  ret void
50}
51
52; Lifetime of %ptr2 ends before the potential use of the capture in the second
53; call.
54define void @test_lifetime_end() {
55; CHECK-LABEL: define {{[^@]+}}@test_lifetime_end() {
56; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
57; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
58; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
59; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
60; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 1, i8* [[PTR2]])
61; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
62; CHECK-NEXT:    ret void
63;
64  %ptr1 = alloca i8
65  %ptr2 = alloca i8
66  call void @llvm.lifetime.start.p0i8(i64 1, i8* %ptr2)
67  call void @foo(i8* %ptr2)
68  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
69  call void @llvm.lifetime.end.p0i8(i64 1, i8* %ptr2)
70  call void @foo(i8* %ptr1)
71  ret void
72}
73
74; Lifetime of %ptr2 does not end, because of size mismatch.
75define void @test_lifetime_not_end() {
76; CHECK-LABEL: define {{[^@]+}}@test_lifetime_not_end() {
77; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
78; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
79; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 1, i8* [[PTR2]])
80; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
81; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
82; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 0, i8* [[PTR2]])
83; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
84; CHECK-NEXT:    ret void
85;
86  %ptr1 = alloca i8
87  %ptr2 = alloca i8
88  call void @llvm.lifetime.start.p0i8(i64 1, i8* %ptr2)
89  call void @foo(i8* %ptr2)
90  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
91  call void @llvm.lifetime.end.p0i8(i64 0, i8* %ptr2)
92  call void @foo(i8* %ptr1)
93  ret void
94}
95
96; Lifetime of %ptr2 ends before any potential use of the capture because we
97; return from the function.
98define void @test_function_end() {
99; CHECK-LABEL: define {{[^@]+}}@test_function_end() {
100; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
101; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
102; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
103; CHECK-NEXT:    ret void
104;
105  %ptr1 = alloca i8
106  %ptr2 = alloca i8
107  call void @foo(i8* %ptr2)
108  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
109  ret void
110}
111
112; A potential use of the capture occurs in a later block, can't be optimized.
113define void @test_terminator() {
114; CHECK-LABEL: define {{[^@]+}}@test_terminator() {
115; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
116; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
117; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
118; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
119; CHECK-NEXT:    br label [[NEXT:%.*]]
120; CHECK:       next:
121; CHECK-NEXT:    call void @foo(i8* [[PTR1]])
122; CHECK-NEXT:    ret void
123;
124  %ptr1 = alloca i8
125  %ptr2 = alloca i8
126  call void @foo(i8* %ptr2)
127  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
128  br label %next
129
130next:
131  call void @foo(i8* %ptr1)
132  ret void
133}
134
135; This case can be optimized, but would require a scan across multiple blocks
136; and is currently not performed.
137define void @test_terminator2() {
138; CHECK-LABEL: define {{[^@]+}}@test_terminator2() {
139; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
140; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
141; CHECK-NEXT:    call void @foo(i8* [[PTR2]])
142; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
143; CHECK-NEXT:    br label [[NEXT:%.*]]
144; CHECK:       next:
145; CHECK-NEXT:    ret void
146;
147  %ptr1 = alloca i8
148  %ptr2 = alloca i8
149  call void @foo(i8* %ptr2)
150  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
151  br label %next
152
153next:
154  ret void
155}
156
157declare void @capture(i8*)
158
159; This case should not be optimized, because dest is captured before the call.
160define void @test_dest_captured_before_alloca() {
161; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_alloca() {
162; CHECK-NEXT:    [[PTR1:%.*]] = alloca i8, align 1
163; CHECK-NEXT:    [[PTR2:%.*]] = alloca i8, align 1
164; CHECK-NEXT:    call void @capture(i8* [[PTR1]])
165; CHECK-NEXT:    call void @foo(i8* [[PTR2]]) #[[ATTR2:[0-9]+]]
166; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i1 false)
167; CHECK-NEXT:    ret void
168;
169  %ptr1 = alloca i8
170  %ptr2 = alloca i8
171  call void @capture(i8* %ptr1)
172  call void @foo(i8* %ptr2) argmemonly
173  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr1, i8* %ptr2, i32 1, i1 false)
174  ret void
175}
176
177
178@g = internal global i8 0
179
180; This case should not be optimized, because @g is captured before the call
181; (being a global) and @icmp_g might depend on its identity.
182define void @test_dest_captured_before_global() {
183; CHECK-LABEL: define {{[^@]+}}@test_dest_captured_before_global() {
184; CHECK-NEXT:    [[PTR:%.*]] = alloca i8, align 1
185; CHECK-NEXT:    call void @icmp_g(i8* [[PTR]])
186; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* @g, i8* [[PTR]], i32 1, i1 false)
187; CHECK-NEXT:    ret void
188;
189  %ptr = alloca i8
190  call void @icmp_g(i8* %ptr)
191  call void @llvm.memcpy.p0i8.p0i8.i32(i8* @g, i8* %ptr, i32 1, i1 false)
192  ret void
193}
194
195define void @icmp_g(i8* %p) {
196; CHECK-LABEL: define {{[^@]+}}@icmp_g
197; CHECK-SAME: (i8* [[P:%.*]]) {
198; CHECK-NEXT:    [[C:%.*]] = icmp eq i8* [[P]], @g
199; CHECK-NEXT:    br i1 [[C]], label [[IF:%.*]], label [[ELSE:%.*]]
200; CHECK:       if:
201; CHECK-NEXT:    store i8 1, i8* [[P]], align 1
202; CHECK-NEXT:    ret void
203; CHECK:       else:
204; CHECK-NEXT:    store i8 2, i8* [[P]], align 1
205; CHECK-NEXT:    ret void
206;
207  %c = icmp eq i8* %p, @g
208  br i1 %c, label %if, label %else
209
210if:
211  store i8 1, i8* %p
212  ret void
213
214else:
215  store i8 2, i8* %p
216  ret void
217}
218