1; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
2; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
3; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=VEC
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7; Test predication of stores.
8define i32 @test(i32* nocapture %f) #0 {
9entry:
10  br label %for.body
11
12; VEC-LABEL: test
13; VEC:   %[[v0:.+]] = add i64 %index, 0
14; VEC:   %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
15; VEC:   %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
16; VEC:   %[[o1:.+]] = or <2 x i1> zeroinitializer, %[[v10]]
17; VEC:   %[[v11:.+]] = extractelement <2 x i1> %[[o1]], i32 0
18; VEC:   %[[v12:.+]] = icmp eq i1 %[[v11]], true
19; VEC:   br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
20;
21; VEC: [[cond]]:
22; VEC:   %[[v13:.+]] = extractelement <2 x i32> %wide.load, i32 0
23; VEC:   %[[v9a:.+]] = add nsw i32 %[[v13]], 20
24; VEC:   %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]]
25; VEC:   store i32 %[[v9a]], i32* %[[v2]], align 4
26; VEC:   br label %[[else:.+]]
27;
28; VEC: [[else]]:
29; VEC:   %[[v15:.+]] = extractelement <2 x i1> %[[o1]], i32 1
30; VEC:   %[[v16:.+]] = icmp eq i1 %[[v15]], true
31; VEC:   br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
32;
33; VEC: [[cond2]]:
34; VEC:   %[[v17:.+]] = extractelement <2 x i32> %wide.load, i32 1
35; VEC:   %[[v9b:.+]] = add nsw i32 %[[v17]], 20
36; VEC:   %[[v1:.+]] = add i64 %index, 1
37; VEC:   %[[v4:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v1]]
38; VEC:   store i32 %[[v9b]], i32* %[[v4]], align 4
39; VEC:   br label %[[else2:.+]]
40;
41; VEC: [[else2]]:
42
43; UNROLL-LABEL: test
44; UNROLL: vector.body:
45; UNROLL:   %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
46; UNROLL:   %[[IND1:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 1
47; UNROLL:   %[[v0:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND]]
48; UNROLL:   %[[v1:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND1]]
49; UNROLL:   %[[v2:[a-zA-Z0-9]+]] = load i32, i32* %[[v0]], align 4
50; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32, i32* %[[v1]], align 4
51; UNROLL:   %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100
52; UNROLL:   %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
53; UNROLL:   %[[o1:[a-zA-Z0-9]+]] = or i1 false, %[[v4]]
54; UNROLL:   %[[o2:[a-zA-Z0-9]+]] = or i1 false, %[[v5]]
55; UNROLL:   %[[v8:[a-zA-Z0-9]+]] = icmp eq i1 %[[o1]], true
56; UNROLL:   br i1 %[[v8]], label %[[cond:[a-zA-Z0-9.]+]], label %[[else:[a-zA-Z0-9.]+]]
57;
58; UNROLL: [[cond]]:
59; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
60; UNROLL:   store i32 %[[v6]], i32* %[[v0]], align 4
61; UNROLL:   br label %[[else]]
62;
63; UNROLL: [[else]]:
64; UNROLL:   %[[v9:[a-zA-Z0-9]+]] = icmp eq i1 %[[o2]], true
65; UNROLL:   br i1 %[[v9]], label %[[cond2:[a-zA-Z0-9.]+]], label %[[else2:[a-zA-Z0-9.]+]]
66;
67; UNROLL: [[cond2]]:
68; UNROLL:   %[[v7:[a-zA-Z0-9]+]] = add nsw i32 %[[v3]], 20
69; UNROLL:   store i32 %[[v7]], i32* %[[v1]], align 4
70; UNROLL:   br label %[[else2]]
71;
72; UNROLL: [[else2]]:
73
74for.body:
75  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
76  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
77  %0 = load i32, i32* %arrayidx, align 4
78  %cmp1 = icmp sgt i32 %0, 100
79  br i1 %cmp1, label %if.then, label %for.inc
80
81if.then:
82  %add = add nsw i32 %0, 20
83  store i32 %add, i32* %arrayidx, align 4
84  br label %for.inc
85
86for.inc:
87  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
88  %exitcond = icmp eq i64 %indvars.iv.next, 128
89  br i1 %exitcond, label %for.end, label %for.body
90
91for.end:
92  ret i32 0
93}
94
95; Track basic blocks when unrolling conditional blocks. This code used to assert
96; because we did not update the phi nodes with the proper predecessor in the
97; vectorized loop body.
98; PR18724
99
100; UNROLL-NOSIMPLIFY-LABEL: bug18724
101; UNROLL-NOSIMPLIFY: store i32
102; UNROLL-NOSIMPLIFY: store i32
103
104define void @bug18724() {
105entry:
106  br label %for.body9
107
108for.body9:
109  br i1 undef, label %for.inc26, label %for.body14
110
111for.body14:
112  %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ]
113  %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ undef, %for.body9 ]
114  %arrayidx16 = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 %indvars.iv3
115  %tmp = load i32, i32* %arrayidx16, align 4
116  br i1 undef, label %if.then18, label %for.inc23
117
118if.then18:
119  store i32 2, i32* %arrayidx16, align 4
120  %inc21 = add nsw i32 %iNewChunks.120, 1
121  br label %for.inc23
122
123for.inc23:
124  %iNewChunks.2 = phi i32 [ %inc21, %if.then18 ], [ %iNewChunks.120, %for.body14 ]
125  %indvars.iv.next4 = add nsw i64 %indvars.iv3, 1
126  %tmp1 = trunc i64 %indvars.iv3 to i32
127  %cmp13 = icmp slt i32 %tmp1, 0
128  br i1 %cmp13, label %for.body14, label %for.inc26
129
130for.inc26:
131  %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ]
132  unreachable
133}
134
135; VEC-LABEL: @minimal_bit_widths(
136;
137; In the test below, it's more profitable for the expression feeding the
138; conditional store to remain scalar. Since we can only type-shrink vector
139; types, we shouldn't try to represent the expression in a smaller type.
140;
141; VEC: vector.body:
142; VEC:   %wide.load = load <2 x i8>, <2 x i8>* {{.*}}, align 1
143; VEC:   br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
144; VEC: [[IF0]]:
145; VEC:   %[[E0:.+]] = extractelement <2 x i8> %wide.load, i32 0
146; VEC:   %[[Z0:.+]] = zext i8 %[[E0]] to i32
147; VEC:   %[[T0:.+]] = trunc i32 %[[Z0]] to i8
148; VEC:   store i8 %[[T0]], i8* {{.*}}, align 1
149; VEC:   br label %[[CONT0]]
150; VEC: [[CONT0]]:
151; VEC:   br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
152; VEC: [[IF1]]:
153; VEC:   %[[E1:.+]] = extractelement <2 x i8> %wide.load, i32 1
154; VEC:   %[[Z1:.+]] = zext i8 %[[E1]] to i32
155; VEC:   %[[T1:.+]] = trunc i32 %[[Z1]] to i8
156; VEC:   store i8 %[[T1]], i8* {{.*}}, align 1
157; VEC:   br label %[[CONT1]]
158; VEC: [[CONT1]]:
159; VEC:   br i1 {{.*}}, label %middle.block, label %vector.body
160;
161define void @minimal_bit_widths(i1 %c) {
162entry:
163  br label %for.body
164
165for.body:
166  %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
167  %tmp1 = phi i64 [ %tmp7, %for.inc ], [ undef, %entry ]
168  %tmp2 = getelementptr i8, i8* undef, i64 %tmp0
169  %tmp3 = load i8, i8* %tmp2, align 1
170  br i1 %c, label %if.then, label %for.inc
171
172if.then:
173  %tmp4 = zext i8 %tmp3 to i32
174  %tmp5 = trunc i32 %tmp4 to i8
175  store i8 %tmp5, i8* %tmp2, align 1
176  br label %for.inc
177
178for.inc:
179  %tmp6 = add nuw nsw i64 %tmp0, 1
180  %tmp7 = add i64 %tmp1, -1
181  %tmp8 = icmp eq i64 %tmp7, 0
182  br i1 %tmp8, label %for.end, label %for.body
183
184for.end:
185  ret void
186}
187