1; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s 2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL 3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC 4 5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 6 7; CHECK-LABEL: @recurrence_1 8; 9; void recurrence_1(int *a, int *b, int n) { 10; for(int i = 0; i < n; i++) 11; b[i] = a[i] + a[i - 1] 12; } 13; 14; CHECK: vector.ph: 15; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %pre_load, i32 3 16; 17; CHECK: vector.body: 18; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ] 19; CHECK: [[L1]] = load <4 x i32> 20; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 21; 22; CHECK: middle.block: 23; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3 24; 25; CHECK: scalar.ph: 26; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ] 27; 28; CHECK: scalar.body: 29; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ] 30; 31; UNROLL: vector.body: 32; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ] 33; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32> 34; UNROLL: [[L2]] = load <4 x i32> 35; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 36; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 37; 38; UNROLL: middle.block: 39; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3 40; 41define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) { 42entry: 43 br label %for.preheader 44 45for.preheader: 46 %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0 47 %pre_load = load i32, i32* %arrayidx.phi.trans.insert 48 br label %scalar.body 49 50scalar.body: 51 %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ] 52 %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ] 53 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 54 %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next 55 %1 = load i32, i32* %arrayidx32 56 %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 57 %add35 = add i32 %1, %0 58 store i32 %add35, i32* %arrayidx34 59 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 60 %exitcond = icmp eq i32 %lftr.wideiv, %n 61 br i1 %exitcond, label %for.exit, label %scalar.body 62 63for.exit: 64 ret void 65} 66 67; CHECK-LABEL: @recurrence_2 68; 69; int recurrence_2(int *a, int n) { 70; int minmax; 71; for (int i = 0; i < n; ++i) 72; minmax = min(minmax, max(a[i] - a[i-1], 0)); 73; return minmax; 74; } 75; 76; CHECK: vector.ph: 77; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3 78; 79; CHECK: vector.body: 80; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ] 81; CHECK: [[L1]] = load <4 x i32> 82; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 83; 84; CHECK: middle.block: 85; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3 86; 87; CHECK: scalar.ph: 88; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ] 89; 90; CHECK: scalar.body: 91; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ] 92; 93; UNROLL: vector.body: 94; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ] 95; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32> 96; UNROLL: [[L2]] = load <4 x i32> 97; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 98; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 99; 100; UNROLL: middle.block: 101; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3 102; 103define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { 104entry: 105 %cmp27 = icmp sgt i32 %n, 0 106 br i1 %cmp27, label %for.preheader, label %for.cond.cleanup 107 108for.preheader: 109 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1 110 %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4 111 br label %scalar.body 112 113for.cond.cleanup.loopexit: 114 %minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ] 115 br label %for.cond.cleanup 116 117for.cond.cleanup: 118 %minmax.0.lcssa = phi i32 [ undef, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ] 119 ret i32 %minmax.0.lcssa 120 121scalar.body: 122 %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ] 123 %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ] 124 %minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ] 125 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 126 %1 = load i32, i32* %arrayidx, align 4 127 %sub3 = sub nsw i32 %1, %0 128 %cmp4 = icmp sgt i32 %sub3, 0 129 %cond = select i1 %cmp4, i32 %sub3, i32 0 130 %cmp5 = icmp slt i32 %minmax.028, %cond 131 %minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond 132 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 133 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 134 %exitcond = icmp eq i32 %lftr.wideiv, %n 135 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body 136} 137 138; CHECK-LABEL: @recurrence_3 139; 140; void recurrence_3(short *a, double *b, int n, float f, short p) { 141; b[0] = (double)a[0] - f * (double)p; 142; for (int i = 1; i < n; i++) 143; b[i] = (double)a[i] - f * (double)a[i - 1]; 144; } 145; 146; 147; CHECK: vector.ph: 148; CHECK: %vector.recur.init = insertelement <4 x i16> undef, i16 %0, i32 3 149; 150; CHECK: vector.body: 151; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ] 152; CHECK: [[L1]] = load <4 x i16> 153; CHECK: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 154; 155; CHECK: middle.block: 156; CHECK: %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3 157; 158; CHECK: scalar.ph: 159; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ] 160; 161; CHECK: scalar.body: 162; CHECK: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ] 163; 164; UNROLL: vector.body: 165; UNROLL: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ] 166; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i16> 167; UNROLL: [[L2]] = load <4 x i16> 168; UNROLL: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 169; UNROLL: {{.*}} = shufflevector <4 x i16> [[L1]], <4 x i16> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 170; 171; UNROLL: middle.block: 172; UNROLL: %vector.recur.extract = extractelement <4 x i16> [[L2]], i32 3 173; 174define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) { 175entry: 176 %0 = load i16, i16* %a, align 2 177 %conv = sitofp i16 %0 to double 178 %conv1 = fpext float %f to double 179 %conv2 = sitofp i16 %p to double 180 %mul = fmul fast double %conv2, %conv1 181 %sub = fsub fast double %conv, %mul 182 store double %sub, double* %b, align 8 183 %cmp25 = icmp sgt i32 %n, 1 184 br i1 %cmp25, label %for.preheader, label %for.end 185 186for.preheader: 187 br label %scalar.body 188 189scalar.body: 190 %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ] 191 %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ] 192 %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv 193 %2 = load i16, i16* %arrayidx5, align 2 194 %conv6 = sitofp i16 %2 to double 195 %conv11 = sitofp i16 %1 to double 196 %mul12 = fmul fast double %conv11, %conv1 197 %sub13 = fsub fast double %conv6, %mul12 198 %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv 199 store double %sub13, double* %arrayidx15, align 8 200 %advars.iv.next = add nuw nsw i64 %advars.iv, 1 201 %lftr.wideiv = trunc i64 %advars.iv.next to i32 202 %exitcond = icmp eq i32 %lftr.wideiv, %n 203 br i1 %exitcond, label %for.end.loopexit, label %scalar.body 204 205for.end.loopexit: 206 br label %for.end 207 208for.end: 209 ret void 210} 211 212; CHECK-LABEL: @PR26734 213; 214; void PR26734(short *a, int *b, int *c, int d, short *e) { 215; for (; d != 21; d++) { 216; *b &= *c; 217; *e = *a - 6; 218; *c = *e; 219; } 220; } 221; 222; CHECK-NOT: vector.ph: 223; 224define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) { 225entry: 226 %cmp4 = icmp eq i32 %d, 21 227 br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph 228 229entry.for.end_crit_edge: 230 %.pre = load i32, i32* %b, align 4 231 br label %for.end 232 233for.body.lr.ph: 234 %0 = load i16, i16* %a, align 2 235 %sub = add i16 %0, -6 236 %conv2 = sext i16 %sub to i32 237 %c.promoted = load i32, i32* %c, align 4 238 %b.promoted = load i32, i32* %b, align 4 239 br label %for.body 240 241for.body: 242 %inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ] 243 %and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ] 244 %conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ] 245 %and = and i32 %and6, %conv25 246 %inc = add nsw i32 %inc7, 1 247 %cmp = icmp eq i32 %inc, 21 248 br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body 249 250for.cond.for.end_crit_edge: 251 %and.lcssa = phi i32 [ %and, %for.body ] 252 store i32 %conv2, i32* %c, align 4 253 store i32 %and.lcssa, i32* %b, align 4 254 store i16 %sub, i16* %e, align 2 255 br label %for.end 256 257for.end: 258 ret void 259} 260 261; CHECK-LABEL: @PR27246 262; 263; int PR27246() { 264; unsigned int e, n; 265; for (int i = 1; i < 49; ++i) { 266; for (int k = i; k > 1; --k) 267; e = k; 268; n = e; 269; } 270; return n; 271; } 272; 273; CHECK-NOT: vector.ph: 274; 275define i32 @PR27246() { 276entry: 277 br label %for.cond1.preheader 278 279for.cond1.preheader: 280 %i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ] 281 %e.015 = phi i32 [ undef, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ] 282 br label %for.cond1 283 284for.cond.cleanup: 285 %e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ] 286 ret i32 %e.1.lcssa.lcssa 287 288for.cond1: 289 %e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ] 290 %k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ] 291 %cmp2 = icmp sgt i32 %k.0, 1 292 %dec = add nsw i32 %k.0, -1 293 br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3 294 295for.cond.cleanup3: 296 %e.1.lcssa = phi i32 [ %e.1, %for.cond1 ] 297 %inc = add nuw nsw i32 %i.016, 1 298 %exitcond = icmp eq i32 %inc, 49 299 br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader 300} 301 302; CHECK-LABEL: @PR29559 303; 304; UNROLL-NO-IC: vector.ph: 305; UNROLL-NO-IC: br label %vector.body 306; 307; UNROLL-NO-IC: vector.body: 308; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 309; UNROLL-NO-IC: %vector.recur = phi <4 x float*> [ undef, %vector.ph ], [ %[[I4:.+]], %vector.body ] 310; UNROLL-NO-IC: %[[G1:.+]] = getelementptr inbounds [3 x float], [3 x float]* undef, i64 0, i64 0 311; UNROLL-NO-IC: %[[I1:.+]] = insertelement <4 x float*> undef, float* %[[G1]], i32 0 312; UNROLL-NO-IC: %[[I2:.+]] = insertelement <4 x float*> %[[I1]], float* %[[G1]], i32 1 313; UNROLL-NO-IC: %[[I3:.+]] = insertelement <4 x float*> %[[I2]], float* %[[G1]], i32 2 314; UNROLL-NO-IC: %[[I4]] = insertelement <4 x float*> %[[I3]], float* %[[G1]], i32 3 315; UNROLL-NO-IC: {{.*}} = shufflevector <4 x float*> %vector.recur, <4 x float*> %[[I4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 316; UNROLL-NO-IC: {{.*}} = shufflevector <4 x float*> %[[I4]], <4 x float*> %[[I4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 317; 318; UNROLL-NO-IC: middle.block: 319; UNROLL-NO-IC: %vector.recur.extract = extractelement <4 x float*> %[[I4]], i32 3 320; 321; UNROLL-NO-IC: scalar.ph: 322; UNROLL-NO-IC: %scalar.recur.init = phi float* [ %vector.recur.extract, %middle.block ], [ undef, %min.iters.checked ], [ undef, %entry ] 323; 324; UNROLL-NO-IC: scalar.body: 325; UNROLL-NO-IC: %scalar.recur = phi float* [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ] 326; 327define void @PR29559() { 328entry: 329 br label %scalar.body 330 331scalar.body: 332 %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ] 333 %tmp2 = phi float* [ undef, %entry ], [ %tmp3, %scalar.body ] 334 %tmp3 = getelementptr inbounds [3 x float], [3 x float]* undef, i64 0, i64 0 335 %i.next = add nuw nsw i64 %i, 1 336 %cond = icmp eq i64 %i.next, undef 337 br i1 %cond, label %for.end, label %scalar.body 338 339for.end: 340 ret void 341} 342