1; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND 3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL 4; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC 5; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE 6 7target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 8 9; Make sure that we can handle multiple integer induction variables. 10; CHECK-LABEL: @multi_int_induction( 11; CHECK: vector.body: 12; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 13; CHECK: %[[VAR:.*]] = trunc i64 %index to i32 14; CHECK: %offset.idx = add i32 190, %[[VAR]] 15define void @multi_int_induction(i32* %A, i32 %N) { 16for.body.lr.ph: 17 br label %for.body 18 19for.body: 20 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 21 %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ] 22 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 23 store i32 %count.09, i32* %arrayidx2, align 4 24 %inc = add nsw i32 %count.09, 1 25 %indvars.iv.next = add i64 %indvars.iv, 1 26 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 27 %exitcond = icmp ne i32 %lftr.wideiv, %N 28 br i1 %exitcond, label %for.body, label %for.end 29 30for.end: 31 ret void 32} 33 34; Make sure we remove unneeded vectorization of induction variables. 35; In order for instcombine to cleanup the vectorized induction variables that we 36; create in the loop vectorizer we need to perform some form of redundancy 37; elimination to get rid of multiple uses. 38 39; IND-LABEL: scalar_use 40 41; IND: br label %vector.body 42; IND: vector.body: 43; Vectorized induction variable. 44; IND-NOT: insertelement <2 x i64> 45; IND-NOT: shufflevector <2 x i64> 46; IND: br {{.*}}, label %vector.body 47 48define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) { 49entry: 50 br label %for.body 51 52for.body: 53 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 54 %ind.sum = add i64 %iv, %offset 55 %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum 56 %l1 = load float, float* %arr.idx, align 4 57 %ind.sum2 = add i64 %iv, %offset2 58 %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 59 %l2 = load float, float* %arr.idx2, align 4 60 %m = fmul fast float %b, %l2 61 %ad = fadd fast float %l1, %m 62 store float %ad, float* %arr.idx, align 4 63 %iv.next = add nuw nsw i64 %iv, 1 64 %exitcond = icmp eq i64 %iv.next, %n 65 br i1 %exitcond, label %loopexit, label %for.body 66 67loopexit: 68 ret void 69} 70 71; Make sure we don't create a vector induction phi node that is unused. 72; Scalarize the step vectors instead. 73; 74; for (int i = 0; i < n; ++i) 75; sum += a[i]; 76; 77; CHECK-LABEL: @scalarize_induction_variable_01( 78; CHECK: vector.body: 79; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 80; CHECK: %[[i0:.+]] = add i64 %index, 0 81; CHECK: %[[i1:.+]] = add i64 %index, 1 82; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]] 83; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i1]] 84; 85; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01( 86; UNROLL-NO-IC: vector.body: 87; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 88; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0 89; UNROLL-NO-IC: %[[i1:.+]] = add i64 %index, 1 90; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2 91; UNROLL-NO-IC: %[[i3:.+]] = add i64 %index, 3 92; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]] 93; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i1]] 94; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]] 95; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i3]] 96; 97; IND-LABEL: @scalarize_induction_variable_01( 98; IND: vector.body: 99; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 100; IND-NOT: add i64 {{.*}}, 2 101; IND: getelementptr inbounds i64, i64* %a, i64 %index 102; 103; UNROLL-LABEL: @scalarize_induction_variable_01( 104; UNROLL: vector.body: 105; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 106; UNROLL-NOT: add i64 {{.*}}, 4 107; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index 108; UNROLL: getelementptr i64, i64* %[[g1]], i64 2 109 110define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) { 111entry: 112 br label %for.body 113 114for.body: 115 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 116 %sum = phi i64 [ %2, %for.body ], [ 0, %entry ] 117 %0 = getelementptr inbounds i64, i64* %a, i64 %i 118 %1 = load i64, i64* %0, align 8 119 %2 = add i64 %1, %sum 120 %i.next = add nuw nsw i64 %i, 1 121 %cond = icmp slt i64 %i.next, %n 122 br i1 %cond, label %for.body, label %for.end 123 124for.end: 125 %3 = phi i64 [ %2, %for.body ] 126 ret i64 %3 127} 128 129; Make sure we scalarize the step vectors used for the pointer arithmetic. We 130; can't easily simplify vectorized step vectors. 131; 132; float s = 0; 133; for (int i ; 0; i < n; i += 8) 134; s += (a[i] + b[i] + 1.0f); 135; 136; CHECK-LABEL: @scalarize_induction_variable_02( 137; CHECK: vector.body: 138; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 139; CHECK: %offset.idx = shl i64 %index, 3 140; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0 141; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8 142; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]] 143; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]] 144; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]] 145; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]] 146; 147; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02( 148; UNROLL-NO-IC: vector.body: 149; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 150; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3 151; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0 152; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8 153; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16 154; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24 155; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]] 156; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]] 157; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]] 158; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]] 159; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]] 160; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]] 161; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]] 162; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]] 163; 164; IND-LABEL: @scalarize_induction_variable_02( 165; IND: vector.body: 166; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 167; IND: %[[i0:.+]] = shl i64 %index, 3 168; IND: %[[i1:.+]] = or i64 %[[i0]], 8 169; IND: getelementptr inbounds float, float* %a, i64 %[[i0]] 170; IND: getelementptr inbounds float, float* %a, i64 %[[i1]] 171; 172; UNROLL-LABEL: @scalarize_induction_variable_02( 173; UNROLL: vector.body: 174; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 175; UNROLL: %[[i0:.+]] = shl i64 %index, 3 176; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8 177; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16 178; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24 179; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]] 180; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]] 181; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]] 182; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]] 183 184define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) { 185entry: 186 br label %for.body 187 188for.body: 189 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 190 %s = phi float [ 0.0, %entry ], [ %6, %for.body ] 191 %0 = getelementptr inbounds float, float* %a, i64 %i 192 %1 = load float, float* %0, align 4 193 %2 = getelementptr inbounds float, float* %b, i64 %i 194 %3 = load float, float* %2, align 4 195 %4 = fadd fast float %s, 1.0 196 %5 = fadd fast float %4, %1 197 %6 = fadd fast float %5, %3 198 %i.next = add nuw nsw i64 %i, 8 199 %cond = icmp slt i64 %i.next, %n 200 br i1 %cond, label %for.body, label %for.end 201 202for.end: 203 %s.lcssa = phi float [ %6, %for.body ] 204 ret float %s.lcssa 205} 206 207; Make sure we scalarize the step vectors used for the pointer arithmetic. We 208; can't easily simplify vectorized step vectors. (Interleaved accesses.) 209; 210; for (int i = 0; i < n; ++i) 211; a[i].f ^= y; 212; 213; INTERLEAVE-LABEL: @scalarize_induction_variable_03( 214; INTERLEAVE: vector.body: 215; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 216; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1 217; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2 218; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3 219; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4 220; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5 221; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6 222; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7 223; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 224; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 225; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 226; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 227; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 228; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 229; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 230; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 231 232%pair.i32 = type { i32, i32 } 233define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) { 234entry: 235 br label %for.body 236 237for.body: 238 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 239 %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 240 %0 = load i32, i32* %f, align 8 241 %1 = xor i32 %0, %y 242 store i32 %1, i32* %f, align 8 243 %i.next = add nuw nsw i64 %i, 1 244 %cond = icmp slt i64 %i.next, %n 245 br i1 %cond, label %for.body, label %for.end 246 247for.end: 248 ret void 249} 250 251; Make sure we scalarize the step vectors used for the pointer arithmetic. We 252; can't easily simplify vectorized step vectors. (Interleaved accesses.) 253; 254; for (int i = 0; i < n; ++i) 255; p[i].f = a[i * 4] 256; 257; INTERLEAVE-LABEL: @scalarize_induction_variable_04( 258; INTERLEAVE: vector.body: 259; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 260; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1 261; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2 262; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3 263; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4 264; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5 265; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6 266; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7 267; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 268; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 269; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 270; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 271; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 272; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 273; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 274; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 275 276define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) { 277entry: 278 br label %for.body 279 280for.body: 281 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry] 282 %0 = shl nsw i64 %i, 2 283 %1 = getelementptr inbounds i32, i32* %a, i64 %0 284 %2 = load i32, i32* %1, align 1 285 %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 286 store i32 %2, i32* %3, align 1 287 %i.next = add nuw nsw i64 %i, 1 288 %4 = trunc i64 %i.next to i32 289 %cond = icmp eq i32 %4, %n 290 br i1 %cond, label %for.end, label %for.body 291 292for.end: 293 ret void 294} 295 296; Ensure we generate both a vector and a scalar induction variable. In this 297; test, the induction variable is used by an instruction that will be 298; vectorized (trunc) as well as an instruction that will remain in scalar form 299; (gepelementptr). 300; 301; CHECK-LABEL: @iv_vector_and_scalar_users( 302; CHECK: vector.body: 303; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 304; CHECK: %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 305; CHECK: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] 306; CHECK: %[[i0:.+]] = add i64 %index, 0 307; CHECK: %[[i1:.+]] = add i64 %index, 1 308; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i0]], i32 1 309; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 310; CHECK: %index.next = add i64 %index, 2 311; CHECK: %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2> 312; CHECK: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> 313; 314; IND-LABEL: @iv_vector_and_scalar_users( 315; IND: vector.body: 316; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 317; IND: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] 318; IND: %[[i1:.+]] = or i64 %index, 1 319; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 320; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 321; IND: %index.next = add i64 %index, 2 322; IND: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> 323; 324; UNROLL-LABEL: @iv_vector_and_scalar_users( 325; UNROLL: vector.body: 326; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 327; UNROLL: %vec.ind2 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next5, %vector.body ] 328; UNROLL: %[[i1:.+]] = or i64 %index, 1 329; UNROLL: %[[i2:.+]] = or i64 %index, 2 330; UNROLL: %[[i3:.+]] = or i64 %index, 3 331; UNROLL: %step.add3 = add <2 x i32> %vec.ind2, <i32 2, i32 2> 332; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 333; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 334; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i2]], i32 1 335; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i3]], i32 1 336; UNROLL: %index.next = add i64 %index, 4 337; UNROLL: %vec.ind.next5 = add <2 x i32> %vec.ind2, <i32 4, i32 4> 338 339%pair.i16 = type { i16, i16 } 340define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) { 341entry: 342 br label %for.body 343 344for.body: 345 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 346 %0 = trunc i64 %i to i32 347 %1 = add i32 %a, %0 348 %2 = trunc i32 %1 to i16 349 %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1 350 store i16 %2, i16* %3, align 2 351 %i.next = add nuw nsw i64 %i, 1 352 %4 = trunc i64 %i.next to i32 353 %cond = icmp eq i32 %4, %n 354 br i1 %cond, label %for.end, label %for.body 355 356for.end: 357 ret void 358} 359 360; Make sure that the loop exit count computation does not overflow for i8 and 361; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the 362; induction variable to a bigger type the exit count computation will overflow 363; to 0. 364; PR17532 365 366; CHECK-LABEL: i8_loop 367; CHECK: icmp eq i32 {{.*}}, 256 368define i32 @i8_loop() nounwind readnone ssp uwtable { 369 br label %1 370 371; <label>:1 ; preds = %1, %0 372 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 373 %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ] 374 %2 = and i32 %a.0, 4 375 %3 = add i8 %b.0, -1 376 %4 = icmp eq i8 %3, 0 377 br i1 %4, label %5, label %1 378 379; <label>:5 ; preds = %1 380 ret i32 %2 381} 382 383; CHECK-LABEL: i16_loop 384; CHECK: icmp eq i32 {{.*}}, 65536 385 386define i32 @i16_loop() nounwind readnone ssp uwtable { 387 br label %1 388 389; <label>:1 ; preds = %1, %0 390 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 391 %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ] 392 %2 = and i32 %a.0, 4 393 %3 = add i16 %b.0, -1 394 %4 = icmp eq i16 %3, 0 395 br i1 %4, label %5, label %1 396 397; <label>:5 ; preds = %1 398 ret i32 %2 399} 400 401; This loop has a backedge taken count of i32_max. We need to check for this 402; condition and branch directly to the scalar loop. 403 404; CHECK-LABEL: max_i32_backedgetaken 405; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked 406 407; CHECK: middle.block: 408; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0 409; CHECK: scalar.ph: 410; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ] 411; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ] 412 413define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { 414 415 br label %1 416 417; <label>:1 ; preds = %1, %0 418 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 419 %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ] 420 %2 = and i32 %a.0, 4 421 %3 = add i32 %b.0, -1 422 %4 = icmp eq i32 %3, 0 423 br i1 %4, label %5, label %1 424 425; <label>:5 ; preds = %1 426 ret i32 %2 427} 428 429; When generating the overflow check we must sure that the induction start value 430; is defined before the branch to the scalar preheader. 431 432; CHECK-LABEL: testoverflowcheck 433; CHECK: entry 434; CHECK: %[[LOAD:.*]] = load i8 435; CHECK: br 436 437; CHECK: scalar.ph 438; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ] 439 440@e = global i8 1, align 1 441@d = common global i32 0, align 4 442@c = common global i32 0, align 4 443define i32 @testoverflowcheck() { 444entry: 445 %.pr.i = load i8, i8* @e, align 1 446 %0 = load i32, i32* @d, align 4 447 %c.promoted.i = load i32, i32* @c, align 4 448 br label %cond.end.i 449 450cond.end.i: 451 %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] 452 %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] 453 %and.i = and i32 %0, %and3.i 454 %inc.i = add i8 %inc4.i, 1 455 %tobool.i = icmp eq i8 %inc.i, 0 456 br i1 %tobool.i, label %loopexit, label %cond.end.i 457 458loopexit: 459 ret i32 %and.i 460} 461 462; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32) 463; In order to recognize %sphi as an induction PHI and vectorize this loop, 464; we need to convert the SCEV expression into an AddRecExpr. 465; The expression gets converted to {zext i8 %t to i32,+,1}. 466 467; CHECK-LABEL: wrappingindvars1 468; CHECK-LABEL: vector.scevcheck 469; CHECK-LABEL: vector.ph 470; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 1> 471; CHECK-LABEL: vector.body 472; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] 473; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2> 474define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { 475 entry: 476 %st = zext i8 %t to i16 477 %ext = zext i8 %t to i32 478 %ecmp = icmp ult i16 %st, 42 479 br i1 %ecmp, label %loop, label %exit 480 481 loop: 482 483 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 484 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 485 %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop] 486 487 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 488 store i32 %sphi, i32* %ptr 489 490 %idx.inc = add i8 %idx, 1 491 %idx.inc.ext = zext i8 %idx.inc to i32 492 %idx.b.inc = add nuw nsw i32 %idx.b, 1 493 494 %c = icmp ult i32 %idx.b, %len 495 br i1 %c, label %loop, label %exit 496 497 exit: 498 ret void 499} 500 501; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32)) 502; In order to recognize %sphi as an induction PHI and vectorize this loop, 503; we need to convert the SCEV expression into an AddRecExpr. 504; The expression gets converted to ({4 * (zext %t to i32),+,4}). 505; CHECK-LABEL: wrappingindvars2 506; CHECK-LABEL: vector.scevcheck 507; CHECK-LABEL: vector.ph 508; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 4> 509; CHECK-LABEL: vector.body 510; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] 511; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8> 512define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { 513 514entry: 515 %st = zext i8 %t to i16 516 %ext = zext i8 %t to i32 517 %ext.mul = mul i32 %ext, 4 518 519 %ecmp = icmp ult i16 %st, 42 520 br i1 %ecmp, label %loop, label %exit 521 522 loop: 523 524 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 525 %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop] 526 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 527 528 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 529 store i32 %sphi, i32* %ptr 530 531 %idx.inc = add i8 %idx, 1 532 %idx.inc.ext = zext i8 %idx.inc to i32 533 %mul = mul i32 %idx.inc.ext, 4 534 %idx.b.inc = add nuw nsw i32 %idx.b, 1 535 536 %c = icmp ult i32 %idx.b, %len 537 br i1 %c, label %loop, label %exit 538 539 exit: 540 ret void 541} 542 543; Check that we generate vectorized IVs in the pre-header 544; instead of widening the scalar IV inside the loop, when 545; we know how to do that. 546; IND-LABEL: veciv 547; IND: vector.body: 548; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 549; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 550; IND: %index.next = add i32 %index, 2 551; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 552; IND: %[[CMP:.*]] = icmp eq i32 %index.next 553; IND: br i1 %[[CMP]] 554; UNROLL-LABEL: veciv 555; UNROLL: vector.body: 556; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 557; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 558; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> 559; UNROLL: %index.next = add i32 %index, 4 560; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> 561; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next 562; UNROLL: br i1 %[[CMP]] 563define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { 564for.body.preheader: 565 br label %for.body 566 567for.body: 568 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 569 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 570 store i32 %indvars.iv, i32* %arrayidx, align 4 571 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 572 %exitcond = icmp eq i32 %indvars.iv.next, %k 573 br i1 %exitcond, label %exit, label %for.body 574 575exit: 576 ret void 577} 578 579; IND-LABEL: trunciv 580; IND: vector.body: 581; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 582; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ] 583; IND: %index.next = add i64 %index, 2 584; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2> 585; IND: %[[CMP:.*]] = icmp eq i64 %index.next 586; IND: br i1 %[[CMP]] 587define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { 588for.body.preheader: 589 br label %for.body 590 591for.body: 592 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 593 %trunc.iv = trunc i64 %indvars.iv to i32 594 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv 595 store i32 %trunc.iv, i32* %arrayidx, align 4 596 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 597 %exitcond = icmp eq i64 %indvars.iv.next, %k 598 br i1 %exitcond, label %exit, label %for.body 599 600exit: 601 ret void 602} 603 604; CHECK-LABEL: @nonprimary( 605; CHECK: vector.ph: 606; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 607; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 608; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 609; CHECK: vector.body: 610; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 611; CHECK: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 612; CHECK: %offset.idx = add i32 %i, %index 613; CHECK: %[[A1:.*]] = add i32 %offset.idx, 0 614; CHECK: %[[A2:.*]] = add i32 %offset.idx, 1 615; CHECK: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i32 %[[A1]] 616; CHECK: %[[G2:.*]] = getelementptr inbounds i32, i32* %a, i32 %[[A2]] 617; CHECK: %[[G3:.*]] = getelementptr i32, i32* %[[G1]], i32 0 618; CHECK: %[[B1:.*]] = bitcast i32* %[[G3]] to <2 x i32>* 619; CHECK: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 620; CHECK: %index.next = add i32 %index, 2 621; CHECK: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 622; CHECK: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 623; CHECK: br i1 %[[CMP]] 624; 625; IND-LABEL: @nonprimary( 626; IND: vector.ph: 627; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 628; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 629; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 630; IND: vector.body: 631; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 632; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 633; IND: %[[A1:.*]] = add i32 %index, %i 634; IND: %[[S1:.*]] = sext i32 %[[A1]] to i64 635; IND: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] 636; IND: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* 637; IND: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 638; IND: %index.next = add i32 %index, 2 639; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 640; IND: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 641; IND: br i1 %[[CMP]] 642; 643; UNROLL-LABEL: @nonprimary( 644; UNROLL: vector.ph: 645; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 646; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 647; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 648; UNROLL: vector.body: 649; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 650; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 651; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> 652; UNROLL: %[[A1:.*]] = add i32 %index, %i 653; UNROLL: %[[S1:.*]] = sext i32 %[[A1]] to i64 654; UNROLL: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] 655; UNROLL: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* 656; UNROLL: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 657; UNROLL: %[[G2:.*]] = getelementptr i32, i32* %[[G1]], i64 2 658; UNROLL: %[[B2:.*]] = bitcast i32* %[[G2]] to <2 x i32>* 659; UNROLL: store <2 x i32> %step.add, <2 x i32>* %[[B2]] 660; UNROLL: %index.next = add i32 %index, 4 661; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> 662; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 663; UNROLL: br i1 %[[CMP]] 664define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) { 665for.body.preheader: 666 br label %for.body 667 668for.body: 669 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ] 670 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 671 store i32 %indvars.iv, i32* %arrayidx, align 4 672 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 673 %exitcond = icmp eq i32 %indvars.iv.next, %k 674 br i1 %exitcond, label %exit, label %for.body 675 676exit: 677 ret void 678} 679