1; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND 3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL 4; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC 5; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE 6 7target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 8 9; Make sure that we can handle multiple integer induction variables. 10; CHECK-LABEL: @multi_int_induction( 11; CHECK: vector.body: 12; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 13; CHECK: %[[VAR:.*]] = trunc i64 %index to i32 14; CHECK: %offset.idx = add i32 190, %[[VAR]] 15define void @multi_int_induction(i32* %A, i32 %N) { 16for.body.lr.ph: 17 br label %for.body 18 19for.body: 20 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 21 %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ] 22 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 23 store i32 %count.09, i32* %arrayidx2, align 4 24 %inc = add nsw i32 %count.09, 1 25 %indvars.iv.next = add i64 %indvars.iv, 1 26 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 27 %exitcond = icmp ne i32 %lftr.wideiv, %N 28 br i1 %exitcond, label %for.body, label %for.end 29 30for.end: 31 ret void 32} 33 34; Make sure we remove unneeded vectorization of induction variables. 35; In order for instcombine to cleanup the vectorized induction variables that we 36; create in the loop vectorizer we need to perform some form of redundancy 37; elimination to get rid of multiple uses. 38 39; IND-LABEL: scalar_use 40 41; IND: br label %vector.body 42; IND: vector.body: 43; Vectorized induction variable. 44; IND-NOT: insertelement <2 x i64> 45; IND-NOT: shufflevector <2 x i64> 46; IND: br {{.*}}, label %vector.body 47 48define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) { 49entry: 50 br label %for.body 51 52for.body: 53 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 54 %ind.sum = add i64 %iv, %offset 55 %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum 56 %l1 = load float, float* %arr.idx, align 4 57 %ind.sum2 = add i64 %iv, %offset2 58 %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 59 %l2 = load float, float* %arr.idx2, align 4 60 %m = fmul fast float %b, %l2 61 %ad = fadd fast float %l1, %m 62 store float %ad, float* %arr.idx, align 4 63 %iv.next = add nuw nsw i64 %iv, 1 64 %exitcond = icmp eq i64 %iv.next, %n 65 br i1 %exitcond, label %loopexit, label %for.body 66 67loopexit: 68 ret void 69} 70 71; Make sure we don't create a vector induction phi node that is unused. 72; Scalarize the step vectors instead. 73; 74; for (int i = 0; i < n; ++i) 75; sum += a[i]; 76; 77; CHECK-LABEL: @scalarize_induction_variable_01( 78; CHECK: vector.body: 79; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 80; CHECK: %[[i0:.+]] = add i64 %index, 0 81; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]] 82; 83; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01( 84; UNROLL-NO-IC: vector.body: 85; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 86; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0 87; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2 88; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]] 89; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]] 90; 91; IND-LABEL: @scalarize_induction_variable_01( 92; IND: vector.body: 93; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 94; IND-NOT: add i64 {{.*}}, 2 95; IND: getelementptr inbounds i64, i64* %a, i64 %index 96; 97; UNROLL-LABEL: @scalarize_induction_variable_01( 98; UNROLL: vector.body: 99; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 100; UNROLL-NOT: add i64 {{.*}}, 4 101; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index 102; UNROLL: getelementptr i64, i64* %[[g1]], i64 2 103 104define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) { 105entry: 106 br label %for.body 107 108for.body: 109 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 110 %sum = phi i64 [ %2, %for.body ], [ 0, %entry ] 111 %0 = getelementptr inbounds i64, i64* %a, i64 %i 112 %1 = load i64, i64* %0, align 8 113 %2 = add i64 %1, %sum 114 %i.next = add nuw nsw i64 %i, 1 115 %cond = icmp slt i64 %i.next, %n 116 br i1 %cond, label %for.body, label %for.end 117 118for.end: 119 %3 = phi i64 [ %2, %for.body ] 120 ret i64 %3 121} 122 123; Make sure we scalarize the step vectors used for the pointer arithmetic. We 124; can't easily simplify vectorized step vectors. 125; 126; float s = 0; 127; for (int i ; 0; i < n; i += 8) 128; s += (a[i] + b[i] + 1.0f); 129; 130; CHECK-LABEL: @scalarize_induction_variable_02( 131; CHECK: vector.body: 132; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 133; CHECK: %offset.idx = shl i64 %index, 3 134; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0 135; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8 136; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]] 137; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]] 138; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]] 139; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]] 140; 141; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02( 142; UNROLL-NO-IC: vector.body: 143; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 144; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3 145; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0 146; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8 147; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16 148; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24 149; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]] 150; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]] 151; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]] 152; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]] 153; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]] 154; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]] 155; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]] 156; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]] 157; 158; IND-LABEL: @scalarize_induction_variable_02( 159; IND: vector.body: 160; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 161; IND: %[[i0:.+]] = shl i64 %index, 3 162; IND: %[[i1:.+]] = or i64 %[[i0]], 8 163; IND: getelementptr inbounds float, float* %a, i64 %[[i0]] 164; IND: getelementptr inbounds float, float* %a, i64 %[[i1]] 165; 166; UNROLL-LABEL: @scalarize_induction_variable_02( 167; UNROLL: vector.body: 168; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 169; UNROLL: %[[i0:.+]] = shl i64 %index, 3 170; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8 171; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16 172; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24 173; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]] 174; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]] 175; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]] 176; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]] 177 178define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) { 179entry: 180 br label %for.body 181 182for.body: 183 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 184 %s = phi float [ 0.0, %entry ], [ %6, %for.body ] 185 %0 = getelementptr inbounds float, float* %a, i64 %i 186 %1 = load float, float* %0, align 4 187 %2 = getelementptr inbounds float, float* %b, i64 %i 188 %3 = load float, float* %2, align 4 189 %4 = fadd fast float %s, 1.0 190 %5 = fadd fast float %4, %1 191 %6 = fadd fast float %5, %3 192 %i.next = add nuw nsw i64 %i, 8 193 %cond = icmp slt i64 %i.next, %n 194 br i1 %cond, label %for.body, label %for.end 195 196for.end: 197 %s.lcssa = phi float [ %6, %for.body ] 198 ret float %s.lcssa 199} 200 201; Make sure we scalarize the step vectors used for the pointer arithmetic. We 202; can't easily simplify vectorized step vectors. (Interleaved accesses.) 203; 204; for (int i = 0; i < n; ++i) 205; a[i].f ^= y; 206; 207; INTERLEAVE-LABEL: @scalarize_induction_variable_03( 208; INTERLEAVE: vector.body: 209; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 210; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1 211; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2 212; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3 213; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4 214; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5 215; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6 216; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7 217; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 218; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 219; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 220; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 221; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 222; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 223; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 224; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 225 226%pair.i32 = type { i32, i32 } 227define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) { 228entry: 229 br label %for.body 230 231for.body: 232 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 233 %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 234 %0 = load i32, i32* %f, align 8 235 %1 = xor i32 %0, %y 236 store i32 %1, i32* %f, align 8 237 %i.next = add nuw nsw i64 %i, 1 238 %cond = icmp slt i64 %i.next, %n 239 br i1 %cond, label %for.body, label %for.end 240 241for.end: 242 ret void 243} 244 245; Make sure we scalarize the step vectors used for the pointer arithmetic. We 246; can't easily simplify vectorized step vectors. (Interleaved accesses.) 247; 248; for (int i = 0; i < n; ++i) 249; p[i].f = a[i * 4] 250; 251; INTERLEAVE-LABEL: @scalarize_induction_variable_04( 252; INTERLEAVE: vector.body: 253; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 254; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1 255; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2 256; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3 257; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4 258; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5 259; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6 260; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7 261; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i0]], i32 1 262; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i1]], i32 1 263; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i2]], i32 1 264; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i3]], i32 1 265; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i4]], i32 1 266; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i5]], i32 1 267; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i6]], i32 1 268; INTERLEAVE: getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %[[i7]], i32 1 269 270define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) { 271entry: 272 br label %for.body 273 274for.body: 275 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry] 276 %0 = shl nsw i64 %i, 2 277 %1 = getelementptr inbounds i32, i32* %a, i64 %0 278 %2 = load i32, i32* %1, align 1 279 %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 280 store i32 %2, i32* %3, align 1 281 %i.next = add nuw nsw i64 %i, 1 282 %4 = trunc i64 %i.next to i32 283 %cond = icmp eq i32 %4, %n 284 br i1 %cond, label %for.end, label %for.body 285 286for.end: 287 ret void 288} 289 290; PR30542. Ensure we generate all the scalar steps for the induction variable. 291; The scalar induction variable is used by a getelementptr instruction 292; (uniform), and a udiv (non-uniform). 293; 294; int sum = 0; 295; for (int i = 0; i < n; ++i) { 296; int x = a[i]; 297; if (c) 298; x /= i; 299; sum += x; 300; } 301; 302; CHECK-LABEL: @scalarize_induction_variable_05( 303; CHECK: vector.body: 304; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ] 305; CHECK: %[[I0:.+]] = add i32 %index, 0 306; CHECK: %[[I1:.+]] = add i32 %index, 1 307; CHECK: getelementptr inbounds i32, i32* %a, i32 %[[I0]] 308; CHECK: pred.udiv.if: 309; CHECK: udiv i32 {{.*}}, %[[I0]] 310; CHECK: pred.udiv.if1: 311; CHECK: udiv i32 {{.*}}, %[[I1]] 312; 313; UNROLL-NO_IC-LABEL: @scalarize_induction_variable_05( 314; UNROLL-NO-IC: vector.body: 315; UNROLL-NO-IC: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ] 316; UNROLL-NO-IC: %[[I0:.+]] = add i32 %index, 0 317; UNROLL-NO-IC: %[[I1:.+]] = add i32 %index, 1 318; UNROLL-NO-IC: %[[I2:.+]] = add i32 %index, 2 319; UNROLL-NO-IC: %[[I3:.+]] = add i32 %index, 3 320; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I0]] 321; UNROLL-NO-IC: getelementptr inbounds i32, i32* %a, i32 %[[I2]] 322; UNROLL-NO-IC: pred.udiv.if: 323; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I0]] 324; UNROLL-NO-IC: pred.udiv.if6: 325; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I1]] 326; UNROLL-NO-IC: pred.udiv.if8: 327; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I2]] 328; UNROLL-NO-IC: pred.udiv.if10: 329; UNROLL-NO-IC: udiv i32 {{.*}}, %[[I3]] 330; 331; IND-LABEL: @scalarize_induction_variable_05( 332; IND: vector.body: 333; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue2 ] 334; IND: %[[I1:.+]] = or i32 %index, 1 335; IND: %[[E0:.+]] = sext i32 %index to i64 336; IND: getelementptr inbounds i32, i32* %a, i64 %[[E0]] 337; IND: pred.udiv.if: 338; IND: udiv i32 {{.*}}, %index 339; IND: pred.udiv.if1: 340; IND: udiv i32 {{.*}}, %[[I1]] 341; 342; UNROLL-LABEL: @scalarize_induction_variable_05( 343; UNROLL: vector.body: 344; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %pred.udiv.continue11 ] 345; UNROLL: %[[I1:.+]] = or i32 %index, 1 346; UNROLL: %[[I2:.+]] = or i32 %index, 2 347; UNROLL: %[[I3:.+]] = or i32 %index, 3 348; UNROLL: %[[E0:.+]] = sext i32 %index to i64 349; UNROLL: %[[G0:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[E0]] 350; UNROLL: getelementptr i32, i32* %[[G0]], i64 2 351; UNROLL: pred.udiv.if: 352; UNROLL: udiv i32 {{.*}}, %index 353; UNROLL: pred.udiv.if6: 354; UNROLL: udiv i32 {{.*}}, %[[I1]] 355; UNROLL: pred.udiv.if8: 356; UNROLL: udiv i32 {{.*}}, %[[I2]] 357; UNROLL: pred.udiv.if10: 358; UNROLL: udiv i32 {{.*}}, %[[I3]] 359 360define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) { 361entry: 362 br label %for.body 363 364for.body: 365 %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ] 366 %sum = phi i32 [ 0, %entry ], [ %tmp4, %if.end ] 367 %tmp0 = getelementptr inbounds i32, i32* %a, i32 %i 368 %tmp1 = load i32, i32* %tmp0, align 4 369 br i1 %c, label %if.then, label %if.end 370 371if.then: 372 %tmp2 = udiv i32 %tmp1, %i 373 br label %if.end 374 375if.end: 376 %tmp3 = phi i32 [ %tmp2, %if.then ], [ %tmp1, %for.body ] 377 %tmp4 = add i32 %tmp3, %sum 378 %i.next = add nuw nsw i32 %i, 1 379 %cond = icmp slt i32 %i.next, %n 380 br i1 %cond, label %for.body, label %for.end 381 382for.end: 383 %tmp5 = phi i32 [ %tmp4, %if.end ] 384 ret i32 %tmp5 385} 386 387; Ensure we generate both a vector and a scalar induction variable. In this 388; test, the induction variable is used by an instruction that will be 389; vectorized (trunc) as well as an instruction that will remain in scalar form 390; (gepelementptr). 391; 392; CHECK-LABEL: @iv_vector_and_scalar_users( 393; CHECK: vector.body: 394; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 395; CHECK: %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 396; CHECK: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] 397; CHECK: %[[i0:.+]] = add i64 %index, 0 398; CHECK: %[[i1:.+]] = add i64 %index, 1 399; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i0]], i32 1 400; CHECK: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 401; CHECK: %index.next = add i64 %index, 2 402; CHECK: %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2> 403; CHECK: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> 404; 405; IND-LABEL: @iv_vector_and_scalar_users( 406; IND: vector.body: 407; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 408; IND: %vec.ind1 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next2, %vector.body ] 409; IND: %[[i1:.+]] = or i64 %index, 1 410; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 411; IND: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 412; IND: %index.next = add i64 %index, 2 413; IND: %vec.ind.next2 = add <2 x i32> %vec.ind1, <i32 2, i32 2> 414; 415; UNROLL-LABEL: @iv_vector_and_scalar_users( 416; UNROLL: vector.body: 417; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 418; UNROLL: %vec.ind2 = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next5, %vector.body ] 419; UNROLL: %[[i1:.+]] = or i64 %index, 1 420; UNROLL: %[[i2:.+]] = or i64 %index, 2 421; UNROLL: %[[i3:.+]] = or i64 %index, 3 422; UNROLL: %step.add3 = add <2 x i32> %vec.ind2, <i32 2, i32 2> 423; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %index, i32 1 424; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i1]], i32 1 425; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i2]], i32 1 426; UNROLL: getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %[[i3]], i32 1 427; UNROLL: %index.next = add i64 %index, 4 428; UNROLL: %vec.ind.next5 = add <2 x i32> %vec.ind2, <i32 4, i32 4> 429 430%pair.i16 = type { i16, i16 } 431define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) { 432entry: 433 br label %for.body 434 435for.body: 436 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 437 %0 = trunc i64 %i to i32 438 %1 = add i32 %a, %0 439 %2 = trunc i32 %1 to i16 440 %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1 441 store i16 %2, i16* %3, align 2 442 %i.next = add nuw nsw i64 %i, 1 443 %4 = trunc i64 %i.next to i32 444 %cond = icmp eq i32 %4, %n 445 br i1 %cond, label %for.end, label %for.body 446 447for.end: 448 ret void 449} 450 451; Make sure that the loop exit count computation does not overflow for i8 and 452; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the 453; induction variable to a bigger type the exit count computation will overflow 454; to 0. 455; PR17532 456 457; CHECK-LABEL: i8_loop 458; CHECK: icmp eq i32 {{.*}}, 256 459define i32 @i8_loop() nounwind readnone ssp uwtable { 460 br label %1 461 462; <label>:1 ; preds = %1, %0 463 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 464 %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ] 465 %2 = and i32 %a.0, 4 466 %3 = add i8 %b.0, -1 467 %4 = icmp eq i8 %3, 0 468 br i1 %4, label %5, label %1 469 470; <label>:5 ; preds = %1 471 ret i32 %2 472} 473 474; CHECK-LABEL: i16_loop 475; CHECK: icmp eq i32 {{.*}}, 65536 476 477define i32 @i16_loop() nounwind readnone ssp uwtable { 478 br label %1 479 480; <label>:1 ; preds = %1, %0 481 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 482 %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ] 483 %2 = and i32 %a.0, 4 484 %3 = add i16 %b.0, -1 485 %4 = icmp eq i16 %3, 0 486 br i1 %4, label %5, label %1 487 488; <label>:5 ; preds = %1 489 ret i32 %2 490} 491 492; This loop has a backedge taken count of i32_max. We need to check for this 493; condition and branch directly to the scalar loop. 494 495; CHECK-LABEL: max_i32_backedgetaken 496; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked 497 498; CHECK: middle.block: 499; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0 500; CHECK: scalar.ph: 501; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ] 502; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ] 503 504define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { 505 506 br label %1 507 508; <label>:1 ; preds = %1, %0 509 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 510 %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ] 511 %2 = and i32 %a.0, 4 512 %3 = add i32 %b.0, -1 513 %4 = icmp eq i32 %3, 0 514 br i1 %4, label %5, label %1 515 516; <label>:5 ; preds = %1 517 ret i32 %2 518} 519 520; When generating the overflow check we must sure that the induction start value 521; is defined before the branch to the scalar preheader. 522 523; CHECK-LABEL: testoverflowcheck 524; CHECK: entry 525; CHECK: %[[LOAD:.*]] = load i8 526; CHECK: br 527 528; CHECK: scalar.ph 529; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ] 530 531@e = global i8 1, align 1 532@d = common global i32 0, align 4 533@c = common global i32 0, align 4 534define i32 @testoverflowcheck() { 535entry: 536 %.pr.i = load i8, i8* @e, align 1 537 %0 = load i32, i32* @d, align 4 538 %c.promoted.i = load i32, i32* @c, align 4 539 br label %cond.end.i 540 541cond.end.i: 542 %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] 543 %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] 544 %and.i = and i32 %0, %and3.i 545 %inc.i = add i8 %inc4.i, 1 546 %tobool.i = icmp eq i8 %inc.i, 0 547 br i1 %tobool.i, label %loopexit, label %cond.end.i 548 549loopexit: 550 ret i32 %and.i 551} 552 553; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32) 554; In order to recognize %sphi as an induction PHI and vectorize this loop, 555; we need to convert the SCEV expression into an AddRecExpr. 556; The expression gets converted to {zext i8 %t to i32,+,1}. 557 558; CHECK-LABEL: wrappingindvars1 559; CHECK-LABEL: vector.scevcheck 560; CHECK-LABEL: vector.ph 561; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 1> 562; CHECK-LABEL: vector.body 563; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] 564; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2> 565define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { 566 entry: 567 %st = zext i8 %t to i16 568 %ext = zext i8 %t to i32 569 %ecmp = icmp ult i16 %st, 42 570 br i1 %ecmp, label %loop, label %exit 571 572 loop: 573 574 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 575 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 576 %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop] 577 578 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 579 store i32 %sphi, i32* %ptr 580 581 %idx.inc = add i8 %idx, 1 582 %idx.inc.ext = zext i8 %idx.inc to i32 583 %idx.b.inc = add nuw nsw i32 %idx.b, 1 584 585 %c = icmp ult i32 %idx.b, %len 586 br i1 %c, label %loop, label %exit 587 588 exit: 589 ret void 590} 591 592; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32)) 593; In order to recognize %sphi as an induction PHI and vectorize this loop, 594; we need to convert the SCEV expression into an AddRecExpr. 595; The expression gets converted to ({4 * (zext %t to i32),+,4}). 596; CHECK-LABEL: wrappingindvars2 597; CHECK-LABEL: vector.scevcheck 598; CHECK-LABEL: vector.ph 599; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 4> 600; CHECK-LABEL: vector.body 601; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ] 602; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8> 603define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { 604 605entry: 606 %st = zext i8 %t to i16 607 %ext = zext i8 %t to i32 608 %ext.mul = mul i32 %ext, 4 609 610 %ecmp = icmp ult i16 %st, 42 611 br i1 %ecmp, label %loop, label %exit 612 613 loop: 614 615 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 616 %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop] 617 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 618 619 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 620 store i32 %sphi, i32* %ptr 621 622 %idx.inc = add i8 %idx, 1 623 %idx.inc.ext = zext i8 %idx.inc to i32 624 %mul = mul i32 %idx.inc.ext, 4 625 %idx.b.inc = add nuw nsw i32 %idx.b, 1 626 627 %c = icmp ult i32 %idx.b, %len 628 br i1 %c, label %loop, label %exit 629 630 exit: 631 ret void 632} 633 634; Check that we generate vectorized IVs in the pre-header 635; instead of widening the scalar IV inside the loop, when 636; we know how to do that. 637; IND-LABEL: veciv 638; IND: vector.body: 639; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 640; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 641; IND: %index.next = add i32 %index, 2 642; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 643; IND: %[[CMP:.*]] = icmp eq i32 %index.next 644; IND: br i1 %[[CMP]] 645; UNROLL-LABEL: veciv 646; UNROLL: vector.body: 647; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 648; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %vec.ind.next, %vector.body ] 649; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> 650; UNROLL: %index.next = add i32 %index, 4 651; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> 652; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next 653; UNROLL: br i1 %[[CMP]] 654define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { 655for.body.preheader: 656 br label %for.body 657 658for.body: 659 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 660 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 661 store i32 %indvars.iv, i32* %arrayidx, align 4 662 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 663 %exitcond = icmp eq i32 %indvars.iv.next, %k 664 br i1 %exitcond, label %exit, label %for.body 665 666exit: 667 ret void 668} 669 670; IND-LABEL: trunciv 671; IND: vector.body: 672; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 673; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ] 674; IND: %index.next = add i64 %index, 2 675; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2> 676; IND: %[[CMP:.*]] = icmp eq i64 %index.next 677; IND: br i1 %[[CMP]] 678define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { 679for.body.preheader: 680 br label %for.body 681 682for.body: 683 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 684 %trunc.iv = trunc i64 %indvars.iv to i32 685 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv 686 store i32 %trunc.iv, i32* %arrayidx, align 4 687 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 688 %exitcond = icmp eq i64 %indvars.iv.next, %k 689 br i1 %exitcond, label %exit, label %for.body 690 691exit: 692 ret void 693} 694 695; CHECK-LABEL: @nonprimary( 696; CHECK: vector.ph: 697; CHECK: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 698; CHECK: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 699; CHECK: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 700; CHECK: vector.body: 701; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 702; CHECK: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 703; CHECK: %offset.idx = add i32 %i, %index 704; CHECK: %[[A1:.*]] = add i32 %offset.idx, 0 705; CHECK: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i32 %[[A1]] 706; CHECK: %[[G3:.*]] = getelementptr i32, i32* %[[G1]], i32 0 707; CHECK: %[[B1:.*]] = bitcast i32* %[[G3]] to <2 x i32>* 708; CHECK: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 709; CHECK: %index.next = add i32 %index, 2 710; CHECK: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 711; CHECK: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 712; CHECK: br i1 %[[CMP]] 713; 714; IND-LABEL: @nonprimary( 715; IND: vector.ph: 716; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 717; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 718; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 719; IND: vector.body: 720; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 721; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 722; IND: %[[A1:.*]] = add i32 %index, %i 723; IND: %[[S1:.*]] = sext i32 %[[A1]] to i64 724; IND: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] 725; IND: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* 726; IND: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 727; IND: %index.next = add i32 %index, 2 728; IND: %vec.ind.next = add <2 x i32> %vec.ind, <i32 2, i32 2> 729; IND: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 730; IND: br i1 %[[CMP]] 731; 732; UNROLL-LABEL: @nonprimary( 733; UNROLL: vector.ph: 734; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0 735; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer 736; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 1> 737; UNROLL: vector.body: 738; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 739; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] 740; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2> 741; UNROLL: %[[A1:.*]] = add i32 %index, %i 742; UNROLL: %[[S1:.*]] = sext i32 %[[A1]] to i64 743; UNROLL: %[[G1:.*]] = getelementptr inbounds i32, i32* %a, i64 %[[S1]] 744; UNROLL: %[[B1:.*]] = bitcast i32* %[[G1]] to <2 x i32>* 745; UNROLL: store <2 x i32> %vec.ind, <2 x i32>* %[[B1]] 746; UNROLL: %[[G2:.*]] = getelementptr i32, i32* %[[G1]], i64 2 747; UNROLL: %[[B2:.*]] = bitcast i32* %[[G2]] to <2 x i32>* 748; UNROLL: store <2 x i32> %step.add, <2 x i32>* %[[B2]] 749; UNROLL: %index.next = add i32 %index, 4 750; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, <i32 4, i32 4> 751; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next, %n.vec 752; UNROLL: br i1 %[[CMP]] 753define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) { 754for.body.preheader: 755 br label %for.body 756 757for.body: 758 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ] 759 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 760 store i32 %indvars.iv, i32* %arrayidx, align 4 761 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 762 %exitcond = icmp eq i32 %indvars.iv.next, %k 763 br i1 %exitcond, label %exit, label %for.body 764 765exit: 766 ret void 767} 768