1cee313d2SEric Christopher; int A[1024], B[1024]; 2cee313d2SEric Christopher; 3cee313d2SEric Christopher; void foo(int iCount, int c, int jCount) 4cee313d2SEric Christopher; { 5cee313d2SEric Christopher; 6cee313d2SEric Christopher; int i, j; 7cee313d2SEric Christopher; 8cee313d2SEric Christopher; #pragma clang loop vectorize(enable) vectorize_width(4) 9cee313d2SEric Christopher; for (i = 0; i < iCount; i++) { 10cee313d2SEric Christopher; A[i] = c; 11cee313d2SEric Christopher; for (j = 0; j < jCount; j++) { 12cee313d2SEric Christopher; A[i] += B[j] + i; 13cee313d2SEric Christopher; } 14cee313d2SEric Christopher; } 15cee313d2SEric Christopher; } 16cee313d2SEric Christopher; RUN: opt -S -loop-vectorize -enable-vplan-native-path < %s | FileCheck %s 17cee313d2SEric Christopher; CHECK: %[[ZeroTripChk:.*]] = icmp sgt i32 %jCount, 0 18cee313d2SEric Christopher; CHECK-LABEL: vector.ph: 19278aa65cSJuneyoung Lee; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0 20278aa65cSJuneyoung Lee; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer 21cee313d2SEric Christopher 22cee313d2SEric Christopher; CHECK-LABEL: vector.body: 23cee313d2SEric Christopher; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] 24cee313d2SEric Christopher; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] 25cee313d2SEric Christopher; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, <4 x i64> %[[VecInd]] 26cee313d2SEric Christopher; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[CSplat]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 27edc92a1cSFlorian Hahn; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]] 28cee313d2SEric Christopher 29cee313d2SEric Christopher; CHECK: [[InnerForPh]]: 30cee313d2SEric Christopher; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 31cee313d2SEric Christopher; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> 32cee313d2SEric Christopher; CHECK: br label %[[InnerForBody:.*]] 33cee313d2SEric Christopher 34cee313d2SEric Christopher; CHECK: [[InnerForBody]]: 3515a74b64SFlorian Hahn; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ] 3615a74b64SFlorian Hahn; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ] 37cee313d2SEric Christopher; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, <4 x i64> %[[InnerInd]] 38cee313d2SEric Christopher; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[BAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 39cee313d2SEric Christopher; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]] 40cee313d2SEric Christopher; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]] 41cee313d2SEric Christopher; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], <i64 1, i64 1, i64 1, i64 1> 42cee313d2SEric Christopher; CHECK: %[[InnerVecCond:.*]] = icmp eq <4 x i64> %[[InnerIndNext]], {{.*}} 43cee313d2SEric Christopher; CHECK: %[[InnerCond:.+]] = extractelement <4 x i1> %[[InnerVecCond]], i32 0 44cee313d2SEric Christopher; CHECK: br i1 %[[InnerCond]], label %[[InnerCrit:.*]], label %[[InnerForBody]] 45cee313d2SEric Christopher 46cee313d2SEric Christopher; CHECK: [[InnerCrit]]: 47cee313d2SEric Christopher; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ] 48cee313d2SEric Christopher; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[StorePhi]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 49cee313d2SEric Christopher; CHECK: br label %[[ForInc]] 50cee313d2SEric Christopher 51cee313d2SEric Christopher; CHECK: [[ForInc]]: 52*23c2f2e6SFlorian Hahn; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 53cee313d2SEric Christopher; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], <i64 4, i64 4, i64 4, i64 4> 54cee313d2SEric Christopher; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}} 55cee313d2SEric Christopher; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body 56cee313d2SEric Christopher 57cee313d2SEric Christopher@A = common global [1024 x i32] zeroinitializer, align 16 58cee313d2SEric Christopher@B = common global [1024 x i32] zeroinitializer, align 16 59cee313d2SEric Christopher 60cee313d2SEric Christopher; Function Attrs: norecurse nounwind uwtable 61cee313d2SEric Christopherdefine void @foo(i32 %iCount, i32 %c, i32 %jCount) { 62cee313d2SEric Christopherentry: 63cee313d2SEric Christopher %cmp22 = icmp sgt i32 %iCount, 0 64cee313d2SEric Christopher br i1 %cmp22, label %for.body.lr.ph, label %for.end11 65cee313d2SEric Christopher 66cee313d2SEric Christopherfor.body.lr.ph: ; preds = %entry 67cee313d2SEric Christopher %cmp220 = icmp sgt i32 %jCount, 0 68cee313d2SEric Christopher %wide.trip.count = zext i32 %jCount to i64 69cee313d2SEric Christopher %wide.trip.count27 = zext i32 %iCount to i64 70cee313d2SEric Christopher br label %for.body 71cee313d2SEric Christopher 72cee313d2SEric Christopherfor.body: ; preds = %for.inc9, %for.body.lr.ph 73cee313d2SEric Christopher %indvars.iv25 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next26, %for.inc9 ] 74cee313d2SEric Christopher %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv25 75cee313d2SEric Christopher store i32 %c, i32* %arrayidx, align 4 76cee313d2SEric Christopher br i1 %cmp220, label %for.body3.lr.ph, label %for.inc9 77cee313d2SEric Christopher 78cee313d2SEric Christopherfor.body3.lr.ph: ; preds = %for.body 79cee313d2SEric Christopher %arrayidx.promoted = load i32, i32* %arrayidx, align 4 80cee313d2SEric Christopher %0 = trunc i64 %indvars.iv25 to i32 81cee313d2SEric Christopher br label %for.body3 82cee313d2SEric Christopher 83cee313d2SEric Christopherfor.body3: ; preds = %for.body3, %for.body3.lr.ph 84cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] 85cee313d2SEric Christopher %1 = phi i32 [ %arrayidx.promoted, %for.body3.lr.ph ], [ %add8, %for.body3 ] 86cee313d2SEric Christopher %arrayidx5 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv 87cee313d2SEric Christopher %2 = load i32, i32* %arrayidx5, align 4 88cee313d2SEric Christopher %add = add nsw i32 %2, %0 89cee313d2SEric Christopher %add8 = add nsw i32 %add, %1 90cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 91cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 92cee313d2SEric Christopher br i1 %exitcond, label %for.cond1.for.inc9_crit_edge, label %for.body3 93cee313d2SEric Christopher 94cee313d2SEric Christopherfor.cond1.for.inc9_crit_edge: ; preds = %for.body3 95cee313d2SEric Christopher store i32 %add8, i32* %arrayidx, align 4 96cee313d2SEric Christopher br label %for.inc9 97cee313d2SEric Christopher 98cee313d2SEric Christopherfor.inc9: ; preds = %for.cond1.for.inc9_crit_edge, %for.body 99cee313d2SEric Christopher %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1 100cee313d2SEric Christopher %exitcond28 = icmp eq i64 %indvars.iv.next26, %wide.trip.count27 101cee313d2SEric Christopher br i1 %exitcond28, label %for.end11, label %for.body, !llvm.loop !1 102cee313d2SEric Christopher 103cee313d2SEric Christopherfor.end11: ; preds = %for.inc9, %entry 104cee313d2SEric Christopher ret void 105cee313d2SEric Christopher} 106cee313d2SEric Christopher 107cee313d2SEric Christopher!1 = distinct !{!1, !2, !3} 108cee313d2SEric Christopher!2 = !{!"llvm.loop.vectorize.width", i32 4} 109cee313d2SEric Christopher!3 = !{!"llvm.loop.vectorize.enable", i1 true} 110