1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S < %s | FileCheck %s 3 4@k = common dso_local local_unnamed_addr global i32 0, align 4 5 6define void @m(i32* nocapture %p, i32* nocapture %p2, i32 %q) { 7; CHECK-LABEL: @m( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: [[P1:%.*]] = bitcast i32* [[P:%.*]] to i8* 10; CHECK-NEXT: [[I:%.*]] = load i32, i32* @k, align 4 11; CHECK-NEXT: [[CMP32:%.*]] = icmp slt i32 [[I]], [[Q:%.*]] 12; CHECK-NEXT: br i1 [[CMP32]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND2_PREHEADER:%.*]] 13; CHECK: for.body.preheader: 14; CHECK-NEXT: br label [[FOR_BODY:%.*]] 15; CHECK: for.cond2.preheader.loopexit: 16; CHECK-NEXT: br label [[FOR_COND2_PREHEADER]] 17; CHECK: for.cond2.preheader: 18; CHECK-NEXT: [[ARRAYIDX9_1:%.*]] = getelementptr inbounds i32, i32* [[P2:%.*]], i64 1 19; CHECK-NEXT: [[ARRAYIDX9_2:%.*]] = getelementptr inbounds i32, i32* [[P2]], i64 2 20; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 21; CHECK: vector.memcheck: 22; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[P]], i64 63 23; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* 24; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[P2]], i64 2 25; CHECK-NEXT: [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8* 26; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP34]], i64 1 27; CHECK-NEXT: [[BC:%.*]] = bitcast i32* [[ARRAYIDX9_2]] to i8* 28; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[P1]], [[UGLYGEP]] 29; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[BC]], [[SCEVGEP2]] 30; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 31; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true 32; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 33; CHECK: vector.ph: 34; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 35; CHECK: vector.body: 36; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 37; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 38; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P2]], align 4, !alias.scope !0 39; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 40; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 41; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]] 42; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX9_1]], align 4, !alias.scope !0 43; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 44; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer 45; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT6]] 46; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX9_2]], align 4, !alias.scope !0 47; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 48; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer 49; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT8]] 50; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[TMP0]] 51; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 52; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 53; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP9]], align 4, !alias.scope !3, !noalias !0 54; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 55; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60 56; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 57; CHECK: middle.block: 58; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 63, 60 59; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END17:%.*]], label [[SCALAR_PH]] 60; CHECK: scalar.ph: 61; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND2_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] 62; CHECK-NEXT: br label [[FOR_COND5_PREHEADER:%.*]] 63; CHECK: for.body: 64; CHECK-NEXT: [[I1:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[I]], [[FOR_BODY_PREHEADER]] ] 65; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I1]] to i64 66; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[P2]], i64 [[IDXPROM]] 67; CHECK-NEXT: store i32 2, i32* [[ARRAYIDX]], align 4 68; CHECK-NEXT: [[I2:%.*]] = load i32, i32* @k, align 4 69; CHECK-NEXT: [[INC]] = add nsw i32 [[I2]], 1 70; CHECK-NEXT: store i32 [[INC]], i32* @k, align 4 71; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[Q]] 72; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND2_PREHEADER_LOOPEXIT:%.*]] 73; CHECK: for.cond5.preheader: 74; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND5_PREHEADER]] ] 75; CHECK-NEXT: [[I3:%.*]] = load i32, i32* [[P2]], align 4 76; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[I3]] 77; CHECK-NEXT: [[I4:%.*]] = load i32, i32* [[ARRAYIDX9_1]], align 4 78; CHECK-NEXT: [[SUB_1:%.*]] = sub nsw i32 [[SUB]], [[I4]] 79; CHECK-NEXT: [[I5:%.*]] = load i32, i32* [[ARRAYIDX9_2]], align 4 80; CHECK-NEXT: [[SUB_2:%.*]] = sub nsw i32 [[SUB_1]], [[I5]] 81; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 [[INDVARS_IV]] 82; CHECK-NEXT: store i32 [[SUB_2]], i32* [[ARRAYIDX14]], align 4 83; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 84; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 63 85; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END17]], label [[FOR_COND5_PREHEADER]], !llvm.loop [[LOOP7:![0-9]+]] 86; CHECK: for.end17: 87; CHECK-NEXT: ret void 88; 89entry: 90 %i = load i32, i32* @k, align 4 91 %cmp32 = icmp slt i32 %i, %q 92 br i1 %cmp32, label %for.body.preheader, label %for.cond2.preheader 93 94for.body.preheader: ; preds = %entry 95 br label %for.body 96 97for.cond2.preheader.loopexit: ; preds = %for.body 98 br label %for.cond2.preheader 99 100for.cond2.preheader: ; preds = %for.cond2.preheader.loopexit, %entry 101 %arrayidx9.1 = getelementptr inbounds i32, i32* %p2, i64 1 102 %arrayidx9.2 = getelementptr inbounds i32, i32* %p2, i64 2 103 br label %for.cond5.preheader 104 105for.body: ; preds = %for.body, %for.body.preheader 106 %i1 = phi i32 [ %inc, %for.body ], [ %i, %for.body.preheader ] 107 %idxprom = sext i32 %i1 to i64 108 %arrayidx = getelementptr inbounds i32, i32* %p2, i64 %idxprom 109 store i32 2, i32* %arrayidx, align 4 110 %i2 = load i32, i32* @k, align 4 111 %inc = add nsw i32 %i2, 1 112 store i32 %inc, i32* @k, align 4 113 %cmp = icmp slt i32 %inc, %q 114 br i1 %cmp, label %for.body, label %for.cond2.preheader.loopexit 115 116for.cond5.preheader: ; preds = %for.cond5.preheader, %for.cond2.preheader 117 %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next, %for.cond5.preheader ] 118 %i3 = load i32, i32* %p2, align 4 119 %sub = sub nsw i32 0, %i3 120 %i4 = load i32, i32* %arrayidx9.1, align 4 121 %sub.1 = sub nsw i32 %sub, %i4 122 %i5 = load i32, i32* %arrayidx9.2, align 4 123 %sub.2 = sub nsw i32 %sub.1, %i5 124 %arrayidx14 = getelementptr inbounds i32, i32* %p, i64 %indvars.iv 125 store i32 %sub.2, i32* %arrayidx14, align 4 126 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 127 %exitcond = icmp eq i64 %indvars.iv.next, 63 128 br i1 %exitcond, label %for.end17, label %for.cond5.preheader 129 130for.end17: ; preds = %for.cond5.preheader 131 ret void 132} 133