1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s 3; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND 4; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL 5; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC 6; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE 7 8target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 9 10; Make sure that we can handle multiple integer induction variables. 11; 12define void @multi_int_induction(i32* %A, i32 %N) { 13; CHECK-LABEL: @multi_int_induction( 14; CHECK-NEXT: for.body.lr.ph: 15; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 16; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 17; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 18; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 19; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 20; CHECK: vector.ph: 21; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 22; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 23; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 24; CHECK-NEXT: [[IND_END:%.*]] = add i32 190, [[CAST_CRD]] 25; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 26; CHECK: vector.body: 27; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 28; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 29; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 30; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] 31; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 32; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* 33; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4 34; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 35; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 36; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 37; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 38; CHECK: middle.block: 39; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 40; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 41; CHECK: scalar.ph: 42; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ] 43; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ] 44; CHECK-NEXT: br label [[FOR_BODY:%.*]] 45; CHECK: for.body: 46; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 47; CHECK-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 48; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 49; CHECK-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4 50; CHECK-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1 51; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 52; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 53; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]] 54; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] 55; CHECK: for.end: 56; CHECK-NEXT: ret void 57; 58; IND-LABEL: @multi_int_induction( 59; IND-NEXT: for.body.lr.ph: 60; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 61; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 62; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 63; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 64; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 65; IND: vector.ph: 66; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 67; IND-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 68; IND-NEXT: [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190 69; IND-NEXT: br label [[VECTOR_BODY:%.*]] 70; IND: vector.body: 71; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 72; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 73; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 74; IND-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 75; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4 76; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 77; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 78; IND-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 79; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 80; IND: middle.block: 81; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 82; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 83; IND: scalar.ph: 84; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ] 85; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ] 86; IND-NEXT: br label [[FOR_BODY:%.*]] 87; IND: for.body: 88; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 89; IND-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 90; IND-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 91; IND-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4 92; IND-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1 93; IND-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 94; IND-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 95; IND-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 96; IND-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 97; IND: for.end: 98; IND-NEXT: ret void 99; 100; UNROLL-LABEL: @multi_int_induction( 101; UNROLL-NEXT: for.body.lr.ph: 102; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 103; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 104; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 105; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 106; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 107; UNROLL: vector.ph: 108; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 109; UNROLL-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 110; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190 111; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 112; UNROLL: vector.body: 113; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 114; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 115; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 116; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 117; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 118; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4 119; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 2 120; UNROLL-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* 121; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP6]], align 4 122; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 123; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4> 124; UNROLL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 125; UNROLL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 126; UNROLL: middle.block: 127; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 128; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 129; UNROLL: scalar.ph: 130; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ] 131; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ] 132; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 133; UNROLL: for.body: 134; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 135; UNROLL-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 136; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 137; UNROLL-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4 138; UNROLL-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1 139; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 140; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 141; UNROLL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 142; UNROLL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 143; UNROLL: for.end: 144; UNROLL-NEXT: ret void 145; 146; UNROLL-NO-IC-LABEL: @multi_int_induction( 147; UNROLL-NO-IC-NEXT: for.body.lr.ph: 148; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 149; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 150; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 151; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 152; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 153; UNROLL-NO-IC: vector.ph: 154; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 155; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 156; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 157; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 190, [[CAST_CRD]] 158; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 159; UNROLL-NO-IC: vector.body: 160; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 161; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 162; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 163; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 164; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 165; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]] 166; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] 167; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 168; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>* 169; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP8]], align 4 170; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 2 171; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* 172; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP10]], align 4 173; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 174; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2> 175; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 176; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 177; UNROLL-NO-IC: middle.block: 178; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 179; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 180; UNROLL-NO-IC: scalar.ph: 181; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ] 182; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ] 183; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 184; UNROLL-NO-IC: for.body: 185; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 186; UNROLL-NO-IC-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 187; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 188; UNROLL-NO-IC-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4 189; UNROLL-NO-IC-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1 190; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 191; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 192; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]] 193; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]] 194; UNROLL-NO-IC: for.end: 195; UNROLL-NO-IC-NEXT: ret void 196; 197; INTERLEAVE-LABEL: @multi_int_induction( 198; INTERLEAVE-NEXT: for.body.lr.ph: 199; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 200; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 201; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 202; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 203; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 204; INTERLEAVE: vector.ph: 205; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 206; INTERLEAVE-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 207; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190 208; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 209; INTERLEAVE: vector.body: 210; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 211; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 190, i32 191, i32 192, i32 193>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 212; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 213; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 214; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 215; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP4]], align 4 216; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4 217; INTERLEAVE-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 218; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP6]], align 4 219; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 220; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8> 221; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 222; INTERLEAVE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 223; INTERLEAVE: middle.block: 224; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 225; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 226; INTERLEAVE: scalar.ph: 227; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ] 228; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ] 229; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 230; INTERLEAVE: for.body: 231; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 232; INTERLEAVE-NEXT: [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 233; INTERLEAVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 234; INTERLEAVE-NEXT: store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4 235; INTERLEAVE-NEXT: [[INC]] = add nsw i32 [[COUNT_09]], 1 236; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 237; INTERLEAVE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 238; INTERLEAVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 239; INTERLEAVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 240; INTERLEAVE: for.end: 241; INTERLEAVE-NEXT: ret void 242; 243for.body.lr.ph: 244 br label %for.body 245 246for.body: 247 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 248 %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ] 249 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 250 store i32 %count.09, i32* %arrayidx2, align 4 251 %inc = add nsw i32 %count.09, 1 252 %indvars.iv.next = add i64 %indvars.iv, 1 253 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 254 %exitcond = icmp ne i32 %lftr.wideiv, %N 255 br i1 %exitcond, label %for.body, label %for.end 256 257for.end: 258 ret void 259} 260 261; Make sure we remove unneeded vectorization of induction variables. 262; In order for instcombine to cleanup the vectorized induction variables that we 263; create in the loop vectorizer we need to perform some form of redundancy 264; elimination to get rid of multiple uses. 265 266 267; Vectorized induction variable. 268 269define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) { 270; CHECK-LABEL: @scalar_use( 271; CHECK-NEXT: entry: 272; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 273; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 274; CHECK: vector.memcheck: 275; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]] 276; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8* 277; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]] 278; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]] 279; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8* 280; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]] 281; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* 282; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]] 283; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]] 284; CHECK-NEXT: [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8* 285; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]] 286; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] 287; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 288; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 289; CHECK: vector.ph: 290; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 291; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 292; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0 293; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 294; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 295; CHECK: vector.body: 296; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 297; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 298; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], [[OFFSET]] 299; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]] 300; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 301; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>* 302; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7 303; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP2]], [[OFFSET2]] 304; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]] 305; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0 306; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <2 x float>* 307; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4, !alias.scope !7 308; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]] 309; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP11]] 310; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP5]] to <2 x float>* 311; CHECK-NEXT: store <2 x float> [[TMP12]], <2 x float>* [[TMP13]], align 4, !alias.scope !4, !noalias !7 312; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 313; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 314; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 315; CHECK: middle.block: 316; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 317; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 318; CHECK: scalar.ph: 319; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 320; CHECK-NEXT: br label [[FOR_BODY:%.*]] 321; CHECK: for.body: 322; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 323; CHECK-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]] 324; CHECK-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]] 325; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4 326; CHECK-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]] 327; CHECK-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]] 328; CHECK-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4 329; CHECK-NEXT: [[M:%.*]] = fmul fast float [[B]], [[L2]] 330; CHECK-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]] 331; CHECK-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4 332; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 333; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 334; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 335; CHECK: loopexit: 336; CHECK-NEXT: ret void 337; 338; IND-LABEL: @scalar_use( 339; IND-NEXT: entry: 340; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 341; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 342; IND: vector.memcheck: 343; IND-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]] 344; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]] 345; IND-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]] 346; IND-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]] 347; IND-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]] 348; IND-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]] 349; IND-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]] 350; IND-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]] 351; IND-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 352; IND-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 353; IND: vector.ph: 354; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 355; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0 356; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 357; IND-NEXT: br label [[VECTOR_BODY:%.*]] 358; IND: vector.body: 359; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 360; IND-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]] 361; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]] 362; IND-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>* 363; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7 364; IND-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], [[OFFSET2]] 365; IND-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]] 366; IND-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>* 367; IND-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 4, !alias.scope !7 368; IND-NEXT: [[TMP8:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]] 369; IND-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP8]] 370; IND-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP3]] to <2 x float>* 371; IND-NEXT: store <2 x float> [[TMP9]], <2 x float>* [[TMP10]], align 4, !alias.scope !4, !noalias !7 372; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 373; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 374; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 375; IND: middle.block: 376; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 377; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 378; IND: scalar.ph: 379; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 380; IND-NEXT: br label [[FOR_BODY:%.*]] 381; IND: for.body: 382; IND-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 383; IND-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]] 384; IND-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]] 385; IND-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4 386; IND-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]] 387; IND-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]] 388; IND-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4 389; IND-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]] 390; IND-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]] 391; IND-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4 392; IND-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 393; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 394; IND-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 395; IND: loopexit: 396; IND-NEXT: ret void 397; 398; UNROLL-LABEL: @scalar_use( 399; UNROLL-NEXT: entry: 400; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 401; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 402; UNROLL: vector.memcheck: 403; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]] 404; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]] 405; UNROLL-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]] 406; UNROLL-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]] 407; UNROLL-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]] 408; UNROLL-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]] 409; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]] 410; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]] 411; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 412; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 413; UNROLL: vector.ph: 414; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 415; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0 416; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 417; UNROLL-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i64 0 418; UNROLL-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer 419; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 420; UNROLL: vector.body: 421; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 422; UNROLL-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]] 423; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]] 424; UNROLL-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>* 425; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7 426; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 2 427; UNROLL-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>* 428; UNROLL-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7 429; UNROLL-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]] 430; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]] 431; UNROLL-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>* 432; UNROLL-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !7 433; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 2 434; UNROLL-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>* 435; UNROLL-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !7 436; UNROLL-NEXT: [[TMP12:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]] 437; UNROLL-NEXT: [[TMP13:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]] 438; UNROLL-NEXT: [[TMP14:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP12]] 439; UNROLL-NEXT: [[TMP15:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP13]] 440; UNROLL-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP3]] to <2 x float>* 441; UNROLL-NEXT: store <2 x float> [[TMP14]], <2 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7 442; UNROLL-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP5]] to <2 x float>* 443; UNROLL-NEXT: store <2 x float> [[TMP15]], <2 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7 444; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 445; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 446; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 447; UNROLL: middle.block: 448; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 449; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 450; UNROLL: scalar.ph: 451; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 452; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 453; UNROLL: for.body: 454; UNROLL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 455; UNROLL-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]] 456; UNROLL-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]] 457; UNROLL-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4 458; UNROLL-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]] 459; UNROLL-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]] 460; UNROLL-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4 461; UNROLL-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]] 462; UNROLL-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]] 463; UNROLL-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4 464; UNROLL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 465; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 466; UNROLL-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 467; UNROLL: loopexit: 468; UNROLL-NEXT: ret void 469; 470; UNROLL-NO-IC-LABEL: @scalar_use( 471; UNROLL-NO-IC-NEXT: entry: 472; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 473; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 474; UNROLL-NO-IC: vector.memcheck: 475; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]] 476; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8* 477; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]] 478; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]] 479; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8* 480; UNROLL-NO-IC-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]] 481; UNROLL-NO-IC-NEXT: [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8* 482; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]] 483; UNROLL-NO-IC-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]] 484; UNROLL-NO-IC-NEXT: [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8* 485; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]] 486; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] 487; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 488; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 489; UNROLL-NO-IC: vector.ph: 490; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 491; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 492; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0 493; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 494; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i32 0 495; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer 496; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 497; UNROLL-NO-IC: vector.body: 498; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 499; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 500; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 501; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[OFFSET]] 502; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[OFFSET]] 503; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]] 504; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]] 505; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0 506; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>* 507; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !4, !noalias !7 508; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 2 509; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>* 510; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !4, !noalias !7 511; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i64 [[TMP2]], [[OFFSET2]] 512; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i64 [[TMP3]], [[OFFSET2]] 513; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] 514; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]] 515; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 0 516; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP16]] to <2 x float>* 517; UNROLL-NO-IC-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP17]], align 4, !alias.scope !7 518; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 2 519; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>* 520; UNROLL-NO-IC-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7 521; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]] 522; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]] 523; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP20]] 524; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP21]] 525; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast float* [[TMP8]] to <2 x float>* 526; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP22]], <2 x float>* [[TMP24]], align 4, !alias.scope !4, !noalias !7 527; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP10]] to <2 x float>* 528; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP23]], <2 x float>* [[TMP25]], align 4, !alias.scope !4, !noalias !7 529; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 530; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 531; UNROLL-NO-IC-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 532; UNROLL-NO-IC: middle.block: 533; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 534; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 535; UNROLL-NO-IC: scalar.ph: 536; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 537; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 538; UNROLL-NO-IC: for.body: 539; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 540; UNROLL-NO-IC-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]] 541; UNROLL-NO-IC-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]] 542; UNROLL-NO-IC-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4 543; UNROLL-NO-IC-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]] 544; UNROLL-NO-IC-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]] 545; UNROLL-NO-IC-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4 546; UNROLL-NO-IC-NEXT: [[M:%.*]] = fmul fast float [[B]], [[L2]] 547; UNROLL-NO-IC-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]] 548; UNROLL-NO-IC-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4 549; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 550; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 551; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 552; UNROLL-NO-IC: loopexit: 553; UNROLL-NO-IC-NEXT: ret void 554; 555; INTERLEAVE-LABEL: @scalar_use( 556; INTERLEAVE-NEXT: entry: 557; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 558; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 559; INTERLEAVE: vector.memcheck: 560; INTERLEAVE-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]] 561; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]] 562; INTERLEAVE-NEXT: [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]] 563; INTERLEAVE-NEXT: [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]] 564; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]] 565; INTERLEAVE-NEXT: [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]] 566; INTERLEAVE-NEXT: [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]] 567; INTERLEAVE-NEXT: [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]] 568; INTERLEAVE-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 569; INTERLEAVE-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 570; INTERLEAVE: vector.ph: 571; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 572; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 573; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 574; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x float> poison, float [[B]], i64 0 575; INTERLEAVE-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT11]], <4 x float> poison, <4 x i32> zeroinitializer 576; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 577; INTERLEAVE: vector.body: 578; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 579; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]] 580; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]] 581; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 582; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7 583; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 4 584; INTERLEAVE-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* 585; INTERLEAVE-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7 586; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]] 587; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]] 588; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 589; INTERLEAVE-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !7 590; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4 591; INTERLEAVE-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* 592; INTERLEAVE-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4, !alias.scope !7 593; INTERLEAVE-NEXT: [[TMP12:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]] 594; INTERLEAVE-NEXT: [[TMP13:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]] 595; INTERLEAVE-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[TMP12]] 596; INTERLEAVE-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[TMP13]] 597; INTERLEAVE-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 598; INTERLEAVE-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7 599; INTERLEAVE-NEXT: [[TMP17:%.*]] = bitcast float* [[TMP5]] to <4 x float>* 600; INTERLEAVE-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7 601; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 602; INTERLEAVE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 603; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 604; INTERLEAVE: middle.block: 605; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 606; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 607; INTERLEAVE: scalar.ph: 608; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 609; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 610; INTERLEAVE: for.body: 611; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 612; INTERLEAVE-NEXT: [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]] 613; INTERLEAVE-NEXT: [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]] 614; INTERLEAVE-NEXT: [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4 615; INTERLEAVE-NEXT: [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]] 616; INTERLEAVE-NEXT: [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]] 617; INTERLEAVE-NEXT: [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4 618; INTERLEAVE-NEXT: [[M:%.*]] = fmul fast float [[L2]], [[B]] 619; INTERLEAVE-NEXT: [[AD:%.*]] = fadd fast float [[L1]], [[M]] 620; INTERLEAVE-NEXT: store float [[AD]], float* [[ARR_IDX]], align 4 621; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 622; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 623; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 624; INTERLEAVE: loopexit: 625; INTERLEAVE-NEXT: ret void 626; 627entry: 628 br label %for.body 629 630for.body: 631 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 632 %ind.sum = add i64 %iv, %offset 633 %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum 634 %l1 = load float, float* %arr.idx, align 4 635 %ind.sum2 = add i64 %iv, %offset2 636 %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2 637 %l2 = load float, float* %arr.idx2, align 4 638 %m = fmul fast float %b, %l2 639 %ad = fadd fast float %l1, %m 640 store float %ad, float* %arr.idx, align 4 641 %iv.next = add nuw nsw i64 %iv, 1 642 %exitcond = icmp eq i64 %iv.next, %n 643 br i1 %exitcond, label %loopexit, label %for.body 644 645loopexit: 646 ret void 647} 648 649; Make sure we don't create a vector induction phi node that is unused. 650; Scalarize the step vectors instead. 651; 652; for (int i = 0; i < n; ++i) 653; sum += a[i]; 654; 655; 656; 657; 658 659define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) { 660; CHECK-LABEL: @scalarize_induction_variable_01( 661; CHECK-NEXT: entry: 662; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 663; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 664; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 665; CHECK: vector.ph: 666; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 667; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 668; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 669; CHECK: vector.body: 670; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 671; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 672; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 673; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]] 674; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0 675; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* 676; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8 677; CHECK-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] 678; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 679; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 680; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 681; CHECK: middle.block: 682; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP4]]) 683; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 684; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 685; CHECK: scalar.ph: 686; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 687; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 688; CHECK-NEXT: br label [[FOR_BODY:%.*]] 689; CHECK: for.body: 690; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 691; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 692; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]] 693; CHECK-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 694; CHECK-NEXT: [[TMP9]] = add i64 [[TMP8]], [[SUM]] 695; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 696; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 697; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 698; CHECK: for.end: 699; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP9]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 700; CHECK-NEXT: ret i64 [[TMP10]] 701; 702; IND-LABEL: @scalarize_induction_variable_01( 703; IND-NEXT: entry: 704; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 705; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 706; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 707; IND: vector.ph: 708; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 709; IND-NEXT: br label [[VECTOR_BODY:%.*]] 710; IND: vector.body: 711; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 712; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] 713; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] 714; IND-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>* 715; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 716; IND-NEXT: [[TMP2]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] 717; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 718; IND-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 719; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 720; IND: middle.block: 721; IND-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]]) 722; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 723; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 724; IND: scalar.ph: 725; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 726; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 727; IND-NEXT: br label [[FOR_BODY:%.*]] 728; IND: for.body: 729; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 730; IND-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 731; IND-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]] 732; IND-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 733; IND-NEXT: [[TMP7]] = add i64 [[TMP6]], [[SUM]] 734; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 735; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 736; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 737; IND: for.end: 738; IND-NEXT: [[TMP8:%.*]] = phi i64 [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] 739; IND-NEXT: ret i64 [[TMP8]] 740; 741; UNROLL-LABEL: @scalarize_induction_variable_01( 742; UNROLL-NEXT: entry: 743; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 744; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 745; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 746; UNROLL: vector.ph: 747; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 748; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 749; UNROLL: vector.body: 750; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 751; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 752; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 753; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] 754; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>* 755; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 756; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2 757; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* 758; UNROLL-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8 759; UNROLL-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] 760; UNROLL-NEXT: [[TMP5]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]] 761; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 762; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 763; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 764; UNROLL: middle.block: 765; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP5]], [[TMP4]] 766; UNROLL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 767; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 768; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 769; UNROLL: scalar.ph: 770; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 771; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 772; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 773; UNROLL: for.body: 774; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 775; UNROLL-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 776; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]] 777; UNROLL-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 778; UNROLL-NEXT: [[TMP10]] = add i64 [[TMP9]], [[SUM]] 779; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 780; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 781; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 782; UNROLL: for.end: 783; UNROLL-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 784; UNROLL-NEXT: ret i64 [[TMP11]] 785; 786; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01( 787; UNROLL-NO-IC-NEXT: entry: 788; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 789; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 790; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 791; UNROLL-NO-IC: vector.ph: 792; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4 793; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 794; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 795; UNROLL-NO-IC: vector.body: 796; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 797; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 798; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 799; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 800; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 801; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]] 802; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP1]] 803; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 0 804; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>* 805; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8 806; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 2 807; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>* 808; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 8 809; UNROLL-NO-IC-NEXT: [[TMP8]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] 810; UNROLL-NO-IC-NEXT: [[TMP9]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]] 811; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 812; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 813; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 814; UNROLL-NO-IC: middle.block: 815; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] 816; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 817; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 818; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 819; UNROLL-NO-IC: scalar.ph: 820; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 821; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 822; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 823; UNROLL-NO-IC: for.body: 824; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 825; UNROLL-NO-IC-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP14:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 826; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]] 827; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 828; UNROLL-NO-IC-NEXT: [[TMP14]] = add i64 [[TMP13]], [[SUM]] 829; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 830; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 831; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 832; UNROLL-NO-IC: for.end: 833; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi i64 [ [[TMP14]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 834; UNROLL-NO-IC-NEXT: ret i64 [[TMP15]] 835; 836; INTERLEAVE-LABEL: @scalarize_induction_variable_01( 837; INTERLEAVE-NEXT: entry: 838; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 839; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 840; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 841; INTERLEAVE: vector.ph: 842; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 843; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 844; INTERLEAVE: vector.body: 845; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 846; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 847; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 848; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]] 849; INTERLEAVE-NEXT: [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <4 x i64>* 850; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 8 851; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 4 852; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>* 853; INTERLEAVE-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 8 854; INTERLEAVE-NEXT: [[TMP4]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]] 855; INTERLEAVE-NEXT: [[TMP5]] = add <4 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]] 856; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 857; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 858; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 859; INTERLEAVE: middle.block: 860; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP5]], [[TMP4]] 861; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]]) 862; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 863; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 864; INTERLEAVE: scalar.ph: 865; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 866; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 867; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 868; INTERLEAVE: for.body: 869; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 870; INTERLEAVE-NEXT: [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 871; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]] 872; INTERLEAVE-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8 873; INTERLEAVE-NEXT: [[TMP10]] = add i64 [[TMP9]], [[SUM]] 874; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 875; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 876; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 877; INTERLEAVE: for.end: 878; INTERLEAVE-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 879; INTERLEAVE-NEXT: ret i64 [[TMP11]] 880; 881entry: 882 br label %for.body 883 884for.body: 885 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 886 %sum = phi i64 [ %2, %for.body ], [ 0, %entry ] 887 %0 = getelementptr inbounds i64, i64* %a, i64 %i 888 %1 = load i64, i64* %0, align 8 889 %2 = add i64 %1, %sum 890 %i.next = add nuw nsw i64 %i, 1 891 %cond = icmp slt i64 %i.next, %n 892 br i1 %cond, label %for.body, label %for.end 893 894for.end: 895 %3 = phi i64 [ %2, %for.body ] 896 ret i64 %3 897} 898 899; Make sure we scalarize the step vectors used for the pointer arithmetic. We 900; can't easily simplify vectorized step vectors. 901; 902; float s = 0; 903; for (int i ; 0; i < n; i += 8) 904; s += (a[i] + b[i] + 1.0f); 905; 906; 907; 908; 909 910define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) { 911; CHECK-LABEL: @scalarize_induction_variable_02( 912; CHECK-NEXT: entry: 913; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) 914; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 915; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 916; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 917; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 918; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 919; CHECK: vector.ph: 920; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 921; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 922; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8 923; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 924; CHECK: vector.body: 925; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 926; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 927; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 928; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 929; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 930; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]] 931; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]] 932; CHECK-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4 933; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[TMP6]], align 4 934; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0 935; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1 936; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]] 937; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]] 938; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP11]], align 4 939; CHECK-NEXT: [[TMP14:%.*]] = load float, float* [[TMP12]], align 4 940; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0 941; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1 942; CHECK-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00> 943; CHECK-NEXT: [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]] 944; CHECK-NEXT: [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]] 945; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 946; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 947; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 948; CHECK: middle.block: 949; CHECK-NEXT: [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP19]]) 950; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 951; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 952; CHECK: scalar.ph: 953; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 954; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] 955; CHECK-NEXT: br label [[FOR_BODY:%.*]] 956; CHECK: for.body: 957; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] 958; CHECK-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ] 959; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 960; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 961; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]] 962; CHECK-NEXT: [[TMP25:%.*]] = load float, float* [[TMP24]], align 4 963; CHECK-NEXT: [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00 964; CHECK-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]] 965; CHECK-NEXT: [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]] 966; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8 967; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 968; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 969; CHECK: for.end: 970; CHECK-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] 971; CHECK-NEXT: ret float [[S_LCSSA]] 972; 973; IND-LABEL: @scalarize_induction_variable_02( 974; IND-NEXT: entry: 975; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) 976; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 977; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 978; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 979; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 980; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 981; IND: vector.ph: 982; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902 983; IND-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 984; IND-NEXT: br label [[VECTOR_BODY:%.*]] 985; IND: vector.body: 986; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 987; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 988; IND-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 989; IND-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8 990; IND-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]] 991; IND-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]] 992; IND-NEXT: [[TMP6:%.*]] = load float, float* [[TMP4]], align 4 993; IND-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4 994; IND-NEXT: [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 995; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP7]], i64 1 996; IND-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]] 997; IND-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]] 998; IND-NEXT: [[TMP12:%.*]] = load float, float* [[TMP10]], align 4 999; IND-NEXT: [[TMP13:%.*]] = load float, float* [[TMP11]], align 4 1000; IND-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0 1001; IND-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1 1002; IND-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00> 1003; IND-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]] 1004; IND-NEXT: [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]] 1005; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1006; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1007; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1008; IND: middle.block: 1009; IND-NEXT: [[TMP20:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP18]]) 1010; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1011; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1012; IND: scalar.ph: 1013; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1014; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1015; IND-NEXT: br label [[FOR_BODY:%.*]] 1016; IND: for.body: 1017; IND-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] 1018; IND-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ] 1019; IND-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1020; IND-NEXT: [[TMP22:%.*]] = load float, float* [[TMP21]], align 4 1021; IND-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]] 1022; IND-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4 1023; IND-NEXT: [[TMP25:%.*]] = fadd fast float [[S]], 1.000000e+00 1024; IND-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP25]], [[TMP22]] 1025; IND-NEXT: [[TMP27]] = fadd fast float [[TMP26]], [[TMP24]] 1026; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8 1027; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1028; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1029; IND: for.end: 1030; IND-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP27]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] 1031; IND-NEXT: ret float [[S_LCSSA]] 1032; 1033; UNROLL-LABEL: @scalarize_induction_variable_02( 1034; UNROLL-NEXT: entry: 1035; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) 1036; UNROLL-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 1037; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 1038; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1039; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24 1040; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1041; UNROLL: vector.ph: 1042; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387900 1043; UNROLL-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 1044; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 1045; UNROLL: vector.body: 1046; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1047; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] 1048; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] 1049; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 1050; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8 1051; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 16 1052; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 24 1053; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]] 1054; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]] 1055; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]] 1056; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]] 1057; UNROLL-NEXT: [[TMP10:%.*]] = load float, float* [[TMP6]], align 4 1058; UNROLL-NEXT: [[TMP11:%.*]] = load float, float* [[TMP7]], align 4 1059; UNROLL-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 1060; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1 1061; UNROLL-NEXT: [[TMP14:%.*]] = load float, float* [[TMP8]], align 4 1062; UNROLL-NEXT: [[TMP15:%.*]] = load float, float* [[TMP9]], align 4 1063; UNROLL-NEXT: [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 1064; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP15]], i64 1 1065; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]] 1066; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]] 1067; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]] 1068; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]] 1069; UNROLL-NEXT: [[TMP22:%.*]] = load float, float* [[TMP18]], align 4 1070; UNROLL-NEXT: [[TMP23:%.*]] = load float, float* [[TMP19]], align 4 1071; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <2 x float> poison, float [[TMP22]], i64 0 1072; UNROLL-NEXT: [[TMP25:%.*]] = insertelement <2 x float> [[TMP24]], float [[TMP23]], i64 1 1073; UNROLL-NEXT: [[TMP26:%.*]] = load float, float* [[TMP20]], align 4 1074; UNROLL-NEXT: [[TMP27:%.*]] = load float, float* [[TMP21]], align 4 1075; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 1076; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1 1077; UNROLL-NEXT: [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00> 1078; UNROLL-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00> 1079; UNROLL-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]] 1080; UNROLL-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]] 1081; UNROLL-NEXT: [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]] 1082; UNROLL-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP29]] 1083; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1084; UNROLL-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1085; UNROLL-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1086; UNROLL: middle.block: 1087; UNROLL-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP35]], [[TMP34]] 1088; UNROLL-NEXT: [[TMP37:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]]) 1089; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1090; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1091; UNROLL: scalar.ph: 1092; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1093; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP37]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1094; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 1095; UNROLL: for.body: 1096; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] 1097; UNROLL-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ] 1098; UNROLL-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1099; UNROLL-NEXT: [[TMP39:%.*]] = load float, float* [[TMP38]], align 4 1100; UNROLL-NEXT: [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]] 1101; UNROLL-NEXT: [[TMP41:%.*]] = load float, float* [[TMP40]], align 4 1102; UNROLL-NEXT: [[TMP42:%.*]] = fadd fast float [[S]], 1.000000e+00 1103; UNROLL-NEXT: [[TMP43:%.*]] = fadd fast float [[TMP42]], [[TMP39]] 1104; UNROLL-NEXT: [[TMP44]] = fadd fast float [[TMP43]], [[TMP41]] 1105; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8 1106; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1107; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1108; UNROLL: for.end: 1109; UNROLL-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP44]], [[FOR_BODY]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] 1110; UNROLL-NEXT: ret float [[S_LCSSA]] 1111; 1112; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02( 1113; UNROLL-NO-IC-NEXT: entry: 1114; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) 1115; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 1116; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 1117; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1118; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 1119; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1120; UNROLL-NO-IC: vector.ph: 1121; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 1122; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 1123; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 8 1124; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 1125; UNROLL-NO-IC: vector.body: 1126; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1127; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ] 1128; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ] 1129; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 1130; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 1131; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 1132; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 1133; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 1134; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]] 1135; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]] 1136; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]] 1137; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]] 1138; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = load float, float* [[TMP7]], align 4 1139; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = load float, float* [[TMP8]], align 4 1140; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0 1141; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP12]], i32 1 1142; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load float, float* [[TMP9]], align 4 1143; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load float, float* [[TMP10]], align 4 1144; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0 1145; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1 1146; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]] 1147; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]] 1148; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]] 1149; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP6]] 1150; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load float, float* [[TMP19]], align 4 1151; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load float, float* [[TMP20]], align 4 1152; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i32 0 1153; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i32 1 1154; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load float, float* [[TMP21]], align 4 1155; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load float, float* [[TMP22]], align 4 1156; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0 1157; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1 1158; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00> 1159; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00> 1160; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]] 1161; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]] 1162; UNROLL-NO-IC-NEXT: [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]] 1163; UNROLL-NO-IC-NEXT: [[TMP36]] = fadd fast <2 x float> [[TMP34]], [[TMP30]] 1164; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1165; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1166; UNROLL-NO-IC-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1167; UNROLL-NO-IC: middle.block: 1168; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP36]], [[TMP35]] 1169; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]]) 1170; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1171; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1172; UNROLL-NO-IC: scalar.ph: 1173; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1174; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ] 1175; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 1176; UNROLL-NO-IC: for.body: 1177; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] 1178; UNROLL-NO-IC-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP45:%.*]], [[FOR_BODY]] ] 1179; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1180; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = load float, float* [[TMP39]], align 4 1181; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]] 1182; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = load float, float* [[TMP41]], align 4 1183; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = fadd fast float [[S]], 1.000000e+00 1184; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = fadd fast float [[TMP43]], [[TMP40]] 1185; UNROLL-NO-IC-NEXT: [[TMP45]] = fadd fast float [[TMP44]], [[TMP42]] 1186; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8 1187; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1188; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1189; UNROLL-NO-IC: for.end: 1190; UNROLL-NO-IC-NEXT: [[S_LCSSA:%.*]] = phi float [ [[TMP45]], [[FOR_BODY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ] 1191; UNROLL-NO-IC-NEXT: ret float [[S_LCSSA]] 1192; 1193; INTERLEAVE-LABEL: @scalarize_induction_variable_02( 1194; INTERLEAVE-NEXT: entry: 1195; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) 1196; INTERLEAVE-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 1197; INTERLEAVE-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 1198; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1199; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 1200; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1201; INTERLEAVE: vector.ph: 1202; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7 1203; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1204; INTERLEAVE-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]] 1205; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]] 1206; INTERLEAVE-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 1207; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 1208; INTERLEAVE: vector.body: 1209; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1210; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] 1211; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] 1212; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3 1213; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 32 1214; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]] 1215; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]] 1216; INTERLEAVE-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP6]] to <32 x float>* 1217; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP7]] to <32 x float>* 1218; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <32 x float>, <32 x float>* [[TMP8]], align 4 1219; INTERLEAVE-NEXT: [[WIDE_VEC2:%.*]] = load <32 x float>, <32 x float>* [[TMP9]], align 4 1220; INTERLEAVE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x float> [[WIDE_VEC]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 1221; INTERLEAVE-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x float> [[WIDE_VEC2]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 1222; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]] 1223; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]] 1224; INTERLEAVE-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP10]] to <32 x float>* 1225; INTERLEAVE-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP11]] to <32 x float>* 1226; INTERLEAVE-NEXT: [[WIDE_VEC4:%.*]] = load <32 x float>, <32 x float>* [[TMP12]], align 4 1227; INTERLEAVE-NEXT: [[WIDE_VEC5:%.*]] = load <32 x float>, <32 x float>* [[TMP13]], align 4 1228; INTERLEAVE-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 1229; INTERLEAVE-NEXT: [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC5]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24> 1230; INTERLEAVE-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1231; INTERLEAVE-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1232; INTERLEAVE-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[STRIDED_VEC]] 1233; INTERLEAVE-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[STRIDED_VEC3]] 1234; INTERLEAVE-NEXT: [[TMP18]] = fadd fast <4 x float> [[TMP16]], [[STRIDED_VEC6]] 1235; INTERLEAVE-NEXT: [[TMP19]] = fadd fast <4 x float> [[TMP17]], [[STRIDED_VEC7]] 1236; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1237; INTERLEAVE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1238; INTERLEAVE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1239; INTERLEAVE: middle.block: 1240; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP18]] 1241; INTERLEAVE-NEXT: [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]]) 1242; INTERLEAVE-NEXT: br label [[SCALAR_PH]] 1243; INTERLEAVE: scalar.ph: 1244; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1245; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1246; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 1247; INTERLEAVE: for.body: 1248; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ] 1249; INTERLEAVE-NEXT: [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ] 1250; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1251; INTERLEAVE-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 1252; INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]] 1253; INTERLEAVE-NEXT: [[TMP25:%.*]] = load float, float* [[TMP24]], align 4 1254; INTERLEAVE-NEXT: [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00 1255; INTERLEAVE-NEXT: [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]] 1256; INTERLEAVE-NEXT: [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]] 1257; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 8 1258; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1259; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP14:![0-9]+]] 1260; INTERLEAVE: for.end: 1261; INTERLEAVE-NEXT: ret float [[TMP28]] 1262; 1263entry: 1264 br label %for.body 1265 1266for.body: 1267 %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] 1268 %s = phi float [ 0.0, %entry ], [ %6, %for.body ] 1269 %0 = getelementptr inbounds float, float* %a, i64 %i 1270 %1 = load float, float* %0, align 4 1271 %2 = getelementptr inbounds float, float* %b, i64 %i 1272 %3 = load float, float* %2, align 4 1273 %4 = fadd fast float %s, 1.0 1274 %5 = fadd fast float %4, %1 1275 %6 = fadd fast float %5, %3 1276 %i.next = add nuw nsw i64 %i, 8 1277 %cond = icmp slt i64 %i.next, %n 1278 br i1 %cond, label %for.body, label %for.end 1279 1280for.end: 1281 %s.lcssa = phi float [ %6, %for.body ] 1282 ret float %s.lcssa 1283} 1284 1285; Make sure we scalarize the step vectors used for the pointer arithmetic. We 1286; can't easily simplify vectorized step vectors. (Interleaved accesses.) 1287; 1288; for (int i = 0; i < n; ++i) 1289; a[i].f ^= y; 1290; 1291 1292%pair.i32 = type { i32, i32 } 1293define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) { 1294; CHECK-LABEL: @scalarize_induction_variable_03( 1295; CHECK-NEXT: entry: 1296; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1297; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1298; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1299; CHECK: vector.ph: 1300; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 1301; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 1302; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0 1303; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 1304; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1305; CHECK: vector.body: 1306; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1307; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1308; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1309; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1 1310; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1 1311; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8 1312; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 8 1313; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0 1314; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1 1315; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[TMP7]], [[BROADCAST_SPLAT]] 1316; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0 1317; CHECK-NEXT: store i32 [[TMP9]], i32* [[TMP2]], align 8 1318; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1 1319; CHECK-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 8 1320; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1321; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1322; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1323; CHECK: middle.block: 1324; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1325; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1326; CHECK: scalar.ph: 1327; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1328; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1329; CHECK: for.body: 1330; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1331; CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1332; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[F]], align 8 1333; CHECK-NEXT: [[TMP13:%.*]] = xor i32 [[TMP12]], [[Y]] 1334; CHECK-NEXT: store i32 [[TMP13]], i32* [[F]], align 8 1335; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1336; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1337; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]] 1338; CHECK: for.end: 1339; CHECK-NEXT: ret void 1340; 1341; IND-LABEL: @scalarize_induction_variable_03( 1342; IND-NEXT: entry: 1343; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1344; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1345; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1346; IND: vector.ph: 1347; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1348; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 1349; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 1350; IND-NEXT: br label [[VECTOR_BODY:%.*]] 1351; IND: vector.body: 1352; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1353; IND-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 1354; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1 1355; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1 1356; IND-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 8 1357; IND-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8 1358; IND-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0 1359; IND-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1 1360; IND-NEXT: [[TMP7:%.*]] = xor <2 x i32> [[TMP6]], [[BROADCAST_SPLAT]] 1361; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i64 0 1362; IND-NEXT: store i32 [[TMP8]], i32* [[TMP1]], align 8 1363; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i64 1 1364; IND-NEXT: store i32 [[TMP9]], i32* [[TMP2]], align 8 1365; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1366; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1367; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1368; IND: middle.block: 1369; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1370; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1371; IND: scalar.ph: 1372; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1373; IND-NEXT: br label [[FOR_BODY:%.*]] 1374; IND: for.body: 1375; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1376; IND-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1377; IND-NEXT: [[TMP11:%.*]] = load i32, i32* [[F]], align 8 1378; IND-NEXT: [[TMP12:%.*]] = xor i32 [[TMP11]], [[Y]] 1379; IND-NEXT: store i32 [[TMP12]], i32* [[F]], align 8 1380; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1381; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1382; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]] 1383; IND: for.end: 1384; IND-NEXT: ret void 1385; 1386; UNROLL-LABEL: @scalarize_induction_variable_03( 1387; UNROLL-NEXT: entry: 1388; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1389; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 1390; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1391; UNROLL: vector.ph: 1392; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 1393; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0 1394; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 1395; UNROLL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0 1396; UNROLL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer 1397; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 1398; UNROLL: vector.body: 1399; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1400; UNROLL-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 1401; UNROLL-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 1402; UNROLL-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 1403; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1 1404; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1 1405; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1 1406; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 1407; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 8 1408; UNROLL-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8 1409; UNROLL-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0 1410; UNROLL-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i64 1 1411; UNROLL-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP5]], align 8 1412; UNROLL-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8 1413; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0 1414; UNROLL-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1 1415; UNROLL-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP10]], [[BROADCAST_SPLAT]] 1416; UNROLL-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP14]], [[BROADCAST_SPLAT2]] 1417; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP15]], i64 0 1418; UNROLL-NEXT: store i32 [[TMP17]], i32* [[TMP3]], align 8 1419; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP15]], i64 1 1420; UNROLL-NEXT: store i32 [[TMP18]], i32* [[TMP4]], align 8 1421; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i64 0 1422; UNROLL-NEXT: store i32 [[TMP19]], i32* [[TMP5]], align 8 1423; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP16]], i64 1 1424; UNROLL-NEXT: store i32 [[TMP20]], i32* [[TMP6]], align 8 1425; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1426; UNROLL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1427; UNROLL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1428; UNROLL: middle.block: 1429; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1430; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1431; UNROLL: scalar.ph: 1432; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1433; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 1434; UNROLL: for.body: 1435; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1436; UNROLL-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1437; UNROLL-NEXT: [[TMP22:%.*]] = load i32, i32* [[F]], align 8 1438; UNROLL-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], [[Y]] 1439; UNROLL-NEXT: store i32 [[TMP23]], i32* [[F]], align 8 1440; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1441; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1442; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]] 1443; UNROLL: for.end: 1444; UNROLL-NEXT: ret void 1445; 1446; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_03( 1447; UNROLL-NO-IC-NEXT: entry: 1448; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1449; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 1450; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1451; UNROLL-NO-IC: vector.ph: 1452; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4 1453; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 1454; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0 1455; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 1456; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i32 0 1457; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer 1458; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 1459; UNROLL-NO-IC: vector.body: 1460; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1461; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1462; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1463; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 1464; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 1465; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1 1466; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1 1467; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 1468; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 1469; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8 1470; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 8 1471; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 1472; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1 1473; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8 1474; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP7]], align 8 1475; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0 1476; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1 1477; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP11]], [[BROADCAST_SPLAT]] 1478; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = xor <2 x i32> [[TMP15]], [[BROADCAST_SPLAT2]] 1479; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 1480; UNROLL-NO-IC-NEXT: store i32 [[TMP18]], i32* [[TMP4]], align 8 1481; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 1482; UNROLL-NO-IC-NEXT: store i32 [[TMP19]], i32* [[TMP5]], align 8 1483; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 1484; UNROLL-NO-IC-NEXT: store i32 [[TMP20]], i32* [[TMP6]], align 8 1485; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1 1486; UNROLL-NO-IC-NEXT: store i32 [[TMP21]], i32* [[TMP7]], align 8 1487; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1488; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1489; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1490; UNROLL-NO-IC: middle.block: 1491; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1492; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1493; UNROLL-NO-IC: scalar.ph: 1494; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1495; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 1496; UNROLL-NO-IC: for.body: 1497; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1498; UNROLL-NO-IC-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1499; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, i32* [[F]], align 8 1500; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = xor i32 [[TMP23]], [[Y]] 1501; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], i32* [[F]], align 8 1502; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1503; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1504; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]] 1505; UNROLL-NO-IC: for.end: 1506; UNROLL-NO-IC-NEXT: ret void 1507; 1508; INTERLEAVE-LABEL: @scalarize_induction_variable_03( 1509; INTERLEAVE-NEXT: entry: 1510; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1511; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 9 1512; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1513; INTERLEAVE: vector.ph: 1514; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 7 1515; INTERLEAVE-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1516; INTERLEAVE-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 8, i64 [[N_MOD_VF]] 1517; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]] 1518; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 1519; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1520; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 1521; INTERLEAVE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer 1522; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 1523; INTERLEAVE: vector.body: 1524; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1525; INTERLEAVE-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 1 1526; INTERLEAVE-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 2 1527; INTERLEAVE-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 3 1528; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 4 1529; INTERLEAVE-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 5 1530; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 6 1531; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 7 1532; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1 1533; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1 1534; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1 1535; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1 1536; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1 1537; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP6]], i32 1 1538; INTERLEAVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP7]], i32 1 1539; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1 1540; INTERLEAVE-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* 1541; INTERLEAVE-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP13]] to <8 x i32>* 1542; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP17]], align 8 1543; INTERLEAVE-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP18]], align 8 1544; INTERLEAVE-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1545; INTERLEAVE-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1546; INTERLEAVE-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] 1547; INTERLEAVE-NEXT: [[TMP20:%.*]] = xor <4 x i32> [[STRIDED_VEC2]], [[BROADCAST_SPLAT4]] 1548; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP19]], i64 0 1549; INTERLEAVE-NEXT: store i32 [[TMP21]], i32* [[TMP9]], align 8 1550; INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP19]], i64 1 1551; INTERLEAVE-NEXT: store i32 [[TMP22]], i32* [[TMP10]], align 8 1552; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP19]], i64 2 1553; INTERLEAVE-NEXT: store i32 [[TMP23]], i32* [[TMP11]], align 8 1554; INTERLEAVE-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP19]], i64 3 1555; INTERLEAVE-NEXT: store i32 [[TMP24]], i32* [[TMP12]], align 8 1556; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i64 0 1557; INTERLEAVE-NEXT: store i32 [[TMP25]], i32* [[TMP13]], align 8 1558; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP20]], i64 1 1559; INTERLEAVE-NEXT: store i32 [[TMP26]], i32* [[TMP14]], align 8 1560; INTERLEAVE-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP20]], i64 2 1561; INTERLEAVE-NEXT: store i32 [[TMP27]], i32* [[TMP15]], align 8 1562; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP20]], i64 3 1563; INTERLEAVE-NEXT: store i32 [[TMP28]], i32* [[TMP16]], align 8 1564; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1565; INTERLEAVE-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1566; INTERLEAVE-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1567; INTERLEAVE: middle.block: 1568; INTERLEAVE-NEXT: br label [[SCALAR_PH]] 1569; INTERLEAVE: scalar.ph: 1570; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1571; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 1572; INTERLEAVE: for.body: 1573; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1574; INTERLEAVE-NEXT: [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1575; INTERLEAVE-NEXT: [[TMP30:%.*]] = load i32, i32* [[F]], align 8 1576; INTERLEAVE-NEXT: [[TMP31:%.*]] = xor i32 [[TMP30]], [[Y]] 1577; INTERLEAVE-NEXT: store i32 [[TMP31]], i32* [[F]], align 8 1578; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1579; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1580; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP16:![0-9]+]] 1581; INTERLEAVE: for.end: 1582; INTERLEAVE-NEXT: ret void 1583; 1584entry: 1585 br label %for.body 1586 1587for.body: 1588 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1589 %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 1590 %0 = load i32, i32* %f, align 8 1591 %1 = xor i32 %0, %y 1592 store i32 %1, i32* %f, align 8 1593 %i.next = add nuw nsw i64 %i, 1 1594 %cond = icmp slt i64 %i.next, %n 1595 br i1 %cond, label %for.body, label %for.end 1596 1597for.end: 1598 ret void 1599} 1600 1601; Make sure we scalarize the step vectors used for the pointer arithmetic. We 1602; can't easily simplify vectorized step vectors. (Interleaved accesses.) 1603; 1604; for (int i = 0; i < n; ++i) 1605; p[i].f = a[i * 4] 1606; 1607 1608define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) { 1609; CHECK-LABEL: @scalarize_induction_variable_04( 1610; CHECK-NEXT: entry: 1611; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* 1612; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 1613; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1614; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1615; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 1616; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1617; CHECK: vector.memcheck: 1618; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1 1619; CHECK-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* 1620; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 1621; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 1622; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 1623; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]] 1624; CHECK-NEXT: [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8* 1625; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2 1626; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1 1627; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]] 1628; CHECK-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8* 1629; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]] 1630; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]] 1631; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1632; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1633; CHECK: vector.ph: 1634; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 1635; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 1636; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1637; CHECK: vector.body: 1638; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1639; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1640; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 1641; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1642; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1643; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0 1644; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 1645; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1 1646; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 1647; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17 1648; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17 1649; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1 1650; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 1651; CHECK-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17 1652; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17 1653; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1654; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1655; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1656; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1657; CHECK: middle.block: 1658; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1659; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1660; CHECK: scalar.ph: 1661; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1662; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1663; CHECK: for.body: 1664; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1665; CHECK-NEXT: [[TMP20:%.*]] = shl nsw i64 [[I]], 2 1666; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 1667; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1 1668; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1669; CHECK-NEXT: store i32 [[TMP22]], i32* [[TMP23]], align 1 1670; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1671; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32 1672; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]] 1673; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1674; CHECK: for.end: 1675; CHECK-NEXT: ret void 1676; 1677; IND-LABEL: @scalarize_induction_variable_04( 1678; IND-NEXT: entry: 1679; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 1680; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1681; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1682; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 1683; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1684; IND: vector.memcheck: 1685; IND-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1 1686; IND-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 1687; IND-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 1688; IND-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 1689; IND-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2 1690; IND-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1 1691; IND-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]] 1692; IND-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]] 1693; IND-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0 1694; IND-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]] 1695; IND-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1696; IND-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1697; IND: vector.ph: 1698; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 1699; IND-NEXT: br label [[VECTOR_BODY:%.*]] 1700; IND: vector.body: 1701; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1702; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1703; IND-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 1704; IND-NEXT: [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1705; IND-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0 1706; IND-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 1707; IND-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1 1708; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 1709; IND-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17 1710; IND-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17 1711; IND-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1712; IND-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 1713; IND-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17 1714; IND-NEXT: store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17 1715; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1716; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1717; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1718; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1719; IND: middle.block: 1720; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1721; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1722; IND: scalar.ph: 1723; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1724; IND-NEXT: br label [[FOR_BODY:%.*]] 1725; IND: for.body: 1726; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1727; IND-NEXT: [[TMP20:%.*]] = shl nsw i64 [[I]], 2 1728; IND-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 1729; IND-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1 1730; IND-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1731; IND-NEXT: store i32 [[TMP22]], i32* [[TMP23]], align 1 1732; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1733; IND-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32 1734; IND-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]] 1735; IND-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1736; IND: for.end: 1737; IND-NEXT: ret void 1738; 1739; UNROLL-LABEL: @scalarize_induction_variable_04( 1740; UNROLL-NEXT: entry: 1741; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 1742; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1743; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1744; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 1745; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1746; UNROLL: vector.memcheck: 1747; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1 1748; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 1749; UNROLL-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 1750; UNROLL-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 1751; UNROLL-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2 1752; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1 1753; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]] 1754; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]] 1755; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0 1756; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]] 1757; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1758; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1759; UNROLL: vector.ph: 1760; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 1761; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 1762; UNROLL: vector.body: 1763; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1764; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1765; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 1766; UNROLL-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 2 1767; UNROLL-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 3 1768; UNROLL-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1769; UNROLL-NEXT: [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1770; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8> 1771; UNROLL-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0 1772; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]] 1773; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1 1774; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] 1775; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i64 0 1776; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] 1777; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i64 1 1778; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 1779; UNROLL-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17 1780; UNROLL-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17 1781; UNROLL-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17 1782; UNROLL-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17 1783; UNROLL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1784; UNROLL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 1785; UNROLL-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1 1786; UNROLL-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1 1787; UNROLL-NEXT: store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17 1788; UNROLL-NEXT: store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17 1789; UNROLL-NEXT: store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17 1790; UNROLL-NEXT: store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17 1791; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1792; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4> 1793; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1794; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1795; UNROLL: middle.block: 1796; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1797; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1798; UNROLL: scalar.ph: 1799; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1800; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 1801; UNROLL: for.body: 1802; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1803; UNROLL-NEXT: [[TMP31:%.*]] = shl nsw i64 [[I]], 2 1804; UNROLL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 1805; UNROLL-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1 1806; UNROLL-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1807; UNROLL-NEXT: store i32 [[TMP33]], i32* [[TMP34]], align 1 1808; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1809; UNROLL-NEXT: [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32 1810; UNROLL-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]] 1811; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1812; UNROLL: for.end: 1813; UNROLL-NEXT: ret void 1814; 1815; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_04( 1816; UNROLL-NO-IC-NEXT: entry: 1817; UNROLL-NO-IC-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8* 1818; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 1819; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1820; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1821; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 1822; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1823; UNROLL-NO-IC: vector.memcheck: 1824; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1 1825; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8* 1826; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 1827; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 1828; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 1829; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]] 1830; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8* 1831; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2 1832; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1 1833; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]] 1834; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8* 1835; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]] 1836; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]] 1837; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1838; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1839; UNROLL-NO-IC: vector.ph: 1840; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 1841; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 1842; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 1843; UNROLL-NO-IC: vector.body: 1844; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1845; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1846; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1847; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 1848; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1849; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2 1850; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3 1851; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1852; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 1853; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0 1854; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]] 1855; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1 1856; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]] 1857; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0 1858; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] 1859; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1 1860; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 1861; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17 1862; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17 1863; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17 1864; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17 1865; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1 1866; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1 1867; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1 1868; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1 1869; UNROLL-NO-IC-NEXT: store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17 1870; UNROLL-NO-IC-NEXT: store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17 1871; UNROLL-NO-IC-NEXT: store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17 1872; UNROLL-NO-IC-NEXT: store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17 1873; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1874; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 1875; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1876; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1877; UNROLL-NO-IC: middle.block: 1878; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1879; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1880; UNROLL-NO-IC: scalar.ph: 1881; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1882; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 1883; UNROLL-NO-IC: for.body: 1884; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1885; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = shl nsw i64 [[I]], 2 1886; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 1887; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1 1888; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1889; UNROLL-NO-IC-NEXT: store i32 [[TMP33]], i32* [[TMP34]], align 1 1890; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1891; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32 1892; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]] 1893; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1894; UNROLL-NO-IC: for.end: 1895; UNROLL-NO-IC-NEXT: ret void 1896; 1897; INTERLEAVE-LABEL: @scalarize_induction_variable_04( 1898; INTERLEAVE-NEXT: entry: 1899; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 1900; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1901; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1902; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 1903; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1904; INTERLEAVE: vector.memcheck: 1905; INTERLEAVE-NEXT: [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1 1906; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1 1907; INTERLEAVE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 1908; INTERLEAVE-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1 1909; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2 1910; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[TMP6]], 1 1911; INTERLEAVE-NEXT: [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]] 1912; INTERLEAVE-NEXT: [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]] 1913; INTERLEAVE-NEXT: [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0 1914; INTERLEAVE-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]] 1915; INTERLEAVE-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1916; INTERLEAVE-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1917; INTERLEAVE: vector.ph: 1918; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7 1919; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1920; INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 8, i64 [[N_MOD_VF]] 1921; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP10]] 1922; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 1923; INTERLEAVE: vector.body: 1924; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1925; INTERLEAVE-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 1926; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 1927; INTERLEAVE-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 3 1928; INTERLEAVE-NEXT: [[TMP14:%.*]] = or i64 [[INDEX]], 4 1929; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 5 1930; INTERLEAVE-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 6 1931; INTERLEAVE-NEXT: [[TMP17:%.*]] = or i64 [[INDEX]], 7 1932; INTERLEAVE-NEXT: [[TMP18:%.*]] = shl nsw i64 [[INDEX]], 2 1933; INTERLEAVE-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP14]], 2 1934; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] 1935; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 1936; INTERLEAVE-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP20]] to <16 x i32>* 1937; INTERLEAVE-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP21]] to <16 x i32>* 1938; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP22]], align 1 1939; INTERLEAVE-NEXT: [[WIDE_VEC7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP23]], align 1 1940; INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1 1941; INTERLEAVE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1 1942; INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP12]], i32 1 1943; INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP13]], i32 1 1944; INTERLEAVE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP14]], i32 1 1945; INTERLEAVE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP15]], i32 1 1946; INTERLEAVE-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP16]], i32 1 1947; INTERLEAVE-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP17]], i32 1 1948; INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 0 1949; INTERLEAVE-NEXT: store i32 [[TMP32]], i32* [[TMP24]], align 1, !alias.scope !17, !noalias !20 1950; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 4 1951; INTERLEAVE-NEXT: store i32 [[TMP33]], i32* [[TMP25]], align 1, !alias.scope !17, !noalias !20 1952; INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 8 1953; INTERLEAVE-NEXT: store i32 [[TMP34]], i32* [[TMP26]], align 1, !alias.scope !17, !noalias !20 1954; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 12 1955; INTERLEAVE-NEXT: store i32 [[TMP35]], i32* [[TMP27]], align 1, !alias.scope !17, !noalias !20 1956; INTERLEAVE-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 0 1957; INTERLEAVE-NEXT: store i32 [[TMP36]], i32* [[TMP28]], align 1, !alias.scope !17, !noalias !20 1958; INTERLEAVE-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 4 1959; INTERLEAVE-NEXT: store i32 [[TMP37]], i32* [[TMP29]], align 1, !alias.scope !17, !noalias !20 1960; INTERLEAVE-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 8 1961; INTERLEAVE-NEXT: store i32 [[TMP38]], i32* [[TMP30]], align 1, !alias.scope !17, !noalias !20 1962; INTERLEAVE-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 12 1963; INTERLEAVE-NEXT: store i32 [[TMP39]], i32* [[TMP31]], align 1, !alias.scope !17, !noalias !20 1964; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1965; INTERLEAVE-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1966; INTERLEAVE-NEXT: br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1967; INTERLEAVE: middle.block: 1968; INTERLEAVE-NEXT: br label [[SCALAR_PH]] 1969; INTERLEAVE: scalar.ph: 1970; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] 1971; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 1972; INTERLEAVE: for.body: 1973; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1974; INTERLEAVE-NEXT: [[TMP41:%.*]] = shl nsw i64 [[I]], 2 1975; INTERLEAVE-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP41]] 1976; INTERLEAVE-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 1 1977; INTERLEAVE-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1 1978; INTERLEAVE-NEXT: store i32 [[TMP43]], i32* [[TMP44]], align 1 1979; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1980; INTERLEAVE-NEXT: [[TMP45:%.*]] = trunc i64 [[I_NEXT]] to i32 1981; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP45]], [[N]] 1982; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1983; INTERLEAVE: for.end: 1984; INTERLEAVE-NEXT: ret void 1985; 1986entry: 1987 br label %for.body 1988 1989for.body: 1990 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry] 1991 %0 = shl nsw i64 %i, 2 1992 %1 = getelementptr inbounds i32, i32* %a, i64 %0 1993 %2 = load i32, i32* %1, align 1 1994 %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1 1995 store i32 %2, i32* %3, align 1 1996 %i.next = add nuw nsw i64 %i, 1 1997 %4 = trunc i64 %i.next to i32 1998 %cond = icmp eq i32 %4, %n 1999 br i1 %cond, label %for.end, label %for.body 2000 2001for.end: 2002 ret void 2003} 2004 2005; PR30542. Ensure we generate all the scalar steps for the induction variable. 2006; The scalar induction variable is used by a getelementptr instruction 2007; (uniform), and a udiv (non-uniform). 2008; 2009; int sum = 0; 2010; for (int i = 0; i < n; ++i) { 2011; int x = a[i]; 2012; if (c) 2013; x /= i; 2014; sum += x; 2015; } 2016; 2017; 2018; 2019; 2020 2021define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) { 2022; CHECK-LABEL: @scalarize_induction_variable_05( 2023; CHECK-NEXT: entry: 2024; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) 2025; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2 2026; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2027; CHECK: vector.ph: 2028; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2 2029; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] 2030; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0 2031; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer 2032; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 2033; CHECK: vector.body: 2034; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] 2035; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ] 2036; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ] 2037; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 2038; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] 2039; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 2040; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 2041; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 2042; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 2043; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] 2044; CHECK: pred.udiv.if: 2045; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 2046; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP0]] 2047; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0 2048; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE]] 2049; CHECK: pred.udiv.continue: 2050; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ] 2051; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 2052; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] 2053; CHECK: pred.udiv.if1: 2054; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1 2055; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 2056; CHECK-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] 2057; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1 2058; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] 2059; CHECK: pred.udiv.continue2: 2060; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF1]] ] 2061; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true> 2062; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] 2063; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] 2064; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 2065; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 2066; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2067; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2068; CHECK: middle.block: 2069; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP16]]) 2070; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] 2071; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2072; CHECK: scalar.ph: 2073; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2074; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 2075; CHECK-NEXT: br label [[FOR_BODY:%.*]] 2076; CHECK: for.body: 2077; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] 2078; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] 2079; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]] 2080; CHECK-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4 2081; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] 2082; CHECK: if.then: 2083; CHECK-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]] 2084; CHECK-NEXT: br label [[IF_END]] 2085; CHECK: if.end: 2086; CHECK-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ] 2087; CHECK-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]] 2088; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 2089; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] 2090; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]] 2091; CHECK: for.end: 2092; CHECK-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] 2093; CHECK-NEXT: ret i32 [[VAR5]] 2094; 2095; IND-LABEL: @scalarize_induction_variable_05( 2096; IND-NEXT: entry: 2097; IND-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) 2098; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2 2099; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2100; IND: vector.ph: 2101; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646 2102; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 2103; IND-NEXT: br label [[VECTOR_BODY:%.*]] 2104; IND: vector.body: 2105; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] 2106; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[PRED_UDIV_CONTINUE2]] ] 2107; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 2108; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 2109; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>* 2110; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4 2111; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] 2112; IND: pred.udiv.if: 2113; IND-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 2114; IND-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]] 2115; IND-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 2116; IND-NEXT: br label [[PRED_UDIV_CONTINUE]] 2117; IND: pred.udiv.continue: 2118; IND-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] 2119; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] 2120; IND: pred.udiv.if1: 2121; IND-NEXT: [[TMP7:%.*]] = or i32 [[INDEX]], 1 2122; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 2123; IND-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]] 2124; IND-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP9]], i64 1 2125; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]] 2126; IND: pred.udiv.continue2: 2127; IND-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ] 2128; IND-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison> 2129; IND-NEXT: [[TMP13:%.*]] = shufflevector <2 x i1> [[TMP12]], <2 x i1> poison, <2 x i32> zeroinitializer 2130; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP13]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]] 2131; IND-NEXT: [[TMP14]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] 2132; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 2133; IND-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2134; IND-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2135; IND: middle.block: 2136; IND-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP14]]) 2137; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] 2138; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2139; IND: scalar.ph: 2140; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2141; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2142; IND-NEXT: br label [[FOR_BODY:%.*]] 2143; IND: for.body: 2144; IND-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] 2145; IND-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] 2146; IND-NEXT: [[TMP17:%.*]] = zext i32 [[I]] to i64 2147; IND-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] 2148; IND-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4 2149; IND-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] 2150; IND: if.then: 2151; IND-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]] 2152; IND-NEXT: br label [[IF_END]] 2153; IND: if.end: 2154; IND-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ] 2155; IND-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]] 2156; IND-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 2157; IND-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] 2158; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]] 2159; IND: for.end: 2160; IND-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] 2161; IND-NEXT: ret i32 [[VAR5]] 2162; 2163; UNROLL-LABEL: @scalarize_induction_variable_05( 2164; UNROLL-NEXT: entry: 2165; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) 2166; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4 2167; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2168; UNROLL: vector.ph: 2169; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644 2170; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 2171; UNROLL-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 2172; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 2173; UNROLL: vector.body: 2174; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE11:%.*]] ] 2175; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UDIV_CONTINUE11]] ] 2176; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UDIV_CONTINUE11]] ] 2177; UNROLL-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 2 2178; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 2179; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] 2180; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 2181; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 2182; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2 2183; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 2184; UNROLL-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4 2185; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] 2186; UNROLL: pred.udiv.if: 2187; UNROLL-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 2188; UNROLL-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]] 2189; UNROLL-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0 2190; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE]] 2191; UNROLL: pred.udiv.continue: 2192; UNROLL-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] 2193; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] 2194; UNROLL: pred.udiv.if4: 2195; UNROLL-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1 2196; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 2197; UNROLL-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] 2198; UNROLL-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i64 1 2199; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE5]] 2200; UNROLL: pred.udiv.continue5: 2201; UNROLL-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ] 2202; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] 2203; UNROLL: pred.udiv.if8: 2204; UNROLL-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 0 2205; UNROLL-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP0]] 2206; UNROLL-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0 2207; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE9]] 2208; UNROLL: pred.udiv.continue9: 2209; UNROLL-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ] 2210; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]] 2211; UNROLL: pred.udiv.if10: 2212; UNROLL-NEXT: [[TMP19:%.*]] = or i32 [[INDEX]], 3 2213; UNROLL-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 1 2214; UNROLL-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP19]] 2215; UNROLL-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP21]], i64 1 2216; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE11]] 2217; UNROLL: pred.udiv.continue11: 2218; UNROLL-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ] 2219; UNROLL-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison> 2220; UNROLL-NEXT: [[TMP25:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> poison, <2 x i32> zeroinitializer 2221; UNROLL-NEXT: [[TMP26:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT6]], <i1 true, i1 poison> 2222; UNROLL-NEXT: [[TMP27:%.*]] = shufflevector <2 x i1> [[TMP26]], <2 x i1> poison, <2 x i32> zeroinitializer 2223; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]] 2224; UNROLL-NEXT: [[PREDPHI12:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD3]], <2 x i32> [[TMP23]] 2225; UNROLL-NEXT: [[TMP28]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] 2226; UNROLL-NEXT: [[TMP29]] = add <2 x i32> [[PREDPHI12]], [[VEC_PHI2]] 2227; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 2228; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2229; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2230; UNROLL: middle.block: 2231; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP29]], [[TMP28]] 2232; UNROLL-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) 2233; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] 2234; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2235; UNROLL: scalar.ph: 2236; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2237; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2238; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 2239; UNROLL: for.body: 2240; UNROLL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] 2241; UNROLL-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] 2242; UNROLL-NEXT: [[TMP32:%.*]] = zext i32 [[I]] to i64 2243; UNROLL-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP32]] 2244; UNROLL-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4 2245; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] 2246; UNROLL: if.then: 2247; UNROLL-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]] 2248; UNROLL-NEXT: br label [[IF_END]] 2249; UNROLL: if.end: 2250; UNROLL-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ] 2251; UNROLL-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]] 2252; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 2253; UNROLL-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] 2254; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]] 2255; UNROLL: for.end: 2256; UNROLL-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] 2257; UNROLL-NEXT: ret i32 [[VAR5]] 2258; 2259; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_05( 2260; UNROLL-NO-IC-NEXT: entry: 2261; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) 2262; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4 2263; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2264; UNROLL-NO-IC: vector.ph: 2265; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4 2266; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] 2267; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0 2268; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer 2269; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i32 0 2270; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT6]], <2 x i1> poison, <2 x i32> zeroinitializer 2271; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 2272; UNROLL-NO-IC: vector.body: 2273; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE11:%.*]] ] 2274; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE11]] ] 2275; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE11]] ] 2276; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE11]] ] 2277; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 2278; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 2279; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 2280; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] 2281; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP1]] 2282; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 2283; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 2284; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4 2285; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2 2286; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* 2287; UNROLL-NO-IC-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4 2288; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 2289; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] 2290; UNROLL-NO-IC: pred.udiv.if: 2291; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 2292; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], [[TMP0]] 2293; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 2294; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE]] 2295; UNROLL-NO-IC: pred.udiv.continue: 2296; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_UDIV_IF]] ] 2297; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 2298; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] 2299; UNROLL-NO-IC: pred.udiv.if4: 2300; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i32 [[INDEX]], 1 2301; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 2302; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP14]] 2303; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP16]], i32 1 2304; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE5]] 2305; UNROLL-NO-IC: pred.udiv.continue5: 2306; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF4]] ] 2307; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 0 2308; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] 2309; UNROLL-NO-IC: pred.udiv.if8: 2310; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0 2311; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP1]] 2312; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0 2313; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE9]] 2314; UNROLL-NO-IC: pred.udiv.continue9: 2315; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP22]], [[PRED_UDIV_IF8]] ] 2316; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 1 2317; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]] 2318; UNROLL-NO-IC: pred.udiv.if10: 2319; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 3 2320; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1 2321; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = udiv i32 [[TMP26]], [[TMP25]] 2322; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP27]], i32 1 2323; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE11]] 2324; UNROLL-NO-IC: pred.udiv.continue11: 2325; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP28]], [[PRED_UDIV_IF10]] ] 2326; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true> 2327; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT7]], <i1 true, i1 true> 2328; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]] 2329; UNROLL-NO-IC-NEXT: [[PREDPHI12:%.*]] = select <2 x i1> [[TMP31]], <2 x i32> [[WIDE_LOAD3]], <2 x i32> [[TMP29]] 2330; UNROLL-NO-IC-NEXT: [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] 2331; UNROLL-NO-IC-NEXT: [[TMP33]] = add <2 x i32> [[PREDPHI12]], [[VEC_PHI2]] 2332; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 2333; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2> 2334; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2335; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2336; UNROLL-NO-IC: middle.block: 2337; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP33]], [[TMP32]] 2338; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) 2339; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] 2340; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2341; UNROLL-NO-IC: scalar.ph: 2342; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2343; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] 2344; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 2345; UNROLL-NO-IC: for.body: 2346; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] 2347; UNROLL-NO-IC-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] 2348; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]] 2349; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4 2350; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] 2351; UNROLL-NO-IC: if.then: 2352; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]] 2353; UNROLL-NO-IC-NEXT: br label [[IF_END]] 2354; UNROLL-NO-IC: if.end: 2355; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ] 2356; UNROLL-NO-IC-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]] 2357; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 2358; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] 2359; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]] 2360; UNROLL-NO-IC: for.end: 2361; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ] 2362; UNROLL-NO-IC-NEXT: ret i32 [[VAR5]] 2363; 2364; INTERLEAVE-LABEL: @scalarize_induction_variable_05( 2365; INTERLEAVE-NEXT: entry: 2366; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1) 2367; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 8 2368; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2369; INTERLEAVE: vector.ph: 2370; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640 2371; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 2372; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 2373; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 2374; INTERLEAVE: vector.body: 2375; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ] 2376; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UDIV_CONTINUE19]] ] 2377; INTERLEAVE-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UDIV_CONTINUE19]] ] 2378; INTERLEAVE-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 4 2379; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 2380; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] 2381; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 2382; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 2383; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4 2384; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* 2385; INTERLEAVE-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 2386; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] 2387; INTERLEAVE: pred.udiv.if: 2388; INTERLEAVE-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0 2389; INTERLEAVE-NEXT: [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]] 2390; INTERLEAVE-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 2391; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE]] 2392; INTERLEAVE: pred.udiv.continue: 2393; INTERLEAVE-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ] 2394; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] 2395; INTERLEAVE: pred.udiv.if4: 2396; INTERLEAVE-NEXT: [[TMP10:%.*]] = or i32 [[INDEX]], 1 2397; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 2398; INTERLEAVE-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] 2399; INTERLEAVE-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP12]], i64 1 2400; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE5]] 2401; INTERLEAVE: pred.udiv.continue5: 2402; INTERLEAVE-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ] 2403; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]] 2404; INTERLEAVE: pred.udiv.if6: 2405; INTERLEAVE-NEXT: [[TMP15:%.*]] = or i32 [[INDEX]], 2 2406; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 2407; INTERLEAVE-NEXT: [[TMP17:%.*]] = udiv i32 [[TMP16]], [[TMP15]] 2408; INTERLEAVE-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP17]], i64 2 2409; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE7]] 2410; INTERLEAVE: pred.udiv.continue7: 2411; INTERLEAVE-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP18]], [[PRED_UDIV_IF6]] ] 2412; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] 2413; INTERLEAVE: pred.udiv.if8: 2414; INTERLEAVE-NEXT: [[TMP20:%.*]] = or i32 [[INDEX]], 3 2415; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 2416; INTERLEAVE-NEXT: [[TMP22:%.*]] = udiv i32 [[TMP21]], [[TMP20]] 2417; INTERLEAVE-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP22]], i64 3 2418; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE9]] 2419; INTERLEAVE: pred.udiv.continue9: 2420; INTERLEAVE-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP23]], [[PRED_UDIV_IF8]] ] 2421; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]] 2422; INTERLEAVE: pred.udiv.if12: 2423; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 0 2424; INTERLEAVE-NEXT: [[TMP26:%.*]] = udiv i32 [[TMP25]], [[TMP0]] 2425; INTERLEAVE-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i64 0 2426; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE13]] 2427; INTERLEAVE: pred.udiv.continue13: 2428; INTERLEAVE-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE9]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ] 2429; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]] 2430; INTERLEAVE: pred.udiv.if14: 2431; INTERLEAVE-NEXT: [[TMP29:%.*]] = or i32 [[INDEX]], 5 2432; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 1 2433; INTERLEAVE-NEXT: [[TMP31:%.*]] = udiv i32 [[TMP30]], [[TMP29]] 2434; INTERLEAVE-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i64 1 2435; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE15]] 2436; INTERLEAVE: pred.udiv.continue15: 2437; INTERLEAVE-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ] 2438; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]] 2439; INTERLEAVE: pred.udiv.if16: 2440; INTERLEAVE-NEXT: [[TMP34:%.*]] = or i32 [[INDEX]], 6 2441; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 2 2442; INTERLEAVE-NEXT: [[TMP36:%.*]] = udiv i32 [[TMP35]], [[TMP34]] 2443; INTERLEAVE-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 2 2444; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE17]] 2445; INTERLEAVE: pred.udiv.continue17: 2446; INTERLEAVE-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ] 2447; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]] 2448; INTERLEAVE: pred.udiv.if18: 2449; INTERLEAVE-NEXT: [[TMP39:%.*]] = or i32 [[INDEX]], 7 2450; INTERLEAVE-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 3 2451; INTERLEAVE-NEXT: [[TMP41:%.*]] = udiv i32 [[TMP40]], [[TMP39]] 2452; INTERLEAVE-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 3 2453; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE19]] 2454; INTERLEAVE: pred.udiv.continue19: 2455; INTERLEAVE-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ] 2456; INTERLEAVE-NEXT: [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison, i1 poison, i1 poison> 2457; INTERLEAVE-NEXT: [[TMP45:%.*]] = shufflevector <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i32> zeroinitializer 2458; INTERLEAVE-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT10]], <i1 true, i1 poison, i1 poison, i1 poison> 2459; INTERLEAVE-NEXT: [[TMP47:%.*]] = shufflevector <4 x i1> [[TMP46]], <4 x i1> poison, <4 x i32> zeroinitializer 2460; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP24]] 2461; INTERLEAVE-NEXT: [[PREDPHI20:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> [[TMP43]] 2462; INTERLEAVE-NEXT: [[TMP48]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]] 2463; INTERLEAVE-NEXT: [[TMP49]] = add <4 x i32> [[PREDPHI20]], [[VEC_PHI2]] 2464; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 2465; INTERLEAVE-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 2466; INTERLEAVE-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 2467; INTERLEAVE: middle.block: 2468; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] 2469; INTERLEAVE-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) 2470; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]] 2471; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2472; INTERLEAVE: scalar.ph: 2473; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2474; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 2475; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 2476; INTERLEAVE: for.body: 2477; INTERLEAVE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ] 2478; INTERLEAVE-NEXT: [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ] 2479; INTERLEAVE-NEXT: [[TMP52:%.*]] = zext i32 [[I]] to i64 2480; INTERLEAVE-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]] 2481; INTERLEAVE-NEXT: [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4 2482; INTERLEAVE-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]] 2483; INTERLEAVE: if.then: 2484; INTERLEAVE-NEXT: [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]] 2485; INTERLEAVE-NEXT: br label [[IF_END]] 2486; INTERLEAVE: if.end: 2487; INTERLEAVE-NEXT: [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ] 2488; INTERLEAVE-NEXT: [[VAR4]] = add i32 [[VAR3]], [[SUM]] 2489; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 2490; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]] 2491; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]] 2492; INTERLEAVE: for.end: 2493; INTERLEAVE-NEXT: [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] 2494; INTERLEAVE-NEXT: ret i32 [[VAR5]] 2495; 2496entry: 2497 br label %for.body 2498 2499for.body: 2500 %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ] 2501 %sum = phi i32 [ 0, %entry ], [ %var4, %if.end ] 2502 %var0 = getelementptr inbounds i32, i32* %a, i32 %i 2503 %var1 = load i32, i32* %var0, align 4 2504 br i1 %c, label %if.then, label %if.end 2505 2506if.then: 2507 %var2 = udiv i32 %var1, %i 2508 br label %if.end 2509 2510if.end: 2511 %var3 = phi i32 [ %var2, %if.then ], [ %var1, %for.body ] 2512 %var4 = add i32 %var3, %sum 2513 %i.next = add nuw nsw i32 %i, 1 2514 %cond = icmp slt i32 %i.next, %n 2515 br i1 %cond, label %for.body, label %for.end 2516 2517for.end: 2518 %var5 = phi i32 [ %var4, %if.end ] 2519 ret i32 %var5 2520} 2521 2522; Ensure we generate both a vector and a scalar induction variable. In this 2523; test, the induction variable is used by an instruction that will be 2524; vectorized (trunc) as well as an instruction that will remain in scalar form 2525; (gepelementptr). 2526; 2527; 2528; 2529 2530%pair.i16 = type { i16, i16 } 2531define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) { 2532; CHECK-LABEL: @iv_vector_and_scalar_users( 2533; CHECK-NEXT: entry: 2534; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 2535; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 2536; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 2537; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 2538; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2539; CHECK: vector.ph: 2540; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 2541; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 2542; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 2543; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 2544; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 2545; CHECK: vector.body: 2546; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2547; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 2548; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] 2549; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 2550; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 2551; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND1]] 2552; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16> 2553; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1 2554; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1 2555; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0 2556; CHECK-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2 2557; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1 2558; CHECK-NEXT: store i16 [[TMP10]], i16* [[TMP8]], align 2 2559; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 2560; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 2561; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2> 2562; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2563; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 2564; CHECK: middle.block: 2565; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 2566; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2567; CHECK: scalar.ph: 2568; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2569; CHECK-NEXT: br label [[FOR_BODY:%.*]] 2570; CHECK: for.body: 2571; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2572; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[I]] to i32 2573; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[A]], [[TMP12]] 2574; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 2575; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1 2576; CHECK-NEXT: store i16 [[TMP14]], i16* [[TMP15]], align 2 2577; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 2578; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[I_NEXT]] to i32 2579; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP16]], [[N]] 2580; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2581; CHECK: for.end: 2582; CHECK-NEXT: ret void 2583; 2584; IND-LABEL: @iv_vector_and_scalar_users( 2585; IND-NEXT: entry: 2586; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 2587; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 2588; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 2589; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 2590; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2591; IND: vector.ph: 2592; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 2593; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 2594; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 2595; IND-NEXT: br label [[VECTOR_BODY:%.*]] 2596; IND: vector.body: 2597; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2598; IND-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] 2599; IND-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 2600; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND1]] 2601; IND-NEXT: [[TMP5:%.*]] = trunc <2 x i32> [[TMP4]] to <2 x i16> 2602; IND-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1 2603; IND-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1 2604; IND-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i64 0 2605; IND-NEXT: store i16 [[TMP8]], i16* [[TMP6]], align 2 2606; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1 2607; IND-NEXT: store i16 [[TMP9]], i16* [[TMP7]], align 2 2608; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 2609; IND-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2> 2610; IND-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2611; IND-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 2612; IND: middle.block: 2613; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 2614; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2615; IND: scalar.ph: 2616; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2617; IND-NEXT: br label [[FOR_BODY:%.*]] 2618; IND: for.body: 2619; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2620; IND-NEXT: [[TMP11:%.*]] = trunc i64 [[I]] to i32 2621; IND-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[A]] 2622; IND-NEXT: [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16 2623; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1 2624; IND-NEXT: store i16 [[TMP13]], i16* [[TMP14]], align 2 2625; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 2626; IND-NEXT: [[TMP15:%.*]] = trunc i64 [[I_NEXT]] to i32 2627; IND-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP15]], [[N]] 2628; IND-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2629; IND: for.end: 2630; IND-NEXT: ret void 2631; 2632; UNROLL-LABEL: @iv_vector_and_scalar_users( 2633; UNROLL-NEXT: entry: 2634; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 2635; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 2636; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 2637; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 2638; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2639; UNROLL: vector.ph: 2640; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 2641; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 2642; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 2643; UNROLL-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0 2644; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 2645; UNROLL: vector.body: 2646; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2647; UNROLL-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 2648; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 2649; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2 2650; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 2651; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]] 2652; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLATINSERT6]], <i32 2, i32 poison> 2653; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer 2654; UNROLL-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[VEC_IND2]] 2655; UNROLL-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16> 2656; UNROLL-NEXT: [[TMP11:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i16> 2657; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1 2658; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1 2659; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1 2660; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1 2661; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP10]], i64 0 2662; UNROLL-NEXT: store i16 [[TMP16]], i16* [[TMP12]], align 2 2663; UNROLL-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i64 1 2664; UNROLL-NEXT: store i16 [[TMP17]], i16* [[TMP13]], align 2 2665; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP11]], i64 0 2666; UNROLL-NEXT: store i16 [[TMP18]], i16* [[TMP14]], align 2 2667; UNROLL-NEXT: [[TMP19:%.*]] = extractelement <2 x i16> [[TMP11]], i64 1 2668; UNROLL-NEXT: store i16 [[TMP19]], i16* [[TMP15]], align 2 2669; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 2670; UNROLL-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4> 2671; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2672; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 2673; UNROLL: middle.block: 2674; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 2675; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2676; UNROLL: scalar.ph: 2677; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2678; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 2679; UNROLL: for.body: 2680; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2681; UNROLL-NEXT: [[TMP21:%.*]] = trunc i64 [[I]] to i32 2682; UNROLL-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], [[A]] 2683; UNROLL-NEXT: [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16 2684; UNROLL-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1 2685; UNROLL-NEXT: store i16 [[TMP23]], i16* [[TMP24]], align 2 2686; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 2687; UNROLL-NEXT: [[TMP25:%.*]] = trunc i64 [[I_NEXT]] to i32 2688; UNROLL-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP25]], [[N]] 2689; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2690; UNROLL: for.end: 2691; UNROLL-NEXT: ret void 2692; 2693; UNROLL-NO-IC-LABEL: @iv_vector_and_scalar_users( 2694; UNROLL-NO-IC-NEXT: entry: 2695; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 2696; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 2697; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 2698; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 2699; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2700; UNROLL-NO-IC: vector.ph: 2701; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 2702; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 2703; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 2704; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 2705; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0 2706; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT6]], <2 x i32> poison, <2 x i32> zeroinitializer 2707; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 2708; UNROLL-NO-IC: vector.body: 2709; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2710; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 2711; UNROLL-NO-IC-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 2712; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 2713; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 2714; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1 2715; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 2716; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 3 2717; UNROLL-NO-IC-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 2718; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]] 2719; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[BROADCAST_SPLAT7]], [[STEP_ADD3]] 2720; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = trunc <2 x i32> [[TMP7]] to <2 x i16> 2721; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16> 2722; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1 2723; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1 2724; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1 2725; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1 2726; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP9]], i32 0 2727; UNROLL-NO-IC-NEXT: store i16 [[TMP15]], i16* [[TMP11]], align 2 2728; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP9]], i32 1 2729; UNROLL-NO-IC-NEXT: store i16 [[TMP16]], i16* [[TMP12]], align 2 2730; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0 2731; UNROLL-NO-IC-NEXT: store i16 [[TMP17]], i16* [[TMP13]], align 2 2732; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1 2733; UNROLL-NO-IC-NEXT: store i16 [[TMP18]], i16* [[TMP14]], align 2 2734; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 2735; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 2736; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], <i32 2, i32 2> 2737; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2738; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 2739; UNROLL-NO-IC: middle.block: 2740; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 2741; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2742; UNROLL-NO-IC: scalar.ph: 2743; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2744; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 2745; UNROLL-NO-IC: for.body: 2746; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2747; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = trunc i64 [[I]] to i32 2748; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i32 [[A]], [[TMP20]] 2749; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = trunc i32 [[TMP21]] to i16 2750; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1 2751; UNROLL-NO-IC-NEXT: store i16 [[TMP22]], i16* [[TMP23]], align 2 2752; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 2753; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32 2754; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]] 2755; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2756; UNROLL-NO-IC: for.end: 2757; UNROLL-NO-IC-NEXT: ret void 2758; 2759; INTERLEAVE-LABEL: @iv_vector_and_scalar_users( 2760; INTERLEAVE-NEXT: entry: 2761; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 2762; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 2763; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 2764; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 2765; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2766; INTERLEAVE: vector.ph: 2767; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 2768; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 2769; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 2770; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 2771; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 2772; INTERLEAVE: vector.body: 2773; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2774; INTERLEAVE-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 2775; INTERLEAVE-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 2776; INTERLEAVE-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 2 2777; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 3 2778; INTERLEAVE-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 4 2779; INTERLEAVE-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 5 2780; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 6 2781; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 7 2782; INTERLEAVE-NEXT: [[TMP10:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]] 2783; INTERLEAVE-NEXT: [[TMP11:%.*]] = add <4 x i32> [[BROADCAST_SPLATINSERT6]], <i32 4, i32 poison, i32 poison, i32 poison> 2784; INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> zeroinitializer 2785; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[VEC_IND2]] 2786; INTERLEAVE-NEXT: [[TMP14:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i16> 2787; INTERLEAVE-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP13]] to <4 x i16> 2788; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1 2789; INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1 2790; INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1 2791; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1 2792; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1 2793; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP7]], i32 1 2794; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP8]], i32 1 2795; INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP9]], i32 1 2796; INTERLEAVE-NEXT: [[TMP24:%.*]] = extractelement <4 x i16> [[TMP14]], i64 0 2797; INTERLEAVE-NEXT: store i16 [[TMP24]], i16* [[TMP16]], align 2 2798; INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <4 x i16> [[TMP14]], i64 1 2799; INTERLEAVE-NEXT: store i16 [[TMP25]], i16* [[TMP17]], align 2 2800; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i16> [[TMP14]], i64 2 2801; INTERLEAVE-NEXT: store i16 [[TMP26]], i16* [[TMP18]], align 2 2802; INTERLEAVE-NEXT: [[TMP27:%.*]] = extractelement <4 x i16> [[TMP14]], i64 3 2803; INTERLEAVE-NEXT: store i16 [[TMP27]], i16* [[TMP19]], align 2 2804; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i16> [[TMP15]], i64 0 2805; INTERLEAVE-NEXT: store i16 [[TMP28]], i16* [[TMP20]], align 2 2806; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <4 x i16> [[TMP15]], i64 1 2807; INTERLEAVE-NEXT: store i16 [[TMP29]], i16* [[TMP21]], align 2 2808; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <4 x i16> [[TMP15]], i64 2 2809; INTERLEAVE-NEXT: store i16 [[TMP30]], i16* [[TMP22]], align 2 2810; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <4 x i16> [[TMP15]], i64 3 2811; INTERLEAVE-NEXT: store i16 [[TMP31]], i16* [[TMP23]], align 2 2812; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 2813; INTERLEAVE-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND2]], <i32 8, i32 8, i32 8, i32 8> 2814; INTERLEAVE-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 2815; INTERLEAVE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 2816; INTERLEAVE: middle.block: 2817; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 2818; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 2819; INTERLEAVE: scalar.ph: 2820; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 2821; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 2822; INTERLEAVE: for.body: 2823; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 2824; INTERLEAVE-NEXT: [[TMP33:%.*]] = trunc i64 [[I]] to i32 2825; INTERLEAVE-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[A]] 2826; INTERLEAVE-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i16 2827; INTERLEAVE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1 2828; INTERLEAVE-NEXT: store i16 [[TMP35]], i16* [[TMP36]], align 2 2829; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 2830; INTERLEAVE-NEXT: [[TMP37:%.*]] = trunc i64 [[I_NEXT]] to i32 2831; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP37]], [[N]] 2832; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 2833; INTERLEAVE: for.end: 2834; INTERLEAVE-NEXT: ret void 2835; 2836entry: 2837 br label %for.body 2838 2839for.body: 2840 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 2841 %0 = trunc i64 %i to i32 2842 %1 = add i32 %a, %0 2843 %2 = trunc i32 %1 to i16 2844 %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1 2845 store i16 %2, i16* %3, align 2 2846 %i.next = add nuw nsw i64 %i, 1 2847 %4 = trunc i64 %i.next to i32 2848 %cond = icmp eq i32 %4, %n 2849 br i1 %cond, label %for.end, label %for.body 2850 2851for.end: 2852 ret void 2853} 2854 2855; Make sure that the loop exit count computation does not overflow for i8 and 2856; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the 2857; induction variable to a bigger type the exit count computation will overflow 2858; to 0. 2859; PR17532 2860 2861define i32 @i8_loop() nounwind readnone ssp uwtable { 2862; CHECK-LABEL: @i8_loop( 2863; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2864; CHECK: vector.ph: 2865; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 2866; CHECK: vector.body: 2867; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2868; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 2869; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8 2870; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]] 2871; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 2872; CHECK-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 2873; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 2874; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 2875; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2876; CHECK: middle.block: 2877; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP3]]) 2878; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256 2879; CHECK-NEXT: br i1 [[CMP_N]], label [[TMP10:%.*]], label [[SCALAR_PH]] 2880; CHECK: scalar.ph: 2881; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 2882; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 2883; CHECK-NEXT: br label [[TMP6:%.*]] 2884; CHECK: 6: 2885; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ] 2886; CHECK-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ] 2887; CHECK-NEXT: [[TMP7]] = and i32 [[A_0]], 4 2888; CHECK-NEXT: [[TMP8]] = add i8 [[B_0]], -1 2889; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 2890; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP29:![0-9]+]] 2891; CHECK: 10: 2892; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 2893; CHECK-NEXT: ret i32 [[DOTLCSSA]] 2894; 2895; IND-LABEL: @i8_loop( 2896; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2897; IND: vector.ph: 2898; IND-NEXT: br label [[VECTOR_BODY:%.*]] 2899; IND: vector.body: 2900; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2901; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 2902; IND-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 2903; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2904; IND: middle.block: 2905; IND-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 2906; IND: scalar.ph: 2907; IND-NEXT: br label [[TMP2:%.*]] 2908; IND: 2: 2909; IND-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] 2910; IND: 3: 2911; IND-NEXT: ret i32 0 2912; 2913; UNROLL-LABEL: @i8_loop( 2914; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2915; UNROLL: vector.ph: 2916; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 2917; UNROLL: vector.body: 2918; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2919; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 2920; UNROLL-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 2921; UNROLL-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2922; UNROLL: middle.block: 2923; UNROLL-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 2924; UNROLL: scalar.ph: 2925; UNROLL-NEXT: br label [[TMP2:%.*]] 2926; UNROLL: 2: 2927; UNROLL-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] 2928; UNROLL: 3: 2929; UNROLL-NEXT: ret i32 0 2930; 2931; UNROLL-NO-IC-LABEL: @i8_loop( 2932; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2933; UNROLL-NO-IC: vector.ph: 2934; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 2935; UNROLL-NO-IC: vector.body: 2936; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2937; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 2938; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 2939; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8 2940; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]] 2941; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0 2942; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i8 [[OFFSET_IDX]], -2 2943; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 2944; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4> 2945; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 2946; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 2947; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2948; UNROLL-NO-IC: middle.block: 2949; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP5]], [[TMP4]] 2950; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) 2951; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 256, 256 2952; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[TMP12:%.*]], label [[SCALAR_PH]] 2953; UNROLL-NO-IC: scalar.ph: 2954; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 2955; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 2956; UNROLL-NO-IC-NEXT: br label [[TMP8:%.*]] 2957; UNROLL-NO-IC: 8: 2958; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ] 2959; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ] 2960; UNROLL-NO-IC-NEXT: [[TMP9]] = and i32 [[A_0]], 4 2961; UNROLL-NO-IC-NEXT: [[TMP10]] = add i8 [[B_0]], -1 2962; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 2963; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP29:![0-9]+]] 2964; UNROLL-NO-IC: 12: 2965; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 2966; UNROLL-NO-IC-NEXT: ret i32 [[DOTLCSSA]] 2967; 2968; INTERLEAVE-LABEL: @i8_loop( 2969; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 2970; INTERLEAVE: vector.ph: 2971; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 2972; INTERLEAVE: vector.body: 2973; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 2974; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 2975; INTERLEAVE-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 2976; INTERLEAVE-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 2977; INTERLEAVE: middle.block: 2978; INTERLEAVE-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 2979; INTERLEAVE: scalar.ph: 2980; INTERLEAVE-NEXT: br label [[TMP2:%.*]] 2981; INTERLEAVE: 2: 2982; INTERLEAVE-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]] 2983; INTERLEAVE: 3: 2984; INTERLEAVE-NEXT: ret i32 0 2985; 2986 br label %1 2987 2988; <label>:1 ; preds = %1, %0 2989 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 2990 %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ] 2991 %2 = and i32 %a.0, 4 2992 %3 = add i8 %b.0, -1 2993 %4 = icmp eq i8 %3, 0 2994 br i1 %4, label %5, label %1 2995 2996; <label>:5 ; preds = %1 2997 ret i32 %2 2998} 2999 3000 3001define i32 @i16_loop() nounwind readnone ssp uwtable { 3002; CHECK-LABEL: @i16_loop( 3003; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3004; CHECK: vector.ph: 3005; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 3006; CHECK: vector.body: 3007; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3008; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 3009; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 3010; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]] 3011; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 3012; CHECK-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 3013; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3014; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 3015; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 3016; CHECK: middle.block: 3017; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP3]]) 3018; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 65536, 65536 3019; CHECK-NEXT: br i1 [[CMP_N]], label [[TMP10:%.*]], label [[SCALAR_PH]] 3020; CHECK: scalar.ph: 3021; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 3022; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 3023; CHECK-NEXT: br label [[TMP6:%.*]] 3024; CHECK: 6: 3025; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ] 3026; CHECK-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ] 3027; CHECK-NEXT: [[TMP7]] = and i32 [[A_0]], 4 3028; CHECK-NEXT: [[TMP8]] = add i16 [[B_0]], -1 3029; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP8]], 0 3030; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP31:![0-9]+]] 3031; CHECK: 10: 3032; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 3033; CHECK-NEXT: ret i32 [[DOTLCSSA]] 3034; 3035; IND-LABEL: @i16_loop( 3036; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3037; IND: vector.ph: 3038; IND-NEXT: br label [[VECTOR_BODY:%.*]] 3039; IND: vector.body: 3040; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3041; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3042; IND-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 3043; IND-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 3044; IND: middle.block: 3045; IND-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 3046; IND: scalar.ph: 3047; IND-NEXT: br label [[TMP2:%.*]] 3048; IND: 2: 3049; IND-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]] 3050; IND: 3: 3051; IND-NEXT: ret i32 0 3052; 3053; UNROLL-LABEL: @i16_loop( 3054; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3055; UNROLL: vector.ph: 3056; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 3057; UNROLL: vector.body: 3058; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3059; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3060; UNROLL-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 3061; UNROLL-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 3062; UNROLL: middle.block: 3063; UNROLL-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 3064; UNROLL: scalar.ph: 3065; UNROLL-NEXT: br label [[TMP2:%.*]] 3066; UNROLL: 2: 3067; UNROLL-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]] 3068; UNROLL: 3: 3069; UNROLL-NEXT: ret i32 0 3070; 3071; UNROLL-NO-IC-LABEL: @i16_loop( 3072; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3073; UNROLL-NO-IC: vector.ph: 3074; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 3075; UNROLL-NO-IC: vector.body: 3076; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3077; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 3078; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 3079; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 3080; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]] 3081; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0 3082; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], -2 3083; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 3084; UNROLL-NO-IC-NEXT: [[TMP5]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4> 3085; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3086; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 3087; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 3088; UNROLL-NO-IC: middle.block: 3089; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP5]], [[TMP4]] 3090; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) 3091; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 65536, 65536 3092; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[TMP12:%.*]], label [[SCALAR_PH]] 3093; UNROLL-NO-IC: scalar.ph: 3094; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 3095; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 3096; UNROLL-NO-IC-NEXT: br label [[TMP8:%.*]] 3097; UNROLL-NO-IC: 8: 3098; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ] 3099; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ] 3100; UNROLL-NO-IC-NEXT: [[TMP9]] = and i32 [[A_0]], 4 3101; UNROLL-NO-IC-NEXT: [[TMP10]] = add i16 [[B_0]], -1 3102; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i16 [[TMP10]], 0 3103; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP31:![0-9]+]] 3104; UNROLL-NO-IC: 12: 3105; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 3106; UNROLL-NO-IC-NEXT: ret i32 [[DOTLCSSA]] 3107; 3108; INTERLEAVE-LABEL: @i16_loop( 3109; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3110; INTERLEAVE: vector.ph: 3111; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 3112; INTERLEAVE: vector.body: 3113; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3114; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 3115; INTERLEAVE-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536 3116; INTERLEAVE-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 3117; INTERLEAVE: middle.block: 3118; INTERLEAVE-NEXT: br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]] 3119; INTERLEAVE: scalar.ph: 3120; INTERLEAVE-NEXT: br label [[TMP2:%.*]] 3121; INTERLEAVE: 2: 3122; INTERLEAVE-NEXT: br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]] 3123; INTERLEAVE: 3: 3124; INTERLEAVE-NEXT: ret i32 0 3125; 3126 br label %1 3127 3128; <label>:1 ; preds = %1, %0 3129 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 3130 %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ] 3131 %2 = and i32 %a.0, 4 3132 %3 = add i16 %b.0, -1 3133 %4 = icmp eq i16 %3, 0 3134 br i1 %4, label %5, label %1 3135 3136; <label>:5 ; preds = %1 3137 ret i32 %2 3138} 3139 3140; This loop has a backedge taken count of i32_max. We need to check for this 3141; condition and branch directly to the scalar loop. 3142 3143 3144 3145define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable { 3146; CHECK-LABEL: @max_i32_backedgetaken( 3147; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3148; CHECK: vector.ph: 3149; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 3150; CHECK: vector.body: 3151; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3152; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] 3153; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]] 3154; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 3155; CHECK-NEXT: [[TMP2]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 3156; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3157; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 3158; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 3159; CHECK: middle.block: 3160; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP2]]) 3161; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 3162; CHECK-NEXT: br i1 [[CMP_N]], label [[TMP9:%.*]], label [[SCALAR_PH]] 3163; CHECK: scalar.ph: 3164; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 3165; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] 3166; CHECK-NEXT: br label [[TMP5:%.*]] 3167; CHECK: 5: 3168; CHECK-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP6:%.*]], [[TMP5]] ] 3169; CHECK-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP5]] ] 3170; CHECK-NEXT: [[TMP6]] = and i32 [[A_0]], 4 3171; CHECK-NEXT: [[TMP7]] = add i32 [[B_0]], -1 3172; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 3173; CHECK-NEXT: br i1 [[TMP8]], label [[TMP9]], label [[TMP5]], !llvm.loop [[LOOP33:![0-9]+]] 3174; CHECK: 9: 3175; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] 3176; CHECK-NEXT: ret i32 [[DOTLCSSA]] 3177; 3178; IND-LABEL: @max_i32_backedgetaken( 3179; IND-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3180; IND: vector.ph: 3181; IND-NEXT: br label [[VECTOR_BODY:%.*]] 3182; IND: vector.body: 3183; IND-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 3184; IND: middle.block: 3185; IND-NEXT: br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]] 3186; IND: scalar.ph: 3187; IND-NEXT: br label [[TMP1:%.*]] 3188; IND: 1: 3189; IND-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ] 3190; IND-NEXT: [[TMP2]] = add i32 [[B_0]], -1 3191; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 3192; IND-NEXT: br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]] 3193; IND: 4: 3194; IND-NEXT: ret i32 0 3195; 3196; UNROLL-LABEL: @max_i32_backedgetaken( 3197; UNROLL-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3198; UNROLL: vector.ph: 3199; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 3200; UNROLL: vector.body: 3201; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 3202; UNROLL: middle.block: 3203; UNROLL-NEXT: br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]] 3204; UNROLL: scalar.ph: 3205; UNROLL-NEXT: br label [[TMP1:%.*]] 3206; UNROLL: 1: 3207; UNROLL-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ] 3208; UNROLL-NEXT: [[TMP2]] = add i32 [[B_0]], -1 3209; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 3210; UNROLL-NEXT: br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]] 3211; UNROLL: 4: 3212; UNROLL-NEXT: ret i32 0 3213; 3214; UNROLL-NO-IC-LABEL: @max_i32_backedgetaken( 3215; UNROLL-NO-IC-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3216; UNROLL-NO-IC: vector.ph: 3217; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 3218; UNROLL-NO-IC: vector.body: 3219; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3220; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 3221; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 3222; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]] 3223; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 3224; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -2 3225; UNROLL-NO-IC-NEXT: [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4> 3226; UNROLL-NO-IC-NEXT: [[TMP4]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4> 3227; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3228; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 3229; UNROLL-NO-IC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 3230; UNROLL-NO-IC: middle.block: 3231; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP4]], [[TMP3]] 3232; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) 3233; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 0, 0 3234; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[TMP11:%.*]], label [[SCALAR_PH]] 3235; UNROLL-NO-IC: scalar.ph: 3236; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ] 3237; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 3238; UNROLL-NO-IC-NEXT: br label [[TMP7:%.*]] 3239; UNROLL-NO-IC: 7: 3240; UNROLL-NO-IC-NEXT: [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP7]] ] 3241; UNROLL-NO-IC-NEXT: [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP7]] ] 3242; UNROLL-NO-IC-NEXT: [[TMP8]] = and i32 [[A_0]], 4 3243; UNROLL-NO-IC-NEXT: [[TMP9]] = add i32 [[B_0]], -1 3244; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 3245; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[TMP11]], label [[TMP7]], !llvm.loop [[LOOP33:![0-9]+]] 3246; UNROLL-NO-IC: 11: 3247; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 3248; UNROLL-NO-IC-NEXT: ret i32 [[DOTLCSSA]] 3249; 3250; INTERLEAVE-LABEL: @max_i32_backedgetaken( 3251; INTERLEAVE-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3252; INTERLEAVE: vector.ph: 3253; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 3254; INTERLEAVE: vector.body: 3255; INTERLEAVE-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 3256; INTERLEAVE: middle.block: 3257; INTERLEAVE-NEXT: br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]] 3258; INTERLEAVE: scalar.ph: 3259; INTERLEAVE-NEXT: br label [[TMP1:%.*]] 3260; INTERLEAVE: 1: 3261; INTERLEAVE-NEXT: [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ] 3262; INTERLEAVE-NEXT: [[TMP2]] = add i32 [[B_0]], -1 3263; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 3264; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]] 3265; INTERLEAVE: 4: 3266; INTERLEAVE-NEXT: ret i32 0 3267; 3268 br label %1 3269 3270; <label>:1 ; preds = %1, %0 3271 %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ] 3272 %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ] 3273 %2 = and i32 %a.0, 4 3274 %3 = add i32 %b.0, -1 3275 %4 = icmp eq i32 %3, 0 3276 br i1 %4, label %5, label %1 3277 3278; <label>:5 ; preds = %1 3279 ret i32 %2 3280} 3281 3282; When generating the overflow check we must sure that the induction start value 3283; is defined before the branch to the scalar preheader. 3284 3285 3286 3287@e = global i8 1, align 1 3288@d = common global i32 0, align 4 3289@c = common global i32 0, align 4 3290define i32 @testoverflowcheck() { 3291; CHECK-LABEL: @testoverflowcheck( 3292; CHECK-NEXT: entry: 3293; CHECK-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 3294; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 3295; CHECK-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 3296; CHECK-NEXT: [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]] 3297; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 3298; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 3299; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 2 3300; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3301; CHECK: vector.ph: 3302; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 2 3303; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] 3304; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3305; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]] 3306; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0 3307; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 3308; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 3309; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 3310; CHECK: vector.body: 3311; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3312; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 3313; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8 3314; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]] 3315; CHECK-NEXT: [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0 3316; CHECK-NEXT: [[TMP7]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] 3317; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3318; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3319; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 3320; CHECK: middle.block: 3321; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP7]]) 3322; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] 3323; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 3324; CHECK: scalar.ph: 3325; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] 3326; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 3327; CHECK-NEXT: br label [[COND_END_I:%.*]] 3328; CHECK: cond.end.i: 3329; CHECK-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] 3330; CHECK-NEXT: [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ] 3331; CHECK-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]] 3332; CHECK-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 3333; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 3334; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] 3335; CHECK: loopexit: 3336; CHECK-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 3337; CHECK-NEXT: ret i32 [[AND_I_LCSSA]] 3338; 3339; IND-LABEL: @testoverflowcheck( 3340; IND-NEXT: entry: 3341; IND-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 3342; IND-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 3343; IND-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 3344; IND-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 3345; IND-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 3346; IND-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 3347; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i8 [[DOTPR_I]], -1 3348; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3349; IND: vector.ph: 3350; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510 3351; IND-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3352; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]] 3353; IND-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0 3354; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 3355; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 3356; IND-NEXT: br label [[VECTOR_BODY:%.*]] 3357; IND: vector.body: 3358; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3359; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3360; IND-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3361; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 3362; IND: middle.block: 3363; IND-NEXT: [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]] 3364; IND-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]]) 3365; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] 3366; IND-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 3367; IND: scalar.ph: 3368; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] 3369; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ] 3370; IND-NEXT: br label [[COND_END_I:%.*]] 3371; IND: cond.end.i: 3372; IND-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] 3373; IND-NEXT: [[TMP8:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] 3374; IND-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 3375; IND-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 3376; IND-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] 3377; IND: loopexit: 3378; IND-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP8]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 3379; IND-NEXT: ret i32 [[AND_I_LCSSA]] 3380; 3381; UNROLL-LABEL: @testoverflowcheck( 3382; UNROLL-NEXT: entry: 3383; UNROLL-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 3384; UNROLL-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 3385; UNROLL-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 3386; UNROLL-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 3387; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 3388; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 3389; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -4 3390; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3391; UNROLL: vector.ph: 3392; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508 3393; UNROLL-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3394; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]] 3395; UNROLL-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0 3396; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 3397; UNROLL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 3398; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 3399; UNROLL: vector.body: 3400; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3401; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3402; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3403; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 3404; UNROLL: middle.block: 3405; UNROLL-NEXT: [[TMP6:%.*]] = and <2 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]] 3406; UNROLL-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> zeroinitializer 3407; UNROLL-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP7]], [[TMP4]] 3408; UNROLL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) 3409; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] 3410; UNROLL-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 3411; UNROLL: scalar.ph: 3412; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] 3413; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ] 3414; UNROLL-NEXT: br label [[COND_END_I:%.*]] 3415; UNROLL: cond.end.i: 3416; UNROLL-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] 3417; UNROLL-NEXT: [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] 3418; UNROLL-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 3419; UNROLL-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 3420; UNROLL-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] 3421; UNROLL: loopexit: 3422; UNROLL-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 3423; UNROLL-NEXT: ret i32 [[AND_I_LCSSA]] 3424; 3425; UNROLL-NO-IC-LABEL: @testoverflowcheck( 3426; UNROLL-NO-IC-NEXT: entry: 3427; UNROLL-NO-IC-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 3428; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 3429; UNROLL-NO-IC-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 3430; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]] 3431; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 3432; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 3433; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4 3434; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3435; UNROLL-NO-IC: vector.ph: 3436; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4 3437; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]] 3438; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3439; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]] 3440; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0 3441; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 3442; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 3443; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0 3444; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer 3445; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 3446; UNROLL-NO-IC: vector.body: 3447; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3448; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 3449; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 3450; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8 3451; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]] 3452; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0 3453; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i8 [[OFFSET_IDX]], 2 3454; UNROLL-NO-IC-NEXT: [[TMP8]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]] 3455; UNROLL-NO-IC-NEXT: [[TMP9]] = and <2 x i32> [[BROADCAST_SPLAT3]], [[VEC_PHI1]] 3456; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3457; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3458; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 3459; UNROLL-NO-IC: middle.block: 3460; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = and <2 x i32> [[TMP9]], [[TMP8]] 3461; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]]) 3462; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] 3463; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 3464; UNROLL-NO-IC: scalar.ph: 3465; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] 3466; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 3467; UNROLL-NO-IC-NEXT: br label [[COND_END_I:%.*]] 3468; UNROLL-NO-IC: cond.end.i: 3469; UNROLL-NO-IC-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] 3470; UNROLL-NO-IC-NEXT: [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ] 3471; UNROLL-NO-IC-NEXT: [[AND_I]] = and i32 [[TMP0]], [[AND3_I]] 3472; UNROLL-NO-IC-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 3473; UNROLL-NO-IC-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 3474; UNROLL-NO-IC-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] 3475; UNROLL-NO-IC: loopexit: 3476; UNROLL-NO-IC-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 3477; UNROLL-NO-IC-NEXT: ret i32 [[AND_I_LCSSA]] 3478; 3479; INTERLEAVE-LABEL: @testoverflowcheck( 3480; INTERLEAVE-NEXT: entry: 3481; INTERLEAVE-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 3482; INTERLEAVE-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 3483; INTERLEAVE-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 3484; INTERLEAVE-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 3485; INTERLEAVE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 3486; INTERLEAVE-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 3487; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -8 3488; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 3489; INTERLEAVE: vector.ph: 3490; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504 3491; INTERLEAVE-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3492; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]] 3493; INTERLEAVE-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0 3494; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 3495; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 3496; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 3497; INTERLEAVE: vector.body: 3498; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3499; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 3500; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3501; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 3502; INTERLEAVE: middle.block: 3503; INTERLEAVE-NEXT: [[TMP6:%.*]] = and <4 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]] 3504; INTERLEAVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer 3505; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP7]], [[TMP4]] 3506; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]]) 3507; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]] 3508; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]] 3509; INTERLEAVE: scalar.ph: 3510; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ] 3511; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ] 3512; INTERLEAVE-NEXT: br label [[COND_END_I:%.*]] 3513; INTERLEAVE: cond.end.i: 3514; INTERLEAVE-NEXT: [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ] 3515; INTERLEAVE-NEXT: [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]] 3516; INTERLEAVE-NEXT: [[INC_I]] = add i8 [[INC4_I]], 1 3517; INTERLEAVE-NEXT: [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0 3518; INTERLEAVE-NEXT: br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]] 3519; INTERLEAVE: loopexit: 3520; INTERLEAVE-NEXT: [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 3521; INTERLEAVE-NEXT: ret i32 [[AND_I_LCSSA]] 3522; 3523entry: 3524 %.pr.i = load i8, i8* @e, align 1 3525 %0 = load i32, i32* @d, align 4 3526 %c.promoted.i = load i32, i32* @c, align 4 3527 br label %cond.end.i 3528 3529cond.end.i: 3530 %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ] 3531 %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ] 3532 %and.i = and i32 %0, %and3.i 3533 %inc.i = add i8 %inc4.i, 1 3534 %tobool.i = icmp eq i8 %inc.i, 0 3535 br i1 %tobool.i, label %loopexit, label %cond.end.i 3536 3537loopexit: 3538 ret i32 %and.i 3539} 3540 3541; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32) 3542; In order to recognize %sphi as an induction PHI and vectorize this loop, 3543; we need to convert the SCEV expression into an AddRecExpr. 3544; The expression gets converted to {zext i8 %t to i32,+,1}. 3545 3546define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) { 3547; CHECK-LABEL: @wrappingindvars1( 3548; CHECK-NEXT: entry: 3549; CHECK-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16 3550; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 3551; CHECK-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42 3552; CHECK-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3553; CHECK: loop.preheader: 3554; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3555; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 3556; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3557; CHECK: vector.scevcheck: 3558; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3559; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] 3560; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] 3561; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] 3562; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] 3563; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 3564; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 3565; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 3566; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 3567; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] 3568; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] 3569; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] 3570; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] 3571; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] 3572; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 3573; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] 3574; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] 3575; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 3576; CHECK: vector.ph: 3577; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 3578; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 3579; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3580; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] 3581; CHECK-NEXT: [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]] 3582; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 3583; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer 3584; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1> 3585; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0 3586; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer 3587; CHECK-NEXT: [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], <i32 0, i32 1> 3588; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 3589; CHECK: vector.body: 3590; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3591; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 3592; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] 3593; CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8 3594; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]] 3595; CHECK-NEXT: [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0 3596; CHECK-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 3597; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 0 3598; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] 3599; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 3600; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>* 3601; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP25]], align 4 3602; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1> 3603; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3604; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2> 3605; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2> 3606; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3607; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 3608; CHECK: middle.block: 3609; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 3610; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3611; CHECK: scalar.ph: 3612; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 3613; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 3614; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] 3615; CHECK-NEXT: br label [[LOOP:%.*]] 3616; CHECK: loop: 3617; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 3618; CHECK-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 3619; CHECK-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] 3620; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]] 3621; CHECK-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 3622; CHECK-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 3623; CHECK-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 3624; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 3625; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 3626; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] 3627; CHECK: exit.loopexit: 3628; CHECK-NEXT: br label [[EXIT]] 3629; CHECK: exit: 3630; CHECK-NEXT: ret void 3631; 3632; IND-LABEL: @wrappingindvars1( 3633; IND-NEXT: entry: 3634; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 3635; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 3636; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3637; IND: loop.preheader: 3638; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3639; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 3640; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3641; IND: vector.scevcheck: 3642; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3643; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 3644; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 3645; IND-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 3646; IND-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 3647; IND-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 3648; IND-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 3649; IND-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 3650; IND-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 3651; IND-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 3652; IND-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 3653; IND-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 3654; IND: vector.ph: 3655; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2 3656; IND-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3657; IND-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 3658; IND-NEXT: [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]] 3659; IND-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0 3660; IND-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer 3661; IND-NEXT: [[INDUCTION6:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT5]], <i32 0, i32 1> 3662; IND-NEXT: br label [[VECTOR_BODY:%.*]] 3663; IND: vector.body: 3664; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3665; IND-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] 3666; IND-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 3667; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]] 3668; IND-NEXT: [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64 3669; IND-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]] 3670; IND-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* 3671; IND-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP15]], align 4 3672; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 3673; IND-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2> 3674; IND-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3675; IND-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 3676; IND: middle.block: 3677; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 3678; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3679; IND: scalar.ph: 3680; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 3681; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 3682; IND-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] 3683; IND-NEXT: br label [[LOOP:%.*]] 3684; IND: loop: 3685; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 3686; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 3687; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] 3688; IND-NEXT: [[TMP17:%.*]] = sext i8 [[IDX]] to i64 3689; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]] 3690; IND-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 3691; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 3692; IND-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 3693; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 3694; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 3695; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] 3696; IND: exit.loopexit: 3697; IND-NEXT: br label [[EXIT]] 3698; IND: exit: 3699; IND-NEXT: ret void 3700; 3701; UNROLL-LABEL: @wrappingindvars1( 3702; UNROLL-NEXT: entry: 3703; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 3704; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 3705; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3706; UNROLL: loop.preheader: 3707; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3708; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 3709; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3710; UNROLL: vector.scevcheck: 3711; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3712; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 3713; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 3714; UNROLL-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 3715; UNROLL-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 3716; UNROLL-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 3717; UNROLL-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 3718; UNROLL-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 3719; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 3720; UNROLL-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 3721; UNROLL-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 3722; UNROLL-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 3723; UNROLL: vector.ph: 3724; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 3725; UNROLL-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3726; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 3727; UNROLL-NEXT: [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]] 3728; UNROLL-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0 3729; UNROLL-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer 3730; UNROLL-NEXT: [[INDUCTION7:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT6]], <i32 0, i32 1> 3731; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 3732; UNROLL: vector.body: 3733; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3734; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 3735; UNROLL-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 3736; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]] 3737; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2> 3738; UNROLL-NEXT: [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64 3739; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]] 3740; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* 3741; UNROLL-NEXT: store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP15]], align 4 3742; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 2 3743; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>* 3744; UNROLL-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP17]], align 4 3745; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3746; UNROLL-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 4, i32 4> 3747; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3748; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 3749; UNROLL: middle.block: 3750; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 3751; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3752; UNROLL: scalar.ph: 3753; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 3754; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 3755; UNROLL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] 3756; UNROLL-NEXT: br label [[LOOP:%.*]] 3757; UNROLL: loop: 3758; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 3759; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 3760; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] 3761; UNROLL-NEXT: [[TMP19:%.*]] = sext i8 [[IDX]] to i64 3762; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 3763; UNROLL-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 3764; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 3765; UNROLL-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 3766; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 3767; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 3768; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] 3769; UNROLL: exit.loopexit: 3770; UNROLL-NEXT: br label [[EXIT]] 3771; UNROLL: exit: 3772; UNROLL-NEXT: ret void 3773; 3774; UNROLL-NO-IC-LABEL: @wrappingindvars1( 3775; UNROLL-NO-IC-NEXT: entry: 3776; UNROLL-NO-IC-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16 3777; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 3778; UNROLL-NO-IC-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42 3779; UNROLL-NO-IC-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3780; UNROLL-NO-IC: loop.preheader: 3781; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3782; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 3783; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3784; UNROLL-NO-IC: vector.scevcheck: 3785; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3786; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] 3787; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] 3788; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] 3789; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] 3790; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 3791; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 3792; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 3793; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 3794; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] 3795; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] 3796; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] 3797; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] 3798; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] 3799; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 3800; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] 3801; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] 3802; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 3803; UNROLL-NO-IC: vector.ph: 3804; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 3805; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 3806; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3807; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] 3808; UNROLL-NO-IC-NEXT: [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]] 3809; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 3810; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer 3811; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1> 3812; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0 3813; UNROLL-NO-IC-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer 3814; UNROLL-NO-IC-NEXT: [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], <i32 0, i32 1> 3815; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 3816; UNROLL-NO-IC: vector.body: 3817; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3818; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 3819; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 3820; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2> 3821; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8 3822; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]] 3823; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0 3824; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1 3825; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 2 3826; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 3 3827; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 0 3828; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 2 3829; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2> 3830; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]] 3831; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP22]] 3832; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0 3833; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <2 x i32>* 3834; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP29]], align 4 3835; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 2 3836; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <2 x i32>* 3837; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP31]], align 4 3838; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1> 3839; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = add <2 x i8> [[STEP_ADD]], <i8 1, i8 1> 3840; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 3841; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2> 3842; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 2, i32 2> 3843; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3844; UNROLL-NO-IC-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 3845; UNROLL-NO-IC: middle.block: 3846; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 3847; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3848; UNROLL-NO-IC: scalar.ph: 3849; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 3850; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 3851; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] 3852; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] 3853; UNROLL-NO-IC: loop: 3854; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 3855; UNROLL-NO-IC-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 3856; UNROLL-NO-IC-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] 3857; UNROLL-NO-IC-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]] 3858; UNROLL-NO-IC-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 3859; UNROLL-NO-IC-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 3860; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 3861; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 3862; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 3863; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] 3864; UNROLL-NO-IC: exit.loopexit: 3865; UNROLL-NO-IC-NEXT: br label [[EXIT]] 3866; UNROLL-NO-IC: exit: 3867; UNROLL-NO-IC-NEXT: ret void 3868; 3869; INTERLEAVE-LABEL: @wrappingindvars1( 3870; INTERLEAVE-NEXT: entry: 3871; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 3872; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 3873; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3874; INTERLEAVE: loop.preheader: 3875; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3876; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 3877; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3878; INTERLEAVE: vector.scevcheck: 3879; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3880; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 3881; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 3882; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 3883; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 3884; INTERLEAVE-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 3885; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 3886; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 3887; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 3888; INTERLEAVE-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 3889; INTERLEAVE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 3890; INTERLEAVE-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 3891; INTERLEAVE: vector.ph: 3892; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8 3893; INTERLEAVE-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 3894; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 3895; INTERLEAVE-NEXT: [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]] 3896; INTERLEAVE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[EXT]], i64 0 3897; INTERLEAVE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer 3898; INTERLEAVE-NEXT: [[INDUCTION7:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT6]], <i32 0, i32 1, i32 2, i32 3> 3899; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 3900; INTERLEAVE: vector.body: 3901; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 3902; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 3903; INTERLEAVE-NEXT: [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8 3904; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]] 3905; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], <i32 4, i32 4, i32 4, i32 4> 3906; INTERLEAVE-NEXT: [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64 3907; INTERLEAVE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]] 3908; INTERLEAVE-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* 3909; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND8]], <4 x i32>* [[TMP15]], align 4 3910; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 4 3911; INTERLEAVE-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 3912; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD9]], <4 x i32>* [[TMP17]], align 4 3913; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 3914; INTERLEAVE-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 8, i32 8, i32 8, i32 8> 3915; INTERLEAVE-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 3916; INTERLEAVE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] 3917; INTERLEAVE: middle.block: 3918; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 3919; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 3920; INTERLEAVE: scalar.ph: 3921; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 3922; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 3923; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ] 3924; INTERLEAVE-NEXT: br label [[LOOP:%.*]] 3925; INTERLEAVE: loop: 3926; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 3927; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 3928; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] 3929; INTERLEAVE-NEXT: [[TMP19:%.*]] = sext i8 [[IDX]] to i64 3930; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 3931; INTERLEAVE-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 3932; INTERLEAVE-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 3933; INTERLEAVE-NEXT: [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32 3934; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 3935; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 3936; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]] 3937; INTERLEAVE: exit.loopexit: 3938; INTERLEAVE-NEXT: br label [[EXIT]] 3939; INTERLEAVE: exit: 3940; INTERLEAVE-NEXT: ret void 3941; 3942 entry: 3943 %st = zext i8 %t to i16 3944 %ext = zext i8 %t to i32 3945 %ecmp = icmp ult i16 %st, 42 3946 br i1 %ecmp, label %loop, label %exit 3947 3948 loop: 3949 3950 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 3951 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 3952 %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop] 3953 3954 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 3955 store i32 %sphi, i32* %ptr 3956 3957 %idx.inc = add i8 %idx, 1 3958 %idx.inc.ext = zext i8 %idx.inc to i32 3959 %idx.b.inc = add nuw nsw i32 %idx.b, 1 3960 3961 %c = icmp ult i32 %idx.b, %len 3962 br i1 %c, label %loop, label %exit 3963 3964 exit: 3965 ret void 3966} 3967 3968; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32)) 3969; In order to recognize %sphi as an induction PHI and vectorize this loop, 3970; we need to convert the SCEV expression into an AddRecExpr. 3971; The expression gets converted to ({4 * (zext %t to i32),+,4}). 3972define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) { 3973; CHECK-LABEL: @wrappingindvars2( 3974; CHECK-NEXT: entry: 3975; CHECK-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16 3976; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 3977; CHECK-NEXT: [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4 3978; CHECK-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42 3979; CHECK-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 3980; CHECK: loop.preheader: 3981; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 3982; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 3983; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 3984; CHECK: vector.scevcheck: 3985; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 3986; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] 3987; CHECK-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] 3988; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] 3989; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] 3990; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 3991; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 3992; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 3993; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 3994; CHECK-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] 3995; CHECK-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] 3996; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] 3997; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] 3998; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] 3999; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 4000; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] 4001; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] 4002; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4003; CHECK: vector.ph: 4004; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 4005; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 4006; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 4007; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] 4008; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[N_VEC]], 4 4009; CHECK-NEXT: [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP19]] 4010; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 4011; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer 4012; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1> 4013; CHECK-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0 4014; CHECK-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer 4015; CHECK-NEXT: [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], <i32 0, i32 4> 4016; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 4017; CHECK: vector.body: 4018; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4019; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4020; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] 4021; CHECK-NEXT: [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8 4022; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]] 4023; CHECK-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0 4024; CHECK-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1 4025; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 0 4026; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]] 4027; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 0 4028; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <2 x i32>* 4029; CHECK-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP26]], align 4 4030; CHECK-NEXT: [[TMP27:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1> 4031; CHECK-NEXT: [[TMP28:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32> 4032; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4033; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2> 4034; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 8, i32 8> 4035; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4036; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 4037; CHECK: middle.block: 4038; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4039; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 4040; CHECK: scalar.ph: 4041; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 4042; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] 4043; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4044; CHECK-NEXT: br label [[LOOP:%.*]] 4045; CHECK: loop: 4046; CHECK-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4047; CHECK-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 4048; CHECK-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 4049; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]] 4050; CHECK-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 4051; CHECK-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 4052; CHECK-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32 4053; CHECK-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4 4054; CHECK-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 4055; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 4056; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] 4057; CHECK: exit.loopexit: 4058; CHECK-NEXT: br label [[EXIT]] 4059; CHECK: exit: 4060; CHECK-NEXT: ret void 4061; 4062; IND-LABEL: @wrappingindvars2( 4063; IND-NEXT: entry: 4064; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 4065; IND-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 4066; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 4067; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 4068; IND: loop.preheader: 4069; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 4070; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 4071; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4072; IND: vector.scevcheck: 4073; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 4074; IND-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 4075; IND-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 4076; IND-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 4077; IND-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 4078; IND-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 4079; IND-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 4080; IND-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 4081; IND-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 4082; IND-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 4083; IND-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 4084; IND-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4085; IND: vector.ph: 4086; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2 4087; IND-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 4088; IND-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 4089; IND-NEXT: [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]] 4090; IND-NEXT: [[IND_END2:%.*]] = shl i32 [[TMP12]], 2 4091; IND-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0 4092; IND-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer 4093; IND-NEXT: [[INDUCTION6:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT5]], <i32 0, i32 4> 4094; IND-NEXT: br label [[VECTOR_BODY:%.*]] 4095; IND: vector.body: 4096; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4097; IND-NEXT: [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ] 4098; IND-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 4099; IND-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]] 4100; IND-NEXT: [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64 4101; IND-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]] 4102; IND-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* 4103; IND-NEXT: store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP16]], align 4 4104; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4105; IND-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 8, i32 8> 4106; IND-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4107; IND-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 4108; IND: middle.block: 4109; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4110; IND-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 4111; IND: scalar.ph: 4112; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 4113; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] 4114; IND-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4115; IND-NEXT: br label [[LOOP:%.*]] 4116; IND: loop: 4117; IND-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4118; IND-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 4119; IND-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 4120; IND-NEXT: [[TMP18:%.*]] = sext i8 [[IDX]] to i64 4121; IND-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]] 4122; IND-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 4123; IND-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 4124; IND-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32 4125; IND-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 4126; IND-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 4127; IND-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 4128; IND-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] 4129; IND: exit.loopexit: 4130; IND-NEXT: br label [[EXIT]] 4131; IND: exit: 4132; IND-NEXT: ret void 4133; 4134; UNROLL-LABEL: @wrappingindvars2( 4135; UNROLL-NEXT: entry: 4136; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 4137; UNROLL-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 4138; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 4139; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 4140; UNROLL: loop.preheader: 4141; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 4142; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 4143; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4144; UNROLL: vector.scevcheck: 4145; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 4146; UNROLL-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 4147; UNROLL-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 4148; UNROLL-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 4149; UNROLL-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 4150; UNROLL-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 4151; UNROLL-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 4152; UNROLL-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 4153; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 4154; UNROLL-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 4155; UNROLL-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 4156; UNROLL-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4157; UNROLL: vector.ph: 4158; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 4159; UNROLL-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 4160; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 4161; UNROLL-NEXT: [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]] 4162; UNROLL-NEXT: [[IND_END2:%.*]] = shl i32 [[TMP12]], 2 4163; UNROLL-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0 4164; UNROLL-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer 4165; UNROLL-NEXT: [[INDUCTION7:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT6]], <i32 0, i32 4> 4166; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 4167; UNROLL: vector.body: 4168; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4169; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 4170; UNROLL-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 4171; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]] 4172; UNROLL-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 8, i32 8> 4173; UNROLL-NEXT: [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64 4174; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]] 4175; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* 4176; UNROLL-NEXT: store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP16]], align 4 4177; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 2 4178; UNROLL-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>* 4179; UNROLL-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP18]], align 4 4180; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 4181; UNROLL-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 16, i32 16> 4182; UNROLL-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4183; UNROLL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 4184; UNROLL: middle.block: 4185; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4186; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 4187; UNROLL: scalar.ph: 4188; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 4189; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] 4190; UNROLL-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4191; UNROLL-NEXT: br label [[LOOP:%.*]] 4192; UNROLL: loop: 4193; UNROLL-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4194; UNROLL-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 4195; UNROLL-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 4196; UNROLL-NEXT: [[TMP20:%.*]] = sext i8 [[IDX]] to i64 4197; UNROLL-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 4198; UNROLL-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 4199; UNROLL-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 4200; UNROLL-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32 4201; UNROLL-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 4202; UNROLL-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 4203; UNROLL-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 4204; UNROLL-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] 4205; UNROLL: exit.loopexit: 4206; UNROLL-NEXT: br label [[EXIT]] 4207; UNROLL: exit: 4208; UNROLL-NEXT: ret void 4209; 4210; UNROLL-NO-IC-LABEL: @wrappingindvars2( 4211; UNROLL-NO-IC-NEXT: entry: 4212; UNROLL-NO-IC-NEXT: [[ST:%.*]] = zext i8 [[T:%.*]] to i16 4213; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 4214; UNROLL-NO-IC-NEXT: [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4 4215; UNROLL-NO-IC-NEXT: [[ECMP:%.*]] = icmp ult i16 [[ST]], 42 4216; UNROLL-NO-IC-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 4217; UNROLL-NO-IC: loop.preheader: 4218; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 4219; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 4220; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4221; UNROLL-NO-IC: vector.scevcheck: 4222; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 4223; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i8 [[T]], [[TMP1]] 4224; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]] 4225; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]] 4226; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]] 4227; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 4228; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255 4229; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 4230; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i32 [[LEN]] to i8 4231; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i8 [[T]], [[TMP10]] 4232; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]] 4233; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]] 4234; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]] 4235; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] 4236; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255 4237; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] 4238; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]] 4239; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4240; UNROLL-NO-IC: vector.ph: 4241; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 4242; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 4243; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 4244; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]] 4245; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul i32 [[N_VEC]], 4 4246; UNROLL-NO-IC-NEXT: [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP19]] 4247; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0 4248; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer 4249; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1> 4250; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0 4251; UNROLL-NO-IC-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer 4252; UNROLL-NO-IC-NEXT: [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], <i32 0, i32 4> 4253; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 4254; UNROLL-NO-IC: vector.body: 4255; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4256; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4257; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 4258; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2> 4259; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8 4260; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]] 4261; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0 4262; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1 4263; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 2 4264; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 3 4265; UNROLL-NO-IC-NEXT: [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 8, i32 8> 4266; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = add i32 [[INDEX]], 0 4267; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = add i32 [[INDEX]], 2 4268; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]] 4269; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP23]] 4270; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 0 4271; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>* 4272; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP30]], align 4 4273; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 2 4274; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <2 x i32>* 4275; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP32]], align 4 4276; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1> 4277; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = add <2 x i8> [[STEP_ADD]], <i8 1, i8 1> 4278; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32> 4279; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = zext <2 x i8> [[TMP34]] to <2 x i32> 4280; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 4281; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2> 4282; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 8, i32 8> 4283; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4284; UNROLL-NO-IC-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 4285; UNROLL-NO-IC: middle.block: 4286; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4287; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 4288; UNROLL-NO-IC: scalar.ph: 4289; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 4290; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] 4291; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4292; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] 4293; UNROLL-NO-IC: loop: 4294; UNROLL-NO-IC-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4295; UNROLL-NO-IC-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 4296; UNROLL-NO-IC-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 4297; UNROLL-NO-IC-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]] 4298; UNROLL-NO-IC-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 4299; UNROLL-NO-IC-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 4300; UNROLL-NO-IC-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32 4301; UNROLL-NO-IC-NEXT: [[MUL]] = mul i32 [[IDX_INC_EXT]], 4 4302; UNROLL-NO-IC-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 4303; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 4304; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] 4305; UNROLL-NO-IC: exit.loopexit: 4306; UNROLL-NO-IC-NEXT: br label [[EXIT]] 4307; UNROLL-NO-IC: exit: 4308; UNROLL-NO-IC-NEXT: ret void 4309; 4310; INTERLEAVE-LABEL: @wrappingindvars2( 4311; INTERLEAVE-NEXT: entry: 4312; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 4313; INTERLEAVE-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 4314; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 4315; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 4316; INTERLEAVE: loop.preheader: 4317; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 4318; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 4319; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4320; INTERLEAVE: vector.scevcheck: 4321; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[LEN]] to i8 4322; INTERLEAVE-NEXT: [[TMP2:%.*]] = xor i8 [[T]], -1 4323; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]] 4324; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255 4325; INTERLEAVE-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] 4326; INTERLEAVE-NEXT: [[TMP6:%.*]] = trunc i32 [[LEN]] to i8 4327; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i8 [[TMP6]], [[T]] 4328; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]] 4329; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255 4330; INTERLEAVE-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 4331; INTERLEAVE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]] 4332; INTERLEAVE-NEXT: br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4333; INTERLEAVE: vector.ph: 4334; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8 4335; INTERLEAVE-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 4336; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]] 4337; INTERLEAVE-NEXT: [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]] 4338; INTERLEAVE-NEXT: [[IND_END2:%.*]] = shl i32 [[TMP12]], 2 4339; INTERLEAVE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0 4340; INTERLEAVE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer 4341; INTERLEAVE-NEXT: [[INDUCTION7:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT6]], <i32 0, i32 4, i32 8, i32 12> 4342; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 4343; INTERLEAVE: vector.body: 4344; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4345; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 4346; INTERLEAVE-NEXT: [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8 4347; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]] 4348; INTERLEAVE-NEXT: [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], <i32 16, i32 16, i32 16, i32 16> 4349; INTERLEAVE-NEXT: [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64 4350; INTERLEAVE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]] 4351; INTERLEAVE-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* 4352; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND8]], <4 x i32>* [[TMP16]], align 4 4353; INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 4 4354; INTERLEAVE-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* 4355; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD9]], <4 x i32>* [[TMP18]], align 4 4356; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 4357; INTERLEAVE-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 32, i32 32, i32 32, i32 32> 4358; INTERLEAVE-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4359; INTERLEAVE-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] 4360; INTERLEAVE: middle.block: 4361; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4362; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] 4363; INTERLEAVE: scalar.ph: 4364; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ] 4365; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ] 4366; INTERLEAVE-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4367; INTERLEAVE-NEXT: br label [[LOOP:%.*]] 4368; INTERLEAVE: loop: 4369; INTERLEAVE-NEXT: [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4370; INTERLEAVE-NEXT: [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 4371; INTERLEAVE-NEXT: [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] 4372; INTERLEAVE-NEXT: [[TMP20:%.*]] = sext i8 [[IDX]] to i64 4373; INTERLEAVE-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] 4374; INTERLEAVE-NEXT: store i32 [[SPHI]], i32* [[PTR]], align 4 4375; INTERLEAVE-NEXT: [[IDX_INC]] = add i8 [[IDX]], 1 4376; INTERLEAVE-NEXT: [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32 4377; INTERLEAVE-NEXT: [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2 4378; INTERLEAVE-NEXT: [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1 4379; INTERLEAVE-NEXT: [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]] 4380; INTERLEAVE-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]] 4381; INTERLEAVE: exit.loopexit: 4382; INTERLEAVE-NEXT: br label [[EXIT]] 4383; INTERLEAVE: exit: 4384; INTERLEAVE-NEXT: ret void 4385; 4386entry: 4387 %st = zext i8 %t to i16 4388 %ext = zext i8 %t to i32 4389 %ext.mul = mul i32 %ext, 4 4390 4391 %ecmp = icmp ult i16 %st, 42 4392 br i1 %ecmp, label %loop, label %exit 4393 4394 loop: 4395 4396 %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ] 4397 %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop] 4398 %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ] 4399 4400 %ptr = getelementptr inbounds i32, i32* %A, i8 %idx 4401 store i32 %sphi, i32* %ptr 4402 4403 %idx.inc = add i8 %idx, 1 4404 %idx.inc.ext = zext i8 %idx.inc to i32 4405 %mul = mul i32 %idx.inc.ext, 4 4406 %idx.b.inc = add nuw nsw i32 %idx.b, 1 4407 4408 %c = icmp ult i32 %idx.b, %len 4409 br i1 %c, label %loop, label %exit 4410 4411 exit: 4412 ret void 4413} 4414 4415; Check that we generate vectorized IVs in the pre-header 4416; instead of widening the scalar IV inside the loop, when 4417; we know how to do that. 4418define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { 4419; CHECK-LABEL: @veciv( 4420; CHECK-NEXT: for.body.preheader: 4421; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2 4422; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4423; CHECK: vector.ph: 4424; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[K]], 2 4425; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]] 4426; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 4427; CHECK: vector.body: 4428; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4429; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4430; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 4431; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 4432; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] 4433; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 4434; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 4435; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4 4436; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4437; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4438; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4439; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 4440; CHECK: middle.block: 4441; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] 4442; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4443; CHECK: scalar.ph: 4444; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] 4445; CHECK-NEXT: br label [[FOR_BODY:%.*]] 4446; CHECK: for.body: 4447; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4448; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] 4449; CHECK-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4450; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4451; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4452; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 4453; CHECK: exit: 4454; CHECK-NEXT: ret void 4455; 4456; IND-LABEL: @veciv( 4457; IND-NEXT: for.body.preheader: 4458; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2 4459; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4460; IND: vector.ph: 4461; IND-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -2 4462; IND-NEXT: br label [[VECTOR_BODY:%.*]] 4463; IND: vector.body: 4464; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4465; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4466; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 4467; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 4468; IND-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>* 4469; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4 4470; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4471; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4472; IND-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4473; IND-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 4474; IND: middle.block: 4475; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]] 4476; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4477; IND: scalar.ph: 4478; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] 4479; IND-NEXT: br label [[FOR_BODY:%.*]] 4480; IND: for.body: 4481; IND-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4482; IND-NEXT: [[TMP4:%.*]] = sext i32 [[INDVARS_IV]] to i64 4483; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] 4484; IND-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4485; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4486; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4487; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 4488; IND: exit: 4489; IND-NEXT: ret void 4490; 4491; UNROLL-LABEL: @veciv( 4492; UNROLL-NEXT: for.body.preheader: 4493; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4 4494; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4495; UNROLL: vector.ph: 4496; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -4 4497; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 4498; UNROLL: vector.body: 4499; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4500; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4501; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4502; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 4503; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 4504; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>* 4505; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4 4506; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2 4507; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 4508; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP4]], align 4 4509; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 4510; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4> 4511; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4512; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 4513; UNROLL: middle.block: 4514; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]] 4515; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4516; UNROLL: scalar.ph: 4517; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] 4518; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 4519; UNROLL: for.body: 4520; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4521; UNROLL-NEXT: [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64 4522; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] 4523; UNROLL-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4524; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4525; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4526; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 4527; UNROLL: exit: 4528; UNROLL-NEXT: ret void 4529; 4530; UNROLL-NO-IC-LABEL: @veciv( 4531; UNROLL-NO-IC-NEXT: for.body.preheader: 4532; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4 4533; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4534; UNROLL-NO-IC: vector.ph: 4535; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[K]], 4 4536; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]] 4537; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 4538; UNROLL-NO-IC: vector.body: 4539; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4540; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4541; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4542; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 4543; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 4544; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 4545; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 4546; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]] 4547; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP2]] 4548; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 4549; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* 4550; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP7]], align 4 4551; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 2 4552; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>* 4553; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP9]], align 4 4554; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 4555; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2> 4556; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4557; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 4558; UNROLL-NO-IC: middle.block: 4559; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]] 4560; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4561; UNROLL-NO-IC: scalar.ph: 4562; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] 4563; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 4564; UNROLL-NO-IC: for.body: 4565; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4566; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] 4567; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4568; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4569; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4570; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 4571; UNROLL-NO-IC: exit: 4572; UNROLL-NO-IC-NEXT: ret void 4573; 4574; INTERLEAVE-LABEL: @veciv( 4575; INTERLEAVE-NEXT: for.body.preheader: 4576; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 8 4577; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4578; INTERLEAVE: vector.ph: 4579; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[K]], -8 4580; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 4581; INTERLEAVE: vector.body: 4582; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4583; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4584; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 4585; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 4586; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 4587; INTERLEAVE-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>* 4588; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP2]], align 4 4589; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4 4590; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 4591; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP4]], align 4 4592; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 4593; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8> 4594; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4595; INTERLEAVE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] 4596; INTERLEAVE: middle.block: 4597; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]] 4598; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4599; INTERLEAVE: scalar.ph: 4600; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] 4601; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 4602; INTERLEAVE: for.body: 4603; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4604; INTERLEAVE-NEXT: [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64 4605; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] 4606; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4607; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4608; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4609; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 4610; INTERLEAVE: exit: 4611; INTERLEAVE-NEXT: ret void 4612; 4613for.body.preheader: 4614 br label %for.body 4615 4616for.body: 4617 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 4618 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 4619 store i32 %indvars.iv, i32* %arrayidx, align 4 4620 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 4621 %exitcond = icmp eq i32 %indvars.iv.next, %k 4622 br i1 %exitcond, label %exit, label %for.body 4623 4624exit: 4625 ret void 4626} 4627 4628define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { 4629; CHECK-LABEL: @trunciv( 4630; CHECK-NEXT: for.body.preheader: 4631; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 4632; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4633; CHECK: vector.scevcheck: 4634; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 4635; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 4636; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] 4637; CHECK-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]] 4638; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 4639; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 4640; CHECK-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 4641; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 4642; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 4643; CHECK-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4644; CHECK: vector.ph: 4645; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 2 4646; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] 4647; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 4648; CHECK: vector.body: 4649; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4650; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4651; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] 4652; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32 4653; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 4654; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], 1 4655; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]] 4656; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0 4657; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>* 4658; CHECK-NEXT: store <2 x i32> [[VEC_IND1]], <2 x i32>* [[TMP15]], align 4 4659; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 4660; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 4661; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2> 4662; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4663; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 4664; CHECK: middle.block: 4665; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] 4666; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4667; CHECK: scalar.ph: 4668; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4669; CHECK-NEXT: br label [[FOR_BODY:%.*]] 4670; CHECK: for.body: 4671; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4672; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 4673; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]] 4674; CHECK-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 4675; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 4676; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] 4677; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 4678; CHECK: exit: 4679; CHECK-NEXT: ret void 4680; 4681; IND-LABEL: @trunciv( 4682; IND-NEXT: for.body.preheader: 4683; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 4684; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4685; IND: vector.scevcheck: 4686; IND-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 4687; IND-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 4688; IND-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] 4689; IND: vector.ph: 4690; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -2 4691; IND-NEXT: br label [[VECTOR_BODY:%.*]] 4692; IND: vector.body: 4693; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4694; IND-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] 4695; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 4696; IND-NEXT: [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32 4697; IND-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]] 4698; IND-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 4699; IND-NEXT: store <2 x i32> [[VEC_IND1]], <2 x i32>* [[TMP4]], align 4 4700; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 4701; IND-NEXT: [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2> 4702; IND-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4703; IND-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 4704; IND: middle.block: 4705; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] 4706; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4707; IND: scalar.ph: 4708; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4709; IND-NEXT: br label [[FOR_BODY:%.*]] 4710; IND: for.body: 4711; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4712; IND-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 4713; IND-NEXT: [[SEXT3:%.*]] = shl i64 [[INDVARS_IV]], 32 4714; IND-NEXT: [[TMP6:%.*]] = ashr exact i64 [[SEXT3]], 32 4715; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] 4716; IND-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 4717; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 4718; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] 4719; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 4720; IND: exit: 4721; IND-NEXT: ret void 4722; 4723; UNROLL-LABEL: @trunciv( 4724; UNROLL-NEXT: for.body.preheader: 4725; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 4726; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4727; UNROLL: vector.scevcheck: 4728; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 4729; UNROLL-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 4730; UNROLL-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] 4731; UNROLL: vector.ph: 4732; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -4 4733; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 4734; UNROLL: vector.body: 4735; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4736; UNROLL-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 4737; UNROLL-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 4738; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 4739; UNROLL-NEXT: [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32 4740; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]] 4741; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 4742; UNROLL-NEXT: store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP4]], align 4 4743; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 2 4744; UNROLL-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>* 4745; UNROLL-NEXT: store <2 x i32> [[STEP_ADD3]], <2 x i32>* [[TMP6]], align 4 4746; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 4747; UNROLL-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4> 4748; UNROLL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4749; UNROLL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 4750; UNROLL: middle.block: 4751; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] 4752; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4753; UNROLL: scalar.ph: 4754; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4755; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 4756; UNROLL: for.body: 4757; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4758; UNROLL-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 4759; UNROLL-NEXT: [[SEXT6:%.*]] = shl i64 [[INDVARS_IV]], 32 4760; UNROLL-NEXT: [[TMP8:%.*]] = ashr exact i64 [[SEXT6]], 32 4761; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] 4762; UNROLL-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 4763; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 4764; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] 4765; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 4766; UNROLL: exit: 4767; UNROLL-NEXT: ret void 4768; 4769; UNROLL-NO-IC-LABEL: @trunciv( 4770; UNROLL-NO-IC-NEXT: for.body.preheader: 4771; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 4772; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4773; UNROLL-NO-IC: vector.scevcheck: 4774; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 4775; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 4776; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]] 4777; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i32 0, [[TMP1]] 4778; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0 4779; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0 4780; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]] 4781; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 4782; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] 4783; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 4784; UNROLL-NO-IC: vector.ph: 4785; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[K]], 4 4786; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]] 4787; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 4788; UNROLL-NO-IC: vector.body: 4789; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4790; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4791; UNROLL-NO-IC-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 4792; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 4793; UNROLL-NO-IC-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 4794; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32 4795; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 0 4796; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i32 [[TMP10]], 1 4797; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add i32 [[TMP10]], 2 4798; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], 3 4799; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]] 4800; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]] 4801; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 4802; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>* 4803; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP18]], align 4 4804; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 2 4805; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>* 4806; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD3]], <2 x i32>* [[TMP20]], align 4 4807; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 4808; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 4809; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], <i32 2, i32 2> 4810; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4811; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 4812; UNROLL-NO-IC: middle.block: 4813; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]] 4814; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4815; UNROLL-NO-IC: scalar.ph: 4816; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4817; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 4818; UNROLL-NO-IC: for.body: 4819; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4820; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 4821; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]] 4822; UNROLL-NO-IC-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 4823; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 4824; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] 4825; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 4826; UNROLL-NO-IC: exit: 4827; UNROLL-NO-IC-NEXT: ret void 4828; 4829; INTERLEAVE-LABEL: @trunciv( 4830; INTERLEAVE-NEXT: for.body.preheader: 4831; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8 4832; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 4833; INTERLEAVE: vector.scevcheck: 4834; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[K]], -1 4835; INTERLEAVE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648 4836; INTERLEAVE-NEXT: br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] 4837; INTERLEAVE: vector.ph: 4838; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], -8 4839; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 4840; INTERLEAVE: vector.body: 4841; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4842; INTERLEAVE-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 4843; INTERLEAVE-NEXT: [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> 4844; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 4845; INTERLEAVE-NEXT: [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32 4846; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]] 4847; INTERLEAVE-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 4848; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND2]], <4 x i32>* [[TMP4]], align 4 4849; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4 4850; INTERLEAVE-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 4851; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD3]], <4 x i32>* [[TMP6]], align 4 4852; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 4853; INTERLEAVE-NEXT: [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND2]], <i32 8, i32 8, i32 8, i32 8> 4854; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 4855; INTERLEAVE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] 4856; INTERLEAVE: middle.block: 4857; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]] 4858; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4859; INTERLEAVE: scalar.ph: 4860; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 4861; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 4862; INTERLEAVE: for.body: 4863; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4864; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 4865; INTERLEAVE-NEXT: [[SEXT6:%.*]] = shl i64 [[INDVARS_IV]], 32 4866; INTERLEAVE-NEXT: [[TMP8:%.*]] = ashr exact i64 [[SEXT6]], 32 4867; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] 4868; INTERLEAVE-NEXT: store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4 4869; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 4870; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]] 4871; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] 4872; INTERLEAVE: exit: 4873; INTERLEAVE-NEXT: ret void 4874; 4875for.body.preheader: 4876 br label %for.body 4877 4878for.body: 4879 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 4880 %trunc.iv = trunc i64 %indvars.iv to i32 4881 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv 4882 store i32 %trunc.iv, i32* %arrayidx, align 4 4883 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 4884 %exitcond = icmp eq i64 %indvars.iv.next, %k 4885 br i1 %exitcond, label %exit, label %for.body 4886 4887exit: 4888 ret void 4889} 4890 4891; 4892; 4893define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) { 4894; CHECK-LABEL: @nonprimary( 4895; CHECK-NEXT: for.body.preheader: 4896; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]] 4897; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 4898; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4899; CHECK: vector.ph: 4900; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 4901; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 4902; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] 4903; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0 4904; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 4905; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 4906; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 4907; CHECK: vector.body: 4908; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4909; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4910; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] 4911; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 4912; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 4913; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]] 4914; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 4915; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 4916; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP5]], align 4 4917; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4918; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4919; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4920; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 4921; CHECK: middle.block: 4922; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4923; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4924; CHECK: scalar.ph: 4925; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ] 4926; CHECK-NEXT: br label [[FOR_BODY:%.*]] 4927; CHECK: for.body: 4928; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4929; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] 4930; CHECK-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4931; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4932; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4933; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] 4934; CHECK: exit: 4935; CHECK-NEXT: ret void 4936; 4937; IND-LABEL: @nonprimary( 4938; IND-NEXT: for.body.preheader: 4939; IND-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]] 4940; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 4941; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4942; IND: vector.ph: 4943; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -2 4944; IND-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]] 4945; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 4946; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 4947; IND-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 4948; IND-NEXT: br label [[VECTOR_BODY:%.*]] 4949; IND: vector.body: 4950; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4951; IND-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4952; IND-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]] 4953; IND-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 4954; IND-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] 4955; IND-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 4956; IND-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4 4957; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 4958; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4959; IND-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 4960; IND-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 4961; IND: middle.block: 4962; IND-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 4963; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 4964; IND: scalar.ph: 4965; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ] 4966; IND-NEXT: br label [[FOR_BODY:%.*]] 4967; IND: for.body: 4968; IND-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 4969; IND-NEXT: [[TMP5:%.*]] = sext i32 [[INDVARS_IV]] to i64 4970; IND-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] 4971; IND-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 4972; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 4973; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 4974; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] 4975; IND: exit: 4976; IND-NEXT: ret void 4977; 4978; UNROLL-LABEL: @nonprimary( 4979; UNROLL-NEXT: for.body.preheader: 4980; UNROLL-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]] 4981; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 4982; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 4983; UNROLL: vector.ph: 4984; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -4 4985; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]] 4986; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0 4987; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 4988; UNROLL-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 4989; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 4990; UNROLL: vector.body: 4991; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 4992; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 4993; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 4994; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]] 4995; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 4996; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] 4997; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 4998; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4 4999; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2 5000; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 5001; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4 5002; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 5003; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4> 5004; UNROLL-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 5005; UNROLL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 5006; UNROLL: middle.block: 5007; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 5008; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 5009; UNROLL: scalar.ph: 5010; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ] 5011; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 5012; UNROLL: for.body: 5013; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5014; UNROLL-NEXT: [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64 5015; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 5016; UNROLL-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 5017; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 5018; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 5019; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] 5020; UNROLL: exit: 5021; UNROLL-NEXT: ret void 5022; 5023; UNROLL-NO-IC-LABEL: @nonprimary( 5024; UNROLL-NO-IC-NEXT: for.body.preheader: 5025; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]] 5026; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 5027; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5028; UNROLL-NO-IC: vector.ph: 5029; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 5030; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 5031; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]] 5032; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0 5033; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 5034; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 5035; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 5036; UNROLL-NO-IC: vector.body: 5037; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5038; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 5039; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 5040; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] 5041; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 5042; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1 5043; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2 5044; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3 5045; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]] 5046; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP3]] 5047; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 5048; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>* 5049; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP8]], align 4 5050; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 2 5051; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* 5052; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP10]], align 4 5053; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 5054; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2> 5055; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 5056; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 5057; UNROLL-NO-IC: middle.block: 5058; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 5059; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 5060; UNROLL-NO-IC: scalar.ph: 5061; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ] 5062; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 5063; UNROLL-NO-IC: for.body: 5064; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5065; UNROLL-NO-IC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] 5066; UNROLL-NO-IC-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 5067; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 5068; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 5069; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] 5070; UNROLL-NO-IC: exit: 5071; UNROLL-NO-IC-NEXT: ret void 5072; 5073; INTERLEAVE-LABEL: @nonprimary( 5074; INTERLEAVE-NEXT: for.body.preheader: 5075; INTERLEAVE-NEXT: [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]] 5076; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 5077; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5078; INTERLEAVE: vector.ph: 5079; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP0]], -8 5080; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]] 5081; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i64 0 5082; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 5083; INTERLEAVE-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3> 5084; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 5085; INTERLEAVE: vector.body: 5086; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5087; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 5088; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 5089; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]] 5090; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64 5091; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]] 5092; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 5093; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4 5094; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4 5095; INTERLEAVE-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* 5096; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4 5097; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 5098; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8> 5099; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 5100; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] 5101; INTERLEAVE: middle.block: 5102; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 5103; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 5104; INTERLEAVE: scalar.ph: 5105; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ] 5106; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 5107; INTERLEAVE: for.body: 5108; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5109; INTERLEAVE-NEXT: [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64 5110; INTERLEAVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 5111; INTERLEAVE-NEXT: store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4 5112; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 5113; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]] 5114; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] 5115; INTERLEAVE: exit: 5116; INTERLEAVE-NEXT: ret void 5117; 5118for.body.preheader: 5119 br label %for.body 5120 5121for.body: 5122 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ] 5123 %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv 5124 store i32 %indvars.iv, i32* %arrayidx, align 4 5125 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 5126 %exitcond = icmp eq i32 %indvars.iv.next, %k 5127 br i1 %exitcond, label %exit, label %for.body 5128 5129exit: 5130 ret void 5131} 5132 5133define void @non_primary_iv_trunc(i32* %a, i64 %n) { 5134; CHECK-LABEL: @non_primary_iv_trunc( 5135; CHECK-NEXT: entry: 5136; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 5137; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 5138; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5139; CHECK: vector.ph: 5140; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 5141; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 5142; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 5143; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 5144; CHECK: vector.body: 5145; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5146; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 5147; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 5148; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 5149; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 5150; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 5151; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 5152; CHECK-NEXT: store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP3]], align 4 5153; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 5154; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4> 5155; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4> 5156; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5157; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] 5158; CHECK: middle.block: 5159; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 5160; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5161; CHECK: scalar.ph: 5162; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 5163; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 5164; CHECK-NEXT: br label [[FOR_BODY:%.*]] 5165; CHECK: for.body: 5166; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5167; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5168; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 5169; CHECK-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32 5170; CHECK-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4 5171; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 5172; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 5173; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 5174; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] 5175; CHECK: for.end: 5176; CHECK-NEXT: ret void 5177; 5178; IND-LABEL: @non_primary_iv_trunc( 5179; IND-NEXT: entry: 5180; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 5181; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 5182; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5183; IND: vector.ph: 5184; IND-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 5185; IND-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 5186; IND-NEXT: br label [[VECTOR_BODY:%.*]] 5187; IND: vector.body: 5188; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5189; IND-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 5190; IND-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 5191; IND-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>* 5192; IND-NEXT: store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP1]], align 4 5193; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 5194; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4> 5195; IND-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5196; IND-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] 5197; IND: middle.block: 5198; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 5199; IND-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5200; IND: scalar.ph: 5201; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 5202; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 5203; IND-NEXT: br label [[FOR_BODY:%.*]] 5204; IND: for.body: 5205; IND-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5206; IND-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5207; IND-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 5208; IND-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32 5209; IND-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4 5210; IND-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 5211; IND-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 5212; IND-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 5213; IND-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] 5214; IND: for.end: 5215; IND-NEXT: ret void 5216; 5217; UNROLL-LABEL: @non_primary_iv_trunc( 5218; UNROLL-NEXT: entry: 5219; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 5220; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 5221; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5222; UNROLL: vector.ph: 5223; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 5224; UNROLL-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 5225; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 5226; UNROLL: vector.body: 5227; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5228; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 5229; UNROLL-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4> 5230; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 5231; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>* 5232; UNROLL-NEXT: store <2 x i32> [[VEC_IND3]], <2 x i32>* [[TMP1]], align 4 5233; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2 5234; UNROLL-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 5235; UNROLL-NEXT: store <2 x i32> [[STEP_ADD4]], <2 x i32>* [[TMP3]], align 4 5236; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 5237; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND3]], <i32 8, i32 8> 5238; UNROLL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5239; UNROLL-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] 5240; UNROLL: middle.block: 5241; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 5242; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5243; UNROLL: scalar.ph: 5244; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 5245; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 5246; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 5247; UNROLL: for.body: 5248; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5249; UNROLL-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5250; UNROLL-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 5251; UNROLL-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32 5252; UNROLL-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4 5253; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 5254; UNROLL-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 5255; UNROLL-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 5256; UNROLL-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] 5257; UNROLL: for.end: 5258; UNROLL-NEXT: ret void 5259; 5260; UNROLL-NO-IC-LABEL: @non_primary_iv_trunc( 5261; UNROLL-NO-IC-NEXT: entry: 5262; UNROLL-NO-IC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 5263; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 5264; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5265; UNROLL-NO-IC: vector.ph: 5266; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4 5267; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] 5268; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 5269; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 5270; UNROLL-NO-IC: vector.body: 5271; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5272; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 5273; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 5274; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 5275; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 5276; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4> 5277; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4> 5278; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 5279; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] 5280; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0 5281; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 5282; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND3]], <2 x i32>* [[TMP5]], align 4 5283; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2 5284; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* 5285; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD4]], <2 x i32>* [[TMP7]], align 4 5286; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 5287; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 4, i64 4> 5288; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], <i32 4, i32 4> 5289; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5290; UNROLL-NO-IC-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] 5291; UNROLL-NO-IC: middle.block: 5292; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 5293; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5294; UNROLL-NO-IC: scalar.ph: 5295; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 5296; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 5297; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 5298; UNROLL-NO-IC: for.body: 5299; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5300; UNROLL-NO-IC-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5301; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 5302; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32 5303; UNROLL-NO-IC-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4 5304; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 5305; UNROLL-NO-IC-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 5306; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 5307; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] 5308; UNROLL-NO-IC: for.end: 5309; UNROLL-NO-IC-NEXT: ret void 5310; 5311; INTERLEAVE-LABEL: @non_primary_iv_trunc( 5312; INTERLEAVE-NEXT: entry: 5313; INTERLEAVE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 5314; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 5315; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5316; INTERLEAVE: vector.ph: 5317; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 5318; INTERLEAVE-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 5319; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 5320; INTERLEAVE: vector.body: 5321; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5322; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 5323; INTERLEAVE-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8> 5324; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 5325; INTERLEAVE-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* 5326; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND3]], <4 x i32>* [[TMP1]], align 4 5327; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 5328; INTERLEAVE-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 5329; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD4]], <4 x i32>* [[TMP3]], align 4 5330; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 5331; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 16, i32 16, i32 16, i32 16> 5332; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 5333; INTERLEAVE-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]] 5334; INTERLEAVE: middle.block: 5335; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 5336; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5337; INTERLEAVE: scalar.ph: 5338; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 5339; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 5340; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 5341; INTERLEAVE: for.body: 5342; INTERLEAVE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5343; INTERLEAVE-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5344; INTERLEAVE-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]] 5345; INTERLEAVE-NEXT: [[VAR1:%.*]] = trunc i64 [[J]] to i32 5346; INTERLEAVE-NEXT: store i32 [[VAR1]], i32* [[VAR0]], align 4 5347; INTERLEAVE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 5348; INTERLEAVE-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 2 5349; INTERLEAVE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 5350; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]] 5351; INTERLEAVE: for.end: 5352; INTERLEAVE-NEXT: ret void 5353; 5354entry: 5355 br label %for.body 5356 5357for.body: 5358 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 5359 %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ] 5360 %var0 = getelementptr inbounds i32, i32* %a, i64 %i 5361 %var1 = trunc i64 %j to i32 5362 store i32 %var1, i32* %var0, align 4 5363 %i.next = add nuw nsw i64 %i, 1 5364 %j.next = add nuw nsw i64 %j, 2 5365 %cond = icmp slt i64 %i.next, %n 5366 br i1 %cond, label %for.body, label %for.end 5367 5368for.end: 5369 ret void 5370} 5371 5372; PR32419. Ensure we transform truncated non-primary induction variables. In 5373; the test case below we replace %var1 with a new induction variable. Because 5374; the truncated value is non-primary, we must compute an offset from the 5375; primary induction variable. 5376; 5377; 5378define i32 @PR32419(i32 %a, i16 %b) { 5379; CHECK-LABEL: @PR32419( 5380; CHECK-NEXT: entry: 5381; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5382; CHECK: vector.ph: 5383; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0 5384; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 5385; CHECK: vector.body: 5386; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE4:%.*]] ] 5387; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 -20, i32 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE4]] ] 5388; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE4]] ] 5389; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_UREM_CONTINUE4]] ] 5390; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] 5391; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 5392; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND1]], zeroinitializer 5393; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 5394; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 5395; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] 5396; CHECK: pred.urem.if: 5397; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], 0 5398; CHECK-NEXT: [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]] 5399; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0 5400; CHECK-NEXT: br label [[PRED_UREM_CONTINUE]] 5401; CHECK: pred.urem.continue: 5402; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ] 5403; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 5404; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]] 5405; CHECK: pred.urem.if3: 5406; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], 1 5407; CHECK-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]] 5408; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i32 1 5409; CHECK-NEXT: br label [[PRED_UREM_CONTINUE4]] 5410; CHECK: pred.urem.continue4: 5411; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ] 5412; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] 5413; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> 5414; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] 5415; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 5416; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 5417; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2> 5418; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 5419; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 5420; CHECK: middle.block: 5421; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]]) 5422; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 20, 20 5423; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5424; CHECK: scalar.ph: 5425; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ] 5426; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 5427; CHECK-NEXT: br label [[FOR_BODY:%.*]] 5428; CHECK: for.body: 5429; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] 5430; CHECK-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] 5431; CHECK-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 5432; CHECK-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 5433; CHECK-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] 5434; CHECK: for.cond: 5435; CHECK-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] 5436; CHECK-NEXT: br label [[FOR_INC]] 5437; CHECK: for.inc: 5438; CHECK-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] 5439; CHECK-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 5440; CHECK-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] 5441; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 5442; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 5443; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 5444; CHECK: for.end: 5445; CHECK-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 5446; CHECK-NEXT: ret i32 [[VAR7]] 5447; 5448; IND-LABEL: @PR32419( 5449; IND-NEXT: entry: 5450; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5451; IND: vector.ph: 5452; IND-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0 5453; IND-NEXT: br label [[VECTOR_BODY:%.*]] 5454; IND: vector.body: 5455; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE4:%.*]] ] 5456; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE4]] ] 5457; IND-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_UREM_CONTINUE4]] ] 5458; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 5459; IND-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND1]], zeroinitializer 5460; IND-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 5461; IND-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0 5462; IND-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] 5463; IND: pred.urem.if: 5464; IND-NEXT: [[TMP5:%.*]] = add i16 [[TMP1]], -20 5465; IND-NEXT: [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]] 5466; IND-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i64 0 5467; IND-NEXT: br label [[PRED_UREM_CONTINUE]] 5468; IND: pred.urem.continue: 5469; IND-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ] 5470; IND-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 5471; IND-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]] 5472; IND: pred.urem.if3: 5473; IND-NEXT: [[TMP10:%.*]] = add i16 [[TMP1]], -19 5474; IND-NEXT: [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]] 5475; IND-NEXT: [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i64 1 5476; IND-NEXT: br label [[PRED_UREM_CONTINUE4]] 5477; IND: pred.urem.continue4: 5478; IND-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ] 5479; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] 5480; IND-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> 5481; IND-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] 5482; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 5483; IND-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2> 5484; IND-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 5485; IND-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 5486; IND: middle.block: 5487; IND-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]]) 5488; IND-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 5489; IND: scalar.ph: 5490; IND-NEXT: br label [[FOR_BODY:%.*]] 5491; IND: for.body: 5492; IND-NEXT: br i1 undef, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]] 5493; IND: for.cond: 5494; IND-NEXT: br label [[FOR_INC]] 5495; IND: for.inc: 5496; IND-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 5497; IND: for.end: 5498; IND-NEXT: [[VAR7:%.*]] = phi i32 [ undef, [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] 5499; IND-NEXT: ret i32 [[VAR7]] 5500; 5501; UNROLL-LABEL: @PR32419( 5502; UNROLL-NEXT: entry: 5503; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5504; UNROLL: vector.ph: 5505; UNROLL-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0 5506; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 5507; UNROLL: vector.body: 5508; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE12:%.*]] ] 5509; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE12]] ] 5510; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE12]] ] 5511; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE12]] ] 5512; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 5513; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], zeroinitializer 5514; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], <i16 -2, i16 -2> 5515; UNROLL-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 5516; UNROLL-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> 5517; UNROLL-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 5518; UNROLL-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] 5519; UNROLL: pred.urem.if: 5520; UNROLL-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], -20 5521; UNROLL-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]] 5522; UNROLL-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i64 0 5523; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE]] 5524; UNROLL: pred.urem.continue: 5525; UNROLL-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] 5526; UNROLL-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 5527; UNROLL-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] 5528; UNROLL: pred.urem.if7: 5529; UNROLL-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19 5530; UNROLL-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] 5531; UNROLL-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i64 1 5532; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE8]] 5533; UNROLL: pred.urem.continue8: 5534; UNROLL-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] 5535; UNROLL-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i64 0 5536; UNROLL-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] 5537; UNROLL: pred.urem.if9: 5538; UNROLL-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18 5539; UNROLL-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] 5540; UNROLL-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i64 0 5541; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE10]] 5542; UNROLL: pred.urem.continue10: 5543; UNROLL-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] 5544; UNROLL-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i64 1 5545; UNROLL-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]] 5546; UNROLL: pred.urem.if11: 5547; UNROLL-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17 5548; UNROLL-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] 5549; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i64 1 5550; UNROLL-NEXT: br label [[PRED_UREM_CONTINUE12]] 5551; UNROLL: pred.urem.continue12: 5552; UNROLL-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] 5553; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] 5554; UNROLL-NEXT: [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] 5555; UNROLL-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> 5556; UNROLL-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI13]] to <2 x i32> 5557; UNROLL-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] 5558; UNROLL-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI2]], [[TMP27]] 5559; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 5560; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i16> [[VEC_IND3]], <i16 4, i16 4> 5561; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 5562; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 5563; UNROLL: middle.block: 5564; UNROLL-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]] 5565; UNROLL-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) 5566; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 5567; UNROLL: scalar.ph: 5568; UNROLL-NEXT: br label [[FOR_BODY:%.*]] 5569; UNROLL: for.body: 5570; UNROLL-NEXT: br i1 undef, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]] 5571; UNROLL: for.cond: 5572; UNROLL-NEXT: br label [[FOR_INC]] 5573; UNROLL: for.inc: 5574; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 5575; UNROLL: for.end: 5576; UNROLL-NEXT: [[VAR7:%.*]] = phi i32 [ undef, [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] 5577; UNROLL-NEXT: ret i32 [[VAR7]] 5578; 5579; UNROLL-NO-IC-LABEL: @PR32419( 5580; UNROLL-NO-IC-NEXT: entry: 5581; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5582; UNROLL-NO-IC: vector.ph: 5583; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0 5584; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 5585; UNROLL-NO-IC: vector.body: 5586; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE12:%.*]] ] 5587; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 -20, i32 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE12]] ] 5588; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE12]] ] 5589; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE12]] ] 5590; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE12]] ] 5591; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 5592; UNROLL-NO-IC-NEXT: [[STEP_ADD4:%.*]] = add <2 x i16> [[VEC_IND3]], <i16 2, i16 2> 5593; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] 5594; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 5595; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], zeroinitializer 5596; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD4]], zeroinitializer 5597; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 5598; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> 5599; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 5600; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] 5601; UNROLL-NO-IC: pred.urem.if: 5602; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], 0 5603; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]] 5604; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i32 0 5605; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE]] 5606; UNROLL-NO-IC: pred.urem.continue: 5607; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] 5608; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 5609; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] 5610; UNROLL-NO-IC: pred.urem.if7: 5611; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], 1 5612; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] 5613; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i32 1 5614; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE8]] 5615; UNROLL-NO-IC: pred.urem.continue8: 5616; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] 5617; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 5618; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] 5619; UNROLL-NO-IC: pred.urem.if9: 5620; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], 2 5621; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] 5622; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0 5623; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE10]] 5624; UNROLL-NO-IC: pred.urem.continue10: 5625; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] 5626; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 5627; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]] 5628; UNROLL-NO-IC: pred.urem.if11: 5629; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], 3 5630; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] 5631; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i32 1 5632; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE12]] 5633; UNROLL-NO-IC: pred.urem.continue12: 5634; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] 5635; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] 5636; UNROLL-NO-IC-NEXT: [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] 5637; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> 5638; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI13]] to <2 x i32> 5639; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] 5640; UNROLL-NO-IC-NEXT: [[TMP29]] = or <2 x i32> [[VEC_PHI2]], [[TMP27]] 5641; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 5642; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2> 5643; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i16> [[STEP_ADD4]], <i16 2, i16 2> 5644; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20 5645; UNROLL-NO-IC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 5646; UNROLL-NO-IC: middle.block: 5647; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]] 5648; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) 5649; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 20, 20 5650; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 5651; UNROLL-NO-IC: scalar.ph: 5652; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ] 5653; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] 5654; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] 5655; UNROLL-NO-IC: for.body: 5656; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] 5657; UNROLL-NO-IC-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] 5658; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 5659; UNROLL-NO-IC-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 5660; UNROLL-NO-IC-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] 5661; UNROLL-NO-IC: for.cond: 5662; UNROLL-NO-IC-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] 5663; UNROLL-NO-IC-NEXT: br label [[FOR_INC]] 5664; UNROLL-NO-IC: for.inc: 5665; UNROLL-NO-IC-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] 5666; UNROLL-NO-IC-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 5667; UNROLL-NO-IC-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] 5668; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 5669; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 5670; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 5671; UNROLL-NO-IC: for.end: 5672; UNROLL-NO-IC-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] 5673; UNROLL-NO-IC-NEXT: ret i32 [[VAR7]] 5674; 5675; INTERLEAVE-LABEL: @PR32419( 5676; INTERLEAVE-NEXT: entry: 5677; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5678; INTERLEAVE: vector.ph: 5679; INTERLEAVE-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[A:%.*]], i64 0 5680; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 5681; INTERLEAVE: vector.body: 5682; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE20:%.*]] ] 5683; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UREM_CONTINUE20]] ] 5684; INTERLEAVE-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UREM_CONTINUE20]] ] 5685; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i16> [ <i16 -20, i16 -19, i16 -18, i16 -17>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE20]] ] 5686; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16 5687; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq <4 x i16> [[VEC_IND3]], zeroinitializer 5688; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq <4 x i16> [[VEC_IND3]], <i16 -4, i16 -4, i16 -4, i16 -4> 5689; INTERLEAVE-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], <i1 true, i1 true, i1 true, i1 true> 5690; INTERLEAVE-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true> 5691; INTERLEAVE-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 5692; INTERLEAVE-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] 5693; INTERLEAVE: pred.urem.if: 5694; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i16 [[TMP1]], -20 5695; INTERLEAVE-NEXT: [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]] 5696; INTERLEAVE-NEXT: [[TMP9:%.*]] = insertelement <4 x i16> poison, i16 [[TMP8]], i64 0 5697; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE]] 5698; INTERLEAVE: pred.urem.continue: 5699; INTERLEAVE-NEXT: [[TMP10:%.*]] = phi <4 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ] 5700; INTERLEAVE-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 5701; INTERLEAVE-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] 5702; INTERLEAVE: pred.urem.if7: 5703; INTERLEAVE-NEXT: [[TMP12:%.*]] = add i16 [[TMP1]], -19 5704; INTERLEAVE-NEXT: [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]] 5705; INTERLEAVE-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP10]], i16 [[TMP13]], i64 1 5706; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE8]] 5707; INTERLEAVE: pred.urem.continue8: 5708; INTERLEAVE-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ] 5709; INTERLEAVE-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 5710; INTERLEAVE-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] 5711; INTERLEAVE: pred.urem.if9: 5712; INTERLEAVE-NEXT: [[TMP17:%.*]] = add i16 [[TMP1]], -18 5713; INTERLEAVE-NEXT: [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]] 5714; INTERLEAVE-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP18]], i64 2 5715; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE10]] 5716; INTERLEAVE: pred.urem.continue10: 5717; INTERLEAVE-NEXT: [[TMP20:%.*]] = phi <4 x i16> [ [[TMP15]], [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ] 5718; INTERLEAVE-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 5719; INTERLEAVE-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]] 5720; INTERLEAVE: pred.urem.if11: 5721; INTERLEAVE-NEXT: [[TMP22:%.*]] = add i16 [[TMP1]], -17 5722; INTERLEAVE-NEXT: [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]] 5723; INTERLEAVE-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP23]], i64 3 5724; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE12]] 5725; INTERLEAVE: pred.urem.continue12: 5726; INTERLEAVE-NEXT: [[TMP25:%.*]] = phi <4 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ] 5727; INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 5728; INTERLEAVE-NEXT: br i1 [[TMP26]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]] 5729; INTERLEAVE: pred.urem.if13: 5730; INTERLEAVE-NEXT: [[TMP27:%.*]] = add i16 [[TMP1]], -16 5731; INTERLEAVE-NEXT: [[TMP28:%.*]] = urem i16 [[B]], [[TMP27]] 5732; INTERLEAVE-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i64 0 5733; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE14]] 5734; INTERLEAVE: pred.urem.continue14: 5735; INTERLEAVE-NEXT: [[TMP30:%.*]] = phi <4 x i16> [ poison, [[PRED_UREM_CONTINUE12]] ], [ [[TMP29]], [[PRED_UREM_IF13]] ] 5736; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 5737; INTERLEAVE-NEXT: br i1 [[TMP31]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]] 5738; INTERLEAVE: pred.urem.if15: 5739; INTERLEAVE-NEXT: [[TMP32:%.*]] = add i16 [[TMP1]], -15 5740; INTERLEAVE-NEXT: [[TMP33:%.*]] = urem i16 [[B]], [[TMP32]] 5741; INTERLEAVE-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP33]], i64 1 5742; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE16]] 5743; INTERLEAVE: pred.urem.continue16: 5744; INTERLEAVE-NEXT: [[TMP35:%.*]] = phi <4 x i16> [ [[TMP30]], [[PRED_UREM_CONTINUE14]] ], [ [[TMP34]], [[PRED_UREM_IF15]] ] 5745; INTERLEAVE-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 5746; INTERLEAVE-NEXT: br i1 [[TMP36]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]] 5747; INTERLEAVE: pred.urem.if17: 5748; INTERLEAVE-NEXT: [[TMP37:%.*]] = add i16 [[TMP1]], -14 5749; INTERLEAVE-NEXT: [[TMP38:%.*]] = urem i16 [[B]], [[TMP37]] 5750; INTERLEAVE-NEXT: [[TMP39:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP38]], i64 2 5751; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE18]] 5752; INTERLEAVE: pred.urem.continue18: 5753; INTERLEAVE-NEXT: [[TMP40:%.*]] = phi <4 x i16> [ [[TMP35]], [[PRED_UREM_CONTINUE16]] ], [ [[TMP39]], [[PRED_UREM_IF17]] ] 5754; INTERLEAVE-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 5755; INTERLEAVE-NEXT: br i1 [[TMP41]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20]] 5756; INTERLEAVE: pred.urem.if19: 5757; INTERLEAVE-NEXT: [[TMP42:%.*]] = add i16 [[TMP1]], -13 5758; INTERLEAVE-NEXT: [[TMP43:%.*]] = urem i16 [[B]], [[TMP42]] 5759; INTERLEAVE-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP43]], i64 3 5760; INTERLEAVE-NEXT: br label [[PRED_UREM_CONTINUE20]] 5761; INTERLEAVE: pred.urem.continue20: 5762; INTERLEAVE-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP40]], [[PRED_UREM_CONTINUE18]] ], [ [[TMP44]], [[PRED_UREM_IF19]] ] 5763; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> zeroinitializer, <4 x i16> [[TMP25]] 5764; INTERLEAVE-NEXT: [[PREDPHI21:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP45]] 5765; INTERLEAVE-NEXT: [[TMP46:%.*]] = sext <4 x i16> [[PREDPHI]] to <4 x i32> 5766; INTERLEAVE-NEXT: [[TMP47:%.*]] = sext <4 x i16> [[PREDPHI21]] to <4 x i32> 5767; INTERLEAVE-NEXT: [[TMP48]] = or <4 x i32> [[VEC_PHI]], [[TMP46]] 5768; INTERLEAVE-NEXT: [[TMP49]] = or <4 x i32> [[VEC_PHI2]], [[TMP47]] 5769; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 5770; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i16> [[VEC_IND3]], <i16 8, i16 8, i16 8, i16 8> 5771; INTERLEAVE-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 5772; INTERLEAVE-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] 5773; INTERLEAVE: middle.block: 5774; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP49]], [[TMP48]] 5775; INTERLEAVE-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]]) 5776; INTERLEAVE-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] 5777; INTERLEAVE: scalar.ph: 5778; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ] 5779; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ] 5780; INTERLEAVE-NEXT: br label [[FOR_BODY:%.*]] 5781; INTERLEAVE: for.body: 5782; INTERLEAVE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ] 5783; INTERLEAVE-NEXT: [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ] 5784; INTERLEAVE-NEXT: [[VAR1:%.*]] = trunc i32 [[I]] to i16 5785; INTERLEAVE-NEXT: [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0 5786; INTERLEAVE-NEXT: br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]] 5787; INTERLEAVE: for.cond: 5788; INTERLEAVE-NEXT: [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]] 5789; INTERLEAVE-NEXT: br label [[FOR_INC]] 5790; INTERLEAVE: for.inc: 5791; INTERLEAVE-NEXT: [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ] 5792; INTERLEAVE-NEXT: [[VAR5:%.*]] = sext i16 [[VAR4]] to i32 5793; INTERLEAVE-NEXT: [[VAR6]] = or i32 [[VAR0]], [[VAR5]] 5794; INTERLEAVE-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1 5795; INTERLEAVE-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0 5796; INTERLEAVE-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]] 5797; INTERLEAVE: for.end: 5798; INTERLEAVE-NEXT: [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] 5799; INTERLEAVE-NEXT: ret i32 [[VAR7]] 5800; 5801entry: 5802 br label %for.body 5803 5804for.body: 5805 %i = phi i32 [ -20, %entry ], [ %i.next, %for.inc ] 5806 %var0 = phi i32 [ %a, %entry ], [ %var6, %for.inc ] 5807 %var1 = trunc i32 %i to i16 5808 %var2 = icmp eq i16 %var1, 0 5809 br i1 %var2, label %for.inc, label %for.cond 5810 5811for.cond: 5812 %var3 = urem i16 %b, %var1 5813 br label %for.inc 5814 5815for.inc: 5816 %var4 = phi i16 [ %var3, %for.cond ], [ 0, %for.body ] 5817 %var5 = sext i16 %var4 to i32 5818 %var6 = or i32 %var0, %var5 5819 %i.next = add nsw i32 %i, 1 5820 %cond = icmp eq i32 %i.next, 0 5821 br i1 %cond, label %for.end, label %for.body 5822 5823for.end: 5824 %var7 = phi i32 [ %var6, %for.inc ] 5825 ret i32 %var7 5826} 5827 5828; Ensure that the shuffle vector for first order recurrence is inserted 5829; correctly after all the phis. These new phis correspond to new IVs 5830; that are generated by optimizing non-free truncs of IVs to IVs themselves. 5831; This also ensures the first-order recurrence splice recipe is placed 5832; correctly if it is fed by an induction. 5833define i64 @trunc_with_first_order_recurrence() { 5834; CHECK-LABEL: @trunc_with_first_order_recurrence( 5835; CHECK-NEXT: entry: 5836; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5837; CHECK: vector.ph: 5838; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 5839; CHECK: vector.body: 5840; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5841; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 5842; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 5843; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 5844; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND4:%.*]], [[VECTOR_BODY]] ] 5845; CHECK-NEXT: [[VEC_IND4]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 5846; CHECK-NEXT: [[VEC_IND6:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] 5847; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2> 5848; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND2]], [[VEC_IND4]] 5849; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42> 5850; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]] 5851; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] 5852; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 5853; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] 5854; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND6]], <i32 1, i32 1> 5855; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] 5856; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> 5857; CHECK-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] 5858; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 5859; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 5860; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 5861; CHECK-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2> 5862; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND6]], <i32 2, i32 2> 5863; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 5864; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 5865; CHECK: middle.block: 5866; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]]) 5867; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 113, 112 5868; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i32 1 5869; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i32 0 5870; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 5871; CHECK: scalar.ph: 5872; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 5873; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] 5874; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] 5875; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 5876; CHECK-NEXT: br label [[LOOP:%.*]] 5877; CHECK: exit: 5878; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 5879; CHECK-NEXT: ret i64 [[DOTLCSSA]] 5880; CHECK: loop: 5881; CHECK-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 5882; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5883; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5884; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 5885; CHECK-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32 5886; CHECK-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]] 5887; CHECK-NEXT: [[C9:%.*]] = add i32 [[C8]], 42 5888; CHECK-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]] 5889; CHECK-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]] 5890; CHECK-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 5891; CHECK-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] 5892; CHECK-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 5893; CHECK-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 5894; CHECK-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] 5895; CHECK-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 5896; CHECK-NEXT: [[C23]] = add i64 [[C13]], [[C16]] 5897; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 5898; CHECK-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 5899; CHECK-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 5900; CHECK-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] 5901; 5902; IND-LABEL: @trunc_with_first_order_recurrence( 5903; IND-NEXT: entry: 5904; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5905; IND: vector.ph: 5906; IND-NEXT: br label [[VECTOR_BODY:%.*]] 5907; IND: vector.body: 5908; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5909; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 5910; IND-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 5911; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND4:%.*]], [[VECTOR_BODY]] ] 5912; IND-NEXT: [[VEC_IND4]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ] 5913; IND-NEXT: [[VEC_IND6:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] 5914; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2> 5915; IND-NEXT: [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND2]], [[VEC_IND4]] 5916; IND-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42> 5917; IND-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]] 5918; IND-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]] 5919; IND-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> 5920; IND-NEXT: [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]] 5921; IND-NEXT: [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND6]], <i32 1, i32 1> 5922; IND-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]] 5923; IND-NEXT: [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> 5924; IND-NEXT: [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]] 5925; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 5926; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 5927; IND-NEXT: [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2> 5928; IND-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND6]], <i32 2, i32 2> 5929; IND-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 5930; IND-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 5931; IND: middle.block: 5932; IND-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]]) 5933; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i64 1 5934; IND-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] 5935; IND: scalar.ph: 5936; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 5937; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 5938; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 5939; IND-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 5940; IND-NEXT: br label [[LOOP:%.*]] 5941; IND: exit: 5942; IND-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 5943; IND-NEXT: ret i64 [[DOTLCSSA]] 5944; IND: loop: 5945; IND-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 5946; IND-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 5947; IND-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 5948; IND-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 5949; IND-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32 5950; IND-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]] 5951; IND-NEXT: [[C9:%.*]] = add i32 [[C8]], 42 5952; IND-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]] 5953; IND-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]] 5954; IND-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 5955; IND-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] 5956; IND-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 5957; IND-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 5958; IND-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] 5959; IND-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 5960; IND-NEXT: [[C23]] = add i64 [[C13]], [[C16]] 5961; IND-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 5962; IND-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 5963; IND-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 5964; IND-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] 5965; 5966; UNROLL-LABEL: @trunc_with_first_order_recurrence( 5967; UNROLL-NEXT: entry: 5968; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 5969; UNROLL: vector.ph: 5970; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 5971; UNROLL: vector.body: 5972; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 5973; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] 5974; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 5975; UNROLL-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] 5976; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ] 5977; UNROLL-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 5978; UNROLL-NEXT: [[VEC_IND12:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ] 5979; UNROLL-NEXT: [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2> 5980; UNROLL-NEXT: [[STEP_ADD9]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2> 5981; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND8]], <2 x i32> <i32 1, i32 2> 5982; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND8]], <2 x i32> [[STEP_ADD9]], <2 x i32> <i32 1, i32 2> 5983; UNROLL-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND4]], [[VEC_IND8]] 5984; UNROLL-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD5]], [[STEP_ADD9]] 5985; UNROLL-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42> 5986; UNROLL-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42> 5987; UNROLL-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND8]] 5988; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD9]] 5989; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] 5990; UNROLL-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]] 5991; UNROLL-NEXT: [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> 5992; UNROLL-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> 5993; UNROLL-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] 5994; UNROLL-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] 5995; UNROLL-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1> 5996; UNROLL-NEXT: [[STEP_ADD13:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1> 5997; UNROLL-NEXT: [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD13]], <i32 4, i32 4> 5998; UNROLL-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] 5999; UNROLL-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] 6000; UNROLL-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> 6001; UNROLL-NEXT: [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64> 6002; UNROLL-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] 6003; UNROLL-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] 6004; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6005; UNROLL-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND4]], <i32 4, i32 4> 6006; UNROLL-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 4, i32 4> 6007; UNROLL-NEXT: [[VEC_IND_NEXT15]] = add <2 x i32> [[VEC_IND12]], <i32 4, i32 4> 6008; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 6009; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 6010; UNROLL: middle.block: 6011; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]] 6012; UNROLL-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 6013; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i64 1 6014; UNROLL-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] 6015; UNROLL: scalar.ph: 6016; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6017; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 6018; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 6019; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6020; UNROLL-NEXT: br label [[LOOP:%.*]] 6021; UNROLL: exit: 6022; UNROLL-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6023; UNROLL-NEXT: ret i64 [[DOTLCSSA]] 6024; UNROLL: loop: 6025; UNROLL-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 6026; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 6027; UNROLL-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 6028; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 6029; UNROLL-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32 6030; UNROLL-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]] 6031; UNROLL-NEXT: [[C9:%.*]] = add i32 [[C8]], 42 6032; UNROLL-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]] 6033; UNROLL-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]] 6034; UNROLL-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 6035; UNROLL-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] 6036; UNROLL-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 6037; UNROLL-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 6038; UNROLL-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] 6039; UNROLL-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 6040; UNROLL-NEXT: [[C23]] = add i64 [[C13]], [[C16]] 6041; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 6042; UNROLL-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 6043; UNROLL-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 6044; UNROLL-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] 6045; 6046; UNROLL-NO-IC-LABEL: @trunc_with_first_order_recurrence( 6047; UNROLL-NO-IC-NEXT: entry: 6048; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6049; UNROLL-NO-IC: vector.ph: 6050; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 6051; UNROLL-NO-IC: vector.body: 6052; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6053; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] 6054; UNROLL-NO-IC-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 6055; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6056; UNROLL-NO-IC-NEXT: [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] 6057; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ] 6058; UNROLL-NO-IC-NEXT: [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 6059; UNROLL-NO-IC-NEXT: [[VEC_IND12:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ] 6060; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 6061; UNROLL-NO-IC-NEXT: [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2> 6062; UNROLL-NO-IC-NEXT: [[STEP_ADD9]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2> 6063; UNROLL-NO-IC-NEXT: [[STEP_ADD13:%.*]] = add <2 x i32> [[VEC_IND12]], <i32 2, i32 2> 6064; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND8]], <2 x i32> <i32 1, i32 2> 6065; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND8]], <2 x i32> [[STEP_ADD9]], <2 x i32> <i32 1, i32 2> 6066; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND4]], [[VEC_IND8]] 6067; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD5]], [[STEP_ADD9]] 6068; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42> 6069; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42> 6070; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND8]] 6071; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD9]] 6072; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]] 6073; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]] 6074; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64> 6075; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64> 6076; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]] 6077; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]] 6078; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1> 6079; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD13]], <i32 1, i32 1> 6080; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]] 6081; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]] 6082; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64> 6083; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64> 6084; UNROLL-NO-IC-NEXT: [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]] 6085; UNROLL-NO-IC-NEXT: [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]] 6086; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6087; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 6088; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT7]] = add <2 x i32> [[STEP_ADD5]], <i32 2, i32 2> 6089; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 2, i32 2> 6090; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT15]] = add <2 x i32> [[STEP_ADD13]], <i32 2, i32 2> 6091; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 6092; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 6093; UNROLL-NO-IC: middle.block: 6094; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]] 6095; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 6096; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 113, 112 6097; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i32 1 6098; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i32 0 6099; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6100; UNROLL-NO-IC: scalar.ph: 6101; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6102; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] 6103; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] 6104; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6105; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] 6106; UNROLL-NO-IC: exit: 6107; UNROLL-NO-IC-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6108; UNROLL-NO-IC-NEXT: ret i64 [[DOTLCSSA]] 6109; UNROLL-NO-IC: loop: 6110; UNROLL-NO-IC-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 6111; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 6112; UNROLL-NO-IC-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 6113; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 6114; UNROLL-NO-IC-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32 6115; UNROLL-NO-IC-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]] 6116; UNROLL-NO-IC-NEXT: [[C9:%.*]] = add i32 [[C8]], 42 6117; UNROLL-NO-IC-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]] 6118; UNROLL-NO-IC-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]] 6119; UNROLL-NO-IC-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 6120; UNROLL-NO-IC-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] 6121; UNROLL-NO-IC-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 6122; UNROLL-NO-IC-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 6123; UNROLL-NO-IC-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] 6124; UNROLL-NO-IC-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 6125; UNROLL-NO-IC-NEXT: [[C23]] = add i64 [[C13]], [[C16]] 6126; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 6127; UNROLL-NO-IC-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 6128; UNROLL-NO-IC-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 6129; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] 6130; 6131; INTERLEAVE-LABEL: @trunc_with_first_order_recurrence( 6132; INTERLEAVE-NEXT: entry: 6133; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6134; INTERLEAVE: vector.ph: 6135; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 6136; INTERLEAVE: vector.body: 6137; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6138; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] 6139; INTERLEAVE-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] 6140; INTERLEAVE-NEXT: [[VEC_IND4:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] 6141; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ] 6142; INTERLEAVE-NEXT: [[VEC_IND8:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ] 6143; INTERLEAVE-NEXT: [[VEC_IND12:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ] 6144; INTERLEAVE-NEXT: [[STEP_ADD5:%.*]] = add <4 x i32> [[VEC_IND4]], <i32 4, i32 4, i32 4, i32 4> 6145; INTERLEAVE-NEXT: [[STEP_ADD9]] = add <4 x i32> [[VEC_IND8]], <i32 4, i32 4, i32 4, i32 4> 6146; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6147; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND8]], <4 x i32> [[STEP_ADD9]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6148; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND4]], [[VEC_IND8]] 6149; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD5]], [[STEP_ADD9]] 6150; INTERLEAVE-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], <i32 42, i32 42, i32 42, i32 42> 6151; INTERLEAVE-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], <i32 42, i32 42, i32 42, i32 42> 6152; INTERLEAVE-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND8]] 6153; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD9]] 6154; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]] 6155; INTERLEAVE-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP5]] 6156; INTERLEAVE-NEXT: [[TMP10:%.*]] = sext <4 x i32> [[TMP8]] to <4 x i64> 6157; INTERLEAVE-NEXT: [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64> 6158; INTERLEAVE-NEXT: [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]] 6159; INTERLEAVE-NEXT: [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]] 6160; INTERLEAVE-NEXT: [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND12]], <i32 1, i32 1, i32 1, i32 1> 6161; INTERLEAVE-NEXT: [[STEP_ADD13:%.*]] = shl <4 x i32> [[VEC_IND12]], <i32 1, i32 1, i32 1, i32 1> 6162; INTERLEAVE-NEXT: [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD13]], <i32 8, i32 8, i32 8, i32 8> 6163; INTERLEAVE-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]] 6164; INTERLEAVE-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]] 6165; INTERLEAVE-NEXT: [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64> 6166; INTERLEAVE-NEXT: [[TMP19:%.*]] = sext <4 x i32> [[TMP17]] to <4 x i64> 6167; INTERLEAVE-NEXT: [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]] 6168; INTERLEAVE-NEXT: [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]] 6169; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 6170; INTERLEAVE-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND4]], <i32 8, i32 8, i32 8, i32 8> 6171; INTERLEAVE-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 8, i32 8, i32 8, i32 8> 6172; INTERLEAVE-NEXT: [[VEC_IND_NEXT15]] = add <4 x i32> [[VEC_IND12]], <i32 8, i32 8, i32 8, i32 8> 6173; INTERLEAVE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 6174; INTERLEAVE-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]] 6175; INTERLEAVE: middle.block: 6176; INTERLEAVE-NEXT: [[BIN_RDX:%.*]] = add <4 x i64> [[TMP21]], [[TMP20]] 6177; INTERLEAVE-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]]) 6178; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD9]], i64 3 6179; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] 6180; INTERLEAVE: scalar.ph: 6181; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6182; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 6183; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ] 6184; INTERLEAVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6185; INTERLEAVE-NEXT: br label [[LOOP:%.*]] 6186; INTERLEAVE: exit: 6187; INTERLEAVE-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ] 6188; INTERLEAVE-NEXT: ret i64 [[DOTLCSSA]] 6189; INTERLEAVE: loop: 6190; INTERLEAVE-NEXT: [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 6191; INTERLEAVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 6192; INTERLEAVE-NEXT: [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 6193; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] 6194; INTERLEAVE-NEXT: [[C6]] = trunc i64 [[INDVARS_IV]] to i32 6195; INTERLEAVE-NEXT: [[C8:%.*]] = mul i32 [[X]], [[C6]] 6196; INTERLEAVE-NEXT: [[C9:%.*]] = add i32 [[C8]], 42 6197; INTERLEAVE-NEXT: [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]] 6198; INTERLEAVE-NEXT: [[C11:%.*]] = add i32 [[C10]], [[C9]] 6199; INTERLEAVE-NEXT: [[C12:%.*]] = sext i32 [[C11]] to i64 6200; INTERLEAVE-NEXT: [[C13:%.*]] = add i64 [[C5]], [[C12]] 6201; INTERLEAVE-NEXT: [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32 6202; INTERLEAVE-NEXT: [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1 6203; INTERLEAVE-NEXT: [[C15:%.*]] = add i32 [[C9]], [[C14]] 6204; INTERLEAVE-NEXT: [[C16:%.*]] = sext i32 [[C15]] to i64 6205; INTERLEAVE-NEXT: [[C23]] = add i64 [[C13]], [[C16]] 6206; INTERLEAVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 6207; INTERLEAVE-NEXT: [[C24]] = add nuw nsw i32 [[X]], 1 6208; INTERLEAVE-NEXT: [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114 6209; INTERLEAVE-NEXT: br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]] 6210; 6211entry: 6212 br label %loop 6213 6214exit: ; preds = %loop 6215 %.lcssa = phi i64 [ %c23, %loop ] 6216 ret i64 %.lcssa 6217 6218loop: ; preds = %loop, %entry 6219 %c5 = phi i64 [ %c23, %loop ], [ 0, %entry ] 6220 %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 1, %entry ] 6221 %x = phi i32 [ %c24, %loop ], [ 1, %entry ] 6222 %y = phi i32 [ %c6, %loop ], [ 42, %entry ] 6223 %c6 = trunc i64 %indvars.iv to i32 6224 %c8 = mul i32 %x, %c6 6225 %c9 = add i32 %c8, 42 6226 %c10 = add i32 %y, %c6 6227 %c11 = add i32 %c10, %c9 6228 %c12 = sext i32 %c11 to i64 6229 %c13 = add i64 %c5, %c12 6230 %indvars.iv.tr = trunc i64 %indvars.iv to i32 6231 %c14 = shl i32 %indvars.iv.tr, 1 6232 %c15 = add i32 %c9, %c14 6233 %c16 = sext i32 %c15 to i64 6234 %c23 = add i64 %c13, %c16 6235 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 6236 %c24 = add nuw nsw i32 %x, 1 6237 %exitcond.i = icmp eq i64 %indvars.iv.next, 114 6238 br i1 %exitcond.i, label %exit, label %loop 6239 6240} 6241 6242; Test case for PR52460. 6243define void @pr52460_first_order_recurrence_truncated_iv(i32* noalias %src, i32* %dst) { 6244; 6245; CHECK-LABEL: @pr52460_first_order_recurrence_truncated_iv( 6246; CHECK-NEXT: entry: 6247; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6248; CHECK: vector.ph: 6249; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 6250; CHECK: vector.body: 6251; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6252; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6253; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ] 6254; CHECK-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 6255; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 6256; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 6257; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 6258; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], 0 6259; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 1 6260; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2> 6261; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[SRC:%.*]], align 4 6262; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0 6263; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6264; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] 6265; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP2]] 6266; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP6]] 6267; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 0 6268; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>* 6269; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4 6270; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 6271; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 6272; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 6273; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 6274; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] 6275; CHECK: middle.block: 6276; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100 6277; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1 6278; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 0 6279; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6280; CHECK: scalar.ph: 6281; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6282; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 6283; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 6284; CHECK-NEXT: br label [[LOOP:%.*]] 6285; CHECK: loop: 6286; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 6287; CHECK-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] 6288; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] 6289; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4 6290; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]] 6291; CHECK-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 6292; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 6293; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 6294; CHECK-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]] 6295; CHECK-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] 6296; CHECK-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4 6297; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 6298; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] 6299; CHECK: exit: 6300; CHECK-NEXT: ret void 6301; 6302; IND-LABEL: @pr52460_first_order_recurrence_truncated_iv( 6303; IND-NEXT: entry: 6304; IND-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6305; IND: vector.ph: 6306; IND-NEXT: br label [[VECTOR_BODY:%.*]] 6307; IND: vector.body: 6308; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6309; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ] 6310; IND-NEXT: [[VEC_IND2]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 6311; IND-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2> 6312; IND-NEXT: [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4 6313; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0 6314; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6315; IND-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]] 6316; IND-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 6317; IND-NEXT: [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32 6318; IND-NEXT: [[TMP4:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP3]] 6319; IND-NEXT: [[TMP5:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP2]] 6320; IND-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>* 6321; IND-NEXT: store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4 6322; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 6323; IND-NEXT: [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2> 6324; IND-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 6325; IND-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] 6326; IND: middle.block: 6327; IND-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 6328; IND: scalar.ph: 6329; IND-NEXT: br label [[LOOP:%.*]] 6330; IND: loop: 6331; IND-NEXT: br i1 undef, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] 6332; IND: exit: 6333; IND-NEXT: ret void 6334; 6335; UNROLL-LABEL: @pr52460_first_order_recurrence_truncated_iv( 6336; UNROLL-NEXT: entry: 6337; UNROLL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6338; UNROLL: vector.ph: 6339; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 6340; UNROLL: vector.body: 6341; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6342; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] 6343; UNROLL-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 6344; UNROLL-NEXT: [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2> 6345; UNROLL-NEXT: [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2> 6346; UNROLL-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> <i32 1, i32 2> 6347; UNROLL-NEXT: [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4 6348; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0 6349; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6350; UNROLL-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0 6351; UNROLL-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer 6352; UNROLL-NEXT: [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]] 6353; UNROLL-NEXT: [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP1]] 6354; UNROLL-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 6355; UNROLL-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32 6356; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]] 6357; UNROLL-NEXT: [[TMP7:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP3]] 6358; UNROLL-NEXT: [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP4]] 6359; UNROLL-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* 6360; UNROLL-NEXT: store <2 x i32> [[TMP7]], <2 x i32>* [[TMP9]], align 4 6361; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 2 6362; UNROLL-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <2 x i32>* 6363; UNROLL-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP11]], align 4 6364; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6365; UNROLL-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4> 6366; UNROLL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 6367; UNROLL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] 6368; UNROLL: middle.block: 6369; UNROLL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 6370; UNROLL: scalar.ph: 6371; UNROLL-NEXT: br label [[LOOP:%.*]] 6372; UNROLL: loop: 6373; UNROLL-NEXT: br i1 undef, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] 6374; UNROLL: exit: 6375; UNROLL-NEXT: ret void 6376; 6377; UNROLL-NO-IC-LABEL: @pr52460_first_order_recurrence_truncated_iv( 6378; UNROLL-NO-IC-NEXT: entry: 6379; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6380; UNROLL-NO-IC: vector.ph: 6381; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 6382; UNROLL-NO-IC: vector.body: 6383; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6384; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6385; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] 6386; UNROLL-NO-IC-NEXT: [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 6387; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 6388; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 6389; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 6390; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 6391; UNROLL-NO-IC-NEXT: [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2> 6392; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32 6393; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 0 6394; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], 1 6395; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], 2 6396; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i32 [[TMP2]], 3 6397; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2> 6398; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> <i32 1, i32 2> 6399; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = load i32, i32* [[SRC:%.*]], align 4 6400; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 6401; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6402; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = load i32, i32* [[SRC]], align 4 6403; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 6404; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer 6405; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP7]] 6406; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP8]] 6407; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP3]] 6408; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP5]] 6409; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP11]] 6410; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP12]] 6411; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr i32, i32* [[TMP13]], i32 0 6412; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>* 6413; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP15]], <2 x i32>* [[TMP18]], align 4 6414; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2 6415; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>* 6416; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP16]], <2 x i32>* [[TMP20]], align 4 6417; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6418; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2> 6419; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], <i32 2, i32 2> 6420; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 6421; UNROLL-NO-IC-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] 6422; UNROLL-NO-IC: middle.block: 6423; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100 6424; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD4]], i32 1 6425; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD4]], i32 0 6426; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6427; UNROLL-NO-IC: scalar.ph: 6428; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6429; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 6430; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 6431; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] 6432; UNROLL-NO-IC: loop: 6433; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 6434; UNROLL-NO-IC-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] 6435; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] 6436; UNROLL-NO-IC-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4 6437; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]] 6438; UNROLL-NO-IC-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 6439; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 6440; UNROLL-NO-IC-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 6441; UNROLL-NO-IC-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]] 6442; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]] 6443; UNROLL-NO-IC-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4 6444; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 6445; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] 6446; UNROLL-NO-IC: exit: 6447; UNROLL-NO-IC-NEXT: ret void 6448; 6449; INTERLEAVE-LABEL: @pr52460_first_order_recurrence_truncated_iv( 6450; INTERLEAVE-NEXT: entry: 6451; INTERLEAVE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 6452; INTERLEAVE: vector.ph: 6453; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 6454; INTERLEAVE: vector.body: 6455; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6456; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ] 6457; INTERLEAVE-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] 6458; INTERLEAVE-NEXT: [[STEP_ADD4]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4> 6459; INTERLEAVE-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6460; INTERLEAVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6461; INTERLEAVE-NEXT: [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4 6462; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 6463; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 6464; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 6465; INTERLEAVE-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer 6466; INTERLEAVE-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]] 6467; INTERLEAVE-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT8]], [[TMP1]] 6468; INTERLEAVE-NEXT: [[SEXT:%.*]] = shl i64 [[INDEX]], 32 6469; INTERLEAVE-NEXT: [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32 6470; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]] 6471; INTERLEAVE-NEXT: [[TMP7:%.*]] = add <4 x i32> [[VEC_IND3]], [[TMP3]] 6472; INTERLEAVE-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD4]], [[TMP4]] 6473; INTERLEAVE-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 6474; INTERLEAVE-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4 6475; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 4 6476; INTERLEAVE-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* 6477; INTERLEAVE-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4 6478; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 6479; INTERLEAVE-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8> 6480; INTERLEAVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 6481; INTERLEAVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]] 6482; INTERLEAVE: middle.block: 6483; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD4]], i64 3 6484; INTERLEAVE-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] 6485; INTERLEAVE: scalar.ph: 6486; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6487; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ] 6488; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ] 6489; INTERLEAVE-NEXT: br label [[LOOP:%.*]] 6490; INTERLEAVE: loop: 6491; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 6492; INTERLEAVE-NEXT: [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ] 6493; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] 6494; INTERLEAVE-NEXT: [[LV:%.*]] = load i32, i32* [[SRC]], align 4 6495; INTERLEAVE-NEXT: [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]] 6496; INTERLEAVE-NEXT: [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1 6497; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 6498; INTERLEAVE-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 6499; INTERLEAVE-NEXT: [[SEXT9:%.*]] = shl i64 [[IV]], 32 6500; INTERLEAVE-NEXT: [[TMP13:%.*]] = ashr exact i64 [[SEXT9]], 32 6501; INTERLEAVE-NEXT: [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]] 6502; INTERLEAVE-NEXT: [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]] 6503; INTERLEAVE-NEXT: store i32 [[ADD]], i32* [[DST_GEP]], align 4 6504; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100 6505; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]] 6506; INTERLEAVE: exit: 6507; INTERLEAVE-NEXT: ret void 6508; 6509entry: 6510 br label %loop 6511 6512loop: 6513 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 6514 %trunc.iv = phi i32 [ 0, %entry ], [ %trunc.iv.next, %loop ] 6515 %recur = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] 6516 %lv = load i32, i32* %src, align 4 6517 %mul = mul nsw i32 %lv, %recur 6518 %trunc.iv.next = add i32 %trunc.iv, 1 6519 %iv.next = add nuw nsw i64 %iv, 1 6520 %iv.trunc = trunc i64 %iv to i32 6521 %dst.gep = getelementptr i32, i32* %dst, i32 %iv.trunc 6522 %add = add i32 %iv.trunc, %mul 6523 store i32 %add, i32* %dst.gep 6524 %exitcond = icmp eq i32 %trunc.iv.next, 100 6525 br i1 %exitcond, label %exit, label %loop 6526 6527exit: 6528 ret void 6529} 6530 6531; Test case where %iv.2.ext and %iv.2.conv become redundant due to the SCEV 6532; predicates generated for the vector loop. They should be removed in the 6533; vector loop. 6534define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n, i32 %step, i32* %ptr) { 6535; 6536; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( 6537; CHECK-NEXT: entry: 6538; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 6539; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 6540; CHECK: vector.scevcheck: 6541; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 6542; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 6543; CHECK-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 6544; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 6545; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] 6546; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 6547; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) 6548; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 6549; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 6550; CHECK-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] 6551; CHECK-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] 6552; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0 6553; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0 6554; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] 6555; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 6556; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 6557; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] 6558; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] 6559; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] 6560; CHECK-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 6561; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] 6562; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] 6563; CHECK-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 6564; CHECK: vector.ph: 6565; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 6566; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 6567; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 6568; CHECK-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]] 6569; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0 6570; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6571; CHECK-NEXT: [[TMP19:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]] 6572; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP19]] 6573; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[STEP]], 2 6574; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0 6575; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer 6576; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 6577; CHECK: vector.body: 6578; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6579; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] 6580; CHECK-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6581; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 6582; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2> 6583; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] 6584; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 6585; CHECK-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>* 6586; CHECK-NEXT: store <2 x i32> [[TMP22]], <2 x i32>* [[TMP25]], align 4 6587; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 6588; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] 6589; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6590; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 6591; CHECK: middle.block: 6592; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 6593; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 6594; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0 6595; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6596; CHECK: scalar.ph: 6597; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6598; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 6599; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 6600; CHECK-NEXT: br label [[LOOP:%.*]] 6601; CHECK: loop: 6602; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] 6603; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] 6604; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] 6605; CHECK-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24 6606; CHECK-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24 6607; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]] 6608; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4 6609; CHECK-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] 6610; CHECK-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 6611; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] 6612; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] 6613; CHECK: exit: 6614; CHECK-NEXT: ret void 6615; 6616; IND-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( 6617; IND-NEXT: entry: 6618; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 6619; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 6620; IND: vector.scevcheck: 6621; IND-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 6622; IND-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 6623; IND-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 6624; IND-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 6625; IND-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] 6626; IND-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 6627; IND-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) 6628; IND-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 6629; IND-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 6630; IND-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 6631; IND-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 6632; IND-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] 6633; IND-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 6634; IND-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 6635; IND-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] 6636; IND-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] 6637; IND-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] 6638; IND-NEXT: [[TMP14:%.*]] = add i32 [[STEP]], -128 6639; IND-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256 6640; IND-NEXT: [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]] 6641; IND-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 6642; IND: vector.ph: 6643; IND-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -2 6644; IND-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 6645; IND-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]] 6646; IND-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 6647; IND-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6648; IND-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 6649; IND-NEXT: [[TMP18:%.*]] = shl i32 [[STEP]], 1 6650; IND-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 6651; IND-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer 6652; IND-NEXT: br label [[VECTOR_BODY:%.*]] 6653; IND: vector.body: 6654; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6655; IND-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] 6656; IND-NEXT: [[VEC_IND]] = phi <2 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6657; IND-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2> 6658; IND-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]] 6659; IND-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>* 6660; IND-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP21]], align 4 6661; IND-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 6662; IND-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] 6663; IND-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6664; IND-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 6665; IND: middle.block: 6666; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 6667; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1 6668; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6669; IND: scalar.ph: 6670; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6671; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] 6672; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] 6673; IND-NEXT: br label [[LOOP:%.*]] 6674; IND: loop: 6675; IND-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] 6676; IND-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] 6677; IND-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] 6678; IND-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24 6679; IND-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24 6680; IND-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]] 6681; IND-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4 6682; IND-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] 6683; IND-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 6684; IND-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] 6685; IND-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] 6686; IND: exit: 6687; IND-NEXT: ret void 6688; 6689; UNROLL-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( 6690; UNROLL-NEXT: entry: 6691; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 6692; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 6693; UNROLL: vector.scevcheck: 6694; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 6695; UNROLL-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 6696; UNROLL-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 6697; UNROLL-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 6698; UNROLL-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] 6699; UNROLL-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 6700; UNROLL-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) 6701; UNROLL-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 6702; UNROLL-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 6703; UNROLL-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 6704; UNROLL-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 6705; UNROLL-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] 6706; UNROLL-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 6707; UNROLL-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 6708; UNROLL-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] 6709; UNROLL-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] 6710; UNROLL-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] 6711; UNROLL-NEXT: [[TMP14:%.*]] = add i32 [[STEP]], -128 6712; UNROLL-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256 6713; UNROLL-NEXT: [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]] 6714; UNROLL-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 6715; UNROLL: vector.ph: 6716; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 6717; UNROLL-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 6718; UNROLL-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]] 6719; UNROLL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 6720; UNROLL-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6721; UNROLL-NEXT: [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1> 6722; UNROLL-NEXT: [[TMP18:%.*]] = shl i32 [[STEP]], 1 6723; UNROLL-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0 6724; UNROLL-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer 6725; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 6726; UNROLL: vector.body: 6727; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6728; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] 6729; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6730; UNROLL-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] 6731; UNROLL-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2> 6732; UNROLL-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2> 6733; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]] 6734; UNROLL-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>* 6735; UNROLL-NEXT: store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4 6736; UNROLL-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 2 6737; UNROLL-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <2 x i32>* 6738; UNROLL-NEXT: store <2 x i32> [[TMP20]], <2 x i32>* [[TMP24]], align 4 6739; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6740; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]] 6741; UNROLL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6742; UNROLL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 6743; UNROLL: middle.block: 6744; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 6745; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1 6746; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6747; UNROLL: scalar.ph: 6748; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6749; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] 6750; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] 6751; UNROLL-NEXT: br label [[LOOP:%.*]] 6752; UNROLL: loop: 6753; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] 6754; UNROLL-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] 6755; UNROLL-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] 6756; UNROLL-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24 6757; UNROLL-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24 6758; UNROLL-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]] 6759; UNROLL-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4 6760; UNROLL-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] 6761; UNROLL-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 6762; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] 6763; UNROLL-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] 6764; UNROLL: exit: 6765; UNROLL-NEXT: ret void 6766; 6767; UNROLL-NO-IC-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( 6768; UNROLL-NO-IC-NEXT: entry: 6769; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 6770; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 6771; UNROLL-NO-IC: vector.scevcheck: 6772; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 6773; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 6774; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 6775; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 6776; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] 6777; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 6778; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) 6779; UNROLL-NO-IC-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 6780; UNROLL-NO-IC-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 6781; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]] 6782; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]] 6783; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0 6784; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0 6785; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]] 6786; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255 6787; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0 6788; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]] 6789; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]] 6790; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] 6791; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sext i8 [[TMP1]] to i32 6792; UNROLL-NO-IC-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]] 6793; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]] 6794; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 6795; UNROLL-NO-IC: vector.ph: 6796; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 6797; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 6798; UNROLL-NO-IC-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 6799; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]] 6800; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0 6801; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 6802; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]] 6803; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP19]] 6804; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = mul i32 [[STEP]], 2 6805; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0 6806; UNROLL-NO-IC-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer 6807; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] 6808; UNROLL-NO-IC: vector.body: 6809; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6810; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] 6811; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6812; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 0 6813; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 2 6814; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]] 6815; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2> 6816; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2> 6817; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]] 6818; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP22]] 6819; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 0 6820; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP27]] to <2 x i32>* 6821; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP23]], <2 x i32>* [[TMP28]], align 4 6822; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 2 6823; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>* 6824; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP24]], <2 x i32>* [[TMP30]], align 4 6825; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 6826; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]] 6827; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6828; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 6829; UNROLL-NO-IC: middle.block: 6830; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 6831; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 6832; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 0 6833; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6834; UNROLL-NO-IC: scalar.ph: 6835; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6836; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 6837; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] 6838; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] 6839; UNROLL-NO-IC: loop: 6840; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] 6841; UNROLL-NO-IC-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] 6842; UNROLL-NO-IC-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] 6843; UNROLL-NO-IC-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24 6844; UNROLL-NO-IC-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24 6845; UNROLL-NO-IC-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]] 6846; UNROLL-NO-IC-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4 6847; UNROLL-NO-IC-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] 6848; UNROLL-NO-IC-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 6849; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] 6850; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] 6851; UNROLL-NO-IC: exit: 6852; UNROLL-NO-IC-NEXT: ret void 6853; 6854; INTERLEAVE-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( 6855; INTERLEAVE-NEXT: entry: 6856; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 6857; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 6858; INTERLEAVE: vector.scevcheck: 6859; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 6860; INTERLEAVE-NEXT: [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8 6861; INTERLEAVE-NEXT: [[TMP2:%.*]] = sub i8 0, [[TMP1]] 6862; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0 6863; INTERLEAVE-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]] 6864; INTERLEAVE-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8 6865; INTERLEAVE-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]]) 6866; INTERLEAVE-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 6867; INTERLEAVE-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 6868; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128 6869; INTERLEAVE-NEXT: [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0 6870; INTERLEAVE-NEXT: [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]] 6871; INTERLEAVE-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255 6872; INTERLEAVE-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0 6873; INTERLEAVE-NEXT: [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]] 6874; INTERLEAVE-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] 6875; INTERLEAVE-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]] 6876; INTERLEAVE-NEXT: [[TMP14:%.*]] = add i32 [[STEP]], -128 6877; INTERLEAVE-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256 6878; INTERLEAVE-NEXT: [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]] 6879; INTERLEAVE-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 6880; INTERLEAVE: vector.ph: 6881; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 6882; INTERLEAVE-NEXT: [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32 6883; INTERLEAVE-NEXT: [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]] 6884; INTERLEAVE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0 6885; INTERLEAVE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 6886; INTERLEAVE-NEXT: [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3> 6887; INTERLEAVE-NEXT: [[TMP18:%.*]] = shl i32 [[STEP]], 2 6888; INTERLEAVE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0 6889; INTERLEAVE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer 6890; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] 6891; INTERLEAVE: vector.body: 6892; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 6893; INTERLEAVE-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] 6894; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 6895; INTERLEAVE-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]] 6896; INTERLEAVE-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6897; INTERLEAVE-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 6898; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]] 6899; INTERLEAVE-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 6900; INTERLEAVE-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP22]], align 4 6901; INTERLEAVE-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 4 6902; INTERLEAVE-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 6903; INTERLEAVE-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP24]], align 4 6904; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 6905; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[DOTSPLAT3]] 6906; INTERLEAVE-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 6907; INTERLEAVE-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] 6908; INTERLEAVE: middle.block: 6909; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 6910; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3 6911; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 6912; INTERLEAVE: scalar.ph: 6913; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] 6914; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] 6915; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] 6916; INTERLEAVE-NEXT: br label [[LOOP:%.*]] 6917; INTERLEAVE: loop: 6918; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] 6919; INTERLEAVE-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] 6920; INTERLEAVE-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] 6921; INTERLEAVE-NEXT: [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24 6922; INTERLEAVE-NEXT: [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24 6923; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]] 6924; INTERLEAVE-NEXT: store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4 6925; INTERLEAVE-NEXT: [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]] 6926; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1 6927; INTERLEAVE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]] 6928; INTERLEAVE-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]] 6929; INTERLEAVE: exit: 6930; INTERLEAVE-NEXT: ret void 6931; 6932entry: 6933 br label %loop 6934 6935loop: 6936 %for = phi i32 [ 0, %entry ], [ %iv.2.conv, %loop ] 6937 %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] 6938 %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ] 6939 %iv.2.ext = shl i32 %iv.2, 24 6940 %iv.2.conv = ashr exact i32 %iv.2.ext, 24 6941 %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv.1 6942 store i32 %for, i32* %gep, align 4 6943 %iv.2.next = add nsw i32 %iv.2.conv, %step 6944 %iv.1.next = add nuw nsw i64 %iv.1, 1 6945 %exitcond = icmp eq i64 %iv.1.next, %n 6946 br i1 %exitcond, label %exit, label %loop 6947 6948exit: 6949 ret void 6950} 6951