1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s 3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s 4; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s 5; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -instcombine -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s 6 7@fp_inc = common global float 0.000000e+00, align 4 8 9;void fp_iv_loop1(float init, float * __restrict__ A, int N) { 10; float x = init; 11; for (int i=0; i < N; ++i) { 12; A[i] = x; 13; x -= fp_inc; 14; } 15;} 16 17 18 19 20define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32 %N) { 21; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF( 22; VEC4_INTERL1-NEXT: entry: 23; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 24; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 25; VEC4_INTERL1: for.body.lr.ph: 26; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 27; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 28; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 29; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 30; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 31; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 32; VEC4_INTERL1: vector.ph: 33; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 34; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 35; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 36; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 37; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 38; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 39; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 40; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 41; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 42; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]] 43; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 44; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 45; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 46; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 47; VEC4_INTERL1: vector.body: 48; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 49; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 50; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 51; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 52; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 53; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 54; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 55; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 56; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 57; VEC4_INTERL1: middle.block: 58; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 59; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 60; VEC4_INTERL1: scalar.ph: 61; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 62; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 63; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 64; VEC4_INTERL1: for.body: 65; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 66; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 67; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 68; VEC4_INTERL1-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 69; VEC4_INTERL1-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 70; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 71; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 72; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 73; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 74; VEC4_INTERL1: for.end.loopexit: 75; VEC4_INTERL1-NEXT: br label [[FOR_END]] 76; VEC4_INTERL1: for.end: 77; VEC4_INTERL1-NEXT: ret void 78; 79; VEC4_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 80; VEC4_INTERL2-NEXT: entry: 81; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 82; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 83; VEC4_INTERL2: for.body.lr.ph: 84; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 85; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 86; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 87; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 88; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 89; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 90; VEC4_INTERL2: vector.ph: 91; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 92; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 93; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 94; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 95; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 96; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 97; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 98; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 99; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 100; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]] 101; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 102; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 103; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 104; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 105; VEC4_INTERL2: vector.body: 106; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 107; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 108; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 109; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 110; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 111; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 112; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4 113; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 114; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4 115; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 116; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 117; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 118; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 119; VEC4_INTERL2: middle.block: 120; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 121; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 122; VEC4_INTERL2: scalar.ph: 123; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 124; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 125; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 126; VEC4_INTERL2: for.body: 127; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 128; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 129; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 130; VEC4_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 131; VEC4_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 132; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 133; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 134; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 135; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 136; VEC4_INTERL2: for.end.loopexit: 137; VEC4_INTERL2-NEXT: br label [[FOR_END]] 138; VEC4_INTERL2: for.end: 139; VEC4_INTERL2-NEXT: ret void 140; 141; VEC1_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 142; VEC1_INTERL2-NEXT: entry: 143; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 144; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 145; VEC1_INTERL2: for.body.lr.ph: 146; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 147; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 148; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 149; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 150; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 151; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 152; VEC1_INTERL2: vector.ph: 153; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 154; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 155; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 156; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 157; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 158; VEC1_INTERL2: vector.body: 159; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 160; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 161; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[TMP4]] 162; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]] 163; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] 164; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 165; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 166; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 167; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 168; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 169; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 170; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 171; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 172; VEC1_INTERL2: middle.block: 173; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 174; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 175; VEC1_INTERL2: scalar.ph: 176; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 177; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 178; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 179; VEC1_INTERL2: for.body: 180; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 181; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 182; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 183; VEC1_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 184; VEC1_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 185; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 186; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 187; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 188; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 189; VEC1_INTERL2: for.end.loopexit: 190; VEC1_INTERL2-NEXT: br label [[FOR_END]] 191; VEC1_INTERL2: for.end: 192; VEC1_INTERL2-NEXT: ret void 193; 194; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF( 195; VEC2_INTERL1_PRED_STORE-NEXT: entry: 196; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 197; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 198; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 199; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 200; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 201; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 202; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 203; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 204; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 205; VEC2_INTERL1_PRED_STORE: vector.ph: 206; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 207; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 208; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 209; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 210; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 211; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 212; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 213; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 214; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 215; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP4]] 216; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 217; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 218; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 219; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 220; VEC2_INTERL1_PRED_STORE: vector.body: 221; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 222; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 223; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 224; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>* 225; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4 226; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 227; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 228; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 229; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 230; VEC2_INTERL1_PRED_STORE: middle.block: 231; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 232; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 233; VEC2_INTERL1_PRED_STORE: for.body: 234; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 235; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 236; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 237; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 238; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 239; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 240; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 241; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 242; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 243; VEC2_INTERL1_PRED_STORE: for.end: 244; VEC2_INTERL1_PRED_STORE-NEXT: ret void 245; 246entry: 247 %cmp4 = icmp sgt i32 %N, 0 248 br i1 %cmp4, label %for.body.lr.ph, label %for.end 249 250for.body.lr.ph: ; preds = %entry 251 %fpinc = load float, float* @fp_inc, align 4 252 br label %for.body 253 254for.body: ; preds = %for.body, %for.body.lr.ph 255 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 256 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 257 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 258 store float %x.05, float* %arrayidx, align 4 259 %add = fsub fast float %x.05, %fpinc 260 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 261 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 262 %exitcond = icmp eq i32 %lftr.wideiv, %N 263 br i1 %exitcond, label %for.end.loopexit, label %for.body 264 265for.end.loopexit: ; preds = %for.body 266 br label %for.end 267 268for.end: ; preds = %for.end.loopexit, %entry 269 ret void 270} 271 272; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become 273; 'fast' spontaneously - FMF should propagate from the original IR. 274 275define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) { 276; 277; 278; 279; 280; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF( 281; VEC4_INTERL1-NEXT: entry: 282; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 283; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 284; VEC4_INTERL1: for.body.lr.ph: 285; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 286; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 287; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 288; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 289; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 290; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 291; VEC4_INTERL1: vector.ph: 292; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 293; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 294; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 295; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 296; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 297; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 298; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 299; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 300; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 301; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]] 302; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 303; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 304; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 305; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 306; VEC4_INTERL1: vector.body: 307; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 308; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 309; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 310; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 311; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 312; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 313; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 314; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 315; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 316; VEC4_INTERL1: middle.block: 317; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 318; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 319; VEC4_INTERL1: scalar.ph: 320; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 321; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 322; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 323; VEC4_INTERL1: for.body: 324; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 325; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 326; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 327; VEC4_INTERL1-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 328; VEC4_INTERL1-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 329; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 330; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 331; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 332; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 333; VEC4_INTERL1: for.end.loopexit: 334; VEC4_INTERL1-NEXT: br label [[FOR_END]] 335; VEC4_INTERL1: for.end: 336; VEC4_INTERL1-NEXT: ret void 337; 338; VEC4_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 339; VEC4_INTERL2-NEXT: entry: 340; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 341; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 342; VEC4_INTERL2: for.body.lr.ph: 343; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 344; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 345; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 346; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 347; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 348; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 349; VEC4_INTERL2: vector.ph: 350; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 351; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 352; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 353; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 354; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 355; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 356; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 357; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 358; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 359; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]] 360; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 361; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 362; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 363; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 364; VEC4_INTERL2: vector.body: 365; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 366; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 367; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 368; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 369; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 370; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 371; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4 372; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 373; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4 374; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 375; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 376; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 377; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 378; VEC4_INTERL2: middle.block: 379; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 380; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 381; VEC4_INTERL2: scalar.ph: 382; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 383; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 384; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 385; VEC4_INTERL2: for.body: 386; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 387; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 388; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 389; VEC4_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 390; VEC4_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 391; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 392; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 393; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 394; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 395; VEC4_INTERL2: for.end.loopexit: 396; VEC4_INTERL2-NEXT: br label [[FOR_END]] 397; VEC4_INTERL2: for.end: 398; VEC4_INTERL2-NEXT: ret void 399; 400; VEC1_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 401; VEC1_INTERL2-NEXT: entry: 402; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 403; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 404; VEC1_INTERL2: for.body.lr.ph: 405; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 406; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 407; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 408; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 409; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 410; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 411; VEC1_INTERL2: vector.ph: 412; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 413; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 414; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 415; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 416; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 417; VEC1_INTERL2: vector.body: 418; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 419; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 420; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], [[TMP4]] 421; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP5]] 422; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul reassoc float [[FPINC]], 0.000000e+00 423; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]] 424; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]] 425; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 426; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 427; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 428; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 429; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP10]], align 4 430; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 431; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 432; VEC1_INTERL2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 433; VEC1_INTERL2: middle.block: 434; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 435; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 436; VEC1_INTERL2: scalar.ph: 437; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 438; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 439; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 440; VEC1_INTERL2: for.body: 441; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 442; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 443; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 444; VEC1_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 445; VEC1_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 446; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 447; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 448; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 449; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 450; VEC1_INTERL2: for.end.loopexit: 451; VEC1_INTERL2-NEXT: br label [[FOR_END]] 452; VEC1_INTERL2: for.end: 453; VEC1_INTERL2-NEXT: ret void 454; 455; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF( 456; VEC2_INTERL1_PRED_STORE-NEXT: entry: 457; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 458; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 459; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 460; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 461; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 462; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 463; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 464; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 465; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 466; VEC2_INTERL1_PRED_STORE: vector.ph: 467; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 468; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 469; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 470; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 471; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 472; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 473; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 474; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 475; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 476; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP4]] 477; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 478; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 479; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 480; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 481; VEC2_INTERL1_PRED_STORE: vector.body: 482; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 483; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 484; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 485; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>* 486; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4 487; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 488; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 489; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 490; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 491; VEC2_INTERL1_PRED_STORE: middle.block: 492; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 493; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 494; VEC2_INTERL1_PRED_STORE: for.body: 495; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 496; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 497; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 498; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 499; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 500; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 501; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 502; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 503; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 504; VEC2_INTERL1_PRED_STORE: for.end: 505; VEC2_INTERL1_PRED_STORE-NEXT: ret void 506; 507entry: 508 %cmp4 = icmp sgt i32 %N, 0 509 br i1 %cmp4, label %for.body.lr.ph, label %for.end 510 511for.body.lr.ph: ; preds = %entry 512 %fpinc = load float, float* @fp_inc, align 4 513 br label %for.body 514 515for.body: ; preds = %for.body, %for.body.lr.ph 516 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 517 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 518 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 519 store float %x.05, float* %arrayidx, align 4 520 %add = fsub reassoc float %x.05, %fpinc 521 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 522 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 523 %exitcond = icmp eq i32 %lftr.wideiv, %N 524 br i1 %exitcond, label %for.end.loopexit, label %for.body 525 526for.end.loopexit: ; preds = %for.body 527 br label %for.end 528 529for.end: ; preds = %for.end.loopexit, %entry 530 ret void 531} 532 533;void fp_iv_loop2(float init, float * __restrict__ A, int N) { 534; float x = init; 535; for (int i=0; i < N; ++i) { 536; A[i] = x; 537; x += 0.5; 538; } 539;} 540 541 542define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 { 543; VEC4_INTERL1-LABEL: @fp_iv_loop2( 544; VEC4_INTERL1-NEXT: entry: 545; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 546; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 547; VEC4_INTERL1: for.body.preheader: 548; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 549; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 550; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 551; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 552; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 553; VEC4_INTERL1: vector.ph: 554; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 555; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 556; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 557; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 558; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 559; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 560; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 561; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 562; VEC4_INTERL1: vector.body: 563; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 564; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 565; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 566; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 567; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 568; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 569; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 570; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 571; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 572; VEC4_INTERL1: middle.block: 573; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 574; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 575; VEC4_INTERL1: scalar.ph: 576; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 577; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 578; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 579; VEC4_INTERL1: for.body: 580; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 581; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 582; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 583; VEC4_INTERL1-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 584; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 585; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 586; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 587; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 588; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 589; VEC4_INTERL1: for.end.loopexit: 590; VEC4_INTERL1-NEXT: br label [[FOR_END]] 591; VEC4_INTERL1: for.end: 592; VEC4_INTERL1-NEXT: ret void 593; 594; VEC4_INTERL2-LABEL: @fp_iv_loop2( 595; VEC4_INTERL2-NEXT: entry: 596; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 597; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 598; VEC4_INTERL2: for.body.preheader: 599; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 600; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 601; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 602; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 603; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 604; VEC4_INTERL2: vector.ph: 605; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 606; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 607; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 608; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 609; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 610; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 611; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 612; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 613; VEC4_INTERL2: vector.body: 614; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 615; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 616; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 617; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 618; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 619; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 620; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4 621; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 622; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4 623; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 624; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00> 625; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 626; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 627; VEC4_INTERL2: middle.block: 628; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 629; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 630; VEC4_INTERL2: scalar.ph: 631; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 632; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 633; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 634; VEC4_INTERL2: for.body: 635; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 636; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 637; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 638; VEC4_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 639; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 640; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 641; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 642; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 643; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 644; VEC4_INTERL2: for.end.loopexit: 645; VEC4_INTERL2-NEXT: br label [[FOR_END]] 646; VEC4_INTERL2: for.end: 647; VEC4_INTERL2-NEXT: ret void 648; 649; VEC1_INTERL2-LABEL: @fp_iv_loop2( 650; VEC1_INTERL2-NEXT: entry: 651; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 652; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 653; VEC1_INTERL2: for.body.preheader: 654; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 655; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 656; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 657; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 658; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 659; VEC1_INTERL2: vector.ph: 660; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 661; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 662; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 663; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 664; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 665; VEC1_INTERL2: vector.body: 666; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 667; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 668; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 669; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]] 670; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01 671; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 672; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 673; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 674; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 675; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 676; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 677; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 678; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 679; VEC1_INTERL2: middle.block: 680; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 681; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 682; VEC1_INTERL2: scalar.ph: 683; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 684; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 685; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 686; VEC1_INTERL2: for.body: 687; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 688; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 689; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 690; VEC1_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 691; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 692; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 693; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 694; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 695; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 696; VEC1_INTERL2: for.end.loopexit: 697; VEC1_INTERL2-NEXT: br label [[FOR_END]] 698; VEC1_INTERL2: for.end: 699; VEC1_INTERL2-NEXT: ret void 700; 701; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2( 702; VEC2_INTERL1_PRED_STORE-NEXT: entry: 703; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 704; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 705; VEC2_INTERL1_PRED_STORE: for.body.preheader: 706; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 707; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 708; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 709; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 710; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 711; VEC2_INTERL1_PRED_STORE: vector.ph: 712; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 713; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 714; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 715; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 716; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 717; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 718; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01> 719; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 720; VEC2_INTERL1_PRED_STORE: vector.body: 721; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 722; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 723; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 724; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>* 725; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4 726; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 727; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00> 728; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 729; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 730; VEC2_INTERL1_PRED_STORE: middle.block: 731; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 732; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 733; VEC2_INTERL1_PRED_STORE: for.body: 734; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 735; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 736; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 737; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 738; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 739; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 740; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 741; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 742; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 743; VEC2_INTERL1_PRED_STORE: for.end: 744; VEC2_INTERL1_PRED_STORE-NEXT: ret void 745; 746entry: 747 %cmp4 = icmp sgt i32 %N, 0 748 br i1 %cmp4, label %for.body.preheader, label %for.end 749 750for.body.preheader: ; preds = %entry 751 br label %for.body 752 753for.body: ; preds = %for.body.preheader, %for.body 754 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 755 %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ] 756 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 757 store float %x.06, float* %arrayidx, align 4 758 %conv1 = fadd fast float %x.06, 5.000000e-01 759 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 760 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 761 %exitcond = icmp eq i32 %lftr.wideiv, %N 762 br i1 %exitcond, label %for.end.loopexit, label %for.body 763 764for.end.loopexit: ; preds = %for.body 765 br label %for.end 766 767for.end: ; preds = %for.end.loopexit, %entry 768 ret void 769} 770 771;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) { 772; int i = 0; 773; float x = init; 774; float y = 0.1; 775; for (; i < N; ++i) { 776; A[i] = x; 777; x += fp_inc; 778; y -= 0.5; 779; B[i] = x + y; 780; C[i] = y; 781; } 782;} 783 784 785define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) { 786; VEC4_INTERL1-LABEL: @fp_iv_loop3( 787; VEC4_INTERL1-NEXT: entry: 788; VEC4_INTERL1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 789; VEC4_INTERL1-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 790; VEC4_INTERL1: for.body.lr.ph: 791; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 792; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 793; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 794; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 795; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3 796; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 797; VEC4_INTERL1: vector.ph: 798; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588 799; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 800; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 801; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 802; VEC4_INTERL1-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 803; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 804; VEC4_INTERL1-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 805; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 806; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 807; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 808; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer 809; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 810; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]] 811; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 812; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0 813; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer 814; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 815; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 816; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 817; VEC4_INTERL1: vector.body: 818; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 819; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 820; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 821; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 822; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 823; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], <4 x float>* [[TMP9]], align 4 824; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 825; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01> 826; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP10]] 827; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 828; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>* 829; VEC4_INTERL1-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP14]], align 4 830; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 831; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <4 x float>* 832; VEC4_INTERL1-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP16]], align 4 833; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 834; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00> 835; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] 836; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 837; VEC4_INTERL1-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 838; VEC4_INTERL1: middle.block: 839; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 840; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 841; VEC4_INTERL1: scalar.ph: 842; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 843; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 844; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 845; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 846; VEC4_INTERL1: for.body: 847; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 848; VEC4_INTERL1-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 849; VEC4_INTERL1-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 850; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 851; VEC4_INTERL1-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 852; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 853; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 854; VEC4_INTERL1-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 855; VEC4_INTERL1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 856; VEC4_INTERL1-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 857; VEC4_INTERL1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 858; VEC4_INTERL1-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 859; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 860; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 861; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 862; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 863; VEC4_INTERL1: for.end.loopexit: 864; VEC4_INTERL1-NEXT: br label [[FOR_END]] 865; VEC4_INTERL1: for.end: 866; VEC4_INTERL1-NEXT: ret void 867; 868; VEC4_INTERL2-LABEL: @fp_iv_loop3( 869; VEC4_INTERL2-NEXT: entry: 870; VEC4_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 871; VEC4_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 872; VEC4_INTERL2: for.body.lr.ph: 873; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 874; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 875; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 876; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 877; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7 878; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 879; VEC4_INTERL2: vector.ph: 880; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584 881; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 882; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 883; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 884; VEC4_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 885; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 886; VEC4_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 887; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 888; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 889; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 890; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer 891; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 892; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]] 893; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 894; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0 895; VEC4_INTERL2-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> poison, <4 x i32> zeroinitializer 896; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 897; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 898; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 899; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT14]], <4 x float> poison, <4 x i32> zeroinitializer 900; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 901; VEC4_INTERL2: vector.body: 902; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 903; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 904; VEC4_INTERL2-NEXT: [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ] 905; VEC4_INTERL2-NEXT: [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]] 906; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 907; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 908; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND10]], <4 x float>* [[TMP9]], align 4 909; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4 910; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* 911; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], <4 x float>* [[TMP11]], align 4 912; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]] 913; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT15]] 914; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01> 915; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00> 916; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP12]] 917; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP13]] 918; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 919; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>* 920; VEC4_INTERL2-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP19]], align 4 921; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 4 922; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <4 x float>* 923; VEC4_INTERL2-NEXT: store <4 x float> [[TMP17]], <4 x float>* [[TMP21]], align 4 924; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 925; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>* 926; VEC4_INTERL2-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP23]], align 4 927; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 4 928; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>* 929; VEC4_INTERL2-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[TMP25]], align 4 930; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 931; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00> 932; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT9]] 933; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 934; VEC4_INTERL2-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 935; VEC4_INTERL2: middle.block: 936; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 937; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 938; VEC4_INTERL2: scalar.ph: 939; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 940; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 941; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 942; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 943; VEC4_INTERL2: for.body: 944; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 945; VEC4_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 946; VEC4_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 947; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 948; VEC4_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 949; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 950; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 951; VEC4_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 952; VEC4_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 953; VEC4_INTERL2-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 954; VEC4_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 955; VEC4_INTERL2-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 956; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 957; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 958; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 959; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 960; VEC4_INTERL2: for.end.loopexit: 961; VEC4_INTERL2-NEXT: br label [[FOR_END]] 962; VEC4_INTERL2: for.end: 963; VEC4_INTERL2-NEXT: ret void 964; 965; VEC1_INTERL2-LABEL: @fp_iv_loop3( 966; VEC1_INTERL2-NEXT: entry: 967; VEC1_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 968; VEC1_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 969; VEC1_INTERL2: for.body.lr.ph: 970; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 971; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 972; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 973; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 974; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 975; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 976; VEC1_INTERL2: vector.ph: 977; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 978; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 979; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 980; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 981; VEC1_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 982; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 983; VEC1_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 984; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 985; VEC1_INTERL2: vector.body: 986; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 987; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = sitofp i64 [[INDEX]] to float 988; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], [[TMP6]] 989; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP7]], [[INIT]] 990; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] 991; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float 992; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], -5.000000e-01 993; VEC1_INTERL2-NEXT: [[INDUCTION6:%.*]] = or i64 [[INDEX]], 1 994; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 995; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION6]] 996; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP11]], align 4 997; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP12]], align 4 998; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] 999; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[TMP8]], [[TMP0]] 1000; VEC1_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP10]], 0xBFD99999A0000000 1001; VEC1_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[TMP10]], 0xBFECCCCCC0000000 1002; VEC1_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]] 1003; VEC1_INTERL2-NEXT: [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]] 1004; VEC1_INTERL2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1005; VEC1_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION6]] 1006; VEC1_INTERL2-NEXT: store float [[TMP17]], float* [[TMP19]], align 4 1007; VEC1_INTERL2-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 1008; VEC1_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 1009; VEC1_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION6]] 1010; VEC1_INTERL2-NEXT: store float [[TMP15]], float* [[TMP21]], align 4 1011; VEC1_INTERL2-NEXT: store float [[TMP16]], float* [[TMP22]], align 4 1012; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1013; VEC1_INTERL2-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1014; VEC1_INTERL2-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 1015; VEC1_INTERL2: middle.block: 1016; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 1017; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1018; VEC1_INTERL2: scalar.ph: 1019; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 1020; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 1021; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 1022; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1023; VEC1_INTERL2: for.body: 1024; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1025; VEC1_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 1026; VEC1_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1027; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1028; VEC1_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 1029; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 1030; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 1031; VEC1_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 1032; VEC1_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 1033; VEC1_INTERL2-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 1034; VEC1_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 1035; VEC1_INTERL2-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 1036; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1037; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1038; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1039; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 1040; VEC1_INTERL2: for.end.loopexit: 1041; VEC1_INTERL2-NEXT: br label [[FOR_END]] 1042; VEC1_INTERL2: for.end: 1043; VEC1_INTERL2-NEXT: ret void 1044; 1045; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3( 1046; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1047; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 1048; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 1049; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 1050; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 1051; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 1052; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 1053; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 1054; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 1055; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1056; VEC2_INTERL1_PRED_STORE: vector.ph: 1057; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 1058; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1059; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 1060; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 1061; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 1062; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 1063; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 1064; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 1065; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1066; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1067; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer 1068; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00> 1069; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP6]] 1070; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 1071; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0 1072; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer 1073; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1074; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1075; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1076; VEC2_INTERL1_PRED_STORE: vector.body: 1077; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1078; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1079; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 1080; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1081; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>* 1082; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], <2 x float>* [[TMP9]], align 4 1083; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 1084; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01> 1085; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = fadd fast <2 x float> [[TMP11]], [[TMP10]] 1086; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1087; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <2 x float>* 1088; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP12]], <2 x float>* [[TMP14]], align 4 1089; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 1090; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <2 x float>* 1091; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP11]], <2 x float>* [[TMP16]], align 4 1092; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1093; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00> 1094; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] 1095; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1096; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 1097; VEC2_INTERL1_PRED_STORE: middle.block: 1098; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 1099; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1100; VEC2_INTERL1_PRED_STORE: for.body: 1101; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 1102; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 1103; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 1104; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1105; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 1106; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 1107; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 1108; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 1109; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 1110; VEC2_INTERL1_PRED_STORE-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 1111; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 1112; VEC2_INTERL1_PRED_STORE-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 1113; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1114; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1115; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1116; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1117; VEC2_INTERL1_PRED_STORE: for.end: 1118; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1119; 1120entry: 1121 %cmp9 = icmp sgt i32 %N, 0 1122 br i1 %cmp9, label %for.body.lr.ph, label %for.end 1123 1124for.body.lr.ph: ; preds = %entry 1125 %0 = load float, float* @fp_inc, align 4 1126 br label %for.body 1127 1128for.body: ; preds = %for.body, %for.body.lr.ph 1129 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 1130 %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ] 1131 %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 1132 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1133 store float %x.011, float* %arrayidx, align 4 1134 %add = fadd fast float %x.011, %0 1135 %conv1 = fadd fast float %y.012, -5.000000e-01 1136 %add2 = fadd fast float %conv1, %add 1137 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1138 store float %add2, float* %arrayidx4, align 4 1139 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv 1140 store float %conv1, float* %arrayidx6, align 4 1141 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1142 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1143 %exitcond = icmp eq i32 %lftr.wideiv, %N 1144 br i1 %exitcond, label %for.end.loopexit, label %for.body 1145 1146for.end.loopexit: 1147 br label %for.end 1148 1149for.end: 1150 ret void 1151} 1152 1153; Start and step values are constants. There is no 'fmul' operation in this case 1154;void fp_iv_loop4(float * __restrict__ A, int N) { 1155; float x = 1.0; 1156; for (int i=0; i < N; ++i) { 1157; A[i] = x; 1158; x += 0.5; 1159; } 1160;} 1161 1162 1163define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) { 1164; VEC4_INTERL1-LABEL: @fp_iv_loop4( 1165; VEC4_INTERL1-NEXT: entry: 1166; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1167; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1168; VEC4_INTERL1: for.body.preheader: 1169; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1170; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1171; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1172; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 1173; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1174; VEC4_INTERL1: vector.ph: 1175; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 1176; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1177; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1178; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1179; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1180; VEC4_INTERL1: vector.body: 1181; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1182; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1183; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1184; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1185; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 1186; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1187; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1188; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1189; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1190; VEC4_INTERL1: middle.block: 1191; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1192; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1193; VEC4_INTERL1: scalar.ph: 1194; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1195; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1196; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1197; VEC4_INTERL1: for.body: 1198; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1199; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1200; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1201; VEC4_INTERL1-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1202; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1203; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1204; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1205; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1206; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1207; VEC4_INTERL1: for.end.loopexit: 1208; VEC4_INTERL1-NEXT: br label [[FOR_END]] 1209; VEC4_INTERL1: for.end: 1210; VEC4_INTERL1-NEXT: ret void 1211; 1212; VEC4_INTERL2-LABEL: @fp_iv_loop4( 1213; VEC4_INTERL2-NEXT: entry: 1214; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1215; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1216; VEC4_INTERL2: for.body.preheader: 1217; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1218; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1219; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1220; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 1221; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1222; VEC4_INTERL2: vector.ph: 1223; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 1224; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1225; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1226; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1227; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1228; VEC4_INTERL2: vector.body: 1229; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1230; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1231; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1232; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1233; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1234; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 1235; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4 1236; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 1237; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4 1238; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1239; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00> 1240; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1241; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1242; VEC4_INTERL2: middle.block: 1243; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1244; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1245; VEC4_INTERL2: scalar.ph: 1246; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1247; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1248; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1249; VEC4_INTERL2: for.body: 1250; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1251; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1252; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1253; VEC4_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1254; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1255; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1256; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1257; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1258; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1259; VEC4_INTERL2: for.end.loopexit: 1260; VEC4_INTERL2-NEXT: br label [[FOR_END]] 1261; VEC4_INTERL2: for.end: 1262; VEC4_INTERL2-NEXT: ret void 1263; 1264; VEC1_INTERL2-LABEL: @fp_iv_loop4( 1265; VEC1_INTERL2-NEXT: entry: 1266; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1267; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1268; VEC1_INTERL2: for.body.preheader: 1269; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1270; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1271; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1272; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 1273; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1274; VEC1_INTERL2: vector.ph: 1275; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 1276; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1277; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1278; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1279; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1280; VEC1_INTERL2: vector.body: 1281; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1282; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 1283; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 1284; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00 1285; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00 1286; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 1287; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1288; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 1289; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 1290; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 1291; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1292; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1293; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1294; VEC1_INTERL2: middle.block: 1295; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1296; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1297; VEC1_INTERL2: scalar.ph: 1298; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1299; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1300; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1301; VEC1_INTERL2: for.body: 1302; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1303; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1304; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1305; VEC1_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1306; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1307; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1308; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1309; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1310; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1311; VEC1_INTERL2: for.end.loopexit: 1312; VEC1_INTERL2-NEXT: br label [[FOR_END]] 1313; VEC1_INTERL2: for.end: 1314; VEC1_INTERL2-NEXT: ret void 1315; 1316; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4( 1317; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1318; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1319; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1320; VEC2_INTERL1_PRED_STORE: for.body.preheader: 1321; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1322; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1323; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1324; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 1325; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1326; VEC2_INTERL1_PRED_STORE: vector.ph: 1327; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 1328; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1329; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1330; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1331; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1332; VEC2_INTERL1_PRED_STORE: vector.body: 1333; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1334; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1335; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1336; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>* 1337; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4 1338; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1339; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00> 1340; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1341; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1342; VEC2_INTERL1_PRED_STORE: middle.block: 1343; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1344; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1345; VEC2_INTERL1_PRED_STORE: for.body: 1346; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1347; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1348; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1349; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1350; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1351; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1352; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1353; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1354; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1355; VEC2_INTERL1_PRED_STORE: for.end: 1356; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1357; 1358entry: 1359 %cmp4 = icmp sgt i32 %N, 0 1360 br i1 %cmp4, label %for.body.preheader, label %for.end 1361 1362for.body.preheader: ; preds = %entry 1363 br label %for.body 1364 1365for.body: ; preds = %for.body.preheader, %for.body 1366 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 1367 %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] 1368 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1369 store float %x.06, float* %arrayidx, align 4 1370 %conv1 = fadd fast float %x.06, 5.000000e-01 1371 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1372 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1373 %exitcond = icmp eq i32 %lftr.wideiv, %N 1374 br i1 %exitcond, label %for.end.loopexit, label %for.body 1375 1376for.end.loopexit: ; preds = %for.body 1377 br label %for.end 1378 1379for.end: ; preds = %for.end.loopexit, %entry 1380 ret void 1381} 1382 1383 1384define void @non_primary_iv_float_scalar(float* %A, i64 %N) { 1385; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar( 1386; VEC4_INTERL1-NEXT: entry: 1387; VEC4_INTERL1-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1388; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 1389; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1390; VEC4_INTERL1: vector.ph: 1391; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 1392; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1393; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1394; VEC4_INTERL1: vector.body: 1395; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] 1396; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1397; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1398; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>* 1399; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1400; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1401; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 1402; VEC4_INTERL1-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1403; VEC4_INTERL1: pred.store.if: 1404; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1405; VEC4_INTERL1-NEXT: store float [[TMP0]], float* [[TMP5]], align 4 1406; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]] 1407; VEC4_INTERL1: pred.store.continue: 1408; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 1409; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] 1410; VEC4_INTERL1: pred.store.if2: 1411; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1412; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 1413; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] 1414; VEC4_INTERL1-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 1415; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]] 1416; VEC4_INTERL1: pred.store.continue3: 1417; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 1418; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 1419; VEC4_INTERL1: pred.store.if4: 1420; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 1421; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 1422; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] 1423; VEC4_INTERL1-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 1424; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]] 1425; VEC4_INTERL1: pred.store.continue5: 1426; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 1427; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] 1428; VEC4_INTERL1: pred.store.if6: 1429; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 1430; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 1431; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] 1432; VEC4_INTERL1-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 1433; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]] 1434; VEC4_INTERL1: pred.store.continue7: 1435; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1436; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1437; VEC4_INTERL1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1438; VEC4_INTERL1: middle.block: 1439; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1440; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1441; VEC4_INTERL1: scalar.ph: 1442; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1443; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1444; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1445; VEC4_INTERL1: for.body: 1446; VEC4_INTERL1-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1447; VEC4_INTERL1-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1448; VEC4_INTERL1-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1449; VEC4_INTERL1-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1450; VEC4_INTERL1-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1451; VEC4_INTERL1-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1452; VEC4_INTERL1: if.pred: 1453; VEC4_INTERL1-NEXT: store float [[J]], float* [[VAR0]], align 4 1454; VEC4_INTERL1-NEXT: br label [[FOR_INC]] 1455; VEC4_INTERL1: for.inc: 1456; VEC4_INTERL1-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1457; VEC4_INTERL1-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1458; VEC4_INTERL1-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1459; VEC4_INTERL1-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1460; VEC4_INTERL1: for.end: 1461; VEC4_INTERL1-NEXT: ret void 1462; 1463; VEC4_INTERL2-LABEL: @non_primary_iv_float_scalar( 1464; VEC4_INTERL2-NEXT: entry: 1465; VEC4_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1466; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 1467; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1468; VEC4_INTERL2: vector.ph: 1469; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 1470; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1471; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1472; VEC4_INTERL2: vector.body: 1473; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] 1474; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1475; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 1476; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1477; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* 1478; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 1479; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4 1480; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1481; VEC4_INTERL2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4 1482; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1483; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer 1484; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0 1485; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1486; VEC4_INTERL2: pred.store.if: 1487; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1488; VEC4_INTERL2-NEXT: store float [[TMP0]], float* [[TMP9]], align 4 1489; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1490; VEC4_INTERL2: pred.store.continue: 1491; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 1492; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 1493; VEC4_INTERL2: pred.store.if3: 1494; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1495; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 1496; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] 1497; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 1498; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE5]] 1499; VEC4_INTERL2: pred.store.continue4: 1500; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 1501; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] 1502; VEC4_INTERL2: pred.store.if5: 1503; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 1504; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 1505; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] 1506; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 1507; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE7]] 1508; VEC4_INTERL2: pred.store.continue6: 1509; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 1510; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] 1511; VEC4_INTERL2: pred.store.if7: 1512; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 1513; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 1514; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] 1515; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 1516; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE9]] 1517; VEC4_INTERL2: pred.store.continue8: 1518; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 1519; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] 1520; VEC4_INTERL2: pred.store.if9: 1521; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 1522; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] 1523; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 1524; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] 1525; VEC4_INTERL2: pred.store.continue10: 1526; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 1527; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] 1528; VEC4_INTERL2: pred.store.if11: 1529; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 1530; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 1531; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] 1532; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 1533; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE13]] 1534; VEC4_INTERL2: pred.store.continue12: 1535; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 1536; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] 1537; VEC4_INTERL2: pred.store.if13: 1538; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 1539; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 1540; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] 1541; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 1542; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE15]] 1543; VEC4_INTERL2: pred.store.continue14: 1544; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 1545; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] 1546; VEC4_INTERL2: pred.store.if15: 1547; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 1548; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 1549; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] 1550; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 1551; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE17]] 1552; VEC4_INTERL2: pred.store.continue16: 1553; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1554; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1555; VEC4_INTERL2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1556; VEC4_INTERL2: middle.block: 1557; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1558; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1559; VEC4_INTERL2: scalar.ph: 1560; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1561; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1562; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1563; VEC4_INTERL2: for.body: 1564; VEC4_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1565; VEC4_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1566; VEC4_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1567; VEC4_INTERL2-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1568; VEC4_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1569; VEC4_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1570; VEC4_INTERL2: if.pred: 1571; VEC4_INTERL2-NEXT: store float [[J]], float* [[VAR0]], align 4 1572; VEC4_INTERL2-NEXT: br label [[FOR_INC]] 1573; VEC4_INTERL2: for.inc: 1574; VEC4_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1575; VEC4_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1576; VEC4_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1577; VEC4_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1578; VEC4_INTERL2: for.end: 1579; VEC4_INTERL2-NEXT: ret void 1580; 1581; VEC1_INTERL2-LABEL: @non_primary_iv_float_scalar( 1582; VEC1_INTERL2-NEXT: entry: 1583; VEC1_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1584; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1585; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1586; VEC1_INTERL2: vector.ph: 1587; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1588; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1589; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1590; VEC1_INTERL2: vector.body: 1591; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] 1592; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1593; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 1594; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1595; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 1596; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP1]], align 4 1597; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = load float, float* [[TMP2]], align 4 1598; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00 1599; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00 1600; VEC1_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1601; VEC1_INTERL2: pred.store.if: 1602; VEC1_INTERL2-NEXT: store float [[TMP0]], float* [[TMP1]], align 4 1603; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1604; VEC1_INTERL2: pred.store.continue: 1605; VEC1_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] 1606; VEC1_INTERL2: pred.store.if3: 1607; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1608; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP2]], align 4 1609; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] 1610; VEC1_INTERL2: pred.store.continue4: 1611; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1612; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1613; VEC1_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1614; VEC1_INTERL2: middle.block: 1615; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1616; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1617; VEC1_INTERL2: scalar.ph: 1618; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1619; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1620; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1621; VEC1_INTERL2: for.body: 1622; VEC1_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1623; VEC1_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1624; VEC1_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1625; VEC1_INTERL2-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1626; VEC1_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1627; VEC1_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1628; VEC1_INTERL2: if.pred: 1629; VEC1_INTERL2-NEXT: store float [[J]], float* [[VAR0]], align 4 1630; VEC1_INTERL2-NEXT: br label [[FOR_INC]] 1631; VEC1_INTERL2: for.inc: 1632; VEC1_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1633; VEC1_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1634; VEC1_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1635; VEC1_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 1636; VEC1_INTERL2: for.end: 1637; VEC1_INTERL2-NEXT: ret void 1638; 1639; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar( 1640; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1641; VEC2_INTERL1_PRED_STORE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1642; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1643; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1644; VEC2_INTERL1_PRED_STORE: vector.ph: 1645; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1646; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1647; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1648; VEC2_INTERL1_PRED_STORE: vector.body: 1649; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 1650; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1651; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1652; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>* 1653; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4 1654; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1655; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0 1656; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1657; VEC2_INTERL1_PRED_STORE: pred.store.if: 1658; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1659; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP0]], float* [[TMP5]], align 4 1660; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]] 1661; VEC2_INTERL1_PRED_STORE: pred.store.continue: 1662; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 1663; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 1664; VEC2_INTERL1_PRED_STORE: pred.store.if2: 1665; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1666; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 1667; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] 1668; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 1669; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]] 1670; VEC2_INTERL1_PRED_STORE: pred.store.continue3: 1671; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1672; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1673; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1674; VEC2_INTERL1_PRED_STORE: middle.block: 1675; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1676; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1677; VEC2_INTERL1_PRED_STORE: for.body: 1678; VEC2_INTERL1_PRED_STORE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1679; VEC2_INTERL1_PRED_STORE-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1680; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1681; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1682; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1683; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1684; VEC2_INTERL1_PRED_STORE: if.pred: 1685; VEC2_INTERL1_PRED_STORE-NEXT: store float [[J]], float* [[VAR0]], align 4 1686; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_INC]] 1687; VEC2_INTERL1_PRED_STORE: for.inc: 1688; VEC2_INTERL1_PRED_STORE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1689; VEC2_INTERL1_PRED_STORE-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1690; VEC2_INTERL1_PRED_STORE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1691; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1692; VEC2_INTERL1_PRED_STORE: for.end: 1693; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1694; 1695entry: 1696 br label %for.body 1697 1698for.body: 1699 %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ] 1700 %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ] 1701 %var0 = getelementptr inbounds float, float* %A, i64 %i 1702 %var1 = load float, float* %var0, align 4 1703 %var2 = fcmp fast oeq float %var1, 0.0 1704 br i1 %var2, label %if.pred, label %for.inc 1705 1706if.pred: 1707 store float %j, float* %var0, align 4 1708 br label %for.inc 1709 1710for.inc: 1711 %i.next = add nuw nsw i64 %i, 1 1712 %j.next = fadd fast float %j, 1.0 1713 %cond = icmp slt i64 %i.next, %N 1714 br i1 %cond, label %for.body, label %for.end 1715 1716for.end: 1717 ret void 1718} 1719