1eb052f6bSPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2cee313d2SEric Christopher; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s 3cee313d2SEric Christopher; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s 4cee313d2SEric Christopher; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s 55cce4affSRoman Lebedev; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -instcombine -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s 6cee313d2SEric Christopher 7cee313d2SEric Christopher@fp_inc = common global float 0.000000e+00, align 4 8cee313d2SEric Christopher 9cee313d2SEric Christopher;void fp_iv_loop1(float init, float * __restrict__ A, int N) { 10cee313d2SEric Christopher; float x = init; 11cee313d2SEric Christopher; for (int i=0; i < N; ++i) { 12cee313d2SEric Christopher; A[i] = x; 13cee313d2SEric Christopher; x -= fp_inc; 14cee313d2SEric Christopher; } 15cee313d2SEric Christopher;} 16cee313d2SEric Christopher 17cee313d2SEric Christopher 18cee313d2SEric Christopher 19cee313d2SEric Christopher 2036a489d1SSanjay Pateldefine void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32 %N) { 21eb052f6bSPhilip Reames; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF( 22eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: entry: 23eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 24eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 25eb052f6bSPhilip Reames; VEC4_INTERL1: for.body.lr.ph: 26eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 27eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 28eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 29eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 30eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 31eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 32eb052f6bSPhilip Reames; VEC4_INTERL1: vector.ph: 33eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 34eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 35eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 36eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 37e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 38eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 39e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 40eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 41eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 42eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]] 43eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 44e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 45eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 46eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 47eb052f6bSPhilip Reames; VEC4_INTERL1: vector.body: 48eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 49eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 50eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 51eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 52eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 53eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 54eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 55eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 56eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 57eb052f6bSPhilip Reames; VEC4_INTERL1: middle.block: 58eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 59eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 60eb052f6bSPhilip Reames; VEC4_INTERL1: scalar.ph: 61eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 62eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 63eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 64eb052f6bSPhilip Reames; VEC4_INTERL1: for.body: 65eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 66eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 67eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 68eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 69eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 70eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 71eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 72eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 73eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 74eb052f6bSPhilip Reames; VEC4_INTERL1: for.end.loopexit: 75eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_END]] 76eb052f6bSPhilip Reames; VEC4_INTERL1: for.end: 77eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: ret void 78eb052f6bSPhilip Reames; 79eb052f6bSPhilip Reames; VEC4_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 80eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: entry: 81eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 82eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 83eb052f6bSPhilip Reames; VEC4_INTERL2: for.body.lr.ph: 84eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 85eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 86eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 87eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 88eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 89eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 90eb052f6bSPhilip Reames; VEC4_INTERL2: vector.ph: 91eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 92eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 93eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 94eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 95e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 96eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 97e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 98eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 99eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 100eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]] 101eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 102e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 103eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 104eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 105eb052f6bSPhilip Reames; VEC4_INTERL2: vector.body: 106eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 107eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 108eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 109eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 110eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 111eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 112eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4 113eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 114eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4 115eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 116eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 117eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 118eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 119eb052f6bSPhilip Reames; VEC4_INTERL2: middle.block: 120eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 121eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 122eb052f6bSPhilip Reames; VEC4_INTERL2: scalar.ph: 123eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 124eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 125eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 126eb052f6bSPhilip Reames; VEC4_INTERL2: for.body: 127eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 128eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 129eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 130eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 131eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 132eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 133eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 134eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 135eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 136eb052f6bSPhilip Reames; VEC4_INTERL2: for.end.loopexit: 137eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_END]] 138eb052f6bSPhilip Reames; VEC4_INTERL2: for.end: 139eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: ret void 140eb052f6bSPhilip Reames; 141eb052f6bSPhilip Reames; VEC1_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 142eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: entry: 143eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 144eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 145eb052f6bSPhilip Reames; VEC1_INTERL2: for.body.lr.ph: 146eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 147eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 148eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 149eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 150eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 151eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 152eb052f6bSPhilip Reames; VEC1_INTERL2: vector.ph: 153eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 154eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 155eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 156eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 157eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 158eb052f6bSPhilip Reames; VEC1_INTERL2: vector.body: 159eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 160eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 161eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], [[TMP4]] 162eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]] 163eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] 164b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 165eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 166eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 167eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 168eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 169eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 170eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 171eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 172eb052f6bSPhilip Reames; VEC1_INTERL2: middle.block: 173eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 174eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 175eb052f6bSPhilip Reames; VEC1_INTERL2: scalar.ph: 176eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 177eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 178eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 179eb052f6bSPhilip Reames; VEC1_INTERL2: for.body: 180eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 181eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 182eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 183eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 184eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 185eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 186eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 187eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 188eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 189eb052f6bSPhilip Reames; VEC1_INTERL2: for.end.loopexit: 190eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_END]] 191eb052f6bSPhilip Reames; VEC1_INTERL2: for.end: 192eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: ret void 193eb052f6bSPhilip Reames; 194eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF( 195eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: entry: 196eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 197eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 198eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 199eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 200eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 201eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 202eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 203eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 204eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 205eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.ph: 206eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 207eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 208eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]] 209eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]] 210e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 211eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 212e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 213eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 214eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 215eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP4]] 216eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 217e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 218eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 219eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 220eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.body: 221eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 222eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 223eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 224eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>* 225eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4 226eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 227eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 228eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 229eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 230eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: middle.block: 231eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 232eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 233eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body: 234eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 235eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 236eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 237eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 238eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 239eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 240eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 241eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 242eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 243eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.end: 244eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: ret void 245eb052f6bSPhilip Reames; 246cee313d2SEric Christopherentry: 247cee313d2SEric Christopher %cmp4 = icmp sgt i32 %N, 0 248cee313d2SEric Christopher br i1 %cmp4, label %for.body.lr.ph, label %for.end 249cee313d2SEric Christopher 250cee313d2SEric Christopherfor.body.lr.ph: ; preds = %entry 251cee313d2SEric Christopher %fpinc = load float, float* @fp_inc, align 4 252cee313d2SEric Christopher br label %for.body 253cee313d2SEric Christopher 254cee313d2SEric Christopherfor.body: ; preds = %for.body, %for.body.lr.ph 255cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 256cee313d2SEric Christopher %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 257cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 258cee313d2SEric Christopher store float %x.05, float* %arrayidx, align 4 259cee313d2SEric Christopher %add = fsub fast float %x.05, %fpinc 260cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 261cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 262cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %N 263cee313d2SEric Christopher br i1 %exitcond, label %for.end.loopexit, label %for.body 264cee313d2SEric Christopher 265cee313d2SEric Christopherfor.end.loopexit: ; preds = %for.body 266cee313d2SEric Christopher br label %for.end 267cee313d2SEric Christopher 268cee313d2SEric Christopherfor.end: ; preds = %for.end.loopexit, %entry 269cee313d2SEric Christopher ret void 270cee313d2SEric Christopher} 271cee313d2SEric Christopher 2721bee5497SSanjay Patel; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become 2731bee5497SSanjay Patel; 'fast' spontaneously - FMF should propagate from the original IR. 27436a489d1SSanjay Patel 27536a489d1SSanjay Pateldefine void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) { 276eb052f6bSPhilip Reames; 277eb052f6bSPhilip Reames; 278eb052f6bSPhilip Reames; 279eb052f6bSPhilip Reames; 28036a489d1SSanjay Patel; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF( 28136a489d1SSanjay Patel; VEC4_INTERL1-NEXT: entry: 28236a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 28336a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 28436a489d1SSanjay Patel; VEC4_INTERL1: for.body.lr.ph: 28536a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 28636a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 28736a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 28836a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 28936a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 29036a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 29136a489d1SSanjay Patel; VEC4_INTERL1: vector.ph: 29236a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 29336a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 2941bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 2951bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 296e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 29736a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 298e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 29936a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 3001bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 3011bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]] 3021bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 303e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 30436a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 30536a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 30636a489d1SSanjay Patel; VEC4_INTERL1: vector.body: 30736a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 30836a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 30936a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 31036a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 31136a489d1SSanjay Patel; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 31223c2f2e6SFlorian Hahn; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 3131bee5497SSanjay Patel; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 31436a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 315eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 31636a489d1SSanjay Patel; VEC4_INTERL1: middle.block: 31736a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 31836a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 31936a489d1SSanjay Patel; VEC4_INTERL1: scalar.ph: 32036a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 32136a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 32236a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 32336a489d1SSanjay Patel; VEC4_INTERL1: for.body: 32436a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 32536a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 32636a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 32736a489d1SSanjay Patel; VEC4_INTERL1-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 32836a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 32936a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 33036a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 33136a489d1SSanjay Patel; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 332eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 33336a489d1SSanjay Patel; VEC4_INTERL1: for.end.loopexit: 33436a489d1SSanjay Patel; VEC4_INTERL1-NEXT: br label [[FOR_END]] 33536a489d1SSanjay Patel; VEC4_INTERL1: for.end: 33636a489d1SSanjay Patel; VEC4_INTERL1-NEXT: ret void 33736a489d1SSanjay Patel; 33836a489d1SSanjay Patel; VEC4_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 33936a489d1SSanjay Patel; VEC4_INTERL2-NEXT: entry: 34036a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 34136a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 34236a489d1SSanjay Patel; VEC4_INTERL2: for.body.lr.ph: 34336a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 34436a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 34536a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 34636a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 34736a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 34836a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 34936a489d1SSanjay Patel; VEC4_INTERL2: vector.ph: 35036a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 35136a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 3521bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 3531bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 354e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 35536a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 356e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 35736a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 3581bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 3591bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]] 3601bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 361e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 36236a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 36336a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 36436a489d1SSanjay Patel; VEC4_INTERL2: vector.body: 36536a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 36636a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 3671bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 36836a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 36936a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 37036a489d1SSanjay Patel; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4 37136a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4 37236a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 37336a489d1SSanjay Patel; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4 37423c2f2e6SFlorian Hahn; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 3751bee5497SSanjay Patel; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 37636a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 377eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 37836a489d1SSanjay Patel; VEC4_INTERL2: middle.block: 37936a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 38036a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 38136a489d1SSanjay Patel; VEC4_INTERL2: scalar.ph: 38236a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 38336a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 38436a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 38536a489d1SSanjay Patel; VEC4_INTERL2: for.body: 38636a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 38736a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 38836a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 38936a489d1SSanjay Patel; VEC4_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 39036a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 39136a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 39236a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 39336a489d1SSanjay Patel; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 394eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 39536a489d1SSanjay Patel; VEC4_INTERL2: for.end.loopexit: 39636a489d1SSanjay Patel; VEC4_INTERL2-NEXT: br label [[FOR_END]] 39736a489d1SSanjay Patel; VEC4_INTERL2: for.end: 39836a489d1SSanjay Patel; VEC4_INTERL2-NEXT: ret void 39936a489d1SSanjay Patel; 40036a489d1SSanjay Patel; VEC1_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 40136a489d1SSanjay Patel; VEC1_INTERL2-NEXT: entry: 40236a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 40336a489d1SSanjay Patel; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 40436a489d1SSanjay Patel; VEC1_INTERL2: for.body.lr.ph: 40536a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 406c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 407c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 408c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 409c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 410c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 411c8893f3bSSanjay Patel; VEC1_INTERL2: vector.ph: 412c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 413c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 414c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 415c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 416c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 417c8893f3bSSanjay Patel; VEC1_INTERL2: vector.body: 418c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 419c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 420c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], [[TMP4]] 421c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP5]] 422c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul reassoc float [[FPINC]], 0.000000e+00 423c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]] 424c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]] 425b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 426c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 427c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 428c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 429c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP10]], align 4 43023c2f2e6SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 431c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 432eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 433c8893f3bSSanjay Patel; VEC1_INTERL2: middle.block: 434c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 435c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 436c8893f3bSSanjay Patel; VEC1_INTERL2: scalar.ph: 437c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 438c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 43936a489d1SSanjay Patel; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 44036a489d1SSanjay Patel; VEC1_INTERL2: for.body: 441c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 442c8893f3bSSanjay Patel; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 443eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 44436a489d1SSanjay Patel; VEC1_INTERL2-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 44536a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 44636a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 44736a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 44836a489d1SSanjay Patel; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 449eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 45036a489d1SSanjay Patel; VEC1_INTERL2: for.end.loopexit: 45136a489d1SSanjay Patel; VEC1_INTERL2-NEXT: br label [[FOR_END]] 45236a489d1SSanjay Patel; VEC1_INTERL2: for.end: 45336a489d1SSanjay Patel; VEC1_INTERL2-NEXT: ret void 45436a489d1SSanjay Patel; 45536a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF( 45636a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: entry: 45736a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 45836a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 45936a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 46036a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, float* @fp_inc, align 4 46136a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 46236a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 46336a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 46436a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 46536a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 46636a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: vector.ph: 46736a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 46836a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 4691bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]] 4701bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]] 471e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 47236a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 473e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 47436a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 4751bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 4761bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP4]] 4771bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 478e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 47936a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 48036a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 48136a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: vector.body: 48236a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 48336a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 48436a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 48536a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>* 48636a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4 48723c2f2e6SFlorian Hahn; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 4881bee5497SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 48936a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 490eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 49136a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: middle.block: 49236a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 49336a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 49436a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: for.body: 49536a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 49636a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 49736a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 49836a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], float* [[ARRAYIDX]], align 4 49936a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 50036a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 50136a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 50236a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 503eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 50436a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE: for.end: 50536a489d1SSanjay Patel; VEC2_INTERL1_PRED_STORE-NEXT: ret void 50636a489d1SSanjay Patel; 50736a489d1SSanjay Patelentry: 50836a489d1SSanjay Patel %cmp4 = icmp sgt i32 %N, 0 50936a489d1SSanjay Patel br i1 %cmp4, label %for.body.lr.ph, label %for.end 51036a489d1SSanjay Patel 51136a489d1SSanjay Patelfor.body.lr.ph: ; preds = %entry 51236a489d1SSanjay Patel %fpinc = load float, float* @fp_inc, align 4 51336a489d1SSanjay Patel br label %for.body 51436a489d1SSanjay Patel 51536a489d1SSanjay Patelfor.body: ; preds = %for.body, %for.body.lr.ph 51636a489d1SSanjay Patel %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 51736a489d1SSanjay Patel %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 51836a489d1SSanjay Patel %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 51936a489d1SSanjay Patel store float %x.05, float* %arrayidx, align 4 52036a489d1SSanjay Patel %add = fsub reassoc float %x.05, %fpinc 52136a489d1SSanjay Patel %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 52236a489d1SSanjay Patel %lftr.wideiv = trunc i64 %indvars.iv.next to i32 52336a489d1SSanjay Patel %exitcond = icmp eq i32 %lftr.wideiv, %N 52436a489d1SSanjay Patel br i1 %exitcond, label %for.end.loopexit, label %for.body 52536a489d1SSanjay Patel 52636a489d1SSanjay Patelfor.end.loopexit: ; preds = %for.body 52736a489d1SSanjay Patel br label %for.end 52836a489d1SSanjay Patel 52936a489d1SSanjay Patelfor.end: ; preds = %for.end.loopexit, %entry 53036a489d1SSanjay Patel ret void 53136a489d1SSanjay Patel} 53236a489d1SSanjay Patel 533cee313d2SEric Christopher;void fp_iv_loop2(float init, float * __restrict__ A, int N) { 534cee313d2SEric Christopher; float x = init; 535cee313d2SEric Christopher; for (int i=0; i < N; ++i) { 536cee313d2SEric Christopher; A[i] = x; 537cee313d2SEric Christopher; x += 0.5; 538cee313d2SEric Christopher; } 539cee313d2SEric Christopher;} 540cee313d2SEric Christopher 541bbba8676SPhilip Reames 542bbba8676SPhilip Reamesdefine void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 { 543eb052f6bSPhilip Reames; VEC4_INTERL1-LABEL: @fp_iv_loop2( 544eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: entry: 545eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 546eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 547eb052f6bSPhilip Reames; VEC4_INTERL1: for.body.preheader: 548eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 549eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 550eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 551eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 552eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 553eb052f6bSPhilip Reames; VEC4_INTERL1: vector.ph: 554eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 555eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 556eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 557eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 558e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 559eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 560eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 561eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 562eb052f6bSPhilip Reames; VEC4_INTERL1: vector.body: 563eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 564eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 565eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 566eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 567eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 568eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 569eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 570eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 571eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 572eb052f6bSPhilip Reames; VEC4_INTERL1: middle.block: 573eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 574eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 575eb052f6bSPhilip Reames; VEC4_INTERL1: scalar.ph: 576eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 577eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 578eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 579eb052f6bSPhilip Reames; VEC4_INTERL1: for.body: 580eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 581eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 582eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 583eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 584eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 585eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 586eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 587eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 588eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 589eb052f6bSPhilip Reames; VEC4_INTERL1: for.end.loopexit: 590eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_END]] 591eb052f6bSPhilip Reames; VEC4_INTERL1: for.end: 592eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: ret void 593eb052f6bSPhilip Reames; 594eb052f6bSPhilip Reames; VEC4_INTERL2-LABEL: @fp_iv_loop2( 595eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: entry: 596eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 597eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 598eb052f6bSPhilip Reames; VEC4_INTERL2: for.body.preheader: 599eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 600eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 601eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 602eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 603eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 604eb052f6bSPhilip Reames; VEC4_INTERL2: vector.ph: 605eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 606eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 607eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 608eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 609e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 610eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 611eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 612eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 613eb052f6bSPhilip Reames; VEC4_INTERL2: vector.body: 614eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 615eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 616eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 617eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 618eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 619eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 620eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4 621eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 622eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4 623eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 624eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00> 625eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 626eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 627eb052f6bSPhilip Reames; VEC4_INTERL2: middle.block: 628eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 629eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 630eb052f6bSPhilip Reames; VEC4_INTERL2: scalar.ph: 631eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 632eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 633eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 634eb052f6bSPhilip Reames; VEC4_INTERL2: for.body: 635eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 636eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 637eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 638eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 639eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 640eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 641eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 642eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 643eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 644eb052f6bSPhilip Reames; VEC4_INTERL2: for.end.loopexit: 645eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_END]] 646eb052f6bSPhilip Reames; VEC4_INTERL2: for.end: 647eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: ret void 648eb052f6bSPhilip Reames; 649eb052f6bSPhilip Reames; VEC1_INTERL2-LABEL: @fp_iv_loop2( 650eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: entry: 651eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 652eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 653eb052f6bSPhilip Reames; VEC1_INTERL2: for.body.preheader: 654eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 655eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 656eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 657eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 658eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 659eb052f6bSPhilip Reames; VEC1_INTERL2: vector.ph: 660eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 661eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 662eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 663eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 664eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 665eb052f6bSPhilip Reames; VEC1_INTERL2: vector.body: 666eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 667eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 668eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 669eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]] 670eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01 671b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 672eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 673eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 674eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 675eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 676eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 677eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 678eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 679eb052f6bSPhilip Reames; VEC1_INTERL2: middle.block: 680eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 681eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 682eb052f6bSPhilip Reames; VEC1_INTERL2: scalar.ph: 683eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 684eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 685eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 686eb052f6bSPhilip Reames; VEC1_INTERL2: for.body: 687eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 688eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 689eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 690eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 691eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 692eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 693eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 694eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 695eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 696eb052f6bSPhilip Reames; VEC1_INTERL2: for.end.loopexit: 697eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_END]] 698eb052f6bSPhilip Reames; VEC1_INTERL2: for.end: 699eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: ret void 700eb052f6bSPhilip Reames; 701eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2( 702eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: entry: 703eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 704eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 705eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body.preheader: 706eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 707eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 708eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 709eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 710eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 711eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.ph: 712eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 713eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 714eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 715eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] 716e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 717eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 718eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01> 719eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 720eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.body: 721eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 722eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 723eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 724eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>* 725eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4 726eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 727eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00> 728eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 729eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 730eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: middle.block: 731eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 732eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 733eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body: 734eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 735eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 736eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 737eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 738eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 739eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 740eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 741eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 742eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 743eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.end: 744eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: ret void 745eb052f6bSPhilip Reames; 746cee313d2SEric Christopherentry: 747cee313d2SEric Christopher %cmp4 = icmp sgt i32 %N, 0 748cee313d2SEric Christopher br i1 %cmp4, label %for.body.preheader, label %for.end 749cee313d2SEric Christopher 750cee313d2SEric Christopherfor.body.preheader: ; preds = %entry 751cee313d2SEric Christopher br label %for.body 752cee313d2SEric Christopher 753cee313d2SEric Christopherfor.body: ; preds = %for.body.preheader, %for.body 754cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 755cee313d2SEric Christopher %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ] 756cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 757cee313d2SEric Christopher store float %x.06, float* %arrayidx, align 4 758cee313d2SEric Christopher %conv1 = fadd fast float %x.06, 5.000000e-01 759cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 760cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 761cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %N 762cee313d2SEric Christopher br i1 %exitcond, label %for.end.loopexit, label %for.body 763cee313d2SEric Christopher 764cee313d2SEric Christopherfor.end.loopexit: ; preds = %for.body 765cee313d2SEric Christopher br label %for.end 766cee313d2SEric Christopher 767cee313d2SEric Christopherfor.end: ; preds = %for.end.loopexit, %entry 768cee313d2SEric Christopher ret void 769cee313d2SEric Christopher} 770cee313d2SEric Christopher 771cee313d2SEric Christopher;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) { 772cee313d2SEric Christopher; int i = 0; 773cee313d2SEric Christopher; float x = init; 774cee313d2SEric Christopher; float y = 0.1; 775cee313d2SEric Christopher; for (; i < N; ++i) { 776cee313d2SEric Christopher; A[i] = x; 777cee313d2SEric Christopher; x += fp_inc; 778cee313d2SEric Christopher; y -= 0.5; 779cee313d2SEric Christopher; B[i] = x + y; 780cee313d2SEric Christopher; C[i] = y; 781cee313d2SEric Christopher; } 782cee313d2SEric Christopher;} 783cee313d2SEric Christopher 784eb052f6bSPhilip Reames 785eb052f6bSPhilip Reamesdefine void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) { 786cee313d2SEric Christopher; VEC4_INTERL1-LABEL: @fp_iv_loop3( 787eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: entry: 788eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 789eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 790cee313d2SEric Christopher; VEC4_INTERL1: for.body.lr.ph: 791eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 792eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 793eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 794eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 795eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3 796eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 797cee313d2SEric Christopher; VEC4_INTERL1: vector.ph: 798eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588 799eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 800eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 801eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 802eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 803eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 804eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 805e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 806278aa65cSJuneyoung Lee; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 807e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 808278aa65cSJuneyoung Lee; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer 809eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 810eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]] 811eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 812e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0 813eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer 814e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 815eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 816cee313d2SEric Christopher; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 817cee313d2SEric Christopher; VEC4_INTERL1: vector.body: 818eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 819eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 820eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 821eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 822eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 823eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], <4 x float>* [[TMP9]], align 4 824eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 825eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01> 826eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP10]] 827eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 828eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>* 829eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[TMP12]], <4 x float>* [[TMP14]], align 4 830eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 831eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <4 x float>* 832eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[TMP11]], <4 x float>* [[TMP16]], align 4 83323c2f2e6SFlorian Hahn; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 834cee313d2SEric Christopher; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00> 835eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] 836eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 837eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 838eb052f6bSPhilip Reames; VEC4_INTERL1: middle.block: 839eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 840eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 841eb052f6bSPhilip Reames; VEC4_INTERL1: scalar.ph: 842eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 843eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 844eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 845eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 846eb052f6bSPhilip Reames; VEC4_INTERL1: for.body: 847eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 848eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 849eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 850eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 851eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 852eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 853eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 854eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 855eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 856eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 857eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 858eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 859eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 860eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 861eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 862eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 863eb052f6bSPhilip Reames; VEC4_INTERL1: for.end.loopexit: 864eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_END]] 865eb052f6bSPhilip Reames; VEC4_INTERL1: for.end: 866eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: ret void 867eb052f6bSPhilip Reames; 868eb052f6bSPhilip Reames; VEC4_INTERL2-LABEL: @fp_iv_loop3( 869eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: entry: 870eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 871eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 872eb052f6bSPhilip Reames; VEC4_INTERL2: for.body.lr.ph: 873eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 874eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 875eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 876eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 877eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7 878eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 879eb052f6bSPhilip Reames; VEC4_INTERL2: vector.ph: 880eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584 881eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 882eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 883eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 884eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 885eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 886eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 887e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 888eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 889e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 890eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer 891eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 892eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]] 893eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 894e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0 895eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> poison, <4 x i32> zeroinitializer 896e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 897eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 898e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 899eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT14]], <4 x float> poison, <4 x i32> zeroinitializer 900eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 901eb052f6bSPhilip Reames; VEC4_INTERL2: vector.body: 902eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 903eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 904eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ] 905eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]] 906eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 907eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 908eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND10]], <4 x float>* [[TMP9]], align 4 909eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4 910eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>* 911eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], <4 x float>* [[TMP11]], align 4 912eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]] 913eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT15]] 914eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01> 915eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00> 916eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP12]] 917eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP13]] 918eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 919eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>* 920eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[TMP16]], <4 x float>* [[TMP19]], align 4 921eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 4 922eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = bitcast float* [[TMP20]] to <4 x float>* 923eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[TMP17]], <4 x float>* [[TMP21]], align 4 924eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 925eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>* 926eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[TMP14]], <4 x float>* [[TMP23]], align 4 927eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 4 928eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>* 929eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[TMP15]], <4 x float>* [[TMP25]], align 4 930eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 931eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00> 932eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT9]] 933eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 934eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 935eb052f6bSPhilip Reames; VEC4_INTERL2: middle.block: 936eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 937eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 938eb052f6bSPhilip Reames; VEC4_INTERL2: scalar.ph: 939eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 940eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 941eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 942eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 943eb052f6bSPhilip Reames; VEC4_INTERL2: for.body: 944eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 945eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 946eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 947eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 948eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 949eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 950eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 951eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 952eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 953eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 954eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 955eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 956eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 957eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 958eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 959eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 960eb052f6bSPhilip Reames; VEC4_INTERL2: for.end.loopexit: 961eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_END]] 962eb052f6bSPhilip Reames; VEC4_INTERL2: for.end: 963eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: ret void 964eb052f6bSPhilip Reames; 965eb052f6bSPhilip Reames; VEC1_INTERL2-LABEL: @fp_iv_loop3( 966eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: entry: 967eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 968eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 969eb052f6bSPhilip Reames; VEC1_INTERL2: for.body.lr.ph: 970eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 971eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 972eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 973eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 974eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 975eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 976eb052f6bSPhilip Reames; VEC1_INTERL2: vector.ph: 977eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 978eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 979eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 980eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 981eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 982eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 983eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 984eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 985eb052f6bSPhilip Reames; VEC1_INTERL2: vector.body: 986eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 987eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = sitofp i64 [[INDEX]] to float 988b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], [[TMP6]] 989b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP7]], [[INIT]] 990b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] 991b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = sitofp i64 [[INDEX]] to float 992b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], -5.000000e-01 993b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION6:%.*]] = or i64 [[INDEX]], 1 994eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 995b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION6]] 996b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP11]], align 4 997b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: store float [[TMP8]], float* [[TMP12]], align 4 998b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP0]] 999b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[TMP8]], [[TMP0]] 1000b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP10]], 0xBFD99999A0000000 1001b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP16:%.*]] = fadd fast float [[TMP10]], 0xBFECCCCCC0000000 1002eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]] 1003eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]] 1004eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1005b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION6]] 1006eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP17]], float* [[TMP19]], align 4 1007eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP18]], float* [[TMP20]], align 4 1008eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 1009b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION6]] 1010eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP15]], float* [[TMP21]], align 4 1011eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP16]], float* [[TMP22]], align 4 1012eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1013eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1014eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 1015eb052f6bSPhilip Reames; VEC1_INTERL2: middle.block: 1016eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 1017eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1018eb052f6bSPhilip Reames; VEC1_INTERL2: scalar.ph: 1019eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 1020eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 1021eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 1022eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1023eb052f6bSPhilip Reames; VEC1_INTERL2: for.body: 1024eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1025eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 1026eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 1027eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1028eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 1029eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 1030eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 1031eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 1032eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 1033eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 1034eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 1035eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 1036eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1037eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1038eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1039eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 1040eb052f6bSPhilip Reames; VEC1_INTERL2: for.end.loopexit: 1041eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_END]] 1042eb052f6bSPhilip Reames; VEC1_INTERL2: for.end: 1043eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: ret void 1044eb052f6bSPhilip Reames; 1045eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3( 1046eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1047eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 1048eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 1049eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 1050eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = load float, float* @fp_inc, align 4 1051eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = add i32 [[N]], -1 1052eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 1053eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 1054eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0 1055eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1056eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.ph: 1057eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590 1058eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1059eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01 1060eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000 1061eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float 1062eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]] 1063eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]] 1064e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 1065eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1066e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1067eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer 1068eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00> 1069eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP6]] 1070eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 1071e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0 1072eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer 1073e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1074eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1075eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1076eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.body: 1077eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1078eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1079eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 1080eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1081eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>* 1082eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], <2 x float>* [[TMP9]], align 4 1083eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 1084eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01> 1085eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = fadd fast <2 x float> [[TMP11]], [[TMP10]] 1086eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1087eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP14:%.*]] = bitcast float* [[TMP13]] to <2 x float>* 1088eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP12]], <2 x float>* [[TMP14]], align 4 1089eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]] 1090eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP16:%.*]] = bitcast float* [[TMP15]] to <2 x float>* 1091eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP11]], <2 x float>* [[TMP16]], align 4 1092eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1093eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00> 1094eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] 1095eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1096eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 1097eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: middle.block: 1098eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 1099eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1100eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body: 1101eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 1102eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 1103eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 1104eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1105eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], float* [[ARRAYIDX]], align 4 1106eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 1107eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 1108eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 1109eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 1110eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[ADD2]], float* [[ARRAYIDX4]], align 4 1111eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]] 1112eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[CONV1]], float* [[ARRAYIDX6]], align 4 1113eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1114eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1115eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1116eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1117eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.end: 1118eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1119eb052f6bSPhilip Reames; 1120cee313d2SEric Christopherentry: 1121cee313d2SEric Christopher %cmp9 = icmp sgt i32 %N, 0 1122cee313d2SEric Christopher br i1 %cmp9, label %for.body.lr.ph, label %for.end 1123cee313d2SEric Christopher 1124cee313d2SEric Christopherfor.body.lr.ph: ; preds = %entry 1125cee313d2SEric Christopher %0 = load float, float* @fp_inc, align 4 1126cee313d2SEric Christopher br label %for.body 1127cee313d2SEric Christopher 1128cee313d2SEric Christopherfor.body: ; preds = %for.body, %for.body.lr.ph 1129cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 1130cee313d2SEric Christopher %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ] 1131cee313d2SEric Christopher %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 1132cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1133cee313d2SEric Christopher store float %x.011, float* %arrayidx, align 4 1134cee313d2SEric Christopher %add = fadd fast float %x.011, %0 1135cee313d2SEric Christopher %conv1 = fadd fast float %y.012, -5.000000e-01 1136cee313d2SEric Christopher %add2 = fadd fast float %conv1, %add 1137cee313d2SEric Christopher %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1138cee313d2SEric Christopher store float %add2, float* %arrayidx4, align 4 1139cee313d2SEric Christopher %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv 1140cee313d2SEric Christopher store float %conv1, float* %arrayidx6, align 4 1141cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1142cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1143cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %N 1144cee313d2SEric Christopher br i1 %exitcond, label %for.end.loopexit, label %for.body 1145cee313d2SEric Christopher 1146cee313d2SEric Christopherfor.end.loopexit: 1147cee313d2SEric Christopher br label %for.end 1148cee313d2SEric Christopher 1149cee313d2SEric Christopherfor.end: 1150cee313d2SEric Christopher ret void 1151cee313d2SEric Christopher} 1152cee313d2SEric Christopher 1153cee313d2SEric Christopher; Start and step values are constants. There is no 'fmul' operation in this case 1154cee313d2SEric Christopher;void fp_iv_loop4(float * __restrict__ A, int N) { 1155cee313d2SEric Christopher; float x = 1.0; 1156cee313d2SEric Christopher; for (int i=0; i < N; ++i) { 1157cee313d2SEric Christopher; A[i] = x; 1158cee313d2SEric Christopher; x += 0.5; 1159cee313d2SEric Christopher; } 1160cee313d2SEric Christopher;} 1161cee313d2SEric Christopher 1162bbba8676SPhilip Reames 1163bbba8676SPhilip Reamesdefine void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) { 1164eb052f6bSPhilip Reames; VEC4_INTERL1-LABEL: @fp_iv_loop4( 1165eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: entry: 1166eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1167eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1168eb052f6bSPhilip Reames; VEC4_INTERL1: for.body.preheader: 1169eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1170eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1171eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1172eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 1173eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1174eb052f6bSPhilip Reames; VEC4_INTERL1: vector.ph: 1175eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 1176eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1177eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1178eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1179eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1180eb052f6bSPhilip Reames; VEC4_INTERL1: vector.body: 1181eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1182eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1183eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1184eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1185eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 1186eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1187eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1188eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1189eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1190eb052f6bSPhilip Reames; VEC4_INTERL1: middle.block: 1191eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1192eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1193eb052f6bSPhilip Reames; VEC4_INTERL1: scalar.ph: 1194eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1195eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1196eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1197eb052f6bSPhilip Reames; VEC4_INTERL1: for.body: 1198eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1199eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1200eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1201eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1202eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1203eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1204eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1205eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1206eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1207eb052f6bSPhilip Reames; VEC4_INTERL1: for.end.loopexit: 1208eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_END]] 1209eb052f6bSPhilip Reames; VEC4_INTERL1: for.end: 1210eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: ret void 1211eb052f6bSPhilip Reames; 1212eb052f6bSPhilip Reames; VEC4_INTERL2-LABEL: @fp_iv_loop4( 1213eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: entry: 1214eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1215eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1216eb052f6bSPhilip Reames; VEC4_INTERL2: for.body.preheader: 1217eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1218eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1219eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1220eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 1221eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1222eb052f6bSPhilip Reames; VEC4_INTERL2: vector.ph: 1223eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 1224eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1225eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1226eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1227eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1228eb052f6bSPhilip Reames; VEC4_INTERL2: vector.body: 1229eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1230eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1231eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1232eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1233eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1234eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4 1235eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4 1236eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* 1237eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4 1238eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1239eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00> 1240eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1241eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1242eb052f6bSPhilip Reames; VEC4_INTERL2: middle.block: 1243eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1244eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1245eb052f6bSPhilip Reames; VEC4_INTERL2: scalar.ph: 1246eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1247eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1248eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1249eb052f6bSPhilip Reames; VEC4_INTERL2: for.body: 1250eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1251eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1252eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1253eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1254eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1255eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1256eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1257eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1258eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1259eb052f6bSPhilip Reames; VEC4_INTERL2: for.end.loopexit: 1260eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_END]] 1261eb052f6bSPhilip Reames; VEC4_INTERL2: for.end: 1262eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: ret void 1263eb052f6bSPhilip Reames; 1264eb052f6bSPhilip Reames; VEC1_INTERL2-LABEL: @fp_iv_loop4( 1265eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: entry: 1266eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1267eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1268eb052f6bSPhilip Reames; VEC1_INTERL2: for.body.preheader: 1269eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1270eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1271eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1272eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 1273eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1274eb052f6bSPhilip Reames; VEC1_INTERL2: vector.ph: 1275eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 1276eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1277eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1278eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1279eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1280eb052f6bSPhilip Reames; VEC1_INTERL2: vector.body: 1281eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1282eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float 1283eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01 1284eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00 1285eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00 1286b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 1287eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1288eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 1289eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], float* [[TMP7]], align 4 1290eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP6]], float* [[TMP8]], align 4 1291eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1292eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1293eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1294eb052f6bSPhilip Reames; VEC1_INTERL2: middle.block: 1295eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1296eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1297eb052f6bSPhilip Reames; VEC1_INTERL2: scalar.ph: 1298eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1299eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1300eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1301eb052f6bSPhilip Reames; VEC1_INTERL2: for.body: 1302eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1303eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1304eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1305eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1306eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1307eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1308eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1309eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1310eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1311eb052f6bSPhilip Reames; VEC1_INTERL2: for.end.loopexit: 1312eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_END]] 1313eb052f6bSPhilip Reames; VEC1_INTERL2: for.end: 1314eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: ret void 1315eb052f6bSPhilip Reames; 1316eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4( 1317eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1318eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1319eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1320eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body.preheader: 1321eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 1322eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 1323eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1324eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 1325eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1326eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.ph: 1327eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 1328eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1329eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01 1330eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1331eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1332eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.body: 1333eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1334eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1335eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1336eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>* 1337eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4 1338eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1339eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00> 1340eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1341eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1342eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: middle.block: 1343eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1344eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1345eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body: 1346eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1347eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1348eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 1349eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], float* [[ARRAYIDX]], align 4 1350eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1351eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1352eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1353eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1354eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1355eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.end: 1356eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1357eb052f6bSPhilip Reames; 1358cee313d2SEric Christopherentry: 1359cee313d2SEric Christopher %cmp4 = icmp sgt i32 %N, 0 1360cee313d2SEric Christopher br i1 %cmp4, label %for.body.preheader, label %for.end 1361cee313d2SEric Christopher 1362cee313d2SEric Christopherfor.body.preheader: ; preds = %entry 1363cee313d2SEric Christopher br label %for.body 1364cee313d2SEric Christopher 1365cee313d2SEric Christopherfor.body: ; preds = %for.body.preheader, %for.body 1366cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 1367cee313d2SEric Christopher %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] 1368cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1369cee313d2SEric Christopher store float %x.06, float* %arrayidx, align 4 1370cee313d2SEric Christopher %conv1 = fadd fast float %x.06, 5.000000e-01 1371cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1372cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1373cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %N 1374cee313d2SEric Christopher br i1 %exitcond, label %for.end.loopexit, label %for.body 1375cee313d2SEric Christopher 1376cee313d2SEric Christopherfor.end.loopexit: ; preds = %for.body 1377cee313d2SEric Christopher br label %for.end 1378cee313d2SEric Christopher 1379cee313d2SEric Christopherfor.end: ; preds = %for.end.loopexit, %entry 1380cee313d2SEric Christopher ret void 1381cee313d2SEric Christopher} 1382cee313d2SEric Christopher 1383cee313d2SEric Christopher 1384cee313d2SEric Christopherdefine void @non_primary_iv_float_scalar(float* %A, i64 %N) { 1385eb052f6bSPhilip Reames; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar( 1386eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: entry: 1387eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1388eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4 1389eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1390eb052f6bSPhilip Reames; VEC4_INTERL1: vector.ph: 1391eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 1392eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1393eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1394eb052f6bSPhilip Reames; VEC4_INTERL1: vector.body: 1395eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] 1396eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1397eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1398eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>* 1399eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1400eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1401e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 1402eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1403eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.if: 1404eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1405eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[TMP0]], float* [[TMP5]], align 4 1406eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]] 1407eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.continue: 1408e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 1409eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] 1410eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.if2: 1411eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1412eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 1413eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] 1414eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 1415eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]] 1416eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.continue3: 1417e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 1418eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 1419eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.if4: 1420eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 1421eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 1422eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] 1423eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 1424eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]] 1425eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.continue5: 1426e6ad9ef4SPhilip Reames; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 1427eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] 1428eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.if6: 1429eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 1430eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 1431eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] 1432eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 1433eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]] 1434eb052f6bSPhilip Reames; VEC4_INTERL1: pred.store.continue7: 1435eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1436eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1437eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1438eb052f6bSPhilip Reames; VEC4_INTERL1: middle.block: 1439eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1440eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1441eb052f6bSPhilip Reames; VEC4_INTERL1: scalar.ph: 1442eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1443eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1444eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1445eb052f6bSPhilip Reames; VEC4_INTERL1: for.body: 1446eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1447eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1448eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1449eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1450eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1451eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1452eb052f6bSPhilip Reames; VEC4_INTERL1: if.pred: 1453eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: store float [[J]], float* [[VAR0]], align 4 1454eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br label [[FOR_INC]] 1455eb052f6bSPhilip Reames; VEC4_INTERL1: for.inc: 1456eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1457eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1458eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1459eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1460eb052f6bSPhilip Reames; VEC4_INTERL1: for.end: 1461eb052f6bSPhilip Reames; VEC4_INTERL1-NEXT: ret void 1462eb052f6bSPhilip Reames; 1463eb052f6bSPhilip Reames; VEC4_INTERL2-LABEL: @non_primary_iv_float_scalar( 1464eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: entry: 1465eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1466eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8 1467eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1468eb052f6bSPhilip Reames; VEC4_INTERL2: vector.ph: 1469eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 1470eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1471eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1472eb052f6bSPhilip Reames; VEC4_INTERL2: vector.body: 1473*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] 1474b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1475b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 1476eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1477eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* 1478eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 1479eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4 1480eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* 1481*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4 1482eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1483*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer 1484e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0 1485eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1486eb052f6bSPhilip Reames; VEC4_INTERL2: pred.store.if: 1487eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1488b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: store float [[TMP0]], float* [[TMP9]], align 4 1489eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1490eb052f6bSPhilip Reames; VEC4_INTERL2: pred.store.continue: 1491e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 1492*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 149395f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if3: 1494b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1495eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 1496eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]] 1497eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP11]], float* [[TMP13]], align 4 1498*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE5]] 149995f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue4: 1500e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 1501*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] 150295f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if5: 1503b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 1504eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 1505eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]] 1506eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP15]], float* [[TMP17]], align 4 1507*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE7]] 150895f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue6: 1509e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 1510*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] 151195f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if7: 1512b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 1513eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 1514eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]] 1515eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP19]], float* [[TMP21]], align 4 1516*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE9]] 151795f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue8: 1518e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0 1519*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] 152095f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if9: 1521b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = fadd fast float [[TMP0]], 4.000000e+00 1522b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] 1523eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP23]], float* [[TMP24]], align 4 1524*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE11]] 152595f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue10: 1526e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 1527*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] 152895f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if11: 1529b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 1530eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 1531eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]] 1532eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP26]], float* [[TMP28]], align 4 1533*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE13]] 153495f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue12: 1535e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 1536*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] 153795f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if13: 1538b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 1539eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 1540eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] 1541eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP30]], float* [[TMP32]], align 4 1542*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE15]] 154395f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue14: 1544e6ad9ef4SPhilip Reames; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 1545*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]] 154695f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.if15: 1547b3e8ace1SFlorian Hahn; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 1548eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 1549eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]] 1550eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[TMP34]], float* [[TMP36]], align 4 1551*872f7000SDávid Bolvanský; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE17]] 155295f76bffSFlorian Hahn; VEC4_INTERL2: pred.store.continue16: 1553eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1554eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1555eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1556eb052f6bSPhilip Reames; VEC4_INTERL2: middle.block: 1557eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1558eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1559eb052f6bSPhilip Reames; VEC4_INTERL2: scalar.ph: 1560eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1561eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1562eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1563eb052f6bSPhilip Reames; VEC4_INTERL2: for.body: 1564eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1565eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1566eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1567eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1568eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1569eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1570eb052f6bSPhilip Reames; VEC4_INTERL2: if.pred: 1571eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: store float [[J]], float* [[VAR0]], align 4 1572eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br label [[FOR_INC]] 1573eb052f6bSPhilip Reames; VEC4_INTERL2: for.inc: 1574eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1575eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1576eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1577eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1578eb052f6bSPhilip Reames; VEC4_INTERL2: for.end: 1579eb052f6bSPhilip Reames; VEC4_INTERL2-NEXT: ret void 1580eb052f6bSPhilip Reames; 1581eb052f6bSPhilip Reames; VEC1_INTERL2-LABEL: @non_primary_iv_float_scalar( 1582eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: entry: 1583eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1584eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1585eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1586eb052f6bSPhilip Reames; VEC1_INTERL2: vector.ph: 1587eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1588eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1589eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1590eb052f6bSPhilip Reames; VEC1_INTERL2: vector.body: 1591b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] 1592ff93260bSFlorian Hahn; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1593b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1 1594eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1595eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]] 1596eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP1]], align 4 1597eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = load float, float* [[TMP2]], align 4 1598eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00 1599eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00 1600eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1601eb052f6bSPhilip Reames; VEC1_INTERL2: pred.store.if: 1602eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP0]], float* [[TMP1]], align 4 1603eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1604eb052f6bSPhilip Reames; VEC1_INTERL2: pred.store.continue: 1605b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] 1606b3e8ace1SFlorian Hahn; VEC1_INTERL2: pred.store.if3: 1607eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1608eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[TMP7]], float* [[TMP2]], align 4 1609b3e8ace1SFlorian Hahn; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] 1610b3e8ace1SFlorian Hahn; VEC1_INTERL2: pred.store.continue4: 1611eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1612eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1613eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1614eb052f6bSPhilip Reames; VEC1_INTERL2: middle.block: 1615eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1616eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1617eb052f6bSPhilip Reames; VEC1_INTERL2: scalar.ph: 1618eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1619eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1620eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1621eb052f6bSPhilip Reames; VEC1_INTERL2: for.body: 1622eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1623eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1624eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1625eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1626eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1627eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1628eb052f6bSPhilip Reames; VEC1_INTERL2: if.pred: 1629eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: store float [[J]], float* [[VAR0]], align 4 1630eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br label [[FOR_INC]] 1631eb052f6bSPhilip Reames; VEC1_INTERL2: for.inc: 1632eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1633eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1634eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1635eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]] 1636eb052f6bSPhilip Reames; VEC1_INTERL2: for.end: 1637eb052f6bSPhilip Reames; VEC1_INTERL2-NEXT: ret void 1638eb052f6bSPhilip Reames; 1639eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar( 1640eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1641eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1642eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2 1643eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1644eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.ph: 1645eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1646eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float 1647eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1648eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: vector.body: 1649eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 1650eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float 1651eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1652eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>* 1653eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4 1654eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1655e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0 1656eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1657eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: pred.store.if: 1658eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] 1659eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP0]], float* [[TMP5]], align 4 1660eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]] 1661eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: pred.store.continue: 1662e6ad9ef4SPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 1663eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 1664eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: pred.store.if2: 1665eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 1666eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 1667eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]] 1668eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], float* [[TMP9]], align 4 1669eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]] 1670eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: pred.store.continue3: 1671eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1672eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1673eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1674eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: middle.block: 1675eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1676eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1677eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.body: 1678eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1679eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1680eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]] 1681eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR1:%.*]] = load float, float* [[VAR0]], align 4 1682eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1683eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1684eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: if.pred: 1685eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: store float [[J]], float* [[VAR0]], align 4 1686eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_INC]] 1687eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.inc: 1688eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1689eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1690eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1691eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]] 1692eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE: for.end: 1693eb052f6bSPhilip Reames; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1694eb052f6bSPhilip Reames; 1695cee313d2SEric Christopherentry: 1696cee313d2SEric Christopher br label %for.body 1697cee313d2SEric Christopher 1698cee313d2SEric Christopherfor.body: 1699cee313d2SEric Christopher %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ] 1700cee313d2SEric Christopher %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ] 1701eb052f6bSPhilip Reames %var0 = getelementptr inbounds float, float* %A, i64 %i 1702eb052f6bSPhilip Reames %var1 = load float, float* %var0, align 4 1703eb052f6bSPhilip Reames %var2 = fcmp fast oeq float %var1, 0.0 1704eb052f6bSPhilip Reames br i1 %var2, label %if.pred, label %for.inc 1705cee313d2SEric Christopher 1706cee313d2SEric Christopherif.pred: 1707eb052f6bSPhilip Reames store float %j, float* %var0, align 4 1708cee313d2SEric Christopher br label %for.inc 1709cee313d2SEric Christopher 1710cee313d2SEric Christopherfor.inc: 1711cee313d2SEric Christopher %i.next = add nuw nsw i64 %i, 1 1712cee313d2SEric Christopher %j.next = fadd fast float %j, 1.0 1713cee313d2SEric Christopher %cond = icmp slt i64 %i.next, %N 1714cee313d2SEric Christopher br i1 %cond, label %for.body, label %for.end 1715cee313d2SEric Christopher 1716cee313d2SEric Christopherfor.end: 1717cee313d2SEric Christopher ret void 1718cee313d2SEric Christopher} 1719