1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=UNROLL 3; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY 4; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=VEC 5 6target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 7 8; Test predication of stores. 9define i32 @test(i32* nocapture %f) #0 { 10; UNROLL-LABEL: @test( 11; UNROLL-NEXT: entry: 12; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 13; UNROLL: vector.body: 14; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 15; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 16; UNROLL-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 17; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[INDUCTION]] 18; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDUCTION1]] 19; UNROLL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 20; UNROLL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 21; UNROLL-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 100 22; UNROLL-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 100 23; UNROLL-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 24; UNROLL: pred.store.if: 25; UNROLL-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP2]], 20 26; UNROLL-NEXT: store i32 [[TMP6]], i32* [[TMP0]], align 4 27; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE]] 28; UNROLL: pred.store.continue: 29; UNROLL-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 30; UNROLL: pred.store.if2: 31; UNROLL-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP3]], 20 32; UNROLL-NEXT: store i32 [[TMP7]], i32* [[TMP1]], align 4 33; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]] 34; UNROLL: pred.store.continue3: 35; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 36; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 37; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 38; UNROLL: middle.block: 39; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 40; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] 41; UNROLL: for.body: 42; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] 43; UNROLL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] 44; UNROLL-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 45; UNROLL-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100 46; UNROLL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 47; UNROLL: if.then: 48; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 20 49; UNROLL-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 50; UNROLL-NEXT: br label [[FOR_INC]] 51; UNROLL: for.inc: 52; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 53; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 54; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] 55; UNROLL: for.end: 56; UNROLL-NEXT: ret i32 0 57; 58; UNROLL-NOSIMPLIFY-LABEL: @test( 59; UNROLL-NOSIMPLIFY-NEXT: entry: 60; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 61; UNROLL-NOSIMPLIFY: vector.ph: 62; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] 63; UNROLL-NOSIMPLIFY: vector.body: 64; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 65; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 66; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 67; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[INDUCTION]] 68; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDUCTION1]] 69; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 70; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 71; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 100 72; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 100 73; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 74; UNROLL-NOSIMPLIFY: pred.store.if: 75; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP2]], 20 76; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP6]], i32* [[TMP0]], align 4 77; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] 78; UNROLL-NOSIMPLIFY: pred.store.continue: 79; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 80; UNROLL-NOSIMPLIFY: pred.store.if2: 81; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP3]], 20 82; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP7]], i32* [[TMP1]], align 4 83; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]] 84; UNROLL-NOSIMPLIFY: pred.store.continue3: 85; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 86; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 87; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 88; UNROLL-NOSIMPLIFY: middle.block: 89; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 90; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 91; UNROLL-NOSIMPLIFY: scalar.ph: 92; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 93; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] 94; UNROLL-NOSIMPLIFY: for.body: 95; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 96; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] 97; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 98; UNROLL-NOSIMPLIFY-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100 99; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 100; UNROLL-NOSIMPLIFY: if.then: 101; UNROLL-NOSIMPLIFY-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 20 102; UNROLL-NOSIMPLIFY-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 103; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]] 104; UNROLL-NOSIMPLIFY: for.inc: 105; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 106; UNROLL-NOSIMPLIFY-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 107; UNROLL-NOSIMPLIFY-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] 108; UNROLL-NOSIMPLIFY: for.end: 109; UNROLL-NOSIMPLIFY-NEXT: ret i32 0 110; 111; VEC-LABEL: @test( 112; VEC-NEXT: entry: 113; VEC-NEXT: br label [[VECTOR_BODY:%.*]] 114; VEC: vector.body: 115; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 116; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 117; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[F:%.*]], i64 [[TMP0]] 118; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 119; VEC-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 120; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 121; VEC-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100> 122; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 123; VEC-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 124; VEC: pred.store.if: 125; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 126; VEC-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP6]], 20 127; VEC-NEXT: store i32 [[TMP7]], i32* [[TMP1]], align 4 128; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] 129; VEC: pred.store.continue: 130; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 131; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 132; VEC: pred.store.if1: 133; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 134; VEC-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], 20 135; VEC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 136; VEC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[TMP11]] 137; VEC-NEXT: store i32 [[TMP10]], i32* [[TMP12]], align 4 138; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] 139; VEC: pred.store.continue2: 140; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 141; VEC-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 142; VEC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 143; VEC: middle.block: 144; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 128, 128 145; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] 146; VEC: for.body: 147; VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ] 148; VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]] 149; VEC-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 150; VEC-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100 151; VEC-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] 152; VEC: if.then: 153; VEC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], 20 154; VEC-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 155; VEC-NEXT: br label [[FOR_INC]] 156; VEC: for.inc: 157; VEC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 158; VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128 159; VEC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]] 160; VEC: for.end: 161; VEC-NEXT: ret i32 0 162; 163entry: 164 br label %for.body 165 166 167 168for.body: 169 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 170 %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv 171 %0 = load i32, i32* %arrayidx, align 4 172 %cmp1 = icmp sgt i32 %0, 100 173 br i1 %cmp1, label %if.then, label %for.inc 174 175if.then: 176 %add = add nsw i32 %0, 20 177 store i32 %add, i32* %arrayidx, align 4 178 br label %for.inc 179 180for.inc: 181 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 182 %exitcond = icmp eq i64 %indvars.iv.next, 128 183 br i1 %exitcond, label %for.end, label %for.body 184 185for.end: 186 ret i32 0 187} 188 189; Track basic blocks when unrolling conditional blocks. This code used to assert 190; because we did not update the phi nodes with the proper predecessor in the 191; vectorized loop body. 192; PR18724 193 194define void @bug18724(i1 %cond) { 195; UNROLL-LABEL: @bug18724( 196; UNROLL-NEXT: entry: 197; UNROLL-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true 198; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP0]]) 199; UNROLL-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) 200; UNROLL-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef 201; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 202; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 203; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 204; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 205; UNROLL: vector.ph: 206; UNROLL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 207; UNROLL-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] 208; UNROLL-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] 209; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 210; UNROLL: vector.body: 211; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] 212; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ] 213; UNROLL-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ] 214; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] 215; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 216; UNROLL-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 217; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] 218; UNROLL-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] 219; UNROLL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 220; UNROLL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4 221; UNROLL-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE4]] 222; UNROLL: pred.store.if: 223; UNROLL-NEXT: store i32 2, i32* [[TMP4]], align 4 224; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE4]] 225; UNROLL: pred.store.continue4: 226; UNROLL-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], 1 227; UNROLL-NEXT: [[TMP9:%.*]] = add i32 [[VEC_PHI2]], 1 228; UNROLL-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP8]] 229; UNROLL-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP9]] 230; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 231; UNROLL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 232; UNROLL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] 233; UNROLL: middle.block: 234; UNROLL-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] 235; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 236; UNROLL-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true 237; UNROLL-NEXT: call void @llvm.assume(i1 [[TMP11]]) 238; UNROLL-NEXT: br label [[SCALAR_PH]] 239; UNROLL: scalar.ph: 240; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] 241; UNROLL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 242; UNROLL-NEXT: br label [[FOR_BODY14:%.*]] 243; UNROLL: for.body14: 244; UNROLL-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 245; UNROLL-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 246; UNROLL-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] 247; UNROLL-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 248; UNROLL-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] 249; UNROLL: if.then18: 250; UNROLL-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 251; UNROLL-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 252; UNROLL-NEXT: br label [[FOR_INC23]] 253; UNROLL: for.inc23: 254; UNROLL-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] 255; UNROLL-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 256; UNROLL-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 257; UNROLL-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 258; UNROLL-NEXT: call void @llvm.assume(i1 [[CMP13]]) 259; UNROLL-NEXT: br label [[FOR_BODY14]] 260; 261; UNROLL-NOSIMPLIFY-LABEL: @bug18724( 262; UNROLL-NOSIMPLIFY-NEXT: entry: 263; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY9:%.*]] 264; UNROLL-NOSIMPLIFY: for.body9: 265; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND:%.*]], label [[FOR_INC26:%.*]], label [[FOR_BODY14_PREHEADER:%.*]] 266; UNROLL-NOSIMPLIFY: for.body14.preheader: 267; UNROLL-NOSIMPLIFY-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) 268; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = sub i32 [[SMAX]], undef 269; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 270; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 271; UNROLL-NOSIMPLIFY-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 272; UNROLL-NOSIMPLIFY-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 273; UNROLL-NOSIMPLIFY: vector.ph: 274; UNROLL-NOSIMPLIFY-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 275; UNROLL-NOSIMPLIFY-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] 276; UNROLL-NOSIMPLIFY-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] 277; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] 278; UNROLL-NOSIMPLIFY: vector.body: 279; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] 280; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE4]] ] 281; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI5:%.*]], [[PRED_STORE_CONTINUE4]] ] 282; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] 283; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 284; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 285; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION]] 286; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDUCTION1]] 287; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4 288; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 289; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 290; UNROLL-NOSIMPLIFY: pred.store.if: 291; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP3]], align 4 292; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] 293; UNROLL-NOSIMPLIFY: pred.store.continue: 294; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] 295; UNROLL-NOSIMPLIFY: pred.store.if3: 296; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[TMP4]], align 4 297; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE4]] 298; UNROLL-NOSIMPLIFY: pred.store.continue4: 299; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i32 [[VEC_PHI]], 1 300; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI2]], 1 301; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP7]] 302; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP8]] 303; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 304; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 305; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]] 306; UNROLL-NOSIMPLIFY: middle.block: 307; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]] 308; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 309; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_INC26_LOOPEXIT:%.*]], label [[SCALAR_PH]] 310; UNROLL-NOSIMPLIFY: scalar.ph: 311; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY14_PREHEADER]] ] 312; UNROLL-NOSIMPLIFY-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[FOR_BODY14_PREHEADER]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 313; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY14:%.*]] 314; UNROLL-NOSIMPLIFY: for.body14: 315; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 316; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 317; UNROLL-NOSIMPLIFY-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] 318; UNROLL-NOSIMPLIFY-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 319; UNROLL-NOSIMPLIFY-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] 320; UNROLL-NOSIMPLIFY: if.then18: 321; UNROLL-NOSIMPLIFY-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 322; UNROLL-NOSIMPLIFY-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 323; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC23]] 324; UNROLL-NOSIMPLIFY: for.inc23: 325; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] 326; UNROLL-NOSIMPLIFY-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 327; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 328; UNROLL-NOSIMPLIFY-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 329; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]] 330; UNROLL-NOSIMPLIFY: for.inc26.loopexit: 331; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] 332; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC26]] 333; UNROLL-NOSIMPLIFY: for.inc26: 334; UNROLL-NOSIMPLIFY-NEXT: [[INEWCHUNKS_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY9]] ], [ [[INEWCHUNKS_2_LCSSA]], [[FOR_INC26_LOOPEXIT]] ] 335; UNROLL-NOSIMPLIFY-NEXT: unreachable 336; 337; VEC-LABEL: @bug18724( 338; VEC-NEXT: entry: 339; VEC-NEXT: [[TMP0:%.*]] = xor i1 [[COND:%.*]], true 340; VEC-NEXT: call void @llvm.assume(i1 [[TMP0]]) 341; VEC-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 undef, i32 0) 342; VEC-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], undef 343; VEC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 344; VEC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 345; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 346; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 347; VEC: vector.ph: 348; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 349; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] 350; VEC-NEXT: [[IND_END:%.*]] = add i64 undef, [[N_VEC]] 351; VEC-NEXT: br label [[VECTOR_BODY:%.*]] 352; VEC: vector.body: 353; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 354; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 undef, i32 0>, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] 355; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 undef, [[INDEX]] 356; VEC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 357; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[TMP4]] 358; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 359; VEC-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>* 360; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4 361; VEC-NEXT: br i1 undef, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] 362; VEC: pred.store.if: 363; VEC-NEXT: store i32 2, i32* [[TMP5]], align 4 364; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] 365; VEC: pred.store.continue2: 366; VEC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_PHI]], <i32 1, i32 1> 367; VEC-NEXT: [[PREDPHI]] = select <2 x i1> undef, <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP8]] 368; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 369; VEC-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 370; VEC-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 371; VEC: middle.block: 372; VEC-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[PREDPHI]]) 373; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 374; VEC-NEXT: [[TMP11:%.*]] = xor i1 [[CMP_N]], true 375; VEC-NEXT: call void @llvm.assume(i1 [[TMP11]]) 376; VEC-NEXT: br label [[SCALAR_PH]] 377; VEC: scalar.ph: 378; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY:%.*]] ] 379; VEC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ undef, [[ENTRY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 380; VEC-NEXT: br label [[FOR_BODY14:%.*]] 381; VEC: for.body14: 382; VEC-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 383; VEC-NEXT: [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 384; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]] 385; VEC-NEXT: [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4 386; VEC-NEXT: br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]] 387; VEC: if.then18: 388; VEC-NEXT: store i32 2, i32* [[ARRAYIDX16]], align 4 389; VEC-NEXT: [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1 390; VEC-NEXT: br label [[FOR_INC23]] 391; VEC: for.inc23: 392; VEC-NEXT: [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ] 393; VEC-NEXT: [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1 394; VEC-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32 395; VEC-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0 396; VEC-NEXT: call void @llvm.assume(i1 [[CMP13]]) 397; VEC-NEXT: br label [[FOR_BODY14]] 398; 399entry: 400 br label %for.body9 401 402for.body9: 403 br i1 %cond, label %for.inc26, label %for.body14 404 405for.body14: 406 %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ] 407 %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ undef, %for.body9 ] 408 %arrayidx16 = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 %indvars.iv3 409 %tmp = load i32, i32* %arrayidx16, align 4 410 br i1 undef, label %if.then18, label %for.inc23 411 412if.then18: 413 store i32 2, i32* %arrayidx16, align 4 414 %inc21 = add nsw i32 %iNewChunks.120, 1 415 br label %for.inc23 416 417for.inc23: 418 %iNewChunks.2 = phi i32 [ %inc21, %if.then18 ], [ %iNewChunks.120, %for.body14 ] 419 %indvars.iv.next4 = add nsw i64 %indvars.iv3, 1 420 %tmp1 = trunc i64 %indvars.iv3 to i32 421 %cmp13 = icmp slt i32 %tmp1, 0 422 br i1 %cmp13, label %for.body14, label %for.inc26 423 424for.inc26: 425 %iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ] 426 unreachable 427} 428 429; In the test below, it's more profitable for the expression feeding the 430; conditional store to remain scalar. Since we can only type-shrink vector 431; types, we shouldn't try to represent the expression in a smaller type. 432; 433define void @minimal_bit_widths(i1 %c) { 434; UNROLL-LABEL: @minimal_bit_widths( 435; UNROLL-NEXT: entry: 436; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] 437; UNROLL: vector.body: 438; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 439; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] 440; UNROLL-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 441; UNROLL-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 442; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6]] 443; UNROLL: pred.store.if: 444; UNROLL-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 445; UNROLL-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]] 446; UNROLL-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 447; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 448; UNROLL-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 449; UNROLL-NEXT: store i8 [[TMP3]], i8* [[TMP0]], align 1 450; UNROLL-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1 451; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]] 452; UNROLL-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 453; UNROLL-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 454; UNROLL-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 455; UNROLL-NEXT: store i8 [[TMP7]], i8* [[TMP4]], align 1 456; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE6]] 457; UNROLL: pred.store.continue6: 458; UNROLL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 459; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef 460; UNROLL-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 461; UNROLL: middle.block: 462; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef 463; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] 464; UNROLL: for.body: 465; UNROLL-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] 466; UNROLL-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] 467; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] 468; UNROLL-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 469; UNROLL-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] 470; UNROLL: if.then: 471; UNROLL-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 472; UNROLL-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 473; UNROLL-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 474; UNROLL-NEXT: br label [[FOR_INC]] 475; UNROLL: for.inc: 476; UNROLL-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 477; UNROLL-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 478; UNROLL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 479; UNROLL-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]] 480; UNROLL: for.end: 481; UNROLL-NEXT: ret void 482; 483; UNROLL-NOSIMPLIFY-LABEL: @minimal_bit_widths( 484; UNROLL-NOSIMPLIFY-NEXT: entry: 485; UNROLL-NOSIMPLIFY-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 486; UNROLL-NOSIMPLIFY: vector.ph: 487; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] 488; UNROLL-NOSIMPLIFY: vector.body: 489; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 490; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] 491; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 0 492; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION4:%.*]] = add i64 [[OFFSET_IDX]], -1 493; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 494; UNROLL-NOSIMPLIFY: pred.store.if: 495; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 496; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION]] 497; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = load i8, i8* [[TMP0]], align 1 498; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 499; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 500; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP3]], i8* [[TMP0]], align 1 501; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] 502; UNROLL-NOSIMPLIFY: pred.store.continue: 503; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 504; UNROLL-NOSIMPLIFY: pred.store.if5: 505; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1 506; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* undef, i64 [[INDUCTION2]] 507; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 1 508; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 509; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 510; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP7]], i8* [[TMP4]], align 1 511; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE6]] 512; UNROLL-NOSIMPLIFY: pred.store.continue6: 513; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 514; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef 515; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] 516; UNROLL-NOSIMPLIFY: middle.block: 517; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef 518; UNROLL-NOSIMPLIFY-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 519; UNROLL-NOSIMPLIFY: scalar.ph: 520; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 521; UNROLL-NOSIMPLIFY-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ] 522; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_BODY:%.*]] 523; UNROLL-NOSIMPLIFY: for.body: 524; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 525; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 526; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] 527; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 528; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] 529; UNROLL-NOSIMPLIFY: if.then: 530; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 531; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 532; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 533; UNROLL-NOSIMPLIFY-NEXT: br label [[FOR_INC]] 534; UNROLL-NOSIMPLIFY: for.inc: 535; UNROLL-NOSIMPLIFY-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 536; UNROLL-NOSIMPLIFY-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 537; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 538; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] 539; UNROLL-NOSIMPLIFY: for.end: 540; UNROLL-NOSIMPLIFY-NEXT: ret void 541; 542; VEC-LABEL: @minimal_bit_widths( 543; VEC-NEXT: entry: 544; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0 545; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer 546; VEC-NEXT: br label [[VECTOR_BODY:%.*]] 547; VEC: vector.body: 548; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 549; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 550; VEC-NEXT: [[OFFSET_IDX:%.*]] = sub i64 undef, [[INDEX]] 551; VEC-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0 552; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] 553; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, i8* [[TMP2]], i32 0 554; VEC-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <2 x i8>* 555; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, <2 x i8>* [[TMP4]], align 1 556; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0 557; VEC-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 558; VEC: pred.store.if: 559; VEC-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 560; VEC-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 561; VEC-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 562; VEC-NEXT: store i8 [[TMP8]], i8* [[TMP2]], align 1 563; VEC-NEXT: br label [[PRED_STORE_CONTINUE]] 564; VEC: pred.store.continue: 565; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 566; VEC-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 567; VEC: pred.store.if2: 568; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 569; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 570; VEC-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 571; VEC-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 1 572; VEC-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* undef, i64 [[TMP13]] 573; VEC-NEXT: store i8 [[TMP12]], i8* [[TMP14]], align 1 574; VEC-NEXT: br label [[PRED_STORE_CONTINUE3]] 575; VEC: pred.store.continue3: 576; VEC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 577; VEC-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef 578; VEC-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP5:!llvm.loop !.*]] 579; VEC: middle.block: 580; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef 581; VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]] 582; VEC: for.body: 583; VEC-NEXT: [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ] 584; VEC-NEXT: [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ] 585; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]] 586; VEC-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1 587; VEC-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]] 588; VEC: if.then: 589; VEC-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 590; VEC-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 591; VEC-NEXT: store i8 [[TMP5]], i8* [[TMP2]], align 1 592; VEC-NEXT: br label [[FOR_INC]] 593; VEC: for.inc: 594; VEC-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 595; VEC-NEXT: [[TMP7]] = add i64 [[TMP1]], -1 596; VEC-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 597; VEC-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]] 598; VEC: for.end: 599; VEC-NEXT: ret void 600; 601entry: 602 br label %for.body 603 604for.body: 605 %tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ] 606 %tmp1 = phi i64 [ %tmp7, %for.inc ], [ undef, %entry ] 607 %tmp2 = getelementptr i8, i8* undef, i64 %tmp0 608 %tmp3 = load i8, i8* %tmp2, align 1 609 br i1 %c, label %if.then, label %for.inc 610 611if.then: 612 %tmp4 = zext i8 %tmp3 to i32 613 %tmp5 = trunc i32 %tmp4 to i8 614 store i8 %tmp5, i8* %tmp2, align 1 615 br label %for.inc 616 617for.inc: 618 %tmp6 = add nuw nsw i64 %tmp0, 1 619 %tmp7 = add i64 %tmp1, -1 620 %tmp8 = icmp eq i64 %tmp7, 0 621 br i1 %tmp8, label %for.end, label %for.body 622 623for.end: 624 ret void 625} 626