1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -loop-vectorize -force-vector-width=2 -S %s | FileCheck %s 3 4@src = external global [32 x i16], align 1 5@dst = external global [32 x i16], align 1 6 7; The load in the loop does not need predication, because the accessed memory 8; is de-referenceable for all loop iterations. 9define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { 10; CHECK-LABEL: @single_incoming_phi_no_blend_mask( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 13; CHECK: vector.ph: 14; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0 15; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 16; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 17; CHECK: vector.body: 18; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 19; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 21; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 22; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16 23; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], 0 24; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]] 25; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 26; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>* 27; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP6]], align 1 28; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 29; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true> 30; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[WIDE_LOAD]] 31; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP0]] 32; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0 33; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <2 x i16>* 34; CHECK-NEXT: store <2 x i16> [[PREDPHI]], <2 x i16>* [[TMP11]], align 2 35; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 36; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 37; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 38; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 39; CHECK: middle.block: 40; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 41; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 42; CHECK: scalar.ph: 43; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 44; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 45; CHECK: loop.header: 46; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 47; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 48; CHECK-NEXT: br label [[LOOP_COND:%.*]] 49; CHECK: loop.cond: 50; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] 51; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]] 52; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 53; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] 54; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] 55; CHECK: loop.next: 56; CHECK-NEXT: br label [[LOOP_LATCH]] 57; CHECK: loop.latch: 58; CHECK-NEXT: [[RES:%.*]] = phi i16 [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] 59; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[IV]] 60; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 61; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 62; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 63; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]] 64; CHECK: exit: 65; CHECK-NEXT: ret void 66; 67entry: 68 br label %loop.header 69 70loop.header: 71 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 72 %iv.trunc = trunc i64 %iv to i16 73 br label %loop.cond 74 75loop.cond: 76 %blend = phi i16 [ %iv.trunc, %loop.header ] 77 %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend 78 %lv = load i16, i16* %src.ptr, align 1 79 %cmp.b = icmp sgt i64 %iv, %a 80 br i1 %cmp.b, label %loop.next, label %loop.latch 81 82loop.next: 83 br label %loop.latch 84 85loop.latch: 86 %res = phi i16 [ %lv, %loop.cond ], [ 1, %loop.next ] 87 %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv 88 store i16 %res, i16* %dst.ptr 89 %iv.next = add nuw nsw i64 %iv, 1 90 %cmp439 = icmp ult i64 %iv, 31 91 br i1 %cmp439, label %loop.header, label %exit 92 93exit: 94 ret void 95} 96 97; The load in the loop does not need predication, because the accessed memory 98; is de-referenceable for all loop iterations. 99define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { 100; CHECK-LABEL: @single_incoming_phi_with_blend_mask( 101; CHECK-NEXT: entry: 102; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 103; CHECK: vector.ph: 104; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0 105; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 106; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 110; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 111; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 112; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDEX]] to i16 113; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], 0 114; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 115; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP3]] 116; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, i16* [[TMP5]], i32 0 117; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 118; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, <2 x i16>* [[TMP7]], align 1 119; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 120; CHECK-NEXT: [[TMP9:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 121; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP8]], <i1 true, i1 true> 122; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer 123; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer 124; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer 125; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP12]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]] 126; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP0]] 127; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16* [[TMP13]], i32 0 128; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16* [[TMP14]] to <2 x i16>* 129; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], <2 x i16>* [[TMP15]], align 2 130; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 131; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 132; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 133; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 134; CHECK: middle.block: 135; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 136; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 137; CHECK: scalar.ph: 138; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 139; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 140; CHECK: loop.header: 141; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 142; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 143; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] 144; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] 145; CHECK: loop.cond: 146; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] 147; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]] 148; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 149; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] 150; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] 151; CHECK: loop.next: 152; CHECK-NEXT: br label [[LOOP_LATCH]] 153; CHECK: loop.latch: 154; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] 155; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[IV]] 156; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 157; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 158; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 159; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP5:!llvm.loop !.*]] 160; CHECK: exit: 161; CHECK-NEXT: ret void 162; 163entry: 164 br label %loop.header 165 166loop.header: 167 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 168 %iv.trunc = trunc i64 %iv to i16 169 %cmp.a = icmp ugt i64 %iv, %a 170 br i1 %cmp.a, label %loop.cond, label %loop.latch 171 172loop.cond: 173 %blend = phi i16 [ %iv.trunc, %loop.header ] 174 %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend 175 %lv = load i16, i16* %src.ptr, align 1 176 %cmp.b = icmp sgt i64 %iv, %a 177 br i1 %cmp.b, label %loop.next, label %loop.latch 178 179loop.next: 180 br label %loop.latch 181 182loop.latch: 183 %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ] 184 %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv 185 store i16 %res, i16* %dst.ptr 186 %iv.next = add nuw nsw i64 %iv, 1 187 %cmp439 = icmp ult i64 %iv, 31 188 br i1 %cmp439, label %loop.header, label %exit 189 190exit: 191 ret void 192} 193 194define void @multiple_incoming_phi_with_blend_mask(i64 %a) { 195; CHECK-LABEL: @multiple_incoming_phi_with_blend_mask( 196; CHECK-NEXT: entry: 197; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 198; CHECK: vector.ph: 199; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0 200; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 201; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 202; CHECK: vector.body: 203; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 204; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 205; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] 206; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] 207; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 208; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 209; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 210; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 211; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]] 212; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 213; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP4]] 214; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 215; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP6]] 216; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP5]], align 1 217; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 1 218; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 219; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 220; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2> 221; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], <i16 2, i16 2> 222; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 223; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] 224; CHECK: middle.block: 225; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 32, 32 226; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 227; CHECK: scalar.ph: 228; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 229; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 230; CHECK: loop.header: 231; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 232; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 233; CHECK-NEXT: [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16 234; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] 235; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] 236; CHECK: loop.next: 237; CHECK-NEXT: br label [[LOOP_LATCH]] 238; CHECK: loop.latch: 239; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ] 240; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]] 241; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 242; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 243; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 31 244; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP7:!llvm.loop !.*]] 245; CHECK: exit: 246; CHECK-NEXT: ret void 247; 248entry: 249 br label %loop.header 250 251loop.header: 252 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 253 %iv.trunc = trunc i64 %iv to i16 254 %iv.trunc.2 = trunc i64 %iv to i16 255 %cmp.a = icmp ugt i64 %iv, %a 256 br i1 %cmp.a, label %loop.next, label %loop.latch 257 258loop.next: 259 br label %loop.latch 260 261loop.latch: 262 %blend = phi i16 [ %iv.trunc, %loop.header ], [ %iv.trunc.2, %loop.next ] 263 %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend 264 %lv = load i16, i16* %src.ptr, align 1 265 %iv.next = add nuw nsw i64 %iv, 1 266 %cmp439 = icmp ult i64 %iv, 31 267 br i1 %cmp439, label %loop.header, label %exit 268 269exit: 270 ret void 271} 272 273; The load in the loop needs predication, because the accessed memory is not 274; de-referencable for all iterations of the loop. 275define void @single_incoming_needs_predication(i64 %a, i64 %b) { 276; CHECK-LABEL: @single_incoming_needs_predication( 277; CHECK-NEXT: entry: 278; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 279; CHECK: vector.ph: 280; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i32 0 281; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 282; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 283; CHECK: vector.body: 284; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE4:%.*]] ] 285; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE4]] ] 286; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE4]] ] 287; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 288; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 289; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 290; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 291; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 292; CHECK: pred.load.if: 293; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[VEC_IND1]], i32 0 294; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP4]] 295; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]], align 1 296; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0 297; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 298; CHECK: pred.load.continue: 299; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 300; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 301; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4]] 302; CHECK: pred.load.if3: 303; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[VEC_IND1]], i32 1 304; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP10]] 305; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]], align 1 306; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1 307; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 308; CHECK: pred.load.continue4: 309; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF3]] ] 310; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 311; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true> 312; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP15]], <i1 true, i1 true> 313; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP17]], <2 x i1> zeroinitializer 314; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer 315; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP18]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer 316; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP19]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]] 317; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[TMP0]] 318; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, i16* [[TMP20]], i32 0 319; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16* [[TMP21]] to <2 x i16>* 320; CHECK-NEXT: store <2 x i16> [[PREDPHI5]], <2 x i16>* [[TMP22]], align 2 321; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 322; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 323; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2> 324; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 325; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] 326; CHECK: middle.block: 327; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 328; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 329; CHECK: scalar.ph: 330; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 331; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 332; CHECK: loop.header: 333; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 334; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16 335; CHECK-NEXT: [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]] 336; CHECK-NEXT: br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]] 337; CHECK: loop.cond: 338; CHECK-NEXT: [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ] 339; CHECK-NEXT: [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]] 340; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1 341; CHECK-NEXT: [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]] 342; CHECK-NEXT: br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]] 343; CHECK: loop.next: 344; CHECK-NEXT: br label [[LOOP_LATCH]] 345; CHECK: loop.latch: 346; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ] 347; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[IV]] 348; CHECK-NEXT: store i16 [[RES]], i16* [[DST_PTR]], align 2 349; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 350; CHECK-NEXT: [[CMP439:%.*]] = icmp ult i64 [[IV]], 63 351; CHECK-NEXT: br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP9:!llvm.loop !.*]] 352; CHECK: exit: 353; CHECK-NEXT: ret void 354; 355entry: 356 br label %loop.header 357 358loop.header: 359 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 360 %iv.trunc = trunc i64 %iv to i16 361 %cmp.a = icmp ugt i64 %iv, %a 362 br i1 %cmp.a, label %loop.cond, label %loop.latch 363 364loop.cond: 365 %blend = phi i16 [ %iv.trunc, %loop.header ] 366 %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend 367 %lv = load i16, i16* %src.ptr, align 1 368 %cmp.b = icmp sgt i64 %iv, %a 369 br i1 %cmp.b, label %loop.next, label %loop.latch 370 371loop.next: 372 br label %loop.latch 373 374loop.latch: 375 %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ] 376 %dst.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 %iv 377 store i16 %res, i16* %dst.ptr 378 %iv.next = add nuw nsw i64 %iv, 1 379 %cmp439 = icmp ult i64 %iv, 63 380 br i1 %cmp439, label %loop.header, label %exit 381 382exit: 383 ret void 384} 385 386; Test case for PR44800. 387define void @duplicated_incoming_blocks_blend(i32 %x, i32* %ptr) { 388; CHECK-LABEL: @duplicated_incoming_blocks_blend( 389; CHECK-NEXT: entry: 390; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 391; CHECK: vector.ph: 392; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0 393; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 394; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 395; CHECK: vector.body: 396; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 397; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 398; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 399; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0 400; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[TMP1]] 401; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 402; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 403; CHECK-NEXT: store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4 404; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 405; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 406; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 407; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] 408; CHECK: middle.block: 409; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 1000, 1000 410; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 411; CHECK: scalar.ph: 412; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 413; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 414; CHECK: loop.header: 415; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ] 416; CHECK-NEXT: [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X]] 417; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]] 418; CHECK: loop.latch: 419; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ] 420; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[P]] 421; CHECK-NEXT: store i32 [[P]], i32* [[GEP_PTR]], align 4 422; CHECK-NEXT: [[ADD_I]] = add nsw i32 [[P]], 1 423; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000 424; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP11:!llvm.loop !.*]] 425; CHECK: exit: 426; CHECK-NEXT: ret void 427; 428entry: 429 br label %loop.header 430 431loop.header: 432 %iv = phi i32 [ 0 , %entry ], [ %add.i, %loop.latch ] 433 %c.0 = icmp ugt i32 %iv, %x 434 br i1 %c.0, label %loop.latch, label %loop.latch 435 436loop.latch: 437 %p = phi i32 [ %iv, %loop.header ], [ %iv, %loop.header ] 438 %gep.ptr = getelementptr i32, i32* %ptr, i32 %p 439 store i32 %p, i32* %gep.ptr 440 %add.i = add nsw i32 %p, 1 441 %cmp = icmp slt i32 %add.i, 1000 442 br i1 %cmp, label %loop.header, label %exit 443 444exit: 445 ret void 446} 447