1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 63; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 64; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 65; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 66; TAILFOLD: pred.store.if: 67; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 68; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 69; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 70; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 71; TAILFOLD: pred.store.continue: 72; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 73; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 74; TAILFOLD: pred.store.if1: 75; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 76; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] 77; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 78; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 79; TAILFOLD: pred.store.continue2: 80; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 81; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 82; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 83; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 84; TAILFOLD: middle.block: 85; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 86; TAILFOLD: scalar.ph: 87; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 88; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 89; TAILFOLD: for.cond: 90; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 91; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 92; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 93; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 94; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 95; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 96; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 97; TAILFOLD: if.end: 98; TAILFOLD-NEXT: ret void 99; 100entry: 101 br label %for.cond 102 103for.cond: 104 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 105 %iprom = sext i32 %i to i64 106 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 107 store i16 0, i16* %b, align 4 108 %inc = add nsw i32 %i, 1 109 %cmp = icmp slt i32 %i, %n 110 br i1 %cmp, label %for.cond, label %if.end 111 112if.end: 113 ret void 114} 115 116define void @early_exit(i16* %p, i32 %n) { 117; CHECK-LABEL: @early_exit( 118; CHECK-NEXT: entry: 119; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 120; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 121; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 122; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 123; CHECK: vector.ph: 124; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 125; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 126; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 127; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 128; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 129; CHECK: vector.body: 130; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 131; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 132; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 133; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 134; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 135; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 136; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 138; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 139; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 140; CHECK: middle.block: 141; CHECK-NEXT: br label [[SCALAR_PH]] 142; CHECK: scalar.ph: 143; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 144; CHECK-NEXT: br label [[FOR_COND:%.*]] 145; CHECK: for.cond: 146; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 147; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 148; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 149; CHECK: for.body: 150; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 151; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 152; CHECK-NEXT: store i16 0, i16* [[B]], align 4 153; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 154; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 155; CHECK: if.end: 156; CHECK-NEXT: ret void 157; 158; TAILFOLD-LABEL: @early_exit( 159; TAILFOLD-NEXT: entry: 160; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 161; TAILFOLD: for.cond: 162; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 163; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 164; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 165; TAILFOLD: for.body: 166; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 167; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 168; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 169; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 170; TAILFOLD-NEXT: br label [[FOR_COND]] 171; TAILFOLD: if.end: 172; TAILFOLD-NEXT: ret void 173; 174entry: 175 br label %for.cond 176 177for.cond: 178 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 179 %cmp = icmp slt i32 %i, %n 180 br i1 %cmp, label %for.body, label %if.end 181 182for.body: 183 %iprom = sext i32 %i to i64 184 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 185 store i16 0, i16* %b, align 4 186 %inc = add nsw i32 %i, 1 187 br label %for.cond 188 189if.end: 190 ret void 191} 192 193; Same as early_exit, but with optsize to prevent the use of 194; a scalar epilogue. -- Can't vectorize this in either case. 195define void @optsize(i16* %p, i32 %n) optsize { 196; CHECK-LABEL: @optsize( 197; CHECK-NEXT: entry: 198; CHECK-NEXT: br label [[FOR_COND:%.*]] 199; CHECK: for.cond: 200; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 201; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 202; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 203; CHECK: for.body: 204; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 205; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 206; CHECK-NEXT: store i16 0, i16* [[B]], align 4 207; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 208; CHECK-NEXT: br label [[FOR_COND]] 209; CHECK: if.end: 210; CHECK-NEXT: ret void 211; 212; TAILFOLD-LABEL: @optsize( 213; TAILFOLD-NEXT: entry: 214; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 215; TAILFOLD: for.cond: 216; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 217; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 218; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 219; TAILFOLD: for.body: 220; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 221; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 222; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 223; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 224; TAILFOLD-NEXT: br label [[FOR_COND]] 225; TAILFOLD: if.end: 226; TAILFOLD-NEXT: ret void 227; 228entry: 229 br label %for.cond 230 231for.cond: 232 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 233 %cmp = icmp slt i32 %i, %n 234 br i1 %cmp, label %for.body, label %if.end 235 236for.body: 237 %iprom = sext i32 %i to i64 238 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 239 store i16 0, i16* %b, align 4 240 %inc = add nsw i32 %i, 1 241 br label %for.cond 242 243if.end: 244 ret void 245} 246 247 248; multiple exit - no values inside the loop used outside 249define void @multiple_unique_exit(i16* %p, i32 %n) { 250; CHECK-LABEL: @multiple_unique_exit( 251; CHECK-NEXT: entry: 252; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 253; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 254; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 255; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 256; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 257; CHECK: vector.ph: 258; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 259; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 260; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 261; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 262; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 263; CHECK: vector.body: 264; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 265; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 266; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 267; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 268; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 269; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 270; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 271; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 272; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 273; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 274; CHECK: middle.block: 275; CHECK-NEXT: br label [[SCALAR_PH]] 276; CHECK: scalar.ph: 277; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 278; CHECK-NEXT: br label [[FOR_COND:%.*]] 279; CHECK: for.cond: 280; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 281; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 282; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 283; CHECK: for.body: 284; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 285; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 286; CHECK-NEXT: store i16 0, i16* [[B]], align 4 287; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 288; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 289; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] 290; CHECK: if.end: 291; CHECK-NEXT: ret void 292; 293; TAILFOLD-LABEL: @multiple_unique_exit( 294; TAILFOLD-NEXT: entry: 295; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 296; TAILFOLD: for.cond: 297; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 298; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 299; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 300; TAILFOLD: for.body: 301; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 302; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 303; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 304; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 305; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 306; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 307; TAILFOLD: if.end: 308; TAILFOLD-NEXT: ret void 309; 310entry: 311 br label %for.cond 312 313for.cond: 314 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 315 %cmp = icmp slt i32 %i, %n 316 br i1 %cmp, label %for.body, label %if.end 317 318for.body: 319 %iprom = sext i32 %i to i64 320 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 321 store i16 0, i16* %b, align 4 322 %inc = add nsw i32 %i, 1 323 %cmp2 = icmp slt i32 %i, 2096 324 br i1 %cmp2, label %for.cond, label %if.end 325 326if.end: 327 ret void 328} 329 330; multiple exit - with an lcssa phi 331define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 332; CHECK-LABEL: @multiple_unique_exit2( 333; CHECK-NEXT: entry: 334; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 335; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 336; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 337; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 338; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 339; CHECK: vector.ph: 340; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 341; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 342; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 343; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 344; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 345; CHECK: vector.body: 346; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 347; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 348; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 349; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 350; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 351; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 352; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 353; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 354; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 355; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 356; CHECK: middle.block: 357; CHECK-NEXT: br label [[SCALAR_PH]] 358; CHECK: scalar.ph: 359; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 360; CHECK-NEXT: br label [[FOR_COND:%.*]] 361; CHECK: for.cond: 362; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 363; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 364; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 365; CHECK: for.body: 366; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 367; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 368; CHECK-NEXT: store i16 0, i16* [[B]], align 4 369; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 370; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 371; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 372; CHECK: if.end: 373; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 374; CHECK-NEXT: ret i32 [[I_LCSSA]] 375; 376; TAILFOLD-LABEL: @multiple_unique_exit2( 377; TAILFOLD-NEXT: entry: 378; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 379; TAILFOLD: for.cond: 380; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 381; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 382; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 383; TAILFOLD: for.body: 384; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 385; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 386; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 387; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 388; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 389; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 390; TAILFOLD: if.end: 391; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 392; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 393; 394entry: 395 br label %for.cond 396 397for.cond: 398 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 399 %cmp = icmp slt i32 %i, %n 400 br i1 %cmp, label %for.body, label %if.end 401 402for.body: 403 %iprom = sext i32 %i to i64 404 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 405 store i16 0, i16* %b, align 4 406 %inc = add nsw i32 %i, 1 407 %cmp2 = icmp slt i32 %i, 2096 408 br i1 %cmp2, label %for.cond, label %if.end 409 410if.end: 411 ret i32 %i 412} 413 414; multiple exit w/a non lcssa phi 415define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 416; CHECK-LABEL: @multiple_unique_exit3( 417; CHECK-NEXT: entry: 418; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 419; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 420; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 421; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 422; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 423; CHECK: vector.ph: 424; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 425; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 426; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 427; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 428; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 429; CHECK: vector.body: 430; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 431; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 432; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 433; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 434; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 435; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 436; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 437; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 438; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 439; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 440; CHECK: middle.block: 441; CHECK-NEXT: br label [[SCALAR_PH]] 442; CHECK: scalar.ph: 443; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 444; CHECK-NEXT: br label [[FOR_COND:%.*]] 445; CHECK: for.cond: 446; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 447; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 448; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 449; CHECK: for.body: 450; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 451; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 452; CHECK-NEXT: store i16 0, i16* [[B]], align 4 453; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 454; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 455; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 456; CHECK: if.end: 457; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 458; CHECK-NEXT: ret i32 [[EXIT]] 459; 460; TAILFOLD-LABEL: @multiple_unique_exit3( 461; TAILFOLD-NEXT: entry: 462; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 463; TAILFOLD: for.cond: 464; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 465; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 466; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 467; TAILFOLD: for.body: 468; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 469; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 470; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 471; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 472; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 473; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 474; TAILFOLD: if.end: 475; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 476; TAILFOLD-NEXT: ret i32 [[EXIT]] 477; 478entry: 479 br label %for.cond 480 481for.cond: 482 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 483 %cmp = icmp slt i32 %i, %n 484 br i1 %cmp, label %for.body, label %if.end 485 486for.body: 487 %iprom = sext i32 %i to i64 488 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 489 store i16 0, i16* %b, align 4 490 %inc = add nsw i32 %i, 1 491 %cmp2 = icmp slt i32 %i, 2096 492 br i1 %cmp2, label %for.cond, label %if.end 493 494if.end: 495 %exit = phi i32 [0, %for.cond], [1, %for.body] 496 ret i32 %exit 497} 498 499; multiple exits w/distinct target blocks 500define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 501; CHECK-LABEL: @multiple_exit_blocks( 502; CHECK-NEXT: entry: 503; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 504; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 505; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 506; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 507; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 508; CHECK: vector.ph: 509; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 510; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 511; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 512; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 513; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 514; CHECK: vector.body: 515; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 516; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 517; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 518; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 519; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 520; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 521; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 522; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 523; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 524; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 525; CHECK: middle.block: 526; CHECK-NEXT: br label [[SCALAR_PH]] 527; CHECK: scalar.ph: 528; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 529; CHECK-NEXT: br label [[FOR_COND:%.*]] 530; CHECK: for.cond: 531; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 532; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 533; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 534; CHECK: for.body: 535; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 536; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 537; CHECK-NEXT: store i16 0, i16* [[B]], align 4 538; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 539; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 540; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]] 541; CHECK: if.end: 542; CHECK-NEXT: ret i32 0 543; CHECK: if.end2: 544; CHECK-NEXT: ret i32 1 545; 546; TAILFOLD-LABEL: @multiple_exit_blocks( 547; TAILFOLD-NEXT: entry: 548; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 549; TAILFOLD: for.cond: 550; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 551; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 552; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 553; TAILFOLD: for.body: 554; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 555; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 556; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 557; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 558; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 559; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 560; TAILFOLD: if.end: 561; TAILFOLD-NEXT: ret i32 0 562; TAILFOLD: if.end2: 563; TAILFOLD-NEXT: ret i32 1 564; 565entry: 566 br label %for.cond 567 568for.cond: 569 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 570 %cmp = icmp slt i32 %i, %n 571 br i1 %cmp, label %for.body, label %if.end 572 573for.body: 574 %iprom = sext i32 %i to i64 575 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 576 store i16 0, i16* %b, align 4 577 %inc = add nsw i32 %i, 1 578 %cmp2 = icmp slt i32 %i, 2096 579 br i1 %cmp2, label %for.cond, label %if.end2 580 581if.end: 582 ret i32 0 583 584if.end2: 585 ret i32 1 586} 587 588; LCSSA, common value each exit 589define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 590; CHECK-LABEL: @multiple_exit_blocks2( 591; CHECK-NEXT: entry: 592; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 593; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 594; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 595; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 596; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 597; CHECK: vector.ph: 598; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 599; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 600; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 601; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 602; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 603; CHECK: vector.body: 604; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 605; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 606; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 607; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 608; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 609; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 610; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 611; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 612; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 613; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 614; CHECK: middle.block: 615; CHECK-NEXT: br label [[SCALAR_PH]] 616; CHECK: scalar.ph: 617; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 618; CHECK-NEXT: br label [[FOR_COND:%.*]] 619; CHECK: for.cond: 620; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 621; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 622; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 623; CHECK: for.body: 624; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 625; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 626; CHECK-NEXT: store i16 0, i16* [[B]], align 4 627; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 628; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 629; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 630; CHECK: if.end: 631; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 632; CHECK-NEXT: ret i32 [[I_LCSSA]] 633; CHECK: if.end2: 634; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 635; CHECK-NEXT: ret i32 [[I_LCSSA1]] 636; 637; TAILFOLD-LABEL: @multiple_exit_blocks2( 638; TAILFOLD-NEXT: entry: 639; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 640; TAILFOLD: for.cond: 641; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 642; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 643; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 644; TAILFOLD: for.body: 645; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 646; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 647; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 648; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 649; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 650; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 651; TAILFOLD: if.end: 652; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 653; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 654; TAILFOLD: if.end2: 655; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 656; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 657; 658entry: 659 br label %for.cond 660 661for.cond: 662 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 663 %cmp = icmp slt i32 %i, %n 664 br i1 %cmp, label %for.body, label %if.end 665 666for.body: 667 %iprom = sext i32 %i to i64 668 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 669 store i16 0, i16* %b, align 4 670 %inc = add nsw i32 %i, 1 671 %cmp2 = icmp slt i32 %i, 2096 672 br i1 %cmp2, label %for.cond, label %if.end2 673 674if.end: 675 ret i32 %i 676 677if.end2: 678 ret i32 %i 679} 680 681; LCSSA, distinct value each exit 682define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 683; CHECK-LABEL: @multiple_exit_blocks3( 684; CHECK-NEXT: entry: 685; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 686; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 687; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 688; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 689; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 690; CHECK: vector.ph: 691; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 692; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 693; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 694; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 695; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 696; CHECK: vector.body: 697; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 698; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 699; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 700; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 701; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 702; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 703; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 704; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 705; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1> 706; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 707; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 708; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 709; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 710; CHECK: middle.block: 711; CHECK-NEXT: br label [[SCALAR_PH]] 712; CHECK: scalar.ph: 713; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 714; CHECK-NEXT: br label [[FOR_COND:%.*]] 715; CHECK: for.cond: 716; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 717; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 718; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 719; CHECK: for.body: 720; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 721; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 722; CHECK-NEXT: store i16 0, i16* [[B]], align 4 723; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 724; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 725; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] 726; CHECK: if.end: 727; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 728; CHECK-NEXT: ret i32 [[I_LCSSA]] 729; CHECK: if.end2: 730; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 731; CHECK-NEXT: ret i32 [[INC_LCSSA]] 732; 733; TAILFOLD-LABEL: @multiple_exit_blocks3( 734; TAILFOLD-NEXT: entry: 735; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 736; TAILFOLD: for.cond: 737; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 738; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 739; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 740; TAILFOLD: for.body: 741; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 742; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 743; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 744; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 745; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 746; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 747; TAILFOLD: if.end: 748; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 749; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 750; TAILFOLD: if.end2: 751; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 752; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 753; 754entry: 755 br label %for.cond 756 757for.cond: 758 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 759 %cmp = icmp slt i32 %i, %n 760 br i1 %cmp, label %for.body, label %if.end 761 762for.body: 763 %iprom = sext i32 %i to i64 764 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 765 store i16 0, i16* %b, align 4 766 %inc = add nsw i32 %i, 1 767 %cmp2 = icmp slt i32 %i, 2096 768 br i1 %cmp2, label %for.cond, label %if.end2 769 770if.end: 771 ret i32 %i 772 773if.end2: 774 ret i32 %inc 775} 776 777; unique exit case but with a switch as two edges between the same pair of 778; blocks is an often missed edge case 779define i32 @multiple_exit_switch(i16* %p, i32 %n) { 780; CHECK-LABEL: @multiple_exit_switch( 781; CHECK-NEXT: entry: 782; CHECK-NEXT: br label [[FOR_COND:%.*]] 783; CHECK: for.cond: 784; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 785; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 786; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 787; CHECK-NEXT: store i16 0, i16* [[B]], align 4 788; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 789; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 790; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 791; CHECK-NEXT: i32 2097, label [[IF_END]] 792; CHECK-NEXT: ] 793; CHECK: if.end: 794; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 795; CHECK-NEXT: ret i32 [[I_LCSSA]] 796; 797; TAILFOLD-LABEL: @multiple_exit_switch( 798; TAILFOLD-NEXT: entry: 799; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 800; TAILFOLD: for.cond: 801; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 802; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 803; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 804; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 805; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 806; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 807; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 808; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 809; TAILFOLD-NEXT: ] 810; TAILFOLD: if.end: 811; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 812; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 813; 814entry: 815 br label %for.cond 816 817for.cond: 818 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 819 %iprom = sext i32 %i to i64 820 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 821 store i16 0, i16* %b, align 4 822 %inc = add nsw i32 %i, 1 823 switch i32 %i, label %for.cond [ 824 i32 2096, label %if.end 825 i32 2097, label %if.end 826 ] 827 828if.end: 829 ret i32 %i 830} 831 832; multiple exit case but with a switch as multiple exiting edges from 833; a single block is a commonly missed edge case 834define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 835; CHECK-LABEL: @multiple_exit_switch2( 836; CHECK-NEXT: entry: 837; CHECK-NEXT: br label [[FOR_COND:%.*]] 838; CHECK: for.cond: 839; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 840; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 841; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 842; CHECK-NEXT: store i16 0, i16* [[B]], align 4 843; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 844; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 845; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 846; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 847; CHECK-NEXT: ] 848; CHECK: if.end: 849; CHECK-NEXT: ret i32 0 850; CHECK: if.end2: 851; CHECK-NEXT: ret i32 1 852; 853; TAILFOLD-LABEL: @multiple_exit_switch2( 854; TAILFOLD-NEXT: entry: 855; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 856; TAILFOLD: for.cond: 857; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 858; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 859; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 860; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 861; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 862; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 863; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 864; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 865; TAILFOLD-NEXT: ] 866; TAILFOLD: if.end: 867; TAILFOLD-NEXT: ret i32 0 868; TAILFOLD: if.end2: 869; TAILFOLD-NEXT: ret i32 1 870; 871entry: 872 br label %for.cond 873 874for.cond: 875 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 876 %iprom = sext i32 %i to i64 877 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 878 store i16 0, i16* %b, align 4 879 %inc = add nsw i32 %i, 1 880 switch i32 %i, label %for.cond [ 881 i32 2096, label %if.end 882 i32 2097, label %if.end2 883 ] 884 885if.end: 886 ret i32 0 887 888if.end2: 889 ret i32 1 890} 891 892define i32 @multiple_latch1(i16* %p) { 893; CHECK-LABEL: @multiple_latch1( 894; CHECK-NEXT: entry: 895; CHECK-NEXT: br label [[FOR_BODY:%.*]] 896; CHECK: for.body: 897; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 898; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 899; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 900; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 901; CHECK: for.second: 902; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 903; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 904; CHECK-NEXT: store i16 0, i16* [[B]], align 4 905; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 906; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 907; CHECK: for.body.backedge: 908; CHECK-NEXT: br label [[FOR_BODY]] 909; CHECK: for.end: 910; CHECK-NEXT: ret i32 0 911; 912; TAILFOLD-LABEL: @multiple_latch1( 913; TAILFOLD-NEXT: entry: 914; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 915; TAILFOLD: for.body: 916; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 917; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 918; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 919; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 920; TAILFOLD: for.second: 921; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 922; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 923; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 924; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 925; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 926; TAILFOLD: for.body.backedge: 927; TAILFOLD-NEXT: br label [[FOR_BODY]] 928; TAILFOLD: for.end: 929; TAILFOLD-NEXT: ret i32 0 930; 931entry: 932 br label %for.body 933 934for.body: 935 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 936 %inc = add nsw i32 %i.02, 1 937 %cmp = icmp slt i32 %inc, 16 938 br i1 %cmp, label %for.body.backedge, label %for.second 939 940for.second: 941 %iprom = sext i32 %i.02 to i64 942 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 943 store i16 0, i16* %b, align 4 944 %cmps = icmp sgt i32 %inc, 16 945 br i1 %cmps, label %for.body.backedge, label %for.end 946 947for.body.backedge: 948 br label %for.body 949 950for.end: 951 ret i32 0 952} 953 954 955; two back branches - loop simplify with convert this to the same form 956; as previous before vectorizer sees it, but show that. 957define i32 @multiple_latch2(i16* %p) { 958; CHECK-LABEL: @multiple_latch2( 959; CHECK-NEXT: entry: 960; CHECK-NEXT: br label [[FOR_BODY:%.*]] 961; CHECK: for.body: 962; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 963; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 964; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 965; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 966; CHECK: for.body.backedge: 967; CHECK-NEXT: br label [[FOR_BODY]] 968; CHECK: for.second: 969; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 970; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 971; CHECK-NEXT: store i16 0, i16* [[B]], align 4 972; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 973; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 974; CHECK: for.end: 975; CHECK-NEXT: ret i32 0 976; 977; TAILFOLD-LABEL: @multiple_latch2( 978; TAILFOLD-NEXT: entry: 979; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 980; TAILFOLD: for.body: 981; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 982; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 983; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 984; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 985; TAILFOLD: for.body.backedge: 986; TAILFOLD-NEXT: br label [[FOR_BODY]] 987; TAILFOLD: for.second: 988; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 989; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 990; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 991; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 992; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 993; TAILFOLD: for.end: 994; TAILFOLD-NEXT: ret i32 0 995; 996entry: 997 br label %for.body 998 999for.body: 1000 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 1001 %inc = add nsw i32 %i.02, 1 1002 %cmp = icmp slt i32 %inc, 16 1003 br i1 %cmp, label %for.body, label %for.second 1004 1005for.second: 1006 %iprom = sext i32 %i.02 to i64 1007 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1008 store i16 0, i16* %b, align 4 1009 %cmps = icmp sgt i32 %inc, 16 1010 br i1 %cmps, label %for.body, label %for.end 1011 1012for.end: 1013 ret i32 0 1014} 1015 1016 1017; Check interaction between block predication and early exits. We need the 1018; condition on the early exit to remain dead (i.e. not be used when forming 1019; the predicate mask). 1020define void @scalar_predication(float* %addr) { 1021; CHECK-LABEL: @scalar_predication( 1022; CHECK-NEXT: entry: 1023; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1024; CHECK: vector.ph: 1025; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1026; CHECK: vector.body: 1027; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 1028; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1029; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 1030; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 1031; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 1032; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 1033; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1034; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 1035; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 1036; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1037; CHECK: pred.store.if: 1038; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]] 1039; CHECK-NEXT: store float 1.000000e+01, float* [[TMP7]], align 4 1040; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 1041; CHECK: pred.store.continue: 1042; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 1043; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 1044; CHECK: pred.store.if1: 1045; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1046; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]] 1047; CHECK-NEXT: store float 1.000000e+01, float* [[TMP10]], align 4 1048; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 1049; CHECK: pred.store.continue2: 1050; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1051; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1052; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1053; CHECK: middle.block: 1054; CHECK-NEXT: br label [[SCALAR_PH]] 1055; CHECK: scalar.ph: 1056; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1057; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1058; CHECK: loop.header: 1059; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1060; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1061; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1062; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1063; CHECK: loop.body: 1064; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[GEP]], align 4 1065; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00 1066; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1067; CHECK: then: 1068; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1069; CHECK-NEXT: br label [[LOOP_LATCH]] 1070; CHECK: loop.latch: 1071; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1072; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]] 1073; CHECK: exit: 1074; CHECK-NEXT: ret void 1075; 1076; TAILFOLD-LABEL: @scalar_predication( 1077; TAILFOLD-NEXT: entry: 1078; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1079; TAILFOLD: loop.header: 1080; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1081; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1082; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1083; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1084; TAILFOLD: loop.body: 1085; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1086; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1087; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1088; TAILFOLD: then: 1089; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1090; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1091; TAILFOLD: loop.latch: 1092; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1093; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1094; TAILFOLD: exit: 1095; TAILFOLD-NEXT: ret void 1096; 1097entry: 1098 br label %loop.header 1099 1100loop.header: 1101 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1102 %gep = getelementptr float, float* %addr, i64 %iv 1103 %exitcond.not = icmp eq i64 %iv, 200 1104 br i1 %exitcond.not, label %exit, label %loop.body 1105 1106loop.body: 1107 %0 = load float, float* %gep, align 4 1108 %pred = fcmp oeq float %0, 0.0 1109 br i1 %pred, label %loop.latch, label %then 1110 1111then: 1112 store float 10.0, float* %gep, align 4 1113 br label %loop.latch 1114 1115loop.latch: 1116 %iv.next = add nuw nsw i64 %iv, 1 1117 br label %loop.header 1118 1119exit: 1120 ret void 1121} 1122 1123define i32 @me_reduction(i32* %addr) { 1124; CHECK-LABEL: @me_reduction( 1125; CHECK-NEXT: entry: 1126; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1127; CHECK: vector.ph: 1128; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1129; CHECK: vector.body: 1130; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1131; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1132; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1133; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1134; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1135; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1136; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1137; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1138; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1139; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1140; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1141; CHECK: middle.block: 1142; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) 1143; CHECK-NEXT: br label [[SCALAR_PH]] 1144; CHECK: scalar.ph: 1145; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1146; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1147; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1148; CHECK: loop.header: 1149; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1150; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1151; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1152; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1153; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1154; CHECK: loop.latch: 1155; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1156; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1157; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1158; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1159; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] 1160; CHECK: exit: 1161; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1162; CHECK-NEXT: ret i32 [[LCSSA]] 1163; 1164; TAILFOLD-LABEL: @me_reduction( 1165; TAILFOLD-NEXT: entry: 1166; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1167; TAILFOLD: loop.header: 1168; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1169; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1170; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1171; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1172; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1173; TAILFOLD: loop.latch: 1174; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1175; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1176; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1177; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1178; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1179; TAILFOLD: exit: 1180; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1181; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1182; 1183entry: 1184 br label %loop.header 1185 1186loop.header: 1187 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1188 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1189 %gep = getelementptr i32, i32* %addr, i64 %iv 1190 %exitcond.not = icmp eq i64 %iv, 200 1191 br i1 %exitcond.not, label %exit, label %loop.latch 1192 1193loop.latch: 1194 %0 = load i32, i32* %gep, align 4 1195 %accum.next = add i32 %accum, %0 1196 %iv.next = add nuw nsw i64 %iv, 1 1197 %exitcond2.not = icmp eq i64 %iv, 400 1198 br i1 %exitcond2.not, label %exit, label %loop.header 1199 1200exit: 1201 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1202 ret i32 %lcssa 1203} 1204 1205; TODO: The current definition of reduction is too strict, we can vectorize 1206; this. There's an analogous single exit case where we extract the N-1 1207; value of the reduction that we can also handle. If we fix the later, the 1208; multiple exit case probably falls out. 1209define i32 @me_reduction2(i32* %addr) { 1210; CHECK-LABEL: @me_reduction2( 1211; CHECK-NEXT: entry: 1212; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1213; CHECK: loop.header: 1214; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1215; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1216; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1217; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1218; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1219; CHECK: loop.latch: 1220; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1221; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1222; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1223; CHECK-NEXT: br label [[LOOP_HEADER]] 1224; CHECK: exit: 1225; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1226; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1227; 1228; TAILFOLD-LABEL: @me_reduction2( 1229; TAILFOLD-NEXT: entry: 1230; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1231; TAILFOLD: loop.header: 1232; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1233; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1234; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1235; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1236; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1237; TAILFOLD: loop.latch: 1238; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1239; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1240; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1241; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1242; TAILFOLD: exit: 1243; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1244; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1245; 1246entry: 1247 br label %loop.header 1248 1249loop.header: 1250 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1251 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1252 %gep = getelementptr i32, i32* %addr, i64 %iv 1253 %exitcond.not = icmp eq i64 %iv, 200 1254 br i1 %exitcond.not, label %exit, label %loop.latch 1255 1256loop.latch: 1257 %0 = load i32, i32* %gep, align 4 1258 %accum.next = add i32 %accum, %0 1259 %iv.next = add nuw nsw i64 %iv, 1 1260 br label %loop.header 1261 1262exit: 1263 ret i32 %accum 1264} 1265 1266