1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 63; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 64; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 65; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 66; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 67; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 68; TAILFOLD: pred.store.if: 69; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 70; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 71; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 72; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 73; TAILFOLD: pred.store.continue: 74; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 75; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 76; TAILFOLD: pred.store.if1: 77; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 78; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] 79; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 80; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 81; TAILFOLD: pred.store.continue2: 82; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 83; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 84; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 85; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 86; TAILFOLD: middle.block: 87; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 88; TAILFOLD: scalar.ph: 89; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 90; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 91; TAILFOLD: for.cond: 92; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 93; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 94; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 95; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 96; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 97; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 98; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 99; TAILFOLD: if.end: 100; TAILFOLD-NEXT: ret void 101; 102entry: 103 br label %for.cond 104 105for.cond: 106 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 107 %iprom = sext i32 %i to i64 108 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 109 store i16 0, i16* %b, align 4 110 %inc = add nsw i32 %i, 1 111 %cmp = icmp slt i32 %i, %n 112 br i1 %cmp, label %for.cond, label %if.end 113 114if.end: 115 ret void 116} 117 118define void @early_exit(i16* %p, i32 %n) { 119; CHECK-LABEL: @early_exit( 120; CHECK-NEXT: entry: 121; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 122; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 123; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 124; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 125; CHECK: vector.ph: 126; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 127; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 128; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 129; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 130; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 131; CHECK: vector.body: 132; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 133; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 134; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 135; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 136; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 137; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 138; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 139; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 140; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 141; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 142; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 143; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 144; CHECK: middle.block: 145; CHECK-NEXT: br label [[SCALAR_PH]] 146; CHECK: scalar.ph: 147; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 148; CHECK-NEXT: br label [[FOR_COND:%.*]] 149; CHECK: for.cond: 150; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 151; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 152; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 153; CHECK: for.body: 154; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 155; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 156; CHECK-NEXT: store i16 0, i16* [[B]], align 4 157; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 158; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 159; CHECK: if.end: 160; CHECK-NEXT: ret void 161; 162; TAILFOLD-LABEL: @early_exit( 163; TAILFOLD-NEXT: entry: 164; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 165; TAILFOLD: for.cond: 166; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 167; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 168; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 169; TAILFOLD: for.body: 170; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 171; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 172; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 173; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 174; TAILFOLD-NEXT: br label [[FOR_COND]] 175; TAILFOLD: if.end: 176; TAILFOLD-NEXT: ret void 177; 178entry: 179 br label %for.cond 180 181for.cond: 182 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 183 %cmp = icmp slt i32 %i, %n 184 br i1 %cmp, label %for.body, label %if.end 185 186for.body: 187 %iprom = sext i32 %i to i64 188 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 189 store i16 0, i16* %b, align 4 190 %inc = add nsw i32 %i, 1 191 br label %for.cond 192 193if.end: 194 ret void 195} 196 197; Same as early_exit, but with optsize to prevent the use of 198; a scalar epilogue. -- Can't vectorize this in either case. 199define void @optsize(i16* %p, i32 %n) optsize { 200; CHECK-LABEL: @optsize( 201; CHECK-NEXT: entry: 202; CHECK-NEXT: br label [[FOR_COND:%.*]] 203; CHECK: for.cond: 204; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 205; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 206; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 207; CHECK: for.body: 208; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 209; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 210; CHECK-NEXT: store i16 0, i16* [[B]], align 4 211; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 212; CHECK-NEXT: br label [[FOR_COND]] 213; CHECK: if.end: 214; CHECK-NEXT: ret void 215; 216; TAILFOLD-LABEL: @optsize( 217; TAILFOLD-NEXT: entry: 218; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 219; TAILFOLD: for.cond: 220; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 221; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 222; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 223; TAILFOLD: for.body: 224; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 225; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 226; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 227; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 228; TAILFOLD-NEXT: br label [[FOR_COND]] 229; TAILFOLD: if.end: 230; TAILFOLD-NEXT: ret void 231; 232entry: 233 br label %for.cond 234 235for.cond: 236 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 237 %cmp = icmp slt i32 %i, %n 238 br i1 %cmp, label %for.body, label %if.end 239 240for.body: 241 %iprom = sext i32 %i to i64 242 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 243 store i16 0, i16* %b, align 4 244 %inc = add nsw i32 %i, 1 245 br label %for.cond 246 247if.end: 248 ret void 249} 250 251 252; multiple exit - no values inside the loop used outside 253define void @multiple_unique_exit(i16* %p, i32 %n) { 254; CHECK-LABEL: @multiple_unique_exit( 255; CHECK-NEXT: entry: 256; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 257; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 258; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 259; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 260; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 261; CHECK: vector.ph: 262; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 263; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 264; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 265; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 266; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 267; CHECK: vector.body: 268; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 269; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 270; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 271; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 272; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 273; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 274; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 275; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 276; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 277; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 278; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 279; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 280; CHECK: middle.block: 281; CHECK-NEXT: br label [[SCALAR_PH]] 282; CHECK: scalar.ph: 283; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 284; CHECK-NEXT: br label [[FOR_COND:%.*]] 285; CHECK: for.cond: 286; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 287; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 288; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 289; CHECK: for.body: 290; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 291; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 292; CHECK-NEXT: store i16 0, i16* [[B]], align 4 293; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 294; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 295; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] 296; CHECK: if.end: 297; CHECK-NEXT: ret void 298; 299; TAILFOLD-LABEL: @multiple_unique_exit( 300; TAILFOLD-NEXT: entry: 301; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 302; TAILFOLD: for.cond: 303; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 304; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 305; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 306; TAILFOLD: for.body: 307; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 308; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 309; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 310; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 311; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 312; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 313; TAILFOLD: if.end: 314; TAILFOLD-NEXT: ret void 315; 316entry: 317 br label %for.cond 318 319for.cond: 320 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 321 %cmp = icmp slt i32 %i, %n 322 br i1 %cmp, label %for.body, label %if.end 323 324for.body: 325 %iprom = sext i32 %i to i64 326 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 327 store i16 0, i16* %b, align 4 328 %inc = add nsw i32 %i, 1 329 %cmp2 = icmp slt i32 %i, 2096 330 br i1 %cmp2, label %for.cond, label %if.end 331 332if.end: 333 ret void 334} 335 336; multiple exit - with an lcssa phi 337define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 338; CHECK-LABEL: @multiple_unique_exit2( 339; CHECK-NEXT: entry: 340; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 341; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 342; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 343; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 344; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 345; CHECK: vector.ph: 346; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 347; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 348; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 349; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 350; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 351; CHECK: vector.body: 352; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 353; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 354; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 355; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 356; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 357; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 358; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 359; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 360; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 361; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 362; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 363; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 364; CHECK: middle.block: 365; CHECK-NEXT: br label [[SCALAR_PH]] 366; CHECK: scalar.ph: 367; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 368; CHECK-NEXT: br label [[FOR_COND:%.*]] 369; CHECK: for.cond: 370; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 371; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 372; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 373; CHECK: for.body: 374; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 375; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 376; CHECK-NEXT: store i16 0, i16* [[B]], align 4 377; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 378; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 379; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 380; CHECK: if.end: 381; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 382; CHECK-NEXT: ret i32 [[I_LCSSA]] 383; 384; TAILFOLD-LABEL: @multiple_unique_exit2( 385; TAILFOLD-NEXT: entry: 386; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 387; TAILFOLD: for.cond: 388; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 389; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 390; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 391; TAILFOLD: for.body: 392; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 393; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 394; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 395; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 396; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 397; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 398; TAILFOLD: if.end: 399; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 400; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 401; 402entry: 403 br label %for.cond 404 405for.cond: 406 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 407 %cmp = icmp slt i32 %i, %n 408 br i1 %cmp, label %for.body, label %if.end 409 410for.body: 411 %iprom = sext i32 %i to i64 412 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 413 store i16 0, i16* %b, align 4 414 %inc = add nsw i32 %i, 1 415 %cmp2 = icmp slt i32 %i, 2096 416 br i1 %cmp2, label %for.cond, label %if.end 417 418if.end: 419 ret i32 %i 420} 421 422; multiple exit w/a non lcssa phi 423define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 424; CHECK-LABEL: @multiple_unique_exit3( 425; CHECK-NEXT: entry: 426; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 427; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 428; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 429; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 430; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 431; CHECK: vector.ph: 432; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 433; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 434; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 435; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 436; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 437; CHECK: vector.body: 438; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 439; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 440; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 441; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 442; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 443; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 444; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 445; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 446; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 447; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 448; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 449; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 450; CHECK: middle.block: 451; CHECK-NEXT: br label [[SCALAR_PH]] 452; CHECK: scalar.ph: 453; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 454; CHECK-NEXT: br label [[FOR_COND:%.*]] 455; CHECK: for.cond: 456; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 457; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 458; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 459; CHECK: for.body: 460; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 461; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 462; CHECK-NEXT: store i16 0, i16* [[B]], align 4 463; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 464; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 465; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 466; CHECK: if.end: 467; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 468; CHECK-NEXT: ret i32 [[EXIT]] 469; 470; TAILFOLD-LABEL: @multiple_unique_exit3( 471; TAILFOLD-NEXT: entry: 472; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 473; TAILFOLD: for.cond: 474; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 475; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 476; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 477; TAILFOLD: for.body: 478; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 479; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 480; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 481; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 482; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 483; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 484; TAILFOLD: if.end: 485; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 486; TAILFOLD-NEXT: ret i32 [[EXIT]] 487; 488entry: 489 br label %for.cond 490 491for.cond: 492 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 493 %cmp = icmp slt i32 %i, %n 494 br i1 %cmp, label %for.body, label %if.end 495 496for.body: 497 %iprom = sext i32 %i to i64 498 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 499 store i16 0, i16* %b, align 4 500 %inc = add nsw i32 %i, 1 501 %cmp2 = icmp slt i32 %i, 2096 502 br i1 %cmp2, label %for.cond, label %if.end 503 504if.end: 505 %exit = phi i32 [0, %for.cond], [1, %for.body] 506 ret i32 %exit 507} 508 509; multiple exits w/distinct target blocks 510define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 511; CHECK-LABEL: @multiple_exit_blocks( 512; CHECK-NEXT: entry: 513; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 514; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 515; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 516; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 517; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 518; CHECK: vector.ph: 519; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 520; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 521; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 522; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 523; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 524; CHECK: vector.body: 525; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 526; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 527; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 528; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 529; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 530; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 531; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 532; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 533; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 534; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 535; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 536; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 537; CHECK: middle.block: 538; CHECK-NEXT: br label [[SCALAR_PH]] 539; CHECK: scalar.ph: 540; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 541; CHECK-NEXT: br label [[FOR_COND:%.*]] 542; CHECK: for.cond: 543; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 544; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 545; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 546; CHECK: for.body: 547; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 548; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 549; CHECK-NEXT: store i16 0, i16* [[B]], align 4 550; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 551; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 552; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]] 553; CHECK: if.end: 554; CHECK-NEXT: ret i32 0 555; CHECK: if.end2: 556; CHECK-NEXT: ret i32 1 557; 558; TAILFOLD-LABEL: @multiple_exit_blocks( 559; TAILFOLD-NEXT: entry: 560; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 561; TAILFOLD: for.cond: 562; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 563; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 564; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 565; TAILFOLD: for.body: 566; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 567; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 568; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 569; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 570; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 571; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 572; TAILFOLD: if.end: 573; TAILFOLD-NEXT: ret i32 0 574; TAILFOLD: if.end2: 575; TAILFOLD-NEXT: ret i32 1 576; 577entry: 578 br label %for.cond 579 580for.cond: 581 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 582 %cmp = icmp slt i32 %i, %n 583 br i1 %cmp, label %for.body, label %if.end 584 585for.body: 586 %iprom = sext i32 %i to i64 587 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 588 store i16 0, i16* %b, align 4 589 %inc = add nsw i32 %i, 1 590 %cmp2 = icmp slt i32 %i, 2096 591 br i1 %cmp2, label %for.cond, label %if.end2 592 593if.end: 594 ret i32 0 595 596if.end2: 597 ret i32 1 598} 599 600; LCSSA, common value each exit 601define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 602; CHECK-LABEL: @multiple_exit_blocks2( 603; CHECK-NEXT: entry: 604; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 605; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 606; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 607; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 608; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 609; CHECK: vector.ph: 610; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 611; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 612; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 613; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 614; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 615; CHECK: vector.body: 616; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 617; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 618; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 619; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 620; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 621; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 622; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 623; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 624; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 625; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 626; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 627; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 628; CHECK: middle.block: 629; CHECK-NEXT: br label [[SCALAR_PH]] 630; CHECK: scalar.ph: 631; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 632; CHECK-NEXT: br label [[FOR_COND:%.*]] 633; CHECK: for.cond: 634; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 635; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 636; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 637; CHECK: for.body: 638; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 639; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 640; CHECK-NEXT: store i16 0, i16* [[B]], align 4 641; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 642; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 643; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 644; CHECK: if.end: 645; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 646; CHECK-NEXT: ret i32 [[I_LCSSA]] 647; CHECK: if.end2: 648; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 649; CHECK-NEXT: ret i32 [[I_LCSSA1]] 650; 651; TAILFOLD-LABEL: @multiple_exit_blocks2( 652; TAILFOLD-NEXT: entry: 653; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 654; TAILFOLD: for.cond: 655; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 656; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 657; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 658; TAILFOLD: for.body: 659; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 660; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 661; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 662; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 663; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 664; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 665; TAILFOLD: if.end: 666; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 667; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 668; TAILFOLD: if.end2: 669; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 670; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 671; 672entry: 673 br label %for.cond 674 675for.cond: 676 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 677 %cmp = icmp slt i32 %i, %n 678 br i1 %cmp, label %for.body, label %if.end 679 680for.body: 681 %iprom = sext i32 %i to i64 682 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 683 store i16 0, i16* %b, align 4 684 %inc = add nsw i32 %i, 1 685 %cmp2 = icmp slt i32 %i, 2096 686 br i1 %cmp2, label %for.cond, label %if.end2 687 688if.end: 689 ret i32 %i 690 691if.end2: 692 ret i32 %i 693} 694 695; LCSSA, distinct value each exit 696define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 697; CHECK-LABEL: @multiple_exit_blocks3( 698; CHECK-NEXT: entry: 699; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 700; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 701; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 702; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 703; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 704; CHECK: vector.ph: 705; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 706; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 707; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 708; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 709; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 710; CHECK: vector.body: 711; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 712; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 713; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 714; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 715; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 716; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 717; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 718; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 719; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 720; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1> 721; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 722; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 723; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 724; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 725; CHECK: middle.block: 726; CHECK-NEXT: br label [[SCALAR_PH]] 727; CHECK: scalar.ph: 728; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 729; CHECK-NEXT: br label [[FOR_COND:%.*]] 730; CHECK: for.cond: 731; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 732; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 733; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 734; CHECK: for.body: 735; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 736; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 737; CHECK-NEXT: store i16 0, i16* [[B]], align 4 738; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 739; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 740; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] 741; CHECK: if.end: 742; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 743; CHECK-NEXT: ret i32 [[I_LCSSA]] 744; CHECK: if.end2: 745; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 746; CHECK-NEXT: ret i32 [[INC_LCSSA]] 747; 748; TAILFOLD-LABEL: @multiple_exit_blocks3( 749; TAILFOLD-NEXT: entry: 750; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 751; TAILFOLD: for.cond: 752; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 753; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 754; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 755; TAILFOLD: for.body: 756; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 757; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 758; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 759; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 760; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 761; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 762; TAILFOLD: if.end: 763; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 764; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 765; TAILFOLD: if.end2: 766; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 767; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 768; 769entry: 770 br label %for.cond 771 772for.cond: 773 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 774 %cmp = icmp slt i32 %i, %n 775 br i1 %cmp, label %for.body, label %if.end 776 777for.body: 778 %iprom = sext i32 %i to i64 779 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 780 store i16 0, i16* %b, align 4 781 %inc = add nsw i32 %i, 1 782 %cmp2 = icmp slt i32 %i, 2096 783 br i1 %cmp2, label %for.cond, label %if.end2 784 785if.end: 786 ret i32 %i 787 788if.end2: 789 ret i32 %inc 790} 791 792; unique exit case but with a switch as two edges between the same pair of 793; blocks is an often missed edge case 794define i32 @multiple_exit_switch(i16* %p, i32 %n) { 795; CHECK-LABEL: @multiple_exit_switch( 796; CHECK-NEXT: entry: 797; CHECK-NEXT: br label [[FOR_COND:%.*]] 798; CHECK: for.cond: 799; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 800; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 801; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 802; CHECK-NEXT: store i16 0, i16* [[B]], align 4 803; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 804; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 805; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 806; CHECK-NEXT: i32 2097, label [[IF_END]] 807; CHECK-NEXT: ] 808; CHECK: if.end: 809; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 810; CHECK-NEXT: ret i32 [[I_LCSSA]] 811; 812; TAILFOLD-LABEL: @multiple_exit_switch( 813; TAILFOLD-NEXT: entry: 814; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 815; TAILFOLD: for.cond: 816; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 817; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 818; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 819; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 820; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 821; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 822; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 823; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 824; TAILFOLD-NEXT: ] 825; TAILFOLD: if.end: 826; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 827; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 828; 829entry: 830 br label %for.cond 831 832for.cond: 833 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 834 %iprom = sext i32 %i to i64 835 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 836 store i16 0, i16* %b, align 4 837 %inc = add nsw i32 %i, 1 838 switch i32 %i, label %for.cond [ 839 i32 2096, label %if.end 840 i32 2097, label %if.end 841 ] 842 843if.end: 844 ret i32 %i 845} 846 847; multiple exit case but with a switch as multiple exiting edges from 848; a single block is a commonly missed edge case 849define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 850; CHECK-LABEL: @multiple_exit_switch2( 851; CHECK-NEXT: entry: 852; CHECK-NEXT: br label [[FOR_COND:%.*]] 853; CHECK: for.cond: 854; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 855; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 856; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 857; CHECK-NEXT: store i16 0, i16* [[B]], align 4 858; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 859; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 860; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 861; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 862; CHECK-NEXT: ] 863; CHECK: if.end: 864; CHECK-NEXT: ret i32 0 865; CHECK: if.end2: 866; CHECK-NEXT: ret i32 1 867; 868; TAILFOLD-LABEL: @multiple_exit_switch2( 869; TAILFOLD-NEXT: entry: 870; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 871; TAILFOLD: for.cond: 872; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 873; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 874; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 875; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 876; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 877; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 878; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 879; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 880; TAILFOLD-NEXT: ] 881; TAILFOLD: if.end: 882; TAILFOLD-NEXT: ret i32 0 883; TAILFOLD: if.end2: 884; TAILFOLD-NEXT: ret i32 1 885; 886entry: 887 br label %for.cond 888 889for.cond: 890 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 891 %iprom = sext i32 %i to i64 892 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 893 store i16 0, i16* %b, align 4 894 %inc = add nsw i32 %i, 1 895 switch i32 %i, label %for.cond [ 896 i32 2096, label %if.end 897 i32 2097, label %if.end2 898 ] 899 900if.end: 901 ret i32 0 902 903if.end2: 904 ret i32 1 905} 906 907define i32 @multiple_latch1(i16* %p) { 908; CHECK-LABEL: @multiple_latch1( 909; CHECK-NEXT: entry: 910; CHECK-NEXT: br label [[FOR_BODY:%.*]] 911; CHECK: for.body: 912; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 913; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 914; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 915; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 916; CHECK: for.second: 917; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 918; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 919; CHECK-NEXT: store i16 0, i16* [[B]], align 4 920; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 921; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 922; CHECK: for.body.backedge: 923; CHECK-NEXT: br label [[FOR_BODY]] 924; CHECK: for.end: 925; CHECK-NEXT: ret i32 0 926; 927; TAILFOLD-LABEL: @multiple_latch1( 928; TAILFOLD-NEXT: entry: 929; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 930; TAILFOLD: for.body: 931; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 932; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 933; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 934; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 935; TAILFOLD: for.second: 936; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 937; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 938; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 939; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 940; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 941; TAILFOLD: for.body.backedge: 942; TAILFOLD-NEXT: br label [[FOR_BODY]] 943; TAILFOLD: for.end: 944; TAILFOLD-NEXT: ret i32 0 945; 946entry: 947 br label %for.body 948 949for.body: 950 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 951 %inc = add nsw i32 %i.02, 1 952 %cmp = icmp slt i32 %inc, 16 953 br i1 %cmp, label %for.body.backedge, label %for.second 954 955for.second: 956 %iprom = sext i32 %i.02 to i64 957 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 958 store i16 0, i16* %b, align 4 959 %cmps = icmp sgt i32 %inc, 16 960 br i1 %cmps, label %for.body.backedge, label %for.end 961 962for.body.backedge: 963 br label %for.body 964 965for.end: 966 ret i32 0 967} 968 969 970; two back branches - loop simplify with convert this to the same form 971; as previous before vectorizer sees it, but show that. 972define i32 @multiple_latch2(i16* %p) { 973; CHECK-LABEL: @multiple_latch2( 974; CHECK-NEXT: entry: 975; CHECK-NEXT: br label [[FOR_BODY:%.*]] 976; CHECK: for.body: 977; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 978; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 979; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 980; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 981; CHECK: for.body.backedge: 982; CHECK-NEXT: br label [[FOR_BODY]] 983; CHECK: for.second: 984; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 985; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 986; CHECK-NEXT: store i16 0, i16* [[B]], align 4 987; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 988; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 989; CHECK: for.end: 990; CHECK-NEXT: ret i32 0 991; 992; TAILFOLD-LABEL: @multiple_latch2( 993; TAILFOLD-NEXT: entry: 994; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 995; TAILFOLD: for.body: 996; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 997; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 998; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 999; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1000; TAILFOLD: for.body.backedge: 1001; TAILFOLD-NEXT: br label [[FOR_BODY]] 1002; TAILFOLD: for.second: 1003; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1004; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1005; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 1006; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1007; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1008; TAILFOLD: for.end: 1009; TAILFOLD-NEXT: ret i32 0 1010; 1011entry: 1012 br label %for.body 1013 1014for.body: 1015 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 1016 %inc = add nsw i32 %i.02, 1 1017 %cmp = icmp slt i32 %inc, 16 1018 br i1 %cmp, label %for.body, label %for.second 1019 1020for.second: 1021 %iprom = sext i32 %i.02 to i64 1022 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1023 store i16 0, i16* %b, align 4 1024 %cmps = icmp sgt i32 %inc, 16 1025 br i1 %cmps, label %for.body, label %for.end 1026 1027for.end: 1028 ret i32 0 1029} 1030 1031 1032; Check interaction between block predication and early exits. We need the 1033; condition on the early exit to remain dead (i.e. not be used when forming 1034; the predicate mask). 1035define void @scalar_predication(float* %addr) { 1036; CHECK-LABEL: @scalar_predication( 1037; CHECK-NEXT: entry: 1038; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1039; CHECK: vector.ph: 1040; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1041; CHECK: vector.body: 1042; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 1043; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 1044; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1045; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 1046; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 1047; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 1048; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 1049; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1050; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 1051; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 1052; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1053; CHECK: pred.store.if: 1054; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]] 1055; CHECK-NEXT: store float 1.000000e+01, float* [[TMP7]], align 4 1056; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 1057; CHECK: pred.store.continue: 1058; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 1059; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 1060; CHECK: pred.store.if1: 1061; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1062; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]] 1063; CHECK-NEXT: store float 1.000000e+01, float* [[TMP10]], align 4 1064; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 1065; CHECK: pred.store.continue2: 1066; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1067; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1068; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1069; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 1070; CHECK: middle.block: 1071; CHECK-NEXT: br label [[SCALAR_PH]] 1072; CHECK: scalar.ph: 1073; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1074; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1075; CHECK: loop.header: 1076; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1077; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1078; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1079; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1080; CHECK: loop.body: 1081; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[GEP]], align 4 1082; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00 1083; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1084; CHECK: then: 1085; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1086; CHECK-NEXT: br label [[LOOP_LATCH]] 1087; CHECK: loop.latch: 1088; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1089; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]] 1090; CHECK: exit: 1091; CHECK-NEXT: ret void 1092; 1093; TAILFOLD-LABEL: @scalar_predication( 1094; TAILFOLD-NEXT: entry: 1095; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1096; TAILFOLD: loop.header: 1097; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1098; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1099; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1100; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1101; TAILFOLD: loop.body: 1102; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1103; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1104; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1105; TAILFOLD: then: 1106; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1107; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1108; TAILFOLD: loop.latch: 1109; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1110; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1111; TAILFOLD: exit: 1112; TAILFOLD-NEXT: ret void 1113; 1114entry: 1115 br label %loop.header 1116 1117loop.header: 1118 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1119 %gep = getelementptr float, float* %addr, i64 %iv 1120 %exitcond.not = icmp eq i64 %iv, 200 1121 br i1 %exitcond.not, label %exit, label %loop.body 1122 1123loop.body: 1124 %0 = load float, float* %gep, align 4 1125 %pred = fcmp oeq float %0, 0.0 1126 br i1 %pred, label %loop.latch, label %then 1127 1128then: 1129 store float 10.0, float* %gep, align 4 1130 br label %loop.latch 1131 1132loop.latch: 1133 %iv.next = add nuw nsw i64 %iv, 1 1134 br label %loop.header 1135 1136exit: 1137 ret void 1138} 1139 1140define i32 @me_reduction(i32* %addr) { 1141; CHECK-LABEL: @me_reduction( 1142; CHECK-NEXT: entry: 1143; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1144; CHECK: vector.ph: 1145; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1146; CHECK: vector.body: 1147; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1148; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1149; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1150; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1151; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1152; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1153; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1154; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1155; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1156; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1157; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1158; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1159; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1160; CHECK: middle.block: 1161; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) 1162; CHECK-NEXT: br label [[SCALAR_PH]] 1163; CHECK: scalar.ph: 1164; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1165; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1166; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1167; CHECK: loop.header: 1168; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1169; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1170; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1171; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1172; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1173; CHECK: loop.latch: 1174; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1175; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1176; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1177; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1178; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] 1179; CHECK: exit: 1180; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1181; CHECK-NEXT: ret i32 [[LCSSA]] 1182; 1183; TAILFOLD-LABEL: @me_reduction( 1184; TAILFOLD-NEXT: entry: 1185; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1186; TAILFOLD: loop.header: 1187; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1188; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1189; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1190; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1191; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1192; TAILFOLD: loop.latch: 1193; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1194; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1195; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1196; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1197; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1198; TAILFOLD: exit: 1199; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1200; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1201; 1202entry: 1203 br label %loop.header 1204 1205loop.header: 1206 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1207 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1208 %gep = getelementptr i32, i32* %addr, i64 %iv 1209 %exitcond.not = icmp eq i64 %iv, 200 1210 br i1 %exitcond.not, label %exit, label %loop.latch 1211 1212loop.latch: 1213 %0 = load i32, i32* %gep, align 4 1214 %accum.next = add i32 %accum, %0 1215 %iv.next = add nuw nsw i64 %iv, 1 1216 %exitcond2.not = icmp eq i64 %iv, 400 1217 br i1 %exitcond2.not, label %exit, label %loop.header 1218 1219exit: 1220 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1221 ret i32 %lcssa 1222} 1223 1224; TODO: The current definition of reduction is too strict, we can vectorize 1225; this. There's an analogous single exit case where we extract the N-1 1226; value of the reduction that we can also handle. If we fix the later, the 1227; multiple exit case probably falls out. 1228define i32 @me_reduction2(i32* %addr) { 1229; CHECK-LABEL: @me_reduction2( 1230; CHECK-NEXT: entry: 1231; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1232; CHECK: loop.header: 1233; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1234; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1235; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1236; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1237; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1238; CHECK: loop.latch: 1239; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1240; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1241; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1242; CHECK-NEXT: br label [[LOOP_HEADER]] 1243; CHECK: exit: 1244; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1245; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1246; 1247; TAILFOLD-LABEL: @me_reduction2( 1248; TAILFOLD-NEXT: entry: 1249; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1250; TAILFOLD: loop.header: 1251; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1252; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1253; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1254; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1255; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1256; TAILFOLD: loop.latch: 1257; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1258; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1259; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1260; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1261; TAILFOLD: exit: 1262; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1263; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1264; 1265entry: 1266 br label %loop.header 1267 1268loop.header: 1269 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1270 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1271 %gep = getelementptr i32, i32* %addr, i64 %iv 1272 %exitcond.not = icmp eq i64 %iv, 200 1273 br i1 %exitcond.not, label %exit, label %loop.latch 1274 1275loop.latch: 1276 %0 = load i32, i32* %gep, align 4 1277 %accum.next = add i32 %accum, %0 1278 %iv.next = add nuw nsw i64 %iv, 1 1279 br label %loop.header 1280 1281exit: 1282 ret i32 %accum 1283} 1284 1285