1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 63; TAILFOLD-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 64; TAILFOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 65; TAILFOLD-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 66; TAILFOLD: pred.store.if: 67; TAILFOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 68; TAILFOLD-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 69; TAILFOLD-NEXT: store i16 0, i16* [[TMP5]], align 4 70; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 71; TAILFOLD: pred.store.continue: 72; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 73; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 74; TAILFOLD: pred.store.if1: 75; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 76; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP7]] 77; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 78; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 79; TAILFOLD: pred.store.continue2: 80; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 81; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 82; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 83; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 84; TAILFOLD: middle.block: 85; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 86; TAILFOLD: scalar.ph: 87; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 88; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 89; TAILFOLD: for.cond: 90; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 91; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 92; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 93; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 94; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 95; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 96; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 97; TAILFOLD: if.end: 98; TAILFOLD-NEXT: ret void 99; 100entry: 101 br label %for.cond 102 103for.cond: 104 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 105 %iprom = sext i32 %i to i64 106 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 107 store i16 0, i16* %b, align 4 108 %inc = add nsw i32 %i, 1 109 %cmp = icmp slt i32 %i, %n 110 br i1 %cmp, label %for.cond, label %if.end 111 112if.end: 113 ret void 114} 115 116define void @early_exit(i16* %p, i32 %n) { 117; CHECK-LABEL: @early_exit( 118; CHECK-NEXT: entry: 119; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 120; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 121; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 122; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 123; CHECK: vector.ph: 124; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 125; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 126; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 127; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 128; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 129; CHECK: vector.body: 130; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 131; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 132; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 133; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 134; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 135; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 136; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 138; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 139; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 140; CHECK: middle.block: 141; CHECK-NEXT: br label [[SCALAR_PH]] 142; CHECK: scalar.ph: 143; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 144; CHECK-NEXT: br label [[FOR_COND:%.*]] 145; CHECK: for.cond: 146; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 147; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 148; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 149; CHECK: for.body: 150; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 151; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 152; CHECK-NEXT: store i16 0, i16* [[B]], align 4 153; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 154; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 155; CHECK: if.end: 156; CHECK-NEXT: ret void 157; 158; TAILFOLD-LABEL: @early_exit( 159; TAILFOLD-NEXT: entry: 160; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 161; TAILFOLD: for.cond: 162; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 163; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 164; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 165; TAILFOLD: for.body: 166; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 167; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 168; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 169; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 170; TAILFOLD-NEXT: br label [[FOR_COND]] 171; TAILFOLD: if.end: 172; TAILFOLD-NEXT: ret void 173; 174entry: 175 br label %for.cond 176 177for.cond: 178 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 179 %cmp = icmp slt i32 %i, %n 180 br i1 %cmp, label %for.body, label %if.end 181 182for.body: 183 %iprom = sext i32 %i to i64 184 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 185 store i16 0, i16* %b, align 4 186 %inc = add nsw i32 %i, 1 187 br label %for.cond 188 189if.end: 190 ret void 191} 192 193define i32 @early_exit_with_live_out(i32* %ptr) { 194; CHECK-LABEL: @early_exit_with_live_out( 195; CHECK-NEXT: entry: 196; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 197; CHECK: vector.ph: 198; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 199; CHECK: vector.body: 200; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 201; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 202; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP0]] 203; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 204; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 205; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 206; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 207; CHECK-NEXT: store <2 x i32> <i32 10, i32 10>, <2 x i32>* [[TMP4]], align 4 208; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 209; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 210; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 211; CHECK: middle.block: 212; CHECK-NEXT: br label [[SCALAR_PH]] 213; CHECK: scalar.ph: 214; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 215; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 216; CHECK: loop.header: 217; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 218; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[IV]] 219; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 220; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 221; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 222; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 223; CHECK: loop.latch: 224; CHECK-NEXT: store i32 10, i32* [[GEP]], align 4 225; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] 226; CHECK: exit: 227; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] 228; CHECK-NEXT: ret i32 [[RES_LCSSA]] 229; 230; TAILFOLD-LABEL: @early_exit_with_live_out( 231; TAILFOLD-NEXT: entry: 232; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 233; TAILFOLD: loop.header: 234; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 235; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[IV]] 236; TAILFOLD-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 237; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 238; TAILFOLD-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 239; TAILFOLD-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 240; TAILFOLD: loop.latch: 241; TAILFOLD-NEXT: store i32 10, i32* [[GEP]], align 4 242; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 243; TAILFOLD: exit: 244; TAILFOLD-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] 245; TAILFOLD-NEXT: ret i32 [[RES_LCSSA]] 246; 247entry: 248 br label %loop.header 249 250loop.header: 251 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 252 %gep = getelementptr i32, i32* %ptr, i64 %iv 253 %l = load i32, i32* %gep 254 %iv.next = add nuw nsw i64 %iv, 1 255 %ec = icmp eq i64 %iv.next, 1000 256 br i1 %ec, label %exit, label %loop.latch 257 258loop.latch: 259 store i32 10, i32* %gep 260 br label %loop.header 261 262exit: 263 %res.lcssa = phi i32 [ %l, %loop.header ] 264 ret i32 %res.lcssa 265} 266 267; Same as early_exit, but with optsize to prevent the use of 268; a scalar epilogue. -- Can't vectorize this in either case. 269define void @optsize(i16* %p, i32 %n) optsize { 270; CHECK-LABEL: @optsize( 271; CHECK-NEXT: entry: 272; CHECK-NEXT: br label [[FOR_COND:%.*]] 273; CHECK: for.cond: 274; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 275; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 276; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 277; CHECK: for.body: 278; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 279; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 280; CHECK-NEXT: store i16 0, i16* [[B]], align 4 281; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 282; CHECK-NEXT: br label [[FOR_COND]] 283; CHECK: if.end: 284; CHECK-NEXT: ret void 285; 286; TAILFOLD-LABEL: @optsize( 287; TAILFOLD-NEXT: entry: 288; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 289; TAILFOLD: for.cond: 290; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 291; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 292; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 293; TAILFOLD: for.body: 294; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 295; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 296; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 297; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 298; TAILFOLD-NEXT: br label [[FOR_COND]] 299; TAILFOLD: if.end: 300; TAILFOLD-NEXT: ret void 301; 302entry: 303 br label %for.cond 304 305for.cond: 306 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 307 %cmp = icmp slt i32 %i, %n 308 br i1 %cmp, label %for.body, label %if.end 309 310for.body: 311 %iprom = sext i32 %i to i64 312 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 313 store i16 0, i16* %b, align 4 314 %inc = add nsw i32 %i, 1 315 br label %for.cond 316 317if.end: 318 ret void 319} 320 321 322; multiple exit - no values inside the loop used outside 323define void @multiple_unique_exit(i16* %p, i32 %n) { 324; CHECK-LABEL: @multiple_unique_exit( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 327; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 328; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 329; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 330; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 331; CHECK: vector.ph: 332; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 333; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 334; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 335; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 336; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 337; CHECK: vector.body: 338; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 339; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 340; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 341; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 342; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 343; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 344; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 345; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 346; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 347; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 348; CHECK: middle.block: 349; CHECK-NEXT: br label [[SCALAR_PH]] 350; CHECK: scalar.ph: 351; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 352; CHECK-NEXT: br label [[FOR_COND:%.*]] 353; CHECK: for.cond: 354; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 355; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 356; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 357; CHECK: for.body: 358; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 359; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 360; CHECK-NEXT: store i16 0, i16* [[B]], align 4 361; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 362; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 363; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 364; CHECK: if.end: 365; CHECK-NEXT: ret void 366; 367; TAILFOLD-LABEL: @multiple_unique_exit( 368; TAILFOLD-NEXT: entry: 369; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 370; TAILFOLD: for.cond: 371; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 372; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 373; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 374; TAILFOLD: for.body: 375; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 376; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 377; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 378; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 379; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 380; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 381; TAILFOLD: if.end: 382; TAILFOLD-NEXT: ret void 383; 384entry: 385 br label %for.cond 386 387for.cond: 388 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 389 %cmp = icmp slt i32 %i, %n 390 br i1 %cmp, label %for.body, label %if.end 391 392for.body: 393 %iprom = sext i32 %i to i64 394 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 395 store i16 0, i16* %b, align 4 396 %inc = add nsw i32 %i, 1 397 %cmp2 = icmp slt i32 %i, 2096 398 br i1 %cmp2, label %for.cond, label %if.end 399 400if.end: 401 ret void 402} 403 404; multiple exit - with an lcssa phi 405define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 406; CHECK-LABEL: @multiple_unique_exit2( 407; CHECK-NEXT: entry: 408; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 409; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 410; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 411; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 412; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 413; CHECK: vector.ph: 414; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 415; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 416; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 417; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 418; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 419; CHECK: vector.body: 420; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 421; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 422; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 423; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 424; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 425; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 426; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 427; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 428; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 429; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 430; CHECK: middle.block: 431; CHECK-NEXT: br label [[SCALAR_PH]] 432; CHECK: scalar.ph: 433; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 434; CHECK-NEXT: br label [[FOR_COND:%.*]] 435; CHECK: for.cond: 436; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 437; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 438; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 439; CHECK: for.body: 440; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 441; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 442; CHECK-NEXT: store i16 0, i16* [[B]], align 4 443; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 444; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 445; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 446; CHECK: if.end: 447; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 448; CHECK-NEXT: ret i32 [[I_LCSSA]] 449; 450; TAILFOLD-LABEL: @multiple_unique_exit2( 451; TAILFOLD-NEXT: entry: 452; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 453; TAILFOLD: for.cond: 454; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 455; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 456; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 457; TAILFOLD: for.body: 458; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 459; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 460; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 461; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 462; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 463; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 464; TAILFOLD: if.end: 465; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 466; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 467; 468entry: 469 br label %for.cond 470 471for.cond: 472 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 473 %cmp = icmp slt i32 %i, %n 474 br i1 %cmp, label %for.body, label %if.end 475 476for.body: 477 %iprom = sext i32 %i to i64 478 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 479 store i16 0, i16* %b, align 4 480 %inc = add nsw i32 %i, 1 481 %cmp2 = icmp slt i32 %i, 2096 482 br i1 %cmp2, label %for.cond, label %if.end 483 484if.end: 485 ret i32 %i 486} 487 488; multiple exit w/a non lcssa phi 489define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 490; CHECK-LABEL: @multiple_unique_exit3( 491; CHECK-NEXT: entry: 492; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 493; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 494; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 495; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 496; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 497; CHECK: vector.ph: 498; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 499; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 500; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 501; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 502; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 503; CHECK: vector.body: 504; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 505; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 506; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 507; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 508; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 509; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 510; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 511; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 512; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 513; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 514; CHECK: middle.block: 515; CHECK-NEXT: br label [[SCALAR_PH]] 516; CHECK: scalar.ph: 517; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 518; CHECK-NEXT: br label [[FOR_COND:%.*]] 519; CHECK: for.cond: 520; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 521; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 522; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 523; CHECK: for.body: 524; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 525; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 526; CHECK-NEXT: store i16 0, i16* [[B]], align 4 527; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 528; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 529; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]] 530; CHECK: if.end: 531; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 532; CHECK-NEXT: ret i32 [[EXIT]] 533; 534; TAILFOLD-LABEL: @multiple_unique_exit3( 535; TAILFOLD-NEXT: entry: 536; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 537; TAILFOLD: for.cond: 538; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 539; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 540; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 541; TAILFOLD: for.body: 542; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 543; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 544; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 545; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 546; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 547; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 548; TAILFOLD: if.end: 549; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 550; TAILFOLD-NEXT: ret i32 [[EXIT]] 551; 552entry: 553 br label %for.cond 554 555for.cond: 556 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 557 %cmp = icmp slt i32 %i, %n 558 br i1 %cmp, label %for.body, label %if.end 559 560for.body: 561 %iprom = sext i32 %i to i64 562 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 563 store i16 0, i16* %b, align 4 564 %inc = add nsw i32 %i, 1 565 %cmp2 = icmp slt i32 %i, 2096 566 br i1 %cmp2, label %for.cond, label %if.end 567 568if.end: 569 %exit = phi i32 [0, %for.cond], [1, %for.body] 570 ret i32 %exit 571} 572 573; multiple exits w/distinct target blocks 574define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 575; CHECK-LABEL: @multiple_exit_blocks( 576; CHECK-NEXT: entry: 577; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 578; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 579; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 580; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 581; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 582; CHECK: vector.ph: 583; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 584; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 585; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 586; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 587; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 588; CHECK: vector.body: 589; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 590; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 591; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 592; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 593; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 594; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 595; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 596; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 597; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 598; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 599; CHECK: middle.block: 600; CHECK-NEXT: br label [[SCALAR_PH]] 601; CHECK: scalar.ph: 602; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 603; CHECK-NEXT: br label [[FOR_COND:%.*]] 604; CHECK: for.cond: 605; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 606; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 607; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 608; CHECK: for.body: 609; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 610; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 611; CHECK-NEXT: store i16 0, i16* [[B]], align 4 612; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 613; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 614; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 615; CHECK: if.end: 616; CHECK-NEXT: ret i32 0 617; CHECK: if.end2: 618; CHECK-NEXT: ret i32 1 619; 620; TAILFOLD-LABEL: @multiple_exit_blocks( 621; TAILFOLD-NEXT: entry: 622; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 623; TAILFOLD: for.cond: 624; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 625; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 626; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 627; TAILFOLD: for.body: 628; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 629; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 630; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 631; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 632; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 633; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 634; TAILFOLD: if.end: 635; TAILFOLD-NEXT: ret i32 0 636; TAILFOLD: if.end2: 637; TAILFOLD-NEXT: ret i32 1 638; 639entry: 640 br label %for.cond 641 642for.cond: 643 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 644 %cmp = icmp slt i32 %i, %n 645 br i1 %cmp, label %for.body, label %if.end 646 647for.body: 648 %iprom = sext i32 %i to i64 649 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 650 store i16 0, i16* %b, align 4 651 %inc = add nsw i32 %i, 1 652 %cmp2 = icmp slt i32 %i, 2096 653 br i1 %cmp2, label %for.cond, label %if.end2 654 655if.end: 656 ret i32 0 657 658if.end2: 659 ret i32 1 660} 661 662; LCSSA, common value each exit 663define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 664; CHECK-LABEL: @multiple_exit_blocks2( 665; CHECK-NEXT: entry: 666; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 667; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 668; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 669; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 670; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 671; CHECK: vector.ph: 672; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 673; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 674; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 675; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 676; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 677; CHECK: vector.body: 678; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 679; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 680; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 681; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 682; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 683; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 684; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 685; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 686; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 687; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 688; CHECK: middle.block: 689; CHECK-NEXT: br label [[SCALAR_PH]] 690; CHECK: scalar.ph: 691; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 692; CHECK-NEXT: br label [[FOR_COND:%.*]] 693; CHECK: for.cond: 694; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 695; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 696; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 697; CHECK: for.body: 698; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 699; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 700; CHECK-NEXT: store i16 0, i16* [[B]], align 4 701; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 702; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 703; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] 704; CHECK: if.end: 705; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 706; CHECK-NEXT: ret i32 [[I_LCSSA]] 707; CHECK: if.end2: 708; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 709; CHECK-NEXT: ret i32 [[I_LCSSA1]] 710; 711; TAILFOLD-LABEL: @multiple_exit_blocks2( 712; TAILFOLD-NEXT: entry: 713; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 714; TAILFOLD: for.cond: 715; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 716; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 717; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 718; TAILFOLD: for.body: 719; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 720; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 721; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 722; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 723; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 724; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 725; TAILFOLD: if.end: 726; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 727; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 728; TAILFOLD: if.end2: 729; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 730; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 731; 732entry: 733 br label %for.cond 734 735for.cond: 736 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 737 %cmp = icmp slt i32 %i, %n 738 br i1 %cmp, label %for.body, label %if.end 739 740for.body: 741 %iprom = sext i32 %i to i64 742 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 743 store i16 0, i16* %b, align 4 744 %inc = add nsw i32 %i, 1 745 %cmp2 = icmp slt i32 %i, 2096 746 br i1 %cmp2, label %for.cond, label %if.end2 747 748if.end: 749 ret i32 %i 750 751if.end2: 752 ret i32 %i 753} 754 755; LCSSA, distinct value each exit 756define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 757; CHECK-LABEL: @multiple_exit_blocks3( 758; CHECK-NEXT: entry: 759; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 760; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 761; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 762; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 763; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 764; CHECK: vector.ph: 765; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 766; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 767; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 768; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 769; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 770; CHECK: vector.body: 771; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 772; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 773; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 774; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 775; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 776; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 777; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 778; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 779; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 780; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 781; CHECK: middle.block: 782; CHECK-NEXT: br label [[SCALAR_PH]] 783; CHECK: scalar.ph: 784; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 785; CHECK-NEXT: br label [[FOR_COND:%.*]] 786; CHECK: for.cond: 787; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 788; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 789; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 790; CHECK: for.body: 791; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 792; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 793; CHECK-NEXT: store i16 0, i16* [[B]], align 4 794; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 795; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 796; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]] 797; CHECK: if.end: 798; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 799; CHECK-NEXT: ret i32 [[I_LCSSA]] 800; CHECK: if.end2: 801; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 802; CHECK-NEXT: ret i32 [[INC_LCSSA]] 803; 804; TAILFOLD-LABEL: @multiple_exit_blocks3( 805; TAILFOLD-NEXT: entry: 806; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 807; TAILFOLD: for.cond: 808; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 809; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 810; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 811; TAILFOLD: for.body: 812; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 813; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 814; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 815; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 816; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 817; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 818; TAILFOLD: if.end: 819; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 820; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 821; TAILFOLD: if.end2: 822; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 823; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 824; 825entry: 826 br label %for.cond 827 828for.cond: 829 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 830 %cmp = icmp slt i32 %i, %n 831 br i1 %cmp, label %for.body, label %if.end 832 833for.body: 834 %iprom = sext i32 %i to i64 835 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 836 store i16 0, i16* %b, align 4 837 %inc = add nsw i32 %i, 1 838 %cmp2 = icmp slt i32 %i, 2096 839 br i1 %cmp2, label %for.cond, label %if.end2 840 841if.end: 842 ret i32 %i 843 844if.end2: 845 ret i32 %inc 846} 847 848; unique exit case but with a switch as two edges between the same pair of 849; blocks is an often missed edge case 850define i32 @multiple_exit_switch(i16* %p, i32 %n) { 851; CHECK-LABEL: @multiple_exit_switch( 852; CHECK-NEXT: entry: 853; CHECK-NEXT: br label [[FOR_COND:%.*]] 854; CHECK: for.cond: 855; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 856; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 857; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 858; CHECK-NEXT: store i16 0, i16* [[B]], align 4 859; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 860; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 861; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 862; CHECK-NEXT: i32 2097, label [[IF_END]] 863; CHECK-NEXT: ] 864; CHECK: if.end: 865; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 866; CHECK-NEXT: ret i32 [[I_LCSSA]] 867; 868; TAILFOLD-LABEL: @multiple_exit_switch( 869; TAILFOLD-NEXT: entry: 870; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 871; TAILFOLD: for.cond: 872; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 873; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 874; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 875; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 876; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 877; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 878; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 879; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 880; TAILFOLD-NEXT: ] 881; TAILFOLD: if.end: 882; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 883; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 884; 885entry: 886 br label %for.cond 887 888for.cond: 889 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 890 %iprom = sext i32 %i to i64 891 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 892 store i16 0, i16* %b, align 4 893 %inc = add nsw i32 %i, 1 894 switch i32 %i, label %for.cond [ 895 i32 2096, label %if.end 896 i32 2097, label %if.end 897 ] 898 899if.end: 900 ret i32 %i 901} 902 903; multiple exit case but with a switch as multiple exiting edges from 904; a single block is a commonly missed edge case 905define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 906; CHECK-LABEL: @multiple_exit_switch2( 907; CHECK-NEXT: entry: 908; CHECK-NEXT: br label [[FOR_COND:%.*]] 909; CHECK: for.cond: 910; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 911; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 912; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 913; CHECK-NEXT: store i16 0, i16* [[B]], align 4 914; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 915; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 916; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 917; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 918; CHECK-NEXT: ] 919; CHECK: if.end: 920; CHECK-NEXT: ret i32 0 921; CHECK: if.end2: 922; CHECK-NEXT: ret i32 1 923; 924; TAILFOLD-LABEL: @multiple_exit_switch2( 925; TAILFOLD-NEXT: entry: 926; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 927; TAILFOLD: for.cond: 928; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 929; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 930; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 931; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 932; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 933; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 934; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 935; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 936; TAILFOLD-NEXT: ] 937; TAILFOLD: if.end: 938; TAILFOLD-NEXT: ret i32 0 939; TAILFOLD: if.end2: 940; TAILFOLD-NEXT: ret i32 1 941; 942entry: 943 br label %for.cond 944 945for.cond: 946 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 947 %iprom = sext i32 %i to i64 948 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 949 store i16 0, i16* %b, align 4 950 %inc = add nsw i32 %i, 1 951 switch i32 %i, label %for.cond [ 952 i32 2096, label %if.end 953 i32 2097, label %if.end2 954 ] 955 956if.end: 957 ret i32 0 958 959if.end2: 960 ret i32 1 961} 962 963define i32 @multiple_latch1(i16* %p) { 964; CHECK-LABEL: @multiple_latch1( 965; CHECK-NEXT: entry: 966; CHECK-NEXT: br label [[FOR_BODY:%.*]] 967; CHECK: for.body: 968; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 969; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 970; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 971; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 972; CHECK: for.second: 973; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 974; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 975; CHECK-NEXT: store i16 0, i16* [[B]], align 4 976; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 977; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 978; CHECK: for.body.backedge: 979; CHECK-NEXT: br label [[FOR_BODY]] 980; CHECK: for.end: 981; CHECK-NEXT: ret i32 0 982; 983; TAILFOLD-LABEL: @multiple_latch1( 984; TAILFOLD-NEXT: entry: 985; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 986; TAILFOLD: for.body: 987; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 988; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 989; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 990; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 991; TAILFOLD: for.second: 992; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 993; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 994; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 995; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 996; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 997; TAILFOLD: for.body.backedge: 998; TAILFOLD-NEXT: br label [[FOR_BODY]] 999; TAILFOLD: for.end: 1000; TAILFOLD-NEXT: ret i32 0 1001; 1002entry: 1003 br label %for.body 1004 1005for.body: 1006 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 1007 %inc = add nsw i32 %i.02, 1 1008 %cmp = icmp slt i32 %inc, 16 1009 br i1 %cmp, label %for.body.backedge, label %for.second 1010 1011for.second: 1012 %iprom = sext i32 %i.02 to i64 1013 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1014 store i16 0, i16* %b, align 4 1015 %cmps = icmp sgt i32 %inc, 16 1016 br i1 %cmps, label %for.body.backedge, label %for.end 1017 1018for.body.backedge: 1019 br label %for.body 1020 1021for.end: 1022 ret i32 0 1023} 1024 1025 1026; two back branches - loop simplify with convert this to the same form 1027; as previous before vectorizer sees it, but show that. 1028define i32 @multiple_latch2(i16* %p) { 1029; CHECK-LABEL: @multiple_latch2( 1030; CHECK-NEXT: entry: 1031; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1032; CHECK: for.body: 1033; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 1034; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 1035; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 1036; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1037; CHECK: for.body.backedge: 1038; CHECK-NEXT: br label [[FOR_BODY]] 1039; CHECK: for.second: 1040; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1041; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1042; CHECK-NEXT: store i16 0, i16* [[B]], align 4 1043; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1044; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1045; CHECK: for.end: 1046; CHECK-NEXT: ret i32 0 1047; 1048; TAILFOLD-LABEL: @multiple_latch2( 1049; TAILFOLD-NEXT: entry: 1050; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 1051; TAILFOLD: for.body: 1052; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 1053; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 1054; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 1055; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1056; TAILFOLD: for.body.backedge: 1057; TAILFOLD-NEXT: br label [[FOR_BODY]] 1058; TAILFOLD: for.second: 1059; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1060; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1061; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 1062; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1063; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1064; TAILFOLD: for.end: 1065; TAILFOLD-NEXT: ret i32 0 1066; 1067entry: 1068 br label %for.body 1069 1070for.body: 1071 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 1072 %inc = add nsw i32 %i.02, 1 1073 %cmp = icmp slt i32 %inc, 16 1074 br i1 %cmp, label %for.body, label %for.second 1075 1076for.second: 1077 %iprom = sext i32 %i.02 to i64 1078 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1079 store i16 0, i16* %b, align 4 1080 %cmps = icmp sgt i32 %inc, 16 1081 br i1 %cmps, label %for.body, label %for.end 1082 1083for.end: 1084 ret i32 0 1085} 1086 1087 1088; Check interaction between block predication and early exits. We need the 1089; condition on the early exit to remain dead (i.e. not be used when forming 1090; the predicate mask). 1091define void @scalar_predication(float* %addr) { 1092; CHECK-LABEL: @scalar_predication( 1093; CHECK-NEXT: entry: 1094; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1095; CHECK: vector.ph: 1096; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1097; CHECK: vector.body: 1098; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 1099; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1100; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 1101; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 1102; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 1103; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 1104; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1105; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 1106; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 1107; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1108; CHECK: pred.store.if: 1109; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]] 1110; CHECK-NEXT: store float 1.000000e+01, float* [[TMP7]], align 4 1111; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 1112; CHECK: pred.store.continue: 1113; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 1114; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 1115; CHECK: pred.store.if1: 1116; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1117; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]] 1118; CHECK-NEXT: store float 1.000000e+01, float* [[TMP10]], align 4 1119; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 1120; CHECK: pred.store.continue2: 1121; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1122; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1123; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1124; CHECK: middle.block: 1125; CHECK-NEXT: br label [[SCALAR_PH]] 1126; CHECK: scalar.ph: 1127; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1128; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1129; CHECK: loop.header: 1130; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1131; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1132; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1133; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1134; CHECK: loop.body: 1135; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[GEP]], align 4 1136; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00 1137; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1138; CHECK: then: 1139; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1140; CHECK-NEXT: br label [[LOOP_LATCH]] 1141; CHECK: loop.latch: 1142; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1143; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] 1144; CHECK: exit: 1145; CHECK-NEXT: ret void 1146; 1147; TAILFOLD-LABEL: @scalar_predication( 1148; TAILFOLD-NEXT: entry: 1149; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1150; TAILFOLD: loop.header: 1151; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1152; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1153; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1154; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1155; TAILFOLD: loop.body: 1156; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1157; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1158; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1159; TAILFOLD: then: 1160; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1161; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1162; TAILFOLD: loop.latch: 1163; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1164; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1165; TAILFOLD: exit: 1166; TAILFOLD-NEXT: ret void 1167; 1168entry: 1169 br label %loop.header 1170 1171loop.header: 1172 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1173 %gep = getelementptr float, float* %addr, i64 %iv 1174 %exitcond.not = icmp eq i64 %iv, 200 1175 br i1 %exitcond.not, label %exit, label %loop.body 1176 1177loop.body: 1178 %0 = load float, float* %gep, align 4 1179 %pred = fcmp oeq float %0, 0.0 1180 br i1 %pred, label %loop.latch, label %then 1181 1182then: 1183 store float 10.0, float* %gep, align 4 1184 br label %loop.latch 1185 1186loop.latch: 1187 %iv.next = add nuw nsw i64 %iv, 1 1188 br label %loop.header 1189 1190exit: 1191 ret void 1192} 1193 1194define i32 @me_reduction(i32* %addr) { 1195; CHECK-LABEL: @me_reduction( 1196; CHECK-NEXT: entry: 1197; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1198; CHECK: vector.ph: 1199; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1200; CHECK: vector.body: 1201; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1202; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1203; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1204; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1205; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 1206; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 1207; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 1208; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1209; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1210; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1211; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1212; CHECK: middle.block: 1213; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]]) 1214; CHECK-NEXT: br label [[SCALAR_PH]] 1215; CHECK: scalar.ph: 1216; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1217; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 1218; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1219; CHECK: loop.header: 1220; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1221; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1222; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1223; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1224; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1225; CHECK: loop.latch: 1226; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[GEP]], align 4 1227; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]] 1228; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1229; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1230; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]] 1231; CHECK: exit: 1232; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1233; CHECK-NEXT: ret i32 [[LCSSA]] 1234; 1235; TAILFOLD-LABEL: @me_reduction( 1236; TAILFOLD-NEXT: entry: 1237; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1238; TAILFOLD: loop.header: 1239; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1240; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1241; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1242; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1243; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1244; TAILFOLD: loop.latch: 1245; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1246; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1247; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1248; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1249; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1250; TAILFOLD: exit: 1251; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1252; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1253; 1254entry: 1255 br label %loop.header 1256 1257loop.header: 1258 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1259 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1260 %gep = getelementptr i32, i32* %addr, i64 %iv 1261 %exitcond.not = icmp eq i64 %iv, 200 1262 br i1 %exitcond.not, label %exit, label %loop.latch 1263 1264loop.latch: 1265 %0 = load i32, i32* %gep, align 4 1266 %accum.next = add i32 %accum, %0 1267 %iv.next = add nuw nsw i64 %iv, 1 1268 %exitcond2.not = icmp eq i64 %iv, 400 1269 br i1 %exitcond2.not, label %exit, label %loop.header 1270 1271exit: 1272 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1273 ret i32 %lcssa 1274} 1275 1276; TODO: The current definition of reduction is too strict, we can vectorize 1277; this. There's an analogous single exit case where we extract the N-1 1278; value of the reduction that we can also handle. If we fix the later, the 1279; multiple exit case probably falls out. 1280define i32 @me_reduction2(i32* %addr) { 1281; CHECK-LABEL: @me_reduction2( 1282; CHECK-NEXT: entry: 1283; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1284; CHECK: loop.header: 1285; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1286; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1287; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1288; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1289; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1290; CHECK: loop.latch: 1291; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1292; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1293; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1294; CHECK-NEXT: br label [[LOOP_HEADER]] 1295; CHECK: exit: 1296; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1297; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1298; 1299; TAILFOLD-LABEL: @me_reduction2( 1300; TAILFOLD-NEXT: entry: 1301; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1302; TAILFOLD: loop.header: 1303; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1304; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1305; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1306; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1307; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1308; TAILFOLD: loop.latch: 1309; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1310; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1311; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1312; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1313; TAILFOLD: exit: 1314; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1315; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1316; 1317entry: 1318 br label %loop.header 1319 1320loop.header: 1321 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1322 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1323 %gep = getelementptr i32, i32* %addr, i64 %iv 1324 %exitcond.not = icmp eq i64 %iv, 200 1325 br i1 %exitcond.not, label %exit, label %loop.latch 1326 1327loop.latch: 1328 %0 = load i32, i32* %gep, align 4 1329 %accum.next = add i32 %accum, %0 1330 %iv.next = add nuw nsw i64 %iv, 1 1331 br label %loop.header 1332 1333exit: 1334 ret i32 %accum 1335} 1336 1337