1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 63; TAILFOLD-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 64; TAILFOLD-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 65; TAILFOLD-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 66; TAILFOLD: pred.store.if: 67; TAILFOLD-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0 68; TAILFOLD-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 69; TAILFOLD-NEXT: store i16 0, i16* [[TMP5]], align 4 70; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 71; TAILFOLD: pred.store.continue: 72; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 73; TAILFOLD-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 74; TAILFOLD: pred.store.if1: 75; TAILFOLD-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 76; TAILFOLD-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP7]] 77; TAILFOLD-NEXT: store i16 0, i16* [[TMP8]], align 4 78; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 79; TAILFOLD: pred.store.continue2: 80; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 81; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 82; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 83; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 84; TAILFOLD: middle.block: 85; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 86; TAILFOLD: scalar.ph: 87; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 88; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 89; TAILFOLD: for.cond: 90; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 91; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 92; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 93; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 94; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 95; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 96; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 97; TAILFOLD: if.end: 98; TAILFOLD-NEXT: ret void 99; 100entry: 101 br label %for.cond 102 103for.cond: 104 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 105 %iprom = sext i32 %i to i64 106 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 107 store i16 0, i16* %b, align 4 108 %inc = add nsw i32 %i, 1 109 %cmp = icmp slt i32 %i, %n 110 br i1 %cmp, label %for.cond, label %if.end 111 112if.end: 113 ret void 114} 115 116define void @early_exit(i16* %p, i32 %n) { 117; CHECK-LABEL: @early_exit( 118; CHECK-NEXT: entry: 119; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 120; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 121; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 122; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 123; CHECK: vector.ph: 124; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 125; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 126; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 127; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 128; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 129; CHECK: vector.body: 130; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 131; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 132; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 133; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 134; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 135; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 136; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 138; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 139; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 140; CHECK: middle.block: 141; CHECK-NEXT: br label [[SCALAR_PH]] 142; CHECK: scalar.ph: 143; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 144; CHECK-NEXT: br label [[FOR_COND:%.*]] 145; CHECK: for.cond: 146; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 147; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 148; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 149; CHECK: for.body: 150; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 151; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 152; CHECK-NEXT: store i16 0, i16* [[B]], align 4 153; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 154; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 155; CHECK: if.end: 156; CHECK-NEXT: ret void 157; 158; TAILFOLD-LABEL: @early_exit( 159; TAILFOLD-NEXT: entry: 160; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 161; TAILFOLD: for.cond: 162; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 163; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 164; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 165; TAILFOLD: for.body: 166; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 167; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 168; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 169; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 170; TAILFOLD-NEXT: br label [[FOR_COND]] 171; TAILFOLD: if.end: 172; TAILFOLD-NEXT: ret void 173; 174entry: 175 br label %for.cond 176 177for.cond: 178 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 179 %cmp = icmp slt i32 %i, %n 180 br i1 %cmp, label %for.body, label %if.end 181 182for.body: 183 %iprom = sext i32 %i to i64 184 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 185 store i16 0, i16* %b, align 4 186 %inc = add nsw i32 %i, 1 187 br label %for.cond 188 189if.end: 190 ret void 191} 192 193define i32 @early_exit_with_live_out(i32* %ptr) { 194; CHECK-LABEL: @early_exit_with_live_out( 195; CHECK-NEXT: entry: 196; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 197; CHECK: vector.ph: 198; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 199; CHECK: vector.body: 200; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 201; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 202; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP0]] 203; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 204; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 205; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 206; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 207; CHECK-NEXT: store <2 x i32> <i32 10, i32 10>, <2 x i32>* [[TMP4]], align 4 208; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 209; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 210; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 211; CHECK: middle.block: 212; CHECK-NEXT: br label [[SCALAR_PH]] 213; CHECK: scalar.ph: 214; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 215; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 216; CHECK: loop.header: 217; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 218; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[IV]] 219; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 220; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 221; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 222; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 223; CHECK: loop.latch: 224; CHECK-NEXT: store i32 10, i32* [[GEP]], align 4 225; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] 226; CHECK: exit: 227; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] 228; CHECK-NEXT: ret i32 [[RES_LCSSA]] 229; 230; TAILFOLD-LABEL: @early_exit_with_live_out( 231; TAILFOLD-NEXT: entry: 232; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 233; TAILFOLD: loop.header: 234; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 235; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[IV]] 236; TAILFOLD-NEXT: [[L:%.*]] = load i32, i32* [[GEP]], align 4 237; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 238; TAILFOLD-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 239; TAILFOLD-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 240; TAILFOLD: loop.latch: 241; TAILFOLD-NEXT: store i32 10, i32* [[GEP]], align 4 242; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 243; TAILFOLD: exit: 244; TAILFOLD-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ] 245; TAILFOLD-NEXT: ret i32 [[RES_LCSSA]] 246; 247entry: 248 br label %loop.header 249 250loop.header: 251 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 252 %gep = getelementptr i32, i32* %ptr, i64 %iv 253 %l = load i32, i32* %gep 254 %iv.next = add nuw nsw i64 %iv, 1 255 %ec = icmp eq i64 %iv.next, 1000 256 br i1 %ec, label %exit, label %loop.latch 257 258loop.latch: 259 store i32 10, i32* %gep 260 br label %loop.header 261 262exit: 263 %res.lcssa = phi i32 [ %l, %loop.header ] 264 ret i32 %res.lcssa 265} 266 267; Same as early_exit, but with optsize to prevent the use of 268; a scalar epilogue. -- Can't vectorize this in either case. 269define void @optsize(i16* %p, i32 %n) optsize { 270; CHECK-LABEL: @optsize( 271; CHECK-NEXT: entry: 272; CHECK-NEXT: br label [[FOR_COND:%.*]] 273; CHECK: for.cond: 274; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 275; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 276; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 277; CHECK: for.body: 278; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 279; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 280; CHECK-NEXT: store i16 0, i16* [[B]], align 4 281; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 282; CHECK-NEXT: br label [[FOR_COND]] 283; CHECK: if.end: 284; CHECK-NEXT: ret void 285; 286; TAILFOLD-LABEL: @optsize( 287; TAILFOLD-NEXT: entry: 288; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 289; TAILFOLD: for.cond: 290; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 291; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 292; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 293; TAILFOLD: for.body: 294; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 295; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 296; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 297; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 298; TAILFOLD-NEXT: br label [[FOR_COND]] 299; TAILFOLD: if.end: 300; TAILFOLD-NEXT: ret void 301; 302entry: 303 br label %for.cond 304 305for.cond: 306 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 307 %cmp = icmp slt i32 %i, %n 308 br i1 %cmp, label %for.body, label %if.end 309 310for.body: 311 %iprom = sext i32 %i to i64 312 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 313 store i16 0, i16* %b, align 4 314 %inc = add nsw i32 %i, 1 315 br label %for.cond 316 317if.end: 318 ret void 319} 320 321 322; multiple exit - no values inside the loop used outside 323define void @multiple_unique_exit(i16* %p, i32 %n) { 324; CHECK-LABEL: @multiple_unique_exit( 325; CHECK-NEXT: entry: 326; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 327; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 328; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 329; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 330; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 331; CHECK: vector.ph: 332; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 333; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 334; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 335; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 336; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 337; CHECK: vector.body: 338; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 339; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 340; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 341; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 342; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 343; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 344; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 345; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 346; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 347; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 348; CHECK: middle.block: 349; CHECK-NEXT: br label [[SCALAR_PH]] 350; CHECK: scalar.ph: 351; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 352; CHECK-NEXT: br label [[FOR_COND:%.*]] 353; CHECK: for.cond: 354; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 355; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 356; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 357; CHECK: for.body: 358; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 359; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 360; CHECK-NEXT: store i16 0, i16* [[B]], align 4 361; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 362; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 363; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 364; CHECK: if.end: 365; CHECK-NEXT: ret void 366; 367; TAILFOLD-LABEL: @multiple_unique_exit( 368; TAILFOLD-NEXT: entry: 369; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 370; TAILFOLD: for.cond: 371; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 372; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 373; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 374; TAILFOLD: for.body: 375; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 376; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 377; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 378; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 379; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 380; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 381; TAILFOLD: if.end: 382; TAILFOLD-NEXT: ret void 383; 384entry: 385 br label %for.cond 386 387for.cond: 388 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 389 %cmp = icmp slt i32 %i, %n 390 br i1 %cmp, label %for.body, label %if.end 391 392for.body: 393 %iprom = sext i32 %i to i64 394 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 395 store i16 0, i16* %b, align 4 396 %inc = add nsw i32 %i, 1 397 %cmp2 = icmp slt i32 %i, 2096 398 br i1 %cmp2, label %for.cond, label %if.end 399 400if.end: 401 ret void 402} 403 404; multiple exit - with an lcssa phi 405define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 406; CHECK-LABEL: @multiple_unique_exit2( 407; CHECK-NEXT: entry: 408; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 409; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 410; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 411; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 412; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 413; CHECK: vector.ph: 414; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 415; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 416; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 417; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 418; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 419; CHECK: vector.body: 420; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 421; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 422; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 423; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 424; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 425; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 426; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 427; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 428; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 429; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 430; CHECK: middle.block: 431; CHECK-NEXT: br label [[SCALAR_PH]] 432; CHECK: scalar.ph: 433; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 434; CHECK-NEXT: br label [[FOR_COND:%.*]] 435; CHECK: for.cond: 436; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 437; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 438; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 439; CHECK: for.body: 440; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 441; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 442; CHECK-NEXT: store i16 0, i16* [[B]], align 4 443; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 444; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 445; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 446; CHECK: if.end: 447; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 448; CHECK-NEXT: ret i32 [[I_LCSSA]] 449; 450; TAILFOLD-LABEL: @multiple_unique_exit2( 451; TAILFOLD-NEXT: entry: 452; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 453; TAILFOLD: for.cond: 454; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 455; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 456; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 457; TAILFOLD: for.body: 458; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 459; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 460; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 461; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 462; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 463; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 464; TAILFOLD: if.end: 465; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 466; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 467; 468entry: 469 br label %for.cond 470 471for.cond: 472 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 473 %cmp = icmp slt i32 %i, %n 474 br i1 %cmp, label %for.body, label %if.end 475 476for.body: 477 %iprom = sext i32 %i to i64 478 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 479 store i16 0, i16* %b, align 4 480 %inc = add nsw i32 %i, 1 481 %cmp2 = icmp slt i32 %i, 2096 482 br i1 %cmp2, label %for.cond, label %if.end 483 484if.end: 485 ret i32 %i 486} 487 488; multiple exit w/a non lcssa phi 489define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 490; CHECK-LABEL: @multiple_unique_exit3( 491; CHECK-NEXT: entry: 492; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 493; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 494; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 495; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 496; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 497; CHECK: vector.ph: 498; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 499; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 500; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 501; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 502; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 503; CHECK: vector.body: 504; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 505; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 506; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 507; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 508; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 509; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 510; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 511; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 512; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 513; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 514; CHECK: middle.block: 515; CHECK-NEXT: br label [[SCALAR_PH]] 516; CHECK: scalar.ph: 517; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 518; CHECK-NEXT: br label [[FOR_COND:%.*]] 519; CHECK: for.cond: 520; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 521; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 522; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 523; CHECK: for.body: 524; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 525; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 526; CHECK-NEXT: store i16 0, i16* [[B]], align 4 527; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 528; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 529; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]] 530; CHECK: if.end: 531; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 532; CHECK-NEXT: ret i32 [[EXIT]] 533; 534; TAILFOLD-LABEL: @multiple_unique_exit3( 535; TAILFOLD-NEXT: entry: 536; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 537; TAILFOLD: for.cond: 538; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 539; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 540; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 541; TAILFOLD: for.body: 542; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 543; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 544; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 545; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 546; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 547; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 548; TAILFOLD: if.end: 549; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 550; TAILFOLD-NEXT: ret i32 [[EXIT]] 551; 552entry: 553 br label %for.cond 554 555for.cond: 556 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 557 %cmp = icmp slt i32 %i, %n 558 br i1 %cmp, label %for.body, label %if.end 559 560for.body: 561 %iprom = sext i32 %i to i64 562 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 563 store i16 0, i16* %b, align 4 564 %inc = add nsw i32 %i, 1 565 %cmp2 = icmp slt i32 %i, 2096 566 br i1 %cmp2, label %for.cond, label %if.end 567 568if.end: 569 %exit = phi i32 [0, %for.cond], [1, %for.body] 570 ret i32 %exit 571} 572 573; multiple exits w/distinct target blocks 574define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 575; CHECK-LABEL: @multiple_exit_blocks( 576; CHECK-NEXT: entry: 577; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 578; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 579; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 580; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 581; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 582; CHECK: vector.ph: 583; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 584; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 585; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 586; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 587; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 588; CHECK: vector.body: 589; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 590; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 591; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 592; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 593; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 594; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 595; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 596; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 597; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 598; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 599; CHECK: middle.block: 600; CHECK-NEXT: br label [[SCALAR_PH]] 601; CHECK: scalar.ph: 602; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 603; CHECK-NEXT: br label [[FOR_COND:%.*]] 604; CHECK: for.cond: 605; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 606; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 607; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 608; CHECK: for.body: 609; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 610; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 611; CHECK-NEXT: store i16 0, i16* [[B]], align 4 612; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 613; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 614; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 615; CHECK: if.end: 616; CHECK-NEXT: ret i32 0 617; CHECK: if.end2: 618; CHECK-NEXT: ret i32 1 619; 620; TAILFOLD-LABEL: @multiple_exit_blocks( 621; TAILFOLD-NEXT: entry: 622; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 623; TAILFOLD: for.cond: 624; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 625; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 626; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 627; TAILFOLD: for.body: 628; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 629; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 630; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 631; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 632; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 633; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 634; TAILFOLD: if.end: 635; TAILFOLD-NEXT: ret i32 0 636; TAILFOLD: if.end2: 637; TAILFOLD-NEXT: ret i32 1 638; 639entry: 640 br label %for.cond 641 642for.cond: 643 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 644 %cmp = icmp slt i32 %i, %n 645 br i1 %cmp, label %for.body, label %if.end 646 647for.body: 648 %iprom = sext i32 %i to i64 649 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 650 store i16 0, i16* %b, align 4 651 %inc = add nsw i32 %i, 1 652 %cmp2 = icmp slt i32 %i, 2096 653 br i1 %cmp2, label %for.cond, label %if.end2 654 655if.end: 656 ret i32 0 657 658if.end2: 659 ret i32 1 660} 661 662; LCSSA, common value each exit 663define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 664; CHECK-LABEL: @multiple_exit_blocks2( 665; CHECK-NEXT: entry: 666; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 667; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 668; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 669; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 670; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 671; CHECK: vector.ph: 672; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 673; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 674; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 675; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 676; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 677; CHECK: vector.body: 678; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 679; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 680; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 681; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 682; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 683; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 684; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 685; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 686; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 687; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 688; CHECK: middle.block: 689; CHECK-NEXT: br label [[SCALAR_PH]] 690; CHECK: scalar.ph: 691; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 692; CHECK-NEXT: br label [[FOR_COND:%.*]] 693; CHECK: for.cond: 694; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 695; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 696; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 697; CHECK: for.body: 698; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 699; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 700; CHECK-NEXT: store i16 0, i16* [[B]], align 4 701; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 702; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 703; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]] 704; CHECK: if.end: 705; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 706; CHECK-NEXT: ret i32 [[I_LCSSA]] 707; CHECK: if.end2: 708; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 709; CHECK-NEXT: ret i32 [[I_LCSSA1]] 710; 711; TAILFOLD-LABEL: @multiple_exit_blocks2( 712; TAILFOLD-NEXT: entry: 713; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 714; TAILFOLD: for.cond: 715; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 716; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 717; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 718; TAILFOLD: for.body: 719; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 720; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 721; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 722; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 723; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 724; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 725; TAILFOLD: if.end: 726; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 727; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 728; TAILFOLD: if.end2: 729; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 730; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 731; 732entry: 733 br label %for.cond 734 735for.cond: 736 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 737 %cmp = icmp slt i32 %i, %n 738 br i1 %cmp, label %for.body, label %if.end 739 740for.body: 741 %iprom = sext i32 %i to i64 742 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 743 store i16 0, i16* %b, align 4 744 %inc = add nsw i32 %i, 1 745 %cmp2 = icmp slt i32 %i, 2096 746 br i1 %cmp2, label %for.cond, label %if.end2 747 748if.end: 749 ret i32 %i 750 751if.end2: 752 ret i32 %i 753} 754 755; LCSSA, distinct value each exit 756define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 757; CHECK-LABEL: @multiple_exit_blocks3( 758; CHECK-NEXT: entry: 759; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 760; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 761; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 762; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 763; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 764; CHECK: vector.ph: 765; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 766; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 767; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 768; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 769; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 770; CHECK: vector.body: 771; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 772; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 773; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 774; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 775; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]] 776; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 777; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>* 778; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4 779; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1> 780; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 781; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 782; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 783; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 784; CHECK: middle.block: 785; CHECK-NEXT: br label [[SCALAR_PH]] 786; CHECK: scalar.ph: 787; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 788; CHECK-NEXT: br label [[FOR_COND:%.*]] 789; CHECK: for.cond: 790; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 791; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 792; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 793; CHECK: for.body: 794; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 795; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 796; CHECK-NEXT: store i16 0, i16* [[B]], align 4 797; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 798; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 799; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]] 800; CHECK: if.end: 801; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 802; CHECK-NEXT: ret i32 [[I_LCSSA]] 803; CHECK: if.end2: 804; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 805; CHECK-NEXT: ret i32 [[INC_LCSSA]] 806; 807; TAILFOLD-LABEL: @multiple_exit_blocks3( 808; TAILFOLD-NEXT: entry: 809; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 810; TAILFOLD: for.cond: 811; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 812; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 813; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 814; TAILFOLD: for.body: 815; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 816; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 817; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 818; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 819; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 820; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 821; TAILFOLD: if.end: 822; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 823; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 824; TAILFOLD: if.end2: 825; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 826; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 827; 828entry: 829 br label %for.cond 830 831for.cond: 832 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 833 %cmp = icmp slt i32 %i, %n 834 br i1 %cmp, label %for.body, label %if.end 835 836for.body: 837 %iprom = sext i32 %i to i64 838 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 839 store i16 0, i16* %b, align 4 840 %inc = add nsw i32 %i, 1 841 %cmp2 = icmp slt i32 %i, 2096 842 br i1 %cmp2, label %for.cond, label %if.end2 843 844if.end: 845 ret i32 %i 846 847if.end2: 848 ret i32 %inc 849} 850 851; unique exit case but with a switch as two edges between the same pair of 852; blocks is an often missed edge case 853define i32 @multiple_exit_switch(i16* %p, i32 %n) { 854; CHECK-LABEL: @multiple_exit_switch( 855; CHECK-NEXT: entry: 856; CHECK-NEXT: br label [[FOR_COND:%.*]] 857; CHECK: for.cond: 858; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 859; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 860; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 861; CHECK-NEXT: store i16 0, i16* [[B]], align 4 862; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 863; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 864; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 865; CHECK-NEXT: i32 2097, label [[IF_END]] 866; CHECK-NEXT: ] 867; CHECK: if.end: 868; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 869; CHECK-NEXT: ret i32 [[I_LCSSA]] 870; 871; TAILFOLD-LABEL: @multiple_exit_switch( 872; TAILFOLD-NEXT: entry: 873; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 874; TAILFOLD: for.cond: 875; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 876; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 877; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 878; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 879; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 880; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 881; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 882; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 883; TAILFOLD-NEXT: ] 884; TAILFOLD: if.end: 885; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 886; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 887; 888entry: 889 br label %for.cond 890 891for.cond: 892 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 893 %iprom = sext i32 %i to i64 894 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 895 store i16 0, i16* %b, align 4 896 %inc = add nsw i32 %i, 1 897 switch i32 %i, label %for.cond [ 898 i32 2096, label %if.end 899 i32 2097, label %if.end 900 ] 901 902if.end: 903 ret i32 %i 904} 905 906; multiple exit case but with a switch as multiple exiting edges from 907; a single block is a commonly missed edge case 908define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 909; CHECK-LABEL: @multiple_exit_switch2( 910; CHECK-NEXT: entry: 911; CHECK-NEXT: br label [[FOR_COND:%.*]] 912; CHECK: for.cond: 913; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 914; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 915; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 916; CHECK-NEXT: store i16 0, i16* [[B]], align 4 917; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 918; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 919; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 920; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 921; CHECK-NEXT: ] 922; CHECK: if.end: 923; CHECK-NEXT: ret i32 0 924; CHECK: if.end2: 925; CHECK-NEXT: ret i32 1 926; 927; TAILFOLD-LABEL: @multiple_exit_switch2( 928; TAILFOLD-NEXT: entry: 929; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 930; TAILFOLD: for.cond: 931; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 932; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 933; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 934; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 935; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 936; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 937; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 938; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 939; TAILFOLD-NEXT: ] 940; TAILFOLD: if.end: 941; TAILFOLD-NEXT: ret i32 0 942; TAILFOLD: if.end2: 943; TAILFOLD-NEXT: ret i32 1 944; 945entry: 946 br label %for.cond 947 948for.cond: 949 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 950 %iprom = sext i32 %i to i64 951 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 952 store i16 0, i16* %b, align 4 953 %inc = add nsw i32 %i, 1 954 switch i32 %i, label %for.cond [ 955 i32 2096, label %if.end 956 i32 2097, label %if.end2 957 ] 958 959if.end: 960 ret i32 0 961 962if.end2: 963 ret i32 1 964} 965 966define i32 @multiple_latch1(i16* %p) { 967; CHECK-LABEL: @multiple_latch1( 968; CHECK-NEXT: entry: 969; CHECK-NEXT: br label [[FOR_BODY:%.*]] 970; CHECK: for.body: 971; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 972; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 973; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 974; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 975; CHECK: for.second: 976; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 977; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 978; CHECK-NEXT: store i16 0, i16* [[B]], align 4 979; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 980; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 981; CHECK: for.body.backedge: 982; CHECK-NEXT: br label [[FOR_BODY]] 983; CHECK: for.end: 984; CHECK-NEXT: ret i32 0 985; 986; TAILFOLD-LABEL: @multiple_latch1( 987; TAILFOLD-NEXT: entry: 988; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 989; TAILFOLD: for.body: 990; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 991; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 992; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 993; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 994; TAILFOLD: for.second: 995; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 996; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 997; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 998; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 999; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1000; TAILFOLD: for.body.backedge: 1001; TAILFOLD-NEXT: br label [[FOR_BODY]] 1002; TAILFOLD: for.end: 1003; TAILFOLD-NEXT: ret i32 0 1004; 1005entry: 1006 br label %for.body 1007 1008for.body: 1009 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 1010 %inc = add nsw i32 %i.02, 1 1011 %cmp = icmp slt i32 %inc, 16 1012 br i1 %cmp, label %for.body.backedge, label %for.second 1013 1014for.second: 1015 %iprom = sext i32 %i.02 to i64 1016 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1017 store i16 0, i16* %b, align 4 1018 %cmps = icmp sgt i32 %inc, 16 1019 br i1 %cmps, label %for.body.backedge, label %for.end 1020 1021for.body.backedge: 1022 br label %for.body 1023 1024for.end: 1025 ret i32 0 1026} 1027 1028 1029; two back branches - loop simplify with convert this to the same form 1030; as previous before vectorizer sees it, but show that. 1031define i32 @multiple_latch2(i16* %p) { 1032; CHECK-LABEL: @multiple_latch2( 1033; CHECK-NEXT: entry: 1034; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1035; CHECK: for.body: 1036; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 1037; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 1038; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 1039; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1040; CHECK: for.body.backedge: 1041; CHECK-NEXT: br label [[FOR_BODY]] 1042; CHECK: for.second: 1043; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1044; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1045; CHECK-NEXT: store i16 0, i16* [[B]], align 4 1046; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1047; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1048; CHECK: for.end: 1049; CHECK-NEXT: ret i32 0 1050; 1051; TAILFOLD-LABEL: @multiple_latch2( 1052; TAILFOLD-NEXT: entry: 1053; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 1054; TAILFOLD: for.body: 1055; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 1056; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 1057; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 1058; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 1059; TAILFOLD: for.body.backedge: 1060; TAILFOLD-NEXT: br label [[FOR_BODY]] 1061; TAILFOLD: for.second: 1062; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 1063; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 1064; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 1065; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 1066; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 1067; TAILFOLD: for.end: 1068; TAILFOLD-NEXT: ret i32 0 1069; 1070entry: 1071 br label %for.body 1072 1073for.body: 1074 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 1075 %inc = add nsw i32 %i.02, 1 1076 %cmp = icmp slt i32 %inc, 16 1077 br i1 %cmp, label %for.body, label %for.second 1078 1079for.second: 1080 %iprom = sext i32 %i.02 to i64 1081 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 1082 store i16 0, i16* %b, align 4 1083 %cmps = icmp sgt i32 %inc, 16 1084 br i1 %cmps, label %for.body, label %for.end 1085 1086for.end: 1087 ret i32 0 1088} 1089 1090 1091; Check interaction between block predication and early exits. We need the 1092; condition on the early exit to remain dead (i.e. not be used when forming 1093; the predicate mask). 1094define void @scalar_predication(float* %addr) { 1095; CHECK-LABEL: @scalar_predication( 1096; CHECK-NEXT: entry: 1097; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1098; CHECK: vector.ph: 1099; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1100; CHECK: vector.body: 1101; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 1102; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1103; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 1104; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 1105; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 1106; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 1107; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1108; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 1109; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 1110; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1111; CHECK: pred.store.if: 1112; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]] 1113; CHECK-NEXT: store float 1.000000e+01, float* [[TMP7]], align 4 1114; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 1115; CHECK: pred.store.continue: 1116; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 1117; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 1118; CHECK: pred.store.if1: 1119; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 1120; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]] 1121; CHECK-NEXT: store float 1.000000e+01, float* [[TMP10]], align 4 1122; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 1123; CHECK: pred.store.continue2: 1124; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1125; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1126; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1127; CHECK: middle.block: 1128; CHECK-NEXT: br label [[SCALAR_PH]] 1129; CHECK: scalar.ph: 1130; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1131; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1132; CHECK: loop.header: 1133; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1134; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1135; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1136; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1137; CHECK: loop.body: 1138; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[GEP]], align 4 1139; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00 1140; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1141; CHECK: then: 1142; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1143; CHECK-NEXT: br label [[LOOP_LATCH]] 1144; CHECK: loop.latch: 1145; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1146; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]] 1147; CHECK: exit: 1148; CHECK-NEXT: ret void 1149; 1150; TAILFOLD-LABEL: @scalar_predication( 1151; TAILFOLD-NEXT: entry: 1152; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1153; TAILFOLD: loop.header: 1154; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1155; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1156; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1157; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1158; TAILFOLD: loop.body: 1159; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1160; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1161; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1162; TAILFOLD: then: 1163; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1164; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1165; TAILFOLD: loop.latch: 1166; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1167; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1168; TAILFOLD: exit: 1169; TAILFOLD-NEXT: ret void 1170; 1171entry: 1172 br label %loop.header 1173 1174loop.header: 1175 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1176 %gep = getelementptr float, float* %addr, i64 %iv 1177 %exitcond.not = icmp eq i64 %iv, 200 1178 br i1 %exitcond.not, label %exit, label %loop.body 1179 1180loop.body: 1181 %0 = load float, float* %gep, align 4 1182 %pred = fcmp oeq float %0, 0.0 1183 br i1 %pred, label %loop.latch, label %then 1184 1185then: 1186 store float 10.0, float* %gep, align 4 1187 br label %loop.latch 1188 1189loop.latch: 1190 %iv.next = add nuw nsw i64 %iv, 1 1191 br label %loop.header 1192 1193exit: 1194 ret void 1195} 1196 1197define i32 @me_reduction(i32* %addr) { 1198; CHECK-LABEL: @me_reduction( 1199; CHECK-NEXT: entry: 1200; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1201; CHECK: vector.ph: 1202; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1203; CHECK: vector.body: 1204; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1205; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1206; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1207; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1208; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0 1209; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>* 1210; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4 1211; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1212; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1213; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1214; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1215; CHECK: middle.block: 1216; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]]) 1217; CHECK-NEXT: br label [[SCALAR_PH]] 1218; CHECK: scalar.ph: 1219; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1220; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 1221; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1222; CHECK: loop.header: 1223; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1224; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1225; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1226; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1227; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1228; CHECK: loop.latch: 1229; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[GEP]], align 4 1230; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]] 1231; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1232; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1233; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]] 1234; CHECK: exit: 1235; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1236; CHECK-NEXT: ret i32 [[LCSSA]] 1237; 1238; TAILFOLD-LABEL: @me_reduction( 1239; TAILFOLD-NEXT: entry: 1240; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1241; TAILFOLD: loop.header: 1242; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1243; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1244; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1245; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1246; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1247; TAILFOLD: loop.latch: 1248; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1249; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1250; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1251; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1252; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1253; TAILFOLD: exit: 1254; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1255; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1256; 1257entry: 1258 br label %loop.header 1259 1260loop.header: 1261 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1262 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1263 %gep = getelementptr i32, i32* %addr, i64 %iv 1264 %exitcond.not = icmp eq i64 %iv, 200 1265 br i1 %exitcond.not, label %exit, label %loop.latch 1266 1267loop.latch: 1268 %0 = load i32, i32* %gep, align 4 1269 %accum.next = add i32 %accum, %0 1270 %iv.next = add nuw nsw i64 %iv, 1 1271 %exitcond2.not = icmp eq i64 %iv, 400 1272 br i1 %exitcond2.not, label %exit, label %loop.header 1273 1274exit: 1275 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1276 ret i32 %lcssa 1277} 1278 1279; TODO: The current definition of reduction is too strict, we can vectorize 1280; this. There's an analogous single exit case where we extract the N-1 1281; value of the reduction that we can also handle. If we fix the later, the 1282; multiple exit case probably falls out. 1283define i32 @me_reduction2(i32* %addr) { 1284; CHECK-LABEL: @me_reduction2( 1285; CHECK-NEXT: entry: 1286; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1287; CHECK: loop.header: 1288; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1289; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1290; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1291; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1292; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1293; CHECK: loop.latch: 1294; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1295; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1296; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1297; CHECK-NEXT: br label [[LOOP_HEADER]] 1298; CHECK: exit: 1299; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1300; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1301; 1302; TAILFOLD-LABEL: @me_reduction2( 1303; TAILFOLD-NEXT: entry: 1304; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1305; TAILFOLD: loop.header: 1306; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1307; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1308; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1309; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1310; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1311; TAILFOLD: loop.latch: 1312; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1313; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1314; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1315; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1316; TAILFOLD: exit: 1317; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1318; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1319; 1320entry: 1321 br label %loop.header 1322 1323loop.header: 1324 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1325 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1326 %gep = getelementptr i32, i32* %addr, i64 %iv 1327 %exitcond.not = icmp eq i64 %iv, 200 1328 br i1 %exitcond.not, label %exit, label %loop.latch 1329 1330loop.latch: 1331 %0 = load i32, i32* %gep, align 4 1332 %accum.next = add i32 %accum, %0 1333 %iv.next = add nuw nsw i64 %iv, 1 1334 br label %loop.header 1335 1336exit: 1337 ret i32 %accum 1338} 1339 1340