1; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=EPILOG 2; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine 3; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -check-prefix=PROLOG 4; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-runtime-epilog=false -unroll-count=2 -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine 5 6; the second and fourth RUNs generate an epilog/prolog remainder block for all the test 7; cases below (it does not generate a loop). 8 9; test with three exiting and three exit blocks. 10; none of the exit blocks have successors 11define void @test1(i64 %trip, i1 %cond) { 12; EPILOG: test1( 13; EPILOG-NEXT: entry: 14; EPILOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 15; EPILOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 16; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 17; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] 18; EPILOG: entry.new: 19; EPILOG-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] 20; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] 21; EPILOG: loop_latch.epil: 22; EPILOG-NEXT: %epil.iter.sub = add i64 %epil.iter, -1 23; EPILOG-NEXT: %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0 24; EPILOG-NEXT: br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil 25; EPILOG: loop_latch.7: 26; EPILOG-NEXT: %niter.nsub.7 = add i64 %niter, -8 27; EPILOG-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 28; EPILOG-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header 29 30; PROLOG: test1( 31; PROLOG-NEXT: entry: 32; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 33; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 34; PROLOG-NEXT: [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0 35; PROLOG-NEXT: br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader 36; PROLOG: loop_header.prol: 37; PROLOG-NEXT: %iv.prol = phi i64 [ 0, %loop_header.prol.preheader ], [ %iv_next.prol, %loop_latch.prol ] 38; PROLOG-NEXT: %prol.iter = phi i64 [ [[XTRAITER]], %loop_header.prol.preheader ], [ %prol.iter.sub, %loop_latch.prol ] 39; PROLOG-NEXT: br i1 %cond, label %loop_latch.prol, label %loop_exiting_bb1.prol 40; PROLOG: loop_latch.prol: 41; PROLOG-NEXT: %iv_next.prol = add i64 %iv.prol, 1 42; PROLOG-NEXT: %prol.iter.sub = add i64 %prol.iter, -1 43; PROLOG-NEXT: %prol.iter.cmp = icmp eq i64 %prol.iter.sub, 0 44; PROLOG-NEXT: br i1 %prol.iter.cmp, label %loop_header.prol.loopexit.unr-lcssa, label %loop_header.prol 45; PROLOG: loop_latch.7: 46; PROLOG-NEXT: %iv_next.7 = add i64 %iv, 8 47; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip 48; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header 49entry: 50 br label %loop_header 51 52loop_header: 53 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 54 br i1 %cond, label %loop_latch, label %loop_exiting_bb1 55 56loop_exiting_bb1: 57 br i1 false, label %loop_exiting_bb2, label %exit1 58 59loop_exiting_bb2: 60 br i1 false, label %loop_latch, label %exit3 61 62exit3: 63 ret void 64 65loop_latch: 66 %iv_next = add i64 %iv, 1 67 %cmp = icmp ne i64 %iv_next, %trip 68 br i1 %cmp, label %loop_header, label %exit2.loopexit 69 70exit1: 71 ret void 72 73exit2.loopexit: 74 ret void 75} 76 77 78; test with three exiting and two exit blocks. 79; The non-latch exit block has 2 unique predecessors. 80; There are 2 values passed to the exit blocks that are calculated at every iteration. 81; %sum.02 and %add. Both of these are incoming values for phi from every exiting 82; unrolled block. 83define i32 @test2(i32* nocapture %a, i64 %n) { 84; EPILOG: test2( 85; EPILOG: for.exit2.loopexit: 86; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], 87; EPILOG-NEXT: br label %for.exit2 88; EPILOG: for.exit2.loopexit2: 89; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] 90; EPILOG-NEXT: br label %for.exit2 91; EPILOG: for.exit2: 92; EPILOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] 93; EPILOG-NEXT: ret i32 %retval 94; EPILOG: %niter.nsub.7 = add i64 %niter, -8 95 96; PROLOG: test2( 97; PROLOG: for.exit2.loopexit: 98; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], 99; PROLOG-NEXT: br label %for.exit2 100; PROLOG: for.exit2.loopexit1: 101; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] 102; PROLOG-NEXT: br label %for.exit2 103; PROLOG: for.exit2: 104; PROLOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ] 105; PROLOG-NEXT: ret i32 %retval 106; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8 107 108entry: 109 br label %header 110 111header: 112 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 113 %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] 114 br i1 false, label %for.exit2, label %for.exiting_block 115 116for.exiting_block: 117 %cmp = icmp eq i64 %n, 42 118 br i1 %cmp, label %for.exit2, label %for.body 119 120for.body: 121 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 122 %0 = load i32, i32* %arrayidx, align 4 123 %add = add nsw i32 %0, %sum.02 124 %indvars.iv.next = add i64 %indvars.iv, 1 125 %exitcond = icmp eq i64 %indvars.iv.next, %n 126 br i1 %exitcond, label %for.end, label %header 127 128for.end: ; preds = %for.body 129 %sum.0.lcssa = phi i32 [ %add, %for.body ] 130 ret i32 %sum.0.lcssa 131 132for.exit2: 133 %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] 134 ret i32 %retval 135} 136 137; test with two exiting and three exit blocks. 138; the non-latch exiting block has a switch. 139define void @test3(i64 %trip, i64 %add) { 140; EPILOG: test3( 141; EPILOG-NEXT: entry: 142; EPILOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 143; EPILOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 144; EPILOG-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 145; EPILOG-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] 146; EPILOG: entry.new: 147; EPILOG-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]] 148; EPILOG-NEXT: br label [[LOOP_HEADER:%.*]] 149; EPILOG: loop_header: 150; EPILOG-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ] 151; EPILOG-NEXT: %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ] 152; EPILOG: loop_exiting_bb1.7: 153; EPILOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 154; EPILOG: loop_latch.7: 155; EPILOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add 156; EPILOG-NEXT: %niter.nsub.7 = add i64 %niter, -8 157; EPILOG-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 158; EPILOG-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header 159 160; PROLOG: test3( 161; PROLOG-NEXT: entry: 162; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 163; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 164; PROLOG-NEXT: [[TMP1:%.*]] = icmp eq i64 [[XTRAITER]], 0 165; PROLOG-NEXT: br i1 [[TMP1]], label %loop_header.prol.loopexit, label %loop_header.prol.preheader 166; PROLOG: loop_header: 167; PROLOG-NEXT: %iv = phi i64 [ %iv.unr, %entry.new ], [ %iv_next.7, %loop_latch.7 ] 168; PROLOG-NEXT: %sum = phi i64 [ %sum.unr, %entry.new ], [ %sum.next.7, %loop_latch.7 ] 169; PROLOG: loop_exiting_bb1.7: 170; PROLOG-NEXT: switch i64 %sum.next.6, label %loop_latch.7 171; PROLOG: loop_latch.7: 172; PROLOG-NEXT: %iv_next.7 = add nsw i64 %iv, 8 173; PROLOG-NEXT: %sum.next.7 = add i64 %sum.next.6, %add 174; PROLOG-NEXT: %cmp.7 = icmp eq i64 %iv_next.7, %trip 175; PROLOG-NEXT: br i1 %cmp.7, label %exit2.loopexit.unr-lcssa, label %loop_header 176entry: 177 br label %loop_header 178 179loop_header: 180 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 181 %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] 182 br i1 undef, label %loop_latch, label %loop_exiting_bb1 183 184loop_exiting_bb1: 185 switch i64 %sum, label %loop_latch [ 186 i64 24, label %exit1 187 i64 42, label %exit3 188 ] 189 190exit3: 191 ret void 192 193loop_latch: 194 %iv_next = add nuw nsw i64 %iv, 1 195 %sum.next = add i64 %sum, %add 196 %cmp = icmp ne i64 %iv_next, %trip 197 br i1 %cmp, label %loop_header, label %exit2.loopexit 198 199exit1: 200 ret void 201 202exit2.loopexit: 203 ret void 204} 205 206; FIXME: Support multiple exiting blocks to the same latch exit block. 207define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) { 208; EPILOG: test4( 209; EPILOG-NOT: .unr 210; EPILOG-NOT: .epil 211 212; PROLOG: test4( 213; PROLOG-NOT: .unr 214; PROLOG-NOT: .prol 215entry: 216 br label %header 217 218header: 219 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 220 %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] 221 br i1 %cond, label %for.end, label %for.exiting_block 222 223for.exiting_block: 224 %cmp = icmp eq i64 %n, 42 225 br i1 %cmp, label %for.exit2, label %for.body 226 227for.body: ; preds = %for.body, %entry 228 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 229 %0 = load i32, i32* %arrayidx, align 4 230 %add = add nsw i32 %0, %sum.02 231 %indvars.iv.next = add i64 %indvars.iv, 1 232 %exitcond = icmp eq i64 %indvars.iv.next, %n 233 br i1 %exitcond, label %for.end, label %header 234 235for.end: ; preds = %for.body, %entry 236 %sum.0.lcssa = phi i32 [ 0, %header ], [ %add, %for.body ] 237 ret i32 %sum.0.lcssa 238 239for.exit2: 240 ret i32 42 241} 242 243; FIXME: Support multiple exiting blocks to the unique exit block. 244define void @unique_exit(i32 %arg) { 245; EPILOG: unique_exit( 246; EPILOG-NOT: .unr 247; EPILOG-NOT: .epil 248 249; PROLOG: unique_exit( 250; PROLOG-NOT: .unr 251; PROLOG-NOT: .prol 252entry: 253 %tmp = icmp sgt i32 undef, %arg 254 br i1 %tmp, label %preheader, label %returnblock 255 256preheader: ; preds = %entry 257 br label %header 258 259LoopExit: ; preds = %header, %latch 260 %tmp2.ph = phi i32 [ %tmp4, %header ], [ -1, %latch ] 261 br label %returnblock 262 263returnblock: ; preds = %LoopExit, %entry 264 %tmp2 = phi i32 [ -1, %entry ], [ %tmp2.ph, %LoopExit ] 265 ret void 266 267header: ; preds = %preheader, %latch 268 %tmp4 = phi i32 [ %inc, %latch ], [ %arg, %preheader ] 269 %inc = add nsw i32 %tmp4, 1 270 br i1 true, label %LoopExit, label %latch 271 272latch: ; preds = %header 273 %cmp = icmp slt i32 %inc, undef 274 br i1 %cmp, label %header, label %LoopExit 275} 276 277; two exiting and two exit blocks. 278; the non-latch exiting block has duplicate edges to the non-latch exit block. 279define i64 @test5(i64 %trip, i64 %add, i1 %cond) { 280; EPILOG: test5( 281; EPILOG: exit1.loopexit: 282; EPILOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], 283; EPILOG-NEXT: br label %exit1 284; EPILOG: exit1.loopexit2: 285; EPILOG-NEXT: %ivy.epil = add i64 %iv.epil, %add 286; EPILOG-NEXT: br label %exit1 287; EPILOG: exit1: 288; EPILOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ] 289; EPILOG-NEXT: ret i64 %result 290; EPILOG: loop_latch.7: 291; EPILOG: %niter.nsub.7 = add i64 %niter, -8 292 293; PROLOG: test5( 294; PROLOG: exit1.loopexit: 295; PROLOG-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], 296; PROLOG-NEXT: br label %exit1 297; PROLOG: exit1.loopexit1: 298; PROLOG-NEXT: %ivy.prol = add i64 %iv.prol, %add 299; PROLOG-NEXT: br label %exit1 300; PROLOG: exit1: 301; PROLOG-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.prol, %exit1.loopexit1 ] 302; PROLOG-NEXT: ret i64 %result 303; PROLOG: loop_latch.7: 304; PROLOG: %iv_next.7 = add nsw i64 %iv, 8 305entry: 306 br label %loop_header 307 308loop_header: 309 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 310 %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] 311 br i1 %cond, label %loop_latch, label %loop_exiting 312 313loop_exiting: 314 %ivy = add i64 %iv, %add 315 switch i64 %sum, label %loop_latch [ 316 i64 24, label %exit1 317 i64 42, label %exit1 318 ] 319 320loop_latch: 321 %iv_next = add nuw nsw i64 %iv, 1 322 %sum.next = add i64 %sum, %add 323 %cmp = icmp ne i64 %iv_next, %trip 324 br i1 %cmp, label %loop_header, label %latchexit 325 326exit1: 327 %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ] 328 ret i64 %result 329 330latchexit: 331 ret i64 %sum.next 332} 333 334; test when exit blocks have successors. 335define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { 336; EPILOG: test6( 337; EPILOG: for.exit2.loopexit: 338; EPILOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], 339; EPILOG-NEXT: br label %for.exit2 340; EPILOG: for.exit2.loopexit2: 341; EPILOG-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] 342; EPILOG-NEXT: br label %for.exit2 343; EPILOG: for.exit2: 344; EPILOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] 345; EPILOG-NEXT: br i1 %cond, label %exit_true, label %exit_false 346; EPILOG: latch.7: 347; EPILOG: %niter.nsub.7 = add i64 %niter, -8 348 349; PROLOG: test6( 350; PROLOG: for.exit2.loopexit: 351; PROLOG-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], 352; PROLOG-NEXT: br label %for.exit2 353; PROLOG: for.exit2.loopexit1: 354; PROLOG-NEXT: %retval.ph2 = phi i32 [ 42, %for.exiting_block.prol ], [ %sum.02.prol, %header.prol ] 355; PROLOG-NEXT: br label %for.exit2 356; PROLOG: for.exit2: 357; PROLOG-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph2, %for.exit2.loopexit1 ] 358; PROLOG-NEXT: br i1 %cond, label %exit_true, label %exit_false 359; PROLOG: latch.7: 360; PROLOG: %indvars.iv.next.7 = add i64 %indvars.iv, 8 361entry: 362 br label %header 363 364header: 365 %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ] 366 %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ] 367 br i1 false, label %for.exit2, label %for.exiting_block 368 369for.exiting_block: 370 %cmp = icmp eq i64 %n, 42 371 br i1 %cmp, label %for.exit2, label %latch 372 373latch: 374 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 375 %load = load i32, i32* %arrayidx, align 4 376 %add = add nsw i32 %load, %sum.02 377 %indvars.iv.next = add i64 %indvars.iv, 1 378 %exitcond = icmp eq i64 %indvars.iv.next, %n 379 br i1 %exitcond, label %latch_exit, label %header 380 381latch_exit: 382 %sum.0.lcssa = phi i32 [ %add, %latch ] 383 ret i32 %sum.0.lcssa 384 385for.exit2: 386 %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] 387 %addx = add i32 %retval, %x 388 br i1 %cond, label %exit_true, label %exit_false 389 390exit_true: 391 ret i32 %retval 392 393exit_false: 394 ret i32 %addx 395} 396