1; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S| FileCheck %s 2; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine 3 4; the second RUN generates an epilog remainder block for all the test 5; cases below (it does not generate a loop). 6 7; test with three exiting and three exit blocks. 8; none of the exit blocks have successors 9define void @test1(i64 %trip, i1 %cond) { 10; CHECK-LABEL: test1 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 13; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 14; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 15; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] 16; CHECK: entry.new: 17; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] 18; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 19; CHECK-LABEL: loop_latch.epil: 20; CHECK-NEXT: %epil.iter.sub = add i64 %epil.iter, -1 21; CHECK-NEXT: %epil.iter.cmp = icmp eq i64 %epil.iter.sub, 0 22; CHECK-NEXT: br i1 %epil.iter.cmp, label %exit2.loopexit.epilog-lcssa, label %loop_header.epil 23; CHECK-LABEL: loop_latch.7: 24; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8 25; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 26; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header 27entry: 28 br label %loop_header 29 30loop_header: 31 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 32 br i1 %cond, label %loop_latch, label %loop_exiting_bb1 33 34loop_exiting_bb1: 35 br i1 false, label %loop_exiting_bb2, label %exit1 36 37loop_exiting_bb2: 38 br i1 false, label %loop_latch, label %exit3 39 40exit3: 41 ret void 42 43loop_latch: 44 %iv_next = add i64 %iv, 1 45 %cmp = icmp ne i64 %iv_next, %trip 46 br i1 %cmp, label %loop_header, label %exit2.loopexit 47 48exit1: 49 ret void 50 51exit2.loopexit: 52 ret void 53} 54 55 56; test with three exiting and two exit blocks. 57; The non-latch exit block has 2 unique predecessors. 58; There are 2 values passed to the exit blocks that are calculated at every iteration. 59; %sum.02 and %add. Both of these are incoming values for phi from every exiting 60; unrolled block. 61define i32 @test2(i32* nocapture %a, i64 %n) { 62; CHECK-LABEL: test2 63; CHECK-LABEL: for.exit2.loopexit: 64; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ], [ %add.1, %for.body.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %for.body.2 ], [ 42, %for.exiting_block.3 ], 65; CHECK-NEXT: br label %for.exit2 66; CHECK-LABEL: for.exit2.loopexit2: 67; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] 68; CHECK-NEXT: br label %for.exit2 69; CHECK-LABEL: for.exit2: 70; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] 71; CHECK-NEXT: ret i32 %retval 72; CHECK: %niter.nsub.7 = add i64 %niter, -8 73entry: 74 br label %header 75 76header: 77 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 78 %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] 79 br i1 false, label %for.exit2, label %for.exiting_block 80 81for.exiting_block: 82 %cmp = icmp eq i64 %n, 42 83 br i1 %cmp, label %for.exit2, label %for.body 84 85for.body: 86 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 87 %0 = load i32, i32* %arrayidx, align 4 88 %add = add nsw i32 %0, %sum.02 89 %indvars.iv.next = add i64 %indvars.iv, 1 90 %exitcond = icmp eq i64 %indvars.iv.next, %n 91 br i1 %exitcond, label %for.end, label %header 92 93for.end: ; preds = %for.body 94 %sum.0.lcssa = phi i32 [ %add, %for.body ] 95 ret i32 %sum.0.lcssa 96 97for.exit2: 98 %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] 99 ret i32 %retval 100} 101 102; test with two exiting and three exit blocks. 103; the non-latch exiting block has a switch. 104define void @test3(i64 %trip, i64 %add) { 105; CHECK-LABEL: test3 106; CHECK-NEXT: entry: 107; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 108; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 109; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 110; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] 111; CHECK: entry.new: 112; CHECK-NEXT: %unroll_iter = sub i64 [[TRIP]], [[XTRAITER]] 113; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 114; CHECK-LABEL: loop_header: 115; CHECK-NEXT: %sum = phi i64 [ 0, %entry.new ], [ %sum.next.7, %loop_latch.7 ] 116; CHECK-NEXT: %niter = phi i64 [ %unroll_iter, %entry.new ], [ %niter.nsub.7, %loop_latch.7 ] 117; CHECK-LABEL: loop_exiting_bb1.7: 118; CHECK-NEXT: switch i64 %sum.next.6, label %loop_latch.7 119; CHECK-LABEL: loop_latch.7: 120; CHECK-NEXT: %sum.next.7 = add i64 %sum.next.6, %add 121; CHECK-NEXT: %niter.nsub.7 = add i64 %niter, -8 122; CHECK-NEXT: %niter.ncmp.7 = icmp eq i64 %niter.nsub.7, 0 123; CHECK-NEXT: br i1 %niter.ncmp.7, label %exit2.loopexit.unr-lcssa.loopexit, label %loop_header 124entry: 125 br label %loop_header 126 127loop_header: 128 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 129 %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] 130 br i1 undef, label %loop_latch, label %loop_exiting_bb1 131 132loop_exiting_bb1: 133 switch i64 %sum, label %loop_latch [ 134 i64 24, label %exit1 135 i64 42, label %exit3 136 ] 137 138exit3: 139 ret void 140 141loop_latch: 142 %iv_next = add nuw nsw i64 %iv, 1 143 %sum.next = add i64 %sum, %add 144 %cmp = icmp ne i64 %iv_next, %trip 145 br i1 %cmp, label %loop_header, label %exit2.loopexit 146 147exit1: 148 ret void 149 150exit2.loopexit: 151 ret void 152} 153 154; FIXME: Support multiple exiting blocks to the same latch exit block. 155define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) { 156; CHECK-LABEL: test4 157; CHECK-NOT: .unr 158; CHECK-NOT: .epil 159entry: 160 br label %header 161 162header: 163 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 164 %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] 165 br i1 %cond, label %for.end, label %for.exiting_block 166 167for.exiting_block: 168 %cmp = icmp eq i64 %n, 42 169 br i1 %cmp, label %for.exit2, label %for.body 170 171for.body: ; preds = %for.body, %entry 172 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 173 %0 = load i32, i32* %arrayidx, align 4 174 %add = add nsw i32 %0, %sum.02 175 %indvars.iv.next = add i64 %indvars.iv, 1 176 %exitcond = icmp eq i64 %indvars.iv.next, %n 177 br i1 %exitcond, label %for.end, label %header 178 179for.end: ; preds = %for.body, %entry 180 %sum.0.lcssa = phi i32 [ 0, %header ], [ %add, %for.body ] 181 ret i32 %sum.0.lcssa 182 183for.exit2: 184 ret i32 42 185} 186 187; two exiting and two exit blocks. 188; the non-latch exiting block has duplicate edges to the non-latch exit block. 189define i64 @test5(i64 %trip, i64 %add, i1 %cond) { 190; CHECK-LABEL: test5 191; CHECK-LABEL: exit1.loopexit: 192; CHECK-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.2, %loop_exiting.2 ], 193; CHECK-NEXT: br label %exit1 194; CHECK-LABEL: exit1.loopexit2: 195; CHECK-NEXT: %ivy.epil = add i64 %iv.epil, %add 196; CHECK-NEXT: br label %exit1 197; CHECK-LABEL: exit1: 198; CHECK-NEXT: %result = phi i64 [ %result.ph, %exit1.loopexit ], [ %ivy.epil, %exit1.loopexit2 ] 199; CHECK-NEXT: ret i64 %result 200; CHECK-LABEL: loop_latch.7: 201; CHECK: %niter.nsub.7 = add i64 %niter, -8 202entry: 203 br label %loop_header 204 205loop_header: 206 %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] 207 %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] 208 br i1 %cond, label %loop_latch, label %loop_exiting 209 210loop_exiting: 211 %ivy = add i64 %iv, %add 212 switch i64 %sum, label %loop_latch [ 213 i64 24, label %exit1 214 i64 42, label %exit1 215 ] 216 217loop_latch: 218 %iv_next = add nuw nsw i64 %iv, 1 219 %sum.next = add i64 %sum, %add 220 %cmp = icmp ne i64 %iv_next, %trip 221 br i1 %cmp, label %loop_header, label %latchexit 222 223exit1: 224 %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ] 225 ret i64 %result 226 227latchexit: 228 ret i64 %sum.next 229} 230 231; test when exit blocks have successors. 232define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { 233; CHECK-LABEL: test6 234; CHECK-LABEL: for.exit2.loopexit: 235; CHECK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ], [ %add.1, %latch.1 ], [ 42, %for.exiting_block.2 ], [ %add.2, %latch.2 ], 236; CHECK-NEXT: br label %for.exit2 237; CHECK-LABEL: for.exit2.loopexit2: 238; CHECK-NEXT: %retval.ph3 = phi i32 [ 42, %for.exiting_block.epil ], [ %sum.02.epil, %header.epil ] 239; CHECK-NEXT: br label %for.exit2 240; CHECK-LABEL: for.exit2: 241; CHECK-NEXT: %retval = phi i32 [ %retval.ph, %for.exit2.loopexit ], [ %retval.ph3, %for.exit2.loopexit2 ] 242; CHECK-NEXT: br i1 %cond, label %exit_true, label %exit_false 243; CHECK-LABEL: latch.7: 244; CHECK: %niter.nsub.7 = add i64 %niter, -8 245entry: 246 br label %header 247 248header: 249 %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ] 250 %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ] 251 br i1 false, label %for.exit2, label %for.exiting_block 252 253for.exiting_block: 254 %cmp = icmp eq i64 %n, 42 255 br i1 %cmp, label %for.exit2, label %latch 256 257latch: 258 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 259 %load = load i32, i32* %arrayidx, align 4 260 %add = add nsw i32 %load, %sum.02 261 %indvars.iv.next = add i64 %indvars.iv, 1 262 %exitcond = icmp eq i64 %indvars.iv.next, %n 263 br i1 %exitcond, label %latch_exit, label %header 264 265latch_exit: 266 %sum.0.lcssa = phi i32 [ %add, %latch ] 267 ret i32 %sum.0.lcssa 268 269for.exit2: 270 %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] 271 %addx = add i32 %retval, %x 272 br i1 %cond, label %exit_true, label %exit_false 273 274exit_true: 275 ret i32 %retval 276 277exit_false: 278 ret i32 %addx 279} 280