1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa < %s 2>&1 | FileCheck %s 3 4; latch block exits to a speculation block. We account for this since deopt is 5; very rarely taken. So we do not predicate this loop using that coarse latch 6; check. 7; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12% 8; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98% 9define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 { 10; CHECK-LABEL: @donot_predicate( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 13; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 14; CHECK-NEXT: br label [[HEADER:%.*]] 15; CHECK: Header: 16; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 17; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 18; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 19; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 20; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 21; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 22; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]] 23; CHECK: Latch: 24; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 25; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]] 26; CHECK: deopt: 27; CHECK-NEXT: [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 28; CHECK-NEXT: ret i64 [[COUNTED_SPECULATION_FAILED]] 29; CHECK: exit: 30; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 31; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 32; CHECK-NEXT: ret i64 [[RESULT_LE]] 33; 34entry: 35 %length.ext = zext i32 %length to i64 36 %n.pre = load i64, i64* %n_addr, align 4 37 br label %Header 38 39Header: ; preds = %entry, %Latch 40 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 41 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 42 %within.bounds = icmp ult i64 %j2, %length.ext 43 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 44 %innercmp = icmp eq i64 %j2, %n.pre 45 %j.next = add nuw nsw i64 %j2, 1 46 br i1 %innercmp, label %Latch, label %exit, !prof !0 47 48Latch: ; preds = %Header 49 %speculate_trip_count = icmp ult i64 %j.next, 1048576 50 br i1 %speculate_trip_count, label %Header, label %deopt 51 52deopt: ; preds = %Latch 53 %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] 54 ret i64 %counted_speculation_failed 55 56exit: ; preds = %Header 57 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 58 %result.le = load i64, i64* %result.in3.lcssa, align 8 59 ret i64 %result.le 60} 61!0 = !{!"branch_weights", i32 18, i32 104200} 62 63; predicate loop since there's no profile information and BPI concluded all 64; exiting blocks have same probability of exiting from loop. 65define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 { 66; CHECK-LABEL: @predicate( 67; CHECK-NEXT: entry: 68; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 69; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 70; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]] 71; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]] 72; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] 73; CHECK-NEXT: br label [[HEADER:%.*]] 74; CHECK: Header: 75; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 76; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 77; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] 78; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 79; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 80; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]] 81; CHECK: Latch: 82; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 83; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]] 84; CHECK: exitLatch: 85; CHECK-NEXT: ret i64 1 86; CHECK: exit: 87; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 88; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 89; CHECK-NEXT: ret i64 [[RESULT_LE]] 90; 91entry: 92 %length.ext = zext i32 %length to i64 93 %n.pre = load i64, i64* %n_addr, align 4 94 br label %Header 95 96Header: ; preds = %entry, %Latch 97 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 98 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 99 %within.bounds = icmp ult i64 %j2, %length.ext 100 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 101 %innercmp = icmp eq i64 %j2, %n.pre 102 %j.next = add nuw nsw i64 %j2, 1 103 br i1 %innercmp, label %Latch, label %exit 104 105Latch: ; preds = %Header 106 %speculate_trip_count = icmp ult i64 %j.next, 1048576 107 br i1 %speculate_trip_count, label %Header, label %exitLatch 108 109exitLatch: ; preds = %Latch 110 ret i64 1 111 112exit: ; preds = %Header 113 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 114 %result.le = load i64, i64* %result.in3.lcssa, align 8 115 ret i64 %result.le 116} 117 118; Same as test above but with profiling data that the most probable exit from 119; the loop is the header exiting block (not the latch block). So do not predicate. 120; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00% 121; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99% 122define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 { 123; CHECK-LABEL: @donot_predicate_prof( 124; CHECK-NEXT: entry: 125; CHECK-NEXT: [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64 126; CHECK-NEXT: [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4 127; CHECK-NEXT: br label [[HEADER:%.*]] 128; CHECK: Header: 129; CHECK-NEXT: [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ] 130; CHECK-NEXT: [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ] 131; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]] 132; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ] 133; CHECK-NEXT: [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]] 134; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J2]], 1 135; CHECK-NEXT: br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF2:![0-9]+]] 136; CHECK: Latch: 137; CHECK-NEXT: [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576 138; CHECK-NEXT: br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF3:![0-9]+]] 139; CHECK: exitLatch: 140; CHECK-NEXT: ret i64 1 141; CHECK: exit: 142; CHECK-NEXT: [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ] 143; CHECK-NEXT: [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8 144; CHECK-NEXT: ret i64 [[RESULT_LE]] 145; 146entry: 147 %length.ext = zext i32 %length to i64 148 %n.pre = load i64, i64* %n_addr, align 4 149 br label %Header 150 151Header: ; preds = %entry, %Latch 152 %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] 153 %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] 154 %within.bounds = icmp ult i64 %j2, %length.ext 155 call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] 156 %innercmp = icmp eq i64 %j2, %n.pre 157 %j.next = add nuw nsw i64 %j2, 1 158 br i1 %innercmp, label %Latch, label %exit, !prof !1 159 160Latch: ; preds = %Header 161 %speculate_trip_count = icmp ult i64 %j.next, 1048576 162 br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2 163 164exitLatch: ; preds = %Latch 165 ret i64 1 166 167exit: ; preds = %Header 168 %result.in3.lcssa = phi i64* [ %result.in3, %Header ] 169 %result.le = load i64, i64* %result.in3.lcssa, align 8 170 ret i64 %result.le 171} 172declare i64 @llvm.experimental.deoptimize.i64(...) 173declare void @llvm.experimental.guard(i1, ...) 174 175!1 = !{!"branch_weights", i32 104, i32 1042861} 176!2 = !{!"branch_weights", i32 255129, i32 1} 177!21 = !{!"function_entry_count", i64 20000} 178