1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-predication-skip-profitability-checks=false -passes='require<scalar-evolution>,loop-mssa(loop-predication)' -verify-memoryssa < %s 2>&1 | FileCheck %s
3
4; latch block exits to a speculation block. We account for this since deopt is
5; very rarely taken. So we do not predicate this loop using that coarse latch
6; check.
7; LatchExitProbability: 0x04000000 / 0x80000000 = 3.12%
8; ExitingBlockProbability: 0x7ffa572a / 0x80000000 = 99.98%
9define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 {
10; CHECK-LABEL: @donot_predicate(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
13; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
14; CHECK-NEXT:    br label [[HEADER:%.*]]
15; CHECK:       Header:
16; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
17; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
18; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
19; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
20; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
21; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
22; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF1:![0-9]+]]
23; CHECK:       Latch:
24; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
25; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[DEOPT:%.*]]
26; CHECK:       deopt:
27; CHECK-NEXT:    [[COUNTED_SPECULATION_FAILED:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
28; CHECK-NEXT:    ret i64 [[COUNTED_SPECULATION_FAILED]]
29; CHECK:       exit:
30; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
31; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
32; CHECK-NEXT:    ret i64 [[RESULT_LE]]
33;
34entry:
35  %length.ext = zext i32 %length to i64
36  %n.pre = load i64, i64* %n_addr, align 4
37  br label %Header
38
39Header:                                          ; preds = %entry, %Latch
40  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
41  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
42  %within.bounds = icmp ult i64 %j2, %length.ext
43  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
44  %innercmp = icmp eq i64 %j2, %n.pre
45  %j.next = add nuw nsw i64 %j2, 1
46  br i1 %innercmp, label %Latch, label %exit, !prof !0
47
48Latch:                                           ; preds = %Header
49  %speculate_trip_count = icmp ult i64 %j.next, 1048576
50  br i1 %speculate_trip_count, label %Header, label %deopt
51
52deopt:                                            ; preds = %Latch
53  %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ]
54  ret i64 %counted_speculation_failed
55
56exit:                                             ; preds = %Header
57  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
58  %result.le = load i64, i64* %result.in3.lcssa, align 8
59  ret i64 %result.le
60}
61!0 = !{!"branch_weights", i32 18, i32 104200}
62
63; predicate loop since there's no profile information and BPI concluded all
64; exiting blocks have same probability of exiting from loop.
65define i64 @predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 {
66; CHECK-LABEL: @predicate(
67; CHECK-NEXT:  entry:
68; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
69; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
70; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 1048576, [[LENGTH_EXT]]
71; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 0, [[LENGTH_EXT]]
72; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]]
73; CHECK-NEXT:    br label [[HEADER:%.*]]
74; CHECK:       Header:
75; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
76; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
77; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ]
78; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
79; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
80; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]]
81; CHECK:       Latch:
82; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
83; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]]
84; CHECK:       exitLatch:
85; CHECK-NEXT:    ret i64 1
86; CHECK:       exit:
87; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
88; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
89; CHECK-NEXT:    ret i64 [[RESULT_LE]]
90;
91entry:
92  %length.ext = zext i32 %length to i64
93  %n.pre = load i64, i64* %n_addr, align 4
94  br label %Header
95
96Header:                                          ; preds = %entry, %Latch
97  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
98  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
99  %within.bounds = icmp ult i64 %j2, %length.ext
100  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
101  %innercmp = icmp eq i64 %j2, %n.pre
102  %j.next = add nuw nsw i64 %j2, 1
103  br i1 %innercmp, label %Latch, label %exit
104
105Latch:                                           ; preds = %Header
106  %speculate_trip_count = icmp ult i64 %j.next, 1048576
107  br i1 %speculate_trip_count, label %Header, label %exitLatch
108
109exitLatch:                                            ; preds = %Latch
110  ret i64 1
111
112exit:                                             ; preds = %Header
113  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
114  %result.le = load i64, i64* %result.in3.lcssa, align 8
115  ret i64 %result.le
116}
117
118; Same as test above but with profiling data that the most probable exit from
119; the loop is the header exiting block (not the latch block). So do not predicate.
120; LatchExitProbability: 0x000020e1 / 0x80000000 = 0.00%
121; ExitingBlockProbability: 0x7ffcbb86 / 0x80000000 = 99.99%
122define i64 @donot_predicate_prof(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) !prof !21 {
123; CHECK-LABEL: @donot_predicate_prof(
124; CHECK-NEXT:  entry:
125; CHECK-NEXT:    [[LENGTH_EXT:%.*]] = zext i32 [[LENGTH:%.*]] to i64
126; CHECK-NEXT:    [[N_PRE:%.*]] = load i64, i64* [[N_ADDR:%.*]], align 4
127; CHECK-NEXT:    br label [[HEADER:%.*]]
128; CHECK:       Header:
129; CHECK-NEXT:    [[RESULT_IN3:%.*]] = phi i64* [ [[ARG2:%.*]], [[ENTRY:%.*]] ], [ [[ARG:%.*]], [[LATCH:%.*]] ]
130; CHECK-NEXT:    [[J2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[J_NEXT:%.*]], [[LATCH]] ]
131; CHECK-NEXT:    [[WITHIN_BOUNDS:%.*]] = icmp ult i64 [[J2]], [[LENGTH_EXT]]
132; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_BOUNDS]], i32 9) [ "deopt"() ]
133; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp eq i64 [[J2]], [[N_PRE]]
134; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J2]], 1
135; CHECK-NEXT:    br i1 [[INNERCMP]], label [[LATCH]], label [[EXIT:%.*]], !prof [[PROF2:![0-9]+]]
136; CHECK:       Latch:
137; CHECK-NEXT:    [[SPECULATE_TRIP_COUNT:%.*]] = icmp ult i64 [[J_NEXT]], 1048576
138; CHECK-NEXT:    br i1 [[SPECULATE_TRIP_COUNT]], label [[HEADER]], label [[EXITLATCH:%.*]], !prof [[PROF3:![0-9]+]]
139; CHECK:       exitLatch:
140; CHECK-NEXT:    ret i64 1
141; CHECK:       exit:
142; CHECK-NEXT:    [[RESULT_IN3_LCSSA:%.*]] = phi i64* [ [[RESULT_IN3]], [[HEADER]] ]
143; CHECK-NEXT:    [[RESULT_LE:%.*]] = load i64, i64* [[RESULT_IN3_LCSSA]], align 8
144; CHECK-NEXT:    ret i64 [[RESULT_LE]]
145;
146entry:
147  %length.ext = zext i32 %length to i64
148  %n.pre = load i64, i64* %n_addr, align 4
149  br label %Header
150
151Header:                                          ; preds = %entry, %Latch
152  %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ]
153  %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ]
154  %within.bounds = icmp ult i64 %j2, %length.ext
155  call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
156  %innercmp = icmp eq i64 %j2, %n.pre
157  %j.next = add nuw nsw i64 %j2, 1
158  br i1 %innercmp, label %Latch, label %exit, !prof !1
159
160Latch:                                           ; preds = %Header
161  %speculate_trip_count = icmp ult i64 %j.next, 1048576
162  br i1 %speculate_trip_count, label %Header, label %exitLatch, !prof !2
163
164exitLatch:                                            ; preds = %Latch
165  ret i64 1
166
167exit:                                             ; preds = %Header
168  %result.in3.lcssa = phi i64* [ %result.in3, %Header ]
169  %result.le = load i64, i64* %result.in3.lcssa, align 8
170  ret i64 %result.le
171}
172declare i64 @llvm.experimental.deoptimize.i64(...)
173declare void @llvm.experimental.guard(i1, ...)
174
175!1 = !{!"branch_weights", i32 104, i32 1042861}
176!2 = !{!"branch_weights", i32 255129, i32 1}
177!21 = !{!"function_entry_count", i64 20000}
178