1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s
3
4;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5;; Test base heuristic 1:
6;; highly-biased selects assumed to be highly predictable, converted to branches
7;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8
9; If a select is obviously predictable, turn it into a branch.
10define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) {
11; CHECK-LABEL: @weighted_select1(
12; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
13; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]]
14; CHECK:       select.false:
15; CHECK-NEXT:    br label [[SELECT_END]]
16; CHECK:       select.end:
17; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
18; CHECK-NEXT:    ret i32 [[SEL]]
19;
20  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
21  ret i32 %sel
22}
23
24; If a select is obviously predictable (reversed profile weights),
25; turn it into a branch.
26define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) {
27; CHECK-LABEL: @weighted_select2(
28; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
29; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]]
30; CHECK:       select.false:
31; CHECK-NEXT:    br label [[SELECT_END]]
32; CHECK:       select.end:
33; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
34; CHECK-NEXT:    ret i32 [[SEL]]
35;
36  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !16
37  ret i32 %sel
38}
39
40; Not obvioulsy predictable select.
41define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) {
42; CHECK-LABEL: @weighted_select3(
43; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF18:![0-9]+]]
44; CHECK-NEXT:    ret i32 [[SEL]]
45;
46  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !17
47  ret i32 %sel
48}
49
50; Unpredictable select should not form a branch.
51define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) {
52; CHECK-LABEL: @unpred_select(
53; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable !19
54; CHECK-NEXT:    ret i32 [[SEL]]
55;
56  %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20
57  ret i32 %sel
58}
59
60; Predictable select in function with optsize attribute should not form branch.
61define i32 @weighted_select_optsize(i32 %a, i32 %b, i1 %cmp) optsize {
62; CHECK-LABEL: @weighted_select_optsize(
63; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]]
64; CHECK-NEXT:    ret i32 [[SEL]]
65;
66  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
67  ret i32 %sel
68}
69
70define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 {
71; CHECK-LABEL: @weighted_select_pgso(
72; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !prof [[PROF16]]
73; CHECK-NEXT:    ret i32 [[SEL]]
74;
75  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
76  ret i32 %sel
77}
78
79; If two selects in a row are predictable, turn them into branches.
80define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 {
81; CHECK-LABEL: @weighted_selects(
82; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0
83; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]]
84; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]]
85; CHECK:       select.false:
86; CHECK-NEXT:    br label [[SELECT_END]]
87; CHECK:       select.end:
88; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
89; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0
90; CHECK-NEXT:    [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]]
91; CHECK-NEXT:    br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]]
92; CHECK:       select.false2:
93; CHECK-NEXT:    br label [[SELECT_END1]]
94; CHECK:       select.end1:
95; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[B]], [[SELECT_END]] ], [ [[A]], [[SELECT_FALSE2]] ]
96; CHECK-NEXT:    ret i32 [[SEL1]]
97;
98  %cmp = icmp ne i32 %a, 0
99  %sel = select i1 %cmp, i32 %a, i32 %b, !prof !15
100  %cmp1 = icmp ne i32 %sel, 0
101  %sel1 = select i1 %cmp1, i32 %b, i32 %a, !prof !15
102  ret i32 %sel1
103}
104
105; If select group predictable, turn it into a branch.
106define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 {
107; CHECK-LABEL: @weighted_select_group(
108; CHECK-NEXT:    [[A1:%.*]] = add i32 [[A:%.*]], 1
109; CHECK-NEXT:    [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
110; CHECK-NEXT:    br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]]
111; CHECK:       select.true.sink:
112; CHECK-NEXT:    [[C1:%.*]] = add i32 [[C:%.*]], 1
113; CHECK-NEXT:    br label [[SELECT_END:%.*]]
114; CHECK:       select.false.sink:
115; CHECK-NEXT:    [[B1:%.*]] = add i32 [[B:%.*]], 1
116; CHECK-NEXT:    br label [[SELECT_END]]
117; CHECK:       select.end:
118; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ]
119; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ]
120; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]]
121; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]]
122; CHECK-NEXT:    ret i32 [[ADD]]
123;
124  %a1 = add i32 %a, 1
125  %b1 = add i32 %b, 1
126  %c1 = add i32 %c, 1
127  %sel1 = select i1 %cmp, i32 %a1, i32 %b1, !prof !15
128  call void @llvm.dbg.value(metadata i32 %sel1, metadata !24, metadata !DIExpression()), !dbg !DILocation(scope: !23)
129  %sel2 = select i1 %cmp, i32 %c1, i32 %a1, !prof !15
130  %add = add i32 %sel1, %sel2
131  ret i32 %add
132}
133
134; Predictable select group with intra-group dependence converted to branch
135define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) {
136; CHECK-LABEL: @select_group_intra_group(
137; CHECK-NEXT:    [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
138; CHECK-NEXT:    br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]]
139; CHECK:       select.false:
140; CHECK-NEXT:    br label [[SELECT_END]]
141; CHECK:       select.end:
142; CHECK-NEXT:    [[SEL1:%.*]] = phi i32 [ [[A:%.*]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ]
143; CHECK-NEXT:    [[SEL2:%.*]] = phi i32 [ [[C:%.*]], [[TMP0]] ], [ [[B]], [[SELECT_FALSE]] ]
144; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[SEL1]], [[SEL2]]
145; CHECK-NEXT:    ret i32 [[SUB]]
146;
147  %sel1 = select i1 %cmp, i32 %a, i32 %b,!prof !15
148  %sel2 = select i1 %cmp, i32 %c, i32 %sel1, !prof !15
149  %sub = sub i32 %sel1, %sel2
150  ret i32 %sub
151}
152
153;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
154;; Test base heuristic 2:
155;; look for expensive instructions in the one-use slice of the cold path
156;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
157
158; Select with cold one-use load value operand should form branch and
159; sink load
160define i32 @expensive_val_operand1(ptr nocapture %a, i32 %y, i1 %cmp) {
161; CHECK-LABEL: @expensive_val_operand1(
162; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
163; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
164; CHECK:       select.true.sink:
165; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
166; CHECK-NEXT:    br label [[SELECT_END]]
167; CHECK:       select.end:
168; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[LOAD]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
169; CHECK-NEXT:    ret i32 [[SEL]]
170;
171  %load = load i32, ptr %a, align 8
172  %sel = select i1 %cmp, i32 %load, i32 %y, !prof !17
173  ret i32 %sel
174}
175
176; Expensive hot value operand and cheap cold value operand.
177define i32 @expensive_val_operand2(ptr nocapture %a, i32 %x, i1 %cmp) {
178; CHECK-LABEL: @expensive_val_operand2(
179; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
180; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]], !prof [[PROF18]]
181; CHECK-NEXT:    ret i32 [[SEL]]
182;
183  %load = load i32, ptr %a, align 8
184  %sel = select i1 %cmp, i32 %x, i32 %load, !prof !17
185  ret i32 %sel
186}
187
188; Cold value operand with load in its one-use dependence slice shoud result
189; into a branch with sinked dependence slice.
190define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) {
191; CHECK-LABEL: @expensive_val_operand3(
192; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]]
193; CHECK-NEXT:    br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]]
194; CHECK:       select.true.sink:
195; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8
196; CHECK-NEXT:    [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]]
197; CHECK-NEXT:    br label [[SELECT_END]]
198; CHECK:       select.end:
199; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[X]], [[SELECT_TRUE_SINK]] ], [ [[Y:%.*]], [[TMP0:%.*]] ]
200; CHECK-NEXT:    ret i32 [[SEL]]
201;
202  %load = load i32, ptr %a, align 8
203  %x = add i32 %load, %b
204  %sel = select i1 %cmp, i32 %x, i32 %y, !prof !17
205  ret i32 %sel
206}
207
208; Multiple uses of the load value operand.
209define i32 @expensive_val_operand4(i32 %a, ptr nocapture %b, i32 %x, i1 %cmp) {
210; CHECK-LABEL: @expensive_val_operand4(
211; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4
212; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[X:%.*]], i32 [[LOAD]]
213; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[SEL]], [[LOAD]]
214; CHECK-NEXT:    ret i32 [[ADD]]
215;
216  %load = load i32, ptr %b, align 4
217  %sel = select i1 %cmp, i32 %x, i32 %load
218  %add = add i32 %sel, %load
219  ret i32 %add
220}
221
222;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
223;; Test loop heuristic: loop-level critical-path analysis
224;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
225
226;; Use of cmov in this test would put a load and a fsub on the critical path.
227;; Loop-level analysis should decide to form a branch.
228;;
229;;double cmov_on_critical_path(int n, double x, ptr a) {
230;;  for (int i = 0; i < n; i++) {
231;;    double r = a[i];
232;;    if (x > r)
233;; 			// 50% of iterations
234;;   		x -= r;
235;;  }
236;;  return x;
237;;}
238define double @cmov_on_critical_path(i32 %n, double %x, ptr nocapture %a) {
239; CHECK-LABEL: @cmov_on_critical_path(
240; CHECK-NEXT:  entry:
241; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
242; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
243; CHECK:       for.cond.cleanup:
244; CHECK-NEXT:    ret double [[X:%.*]]
245; CHECK:       for.body.preheader:
246; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
247; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
248; CHECK:       for.body:
249; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
250; CHECK-NEXT:    [[X1:%.*]] = phi double [ [[X2:%.*]], [[SELECT_END]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ]
251; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
252; CHECK-NEXT:    [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8
253; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]]
254; CHECK-NEXT:    [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]]
255; CHECK-NEXT:    br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]]
256; CHECK:       select.true.sink:
257; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[X1]], [[R]]
258; CHECK-NEXT:    br label [[SELECT_END]]
259; CHECK:       select.end:
260; CHECK-NEXT:    [[X2]] = phi double [ [[SUB]], [[SELECT_TRUE_SINK]] ], [ [[X1]], [[FOR_BODY]] ]
261; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
262; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
263; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
264; CHECK:       for.exit:
265; CHECK-NEXT:    ret double [[X2]]
266;
267entry:
268  %cmp1 = icmp sgt i32 %n, 0
269  br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup
270
271for.cond.cleanup:                                 ; preds = %entry
272  ret double %x
273
274for.body.preheader:                               ; preds = %entry
275  %wide.trip.count = zext i32 %n to i64
276  br label %for.body
277
278for.body:                                         ; preds = %for.body.preheader, %for.body
279  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
280  %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ]
281  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
282  %r = load double, ptr %arrayidx, align 8
283  %sub = fsub double %x1, %r
284  %cmp2 = fcmp ogt double %x1, %r
285  %x2 = select i1 %cmp2, double %sub, double %x1, !prof !18
286  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
287  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
288  br i1 %exitcond, label %for.exit, label %for.body
289
290for.exit:                                         ; preds = %for.body
291  ret double %x2
292}
293
294;; The common path includes expensive operations (load and fsub) making
295;; branch similarly expensive to cmov, and thus the gain is small.
296;; Loop-level analysis should decide on not forming a branch.
297;;
298;;double small_gain(int n, double x, ptr a) {
299;;  for (int i = 0; i < n; i++) {
300;;    double r = a[i];
301;;    if (x > r)
302;;      // 99% of iterations
303;;      x -= r;
304;;  }
305;;  return x;
306;;}
307define double @small_gain(i32 %n, double %x, ptr nocapture %a) {
308; CHECK-LABEL: @small_gain(
309; CHECK-NEXT:  entry:
310; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
311; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
312; CHECK:       for.cond.cleanup:
313; CHECK-NEXT:    ret double [[X:%.*]]
314; CHECK:       for.body.preheader:
315; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
316; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
317; CHECK:       for.body:
318; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
319; CHECK-NEXT:    [[X1:%.*]] = phi double [ [[X2:%.*]], [[FOR_BODY]] ], [ [[X]], [[FOR_BODY_PREHEADER]] ]
320; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
321; CHECK-NEXT:    [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8
322; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[X1]], [[R]]
323; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ole double [[X1]], [[R]]
324; CHECK-NEXT:    [[X2]] = select i1 [[CMP2]], double [[X1]], double [[SUB]], !prof [[PROF18]]
325; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
326; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
327; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
328; CHECK:       for.exit:
329; CHECK-NEXT:    ret double [[X2]]
330;
331entry:
332  %cmp1 = icmp sgt i32 %n, 0
333  br i1 %cmp1, label %for.body.preheader, label %for.cond.cleanup
334
335for.cond.cleanup:                                 ; preds = %entry
336  ret double %x
337
338for.body.preheader:                               ; preds = %entry
339  %wide.trip.count = zext i32 %n to i64
340  br label %for.body
341
342for.body:                                         ; preds = %for.body.preheader, %for.body
343  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
344  %x1 = phi double [ %x2, %for.body ], [ %x, %for.body.preheader ]
345  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
346  %r = load double, ptr %arrayidx, align 8
347  %sub = fsub double %x1, %r
348  %cmp2 = fcmp ole double %x1, %r
349  %x2 = select i1 %cmp2, double %x1, double %sub, !prof !17
350  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
351  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
352  br i1 %exitcond, label %for.exit, label %for.body
353
354for.exit:                                         ; preds = %for.body
355  ret double %x2
356}
357
358;; Use of a branch in this test would avoid executing a load and several
359;; floating-point operations for most cases (70% of the time).
360;; Yet, the gain is not increasing much per iteration (small gradient gain).
361;; Loop-level analysis should decide not to form a branch.
362;;
363;;double small_gradient(int n, double x, ptr a) {
364;;  for (int i = 0; i < n; i++) {
365;;    double r = 2 * a[i] + i;
366;;    if (r > 0)
367;;      // 30% of iterations
368;;      x -= r;
369;;  }
370;;  return x;
371;;}
372define double @small_gradient(i32 %n, double %x, ptr nocapture %a) {
373; CHECK-LABEL: @small_gradient(
374; CHECK-NEXT:  entry:
375; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
376; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
377; CHECK:       for.body.preheader:
378; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
379; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
380; CHECK:       for.cond.cleanup:
381; CHECK-NEXT:    [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_ADDR_1:%.*]], [[FOR_BODY]] ]
382; CHECK-NEXT:    ret double [[X_ADDR_0_LCSSA]]
383; CHECK:       for.body:
384; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
385; CHECK-NEXT:    [[X_ADDR_010:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[FOR_BODY]] ]
386; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
387; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
388; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.fmuladd.f64(double [[TMP0]], double 2.000000e+00, double 1.000000e+00)
389; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[TMP1]], 0.000000e+00
390; CHECK-NEXT:    [[SUB:%.*]] = select i1 [[CMP1]], double [[TMP1]], double 0.000000e+00, !prof [[PROF28:![0-9]+]]
391; CHECK-NEXT:    [[X_ADDR_1]] = fsub double [[X_ADDR_010]], [[SUB]]
392; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
393; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
394; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
395;
396entry:
397  %cmp8 = icmp sgt i32 %n, 0
398  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
399
400for.body.preheader:                               ; preds = %entry
401  %wide.trip.count = zext i32 %n to i64
402  br label %for.body
403
404for.cond.cleanup:                                 ; preds = %for.body, %entry
405  %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.body ]
406  ret double %x.addr.0.lcssa
407
408for.body:                                         ; preds = %for.body.preheader, %for.body
409  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
410  %x.addr.010 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ]
411  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
412  %0 = load double, ptr %arrayidx, align 8
413  %1 = call double @llvm.fmuladd.f64(double %0, double 2.000000e+00, double 1.000000e+00)
414  %cmp1 = fcmp ogt double %1, 0.000000e+00
415  %sub = select i1 %cmp1, double %1, double 0.000000e+00, !prof !28
416  %x.addr.1 = fsub double %x.addr.010, %sub
417  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
418  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
419  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
420}
421
422;; One select on the critical path and one off the critical path.
423;; Loop-level analysis should decide to form a branch only for
424;; the select on the critical path.
425;;
426;;double loop_select_groups(int n, double x, ptr a, int k) {
427;;  int c = 0;
428;;  for (int i = 0; i < n; i++) {
429;;    double r = a[i];
430;;    if (x > r)
431;;      x -= r;
432;;    if (i == k)
433;;      c += n;
434;;  }
435;;  return x + c;
436;;}
437define double @loop_select_groups(i32 %n, double %x, ptr nocapture %a, i32 %k) {
438; CHECK-LABEL: @loop_select_groups(
439; CHECK-NEXT:  entry:
440; CHECK-NEXT:    [[CMP19:%.*]] = icmp sgt i32 [[N:%.*]], 0
441; CHECK-NEXT:    br i1 [[CMP19]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
442; CHECK:       for.body.preheader:
443; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
444; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
445; CHECK:       for.cond.cleanup.loopexit:
446; CHECK-NEXT:    [[PHI_CAST:%.*]] = sitofp i32 [[C_1:%.*]] to double
447; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
448; CHECK:       for.cond.cleanup:
449; CHECK-NEXT:    [[C_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[PHI_CAST]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
450; CHECK-NEXT:    [[X_ADDR_0_LCSSA:%.*]] = phi double [ [[X:%.*]], [[ENTRY]] ], [ [[X_ADDR_1:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
451; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[X_ADDR_0_LCSSA]], [[C_0_LCSSA]]
452; CHECK-NEXT:    ret double [[ADD5]]
453; CHECK:       for.body:
454; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SELECT_END:%.*]] ]
455; CHECK-NEXT:    [[X_ADDR_022:%.*]] = phi double [ [[X]], [[FOR_BODY_PREHEADER]] ], [ [[X_ADDR_1]], [[SELECT_END]] ]
456; CHECK-NEXT:    [[C_020:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[C_1]], [[SELECT_END]] ]
457; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]]
458; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
459; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]]
460; CHECK-NEXT:    [[SUB_FROZEN:%.*]] = freeze i1 [[CMP1]]
461; CHECK-NEXT:    br i1 [[SUB_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]]
462; CHECK:       select.false:
463; CHECK-NEXT:    br label [[SELECT_END]]
464; CHECK:       select.end:
465; CHECK-NEXT:    [[SUB:%.*]] = phi double [ [[TMP0]], [[FOR_BODY]] ], [ 0.000000e+00, [[SELECT_FALSE]] ]
466; CHECK-NEXT:    [[X_ADDR_1]] = fsub double [[X_ADDR_022]], [[SUB]]
467; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
468; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[K:%.*]], [[N]]
469; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[CMP2]], i32 [[N]], i32 0
470; CHECK-NEXT:    [[C_1]] = add nsw i32 [[ADD]], [[C_020]]
471; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
472; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
473; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
474;
475entry:
476  %cmp19 = icmp sgt i32 %n, 0
477  br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup
478
479for.body.preheader:                               ; preds = %entry
480  %wide.trip.count = zext i32 %n to i64
481  br label %for.body
482
483for.cond.cleanup.loopexit:                        ; preds = %for.body
484  %phi.cast = sitofp i32 %c.1 to double
485  br label %for.cond.cleanup
486
487for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
488  %c.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %phi.cast, %for.cond.cleanup.loopexit ]
489  %x.addr.0.lcssa = phi double [ %x, %entry ], [ %x.addr.1, %for.cond.cleanup.loopexit ]
490  %add5 = fadd double %x.addr.0.lcssa, %c.0.lcssa
491  ret double %add5
492
493for.body:                                         ; preds = %for.body.preheader, %for.body
494  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
495  %x.addr.022 = phi double [ %x, %for.body.preheader ], [ %x.addr.1, %for.body ]
496  %c.020 = phi i32 [ 0, %for.body.preheader ], [ %c.1, %for.body ]
497  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
498  %0 = load double, ptr %arrayidx, align 8
499  %cmp1 = fcmp ogt double %x.addr.022, %0
500  %sub = select i1 %cmp1, double %0, double 0.000000e+00
501  %x.addr.1 = fsub double %x.addr.022, %sub
502  %1 = trunc i64 %indvars.iv to i32
503  %cmp2 = icmp eq i32 %k, %n
504  %add = select i1 %cmp2, i32 %n, i32 0
505  %c.1 = add nsw i32 %add, %c.020
506  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
507  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
508  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
509}
510
511; Function Attrs: nounwind readnone speculatable willreturn
512declare void @llvm.dbg.value(metadata, metadata, metadata)
513
514; Function Attrs: mustprogress nofree nosync nounwind readnone speculatable willreturn
515declare double @llvm.fmuladd.f64(double, double, double)
516
517!llvm.module.flags = !{!0, !26, !27}
518!0 = !{i32 1, !"ProfileSummary", !1}
519!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
520!2 = !{!"ProfileFormat", !"InstrProf"}
521!3 = !{!"TotalCount", i64 10000}
522!4 = !{!"MaxCount", i64 10}
523!5 = !{!"MaxInternalCount", i64 1}
524!6 = !{!"MaxFunctionCount", i64 1000}
525!7 = !{!"NumCounts", i64 3}
526!8 = !{!"NumFunctions", i64 3}
527!9 = !{!"DetailedSummary", !10}
528!10 = !{!11, !12, !13}
529!11 = !{i32 10000, i64 100, i32 1}
530!12 = !{i32 999000, i64 100, i32 1}
531!13 = !{i32 999999, i64 1, i32 2}
532!14 = !{!"function_entry_count", i64 0}
533!15 = !{!"branch_weights", i32 1, i32 100}
534!16 = !{!"branch_weights", i32 100, i32 1}
535!17 = !{!"branch_weights", i32 1, i32 99}
536!18 = !{!"branch_weights", i32 50, i32 50}
537!19 = !{!"function_entry_count", i64 100}
538!20 = !{}
539!21 = !DIFile(filename: "test.c", directory: "/test")
540!22 = distinct !DICompileUnit(language: DW_LANG_C99, file: !21, producer: "clang version 15.0.0", isOptimized: true, emissionKind: FullDebug, globals: !25, splitDebugInlining: false, nameTableKind: None)
541!23 = distinct !DISubprogram(name: "test", scope: !21, file: !21, line: 1, unit: !22)
542!24 = !DILocalVariable(name: "x", scope: !23)
543!25 = !{}
544!26 = !{i32 2, !"Dwarf Version", i32 4}
545!27 = !{i32 1, !"Debug Info Version", i32 3}
546!28 = !{!"branch_weights", i32 30, i32 70}
547