1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
3; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefix=HOIST
4
5; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=HOIST
6; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefix=HOIST
7
8; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATE
9; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefix=ROTATE
10
11; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s   | FileCheck %s --check-prefix=ROTATE
12; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefix=ROTATE
13
14; This example is produced from a very basic C code:
15;
16;   void f0();
17;   void f1();
18;   void f2();
19;
20;   void loop(int width) {
21;       if(width < 1)
22;           return;
23;       for(int i = 0; i < width - 1; ++i) {
24;           f0();
25;           f1();
26;       }
27;       f0();
28;       f2();
29;   }
30
31; We have a choice here. We can either
32; * hoist the f0() call into loop header,
33;   * which potentially makes loop rotation unprofitable since loop header might
34;     have grown above certain threshold, and such unrotated loops will be
35;     ignored by LoopVectorizer, preventing vectorization
36;   * or loop rotation will succeed, resulting in some weird PHIs that will also
37;     harm vectorization
38; * or not hoist f0() call before performing loop rotation,
39;   at the cost of potential code bloat and/or potentially successfully rotating
40;   the loops, vectorizing them at the cost of compile time.
41
42target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
43
44declare void @f0()
45declare void @f1()
46declare void @f2()
47
48declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
49declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
50
51define void @_Z4loopi(i32 %width) {
52; HOIST-LABEL: @_Z4loopi(
53; HOIST-NEXT:  entry:
54; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
55; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
56; HOIST:       for.cond.preheader:
57; HOIST-NEXT:    [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1
58; HOIST-NEXT:    br label [[FOR_COND:%.*]]
59; HOIST:       for.cond:
60; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
61; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]]
62; HOIST-NEXT:    tail call void @f0()
63; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
64; HOIST:       for.cond.cleanup:
65; HOIST-NEXT:    tail call void @f2()
66; HOIST-NEXT:    br label [[RETURN]]
67; HOIST:       for.body:
68; HOIST-NEXT:    tail call void @f1()
69; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1
70; HOIST-NEXT:    br label [[FOR_COND]]
71; HOIST:       return:
72; HOIST-NEXT:    ret void
73;
74; ROTATE-LABEL: @_Z4loopi(
75; ROTATE-NEXT:  entry:
76; ROTATE-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
77; ROTATE-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
78; ROTATE:       for.cond.preheader:
79; ROTATE-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
80; ROTATE-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
81; ROTATE:       for.body.preheader:
82; ROTATE-NEXT:    [[TMP0:%.*]] = add i32 [[WIDTH]], -2
83; ROTATE-NEXT:    br label [[FOR_BODY:%.*]]
84; ROTATE:       for.cond.cleanup:
85; ROTATE-NEXT:    tail call void @f0()
86; ROTATE-NEXT:    tail call void @f2()
87; ROTATE-NEXT:    br label [[RETURN]]
88; ROTATE:       for.body:
89; ROTATE-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
90; ROTATE-NEXT:    tail call void @f0()
91; ROTATE-NEXT:    tail call void @f1()
92; ROTATE-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
93; ROTATE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_04]], [[TMP0]]
94; ROTATE-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
95; ROTATE:       return:
96; ROTATE-NEXT:    ret void
97;
98entry:
99  %width.addr = alloca i32, align 4
100  %i = alloca i32, align 4
101  store i32 %width, i32* %width.addr, align 4
102  %i1 = load i32, i32* %width.addr, align 4
103  %cmp = icmp slt i32 %i1, 1
104  br i1 %cmp, label %if.then, label %if.end
105
106if.then:
107  br label %return
108
109if.end:
110  %i2 = bitcast i32* %i to i8*
111  call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
112  store i32 0, i32* %i, align 4
113  br label %for.cond
114
115for.cond:
116  %i3 = load i32, i32* %i, align 4
117  %i4 = load i32, i32* %width.addr, align 4
118  %sub = sub nsw i32 %i4, 1
119  %cmp1 = icmp slt i32 %i3, %sub
120  br i1 %cmp1, label %for.body, label %for.cond.cleanup
121
122for.cond.cleanup:
123  %i5 = bitcast i32* %i to i8*
124  call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
125  br label %for.end
126
127for.body:
128  call void @f0()
129  call void @f1()
130  br label %for.inc
131
132for.inc:
133  %i6 = load i32, i32* %i, align 4
134  %inc = add nsw i32 %i6, 1
135  store i32 %inc, i32* %i, align 4
136  br label %for.cond
137
138for.end:
139  call void @f0()
140  call void @f2()
141  br label %return
142
143return:
144  ret void
145}
146