1; Test the loop alignment.
2; RUN: llc -verify-machineinstrs -mcpu=a2 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC
3; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
4; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
5; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
6; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR
7
8; Test the loop alignment and the option -disable-ppc-innermost-loop-align32.
9; RUN: llc -verify-machineinstrs -mcpu=a2 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
10; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
11; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
12; RUN: llc -verify-machineinstrs -mcpu=pwr8 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
13; RUN: llc -verify-machineinstrs -mcpu=pwr9 -disable-ppc-innermost-loop-align32 -mtriple powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefixes=CHECK,PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32
14
15
16%struct.parm = type { i32*, i32, i32 }
17
18; Test the loop alignment when the innermost hot loop has more than 8 instructions.
19define void @big_loop(%struct.parm* %arg) {
20entry:
21  %localArg.sroa.0.0..sroa_idx = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 0
22  %localArg.sroa.0.0.copyload = load i32*, i32** %localArg.sroa.0.0..sroa_idx, align 8
23  %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 1
24  %localArg.sroa.4.0.copyload = load i32, i32* %localArg.sroa.4.0..sroa_idx56, align 8
25  %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 2
26  %localArg.sroa.5.0.copyload = load i32, i32* %localArg.sroa.5.0..sroa_idx58, align 4
27  %0 = sext i32 %localArg.sroa.5.0.copyload to i64
28  br label %do.body
29
30do.body:                                          ; preds = %do.end, %entry
31  %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ]
32  br label %do.body3
33
34do.body3:                                         ; preds = %do.body3, %do.body
35  %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ]
36  %1 = add nsw i64 %indvars.iv, 2
37  %arrayidx = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %1
38  %2 = add nsw i64 %indvars.iv, 3
39  %3 = trunc i64 %1 to i32
40  %4 = add nsw i64 %indvars.iv, 4
41  %arrayidx10 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %2
42  %5 = trunc i64 %2 to i32
43  store i32 %5, i32* %arrayidx10, align 4
44  %arrayidx12 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %4
45  %6 = trunc i64 %4 to i32
46  store i32 %6, i32* %arrayidx12, align 4
47  store i32 %3, i32* %arrayidx, align 4
48  %arrayidx21 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %indvars.iv
49  %7 = trunc i64 %indvars.iv to i32
50  %8 = add i32 %7, 1
51  store i32 %8, i32* %arrayidx21, align 4
52  %indvars.iv.next = add nsw i64 %indvars.iv, -1
53  %9 = icmp eq i64 %indvars.iv, 0
54  br i1 %9, label %do.end, label %do.body3
55
56do.end:                                           ; preds = %do.body3
57  %dec24 = add nsw i32 %m.0, -1
58  %tobool25 = icmp eq i32 %m.0, 0
59  br i1 %tobool25, label %do.end26, label %do.body
60
61do.end26:                                         ; preds = %do.end
62  %arrayidx28 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %0
63  store i32 0, i32* %arrayidx28, align 4
64  ret void
65
66
67; CHECK-LABEL: @big_loop
68; CHECK: mtctr
69; GENERIC: .p2align  4
70; PWR: .p2align  5
71; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
72; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
73; CHECK: bdnz
74}
75
76; Test the loop alignment when the innermost hot loop has 5-8 instructions.
77define void @general_loop(i32* %s, i64 %m) {
78entry:
79  %tobool40 = icmp eq i64 %m, 0
80  br i1 %tobool40, label %while.end18, label %while.body3.lr.ph
81
82while.cond.loopexit:                              ; preds = %while.body3
83  %tobool = icmp eq i64 %dec, 0
84  br i1 %tobool, label %while.end18, label %while.body3.lr.ph
85
86while.body3.lr.ph:                                ; preds = %entry, %while.cond.loopexit
87  %m.addr.041 = phi i64 [ %dec, %while.cond.loopexit ], [ %m, %entry ]
88  %dec = add nsw i64 %m.addr.041, -1
89  %conv = trunc i64 %m.addr.041 to i32
90  %conv11 = trunc i64 %dec to i32
91  br label %while.body3
92
93while.body3:                                      ; preds = %while.body3.lr.ph, %while.body3
94  %n.039 = phi i64 [ %m.addr.041, %while.body3.lr.ph ], [ %dec16, %while.body3 ]
95  %inc = add nsw i64 %n.039, 1
96  %arrayidx = getelementptr inbounds i32, i32* %s, i64 %n.039
97  %inc5 = add nsw i64 %n.039, 2
98  %arrayidx6 = getelementptr inbounds i32, i32* %s, i64 %inc
99  %sub = sub nsw i64 %dec, %inc5
100  %conv7 = trunc i64 %sub to i32
101  %arrayidx9 = getelementptr inbounds i32, i32* %s, i64 %inc5
102  store i32 %conv7, i32* %arrayidx9, align 4
103  store i32 %conv11, i32* %arrayidx6, align 4
104  store i32 %conv, i32* %arrayidx, align 4
105  %dec16 = add nsw i64 %n.039, -1
106  %tobool2 = icmp eq i64 %dec16, 0
107  br i1 %tobool2, label %while.cond.loopexit, label %while.body3
108
109while.end18:                                      ; preds = %while.cond.loopexit, %entry
110  ret void
111
112
113; CHECK-LABEL: @general_loop
114; CHECK: mtctr
115; GENERIC: .p2align  4
116; PWR: .p2align  5
117; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
118; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  5
119; CHECK: bdnz
120}
121
122; Test the small loop alignment when the innermost hot loop has less than 4 instructions.
123define void @small_loop(i64 %m) {
124entry:
125  br label %do.body
126
127do.body:                                          ; preds = %do.end, %entry
128  %m.addr.0 = phi i64 [ %m, %entry ], [ %1, %do.end ]
129  br label %do.body1
130
131do.body1:                                         ; preds = %do.body1, %do.body
132  %n.0 = phi i64 [ %m.addr.0, %do.body ], [ %0, %do.body1 ]
133  %0 = tail call i64 asm "subi     $0,$0,1", "=r,0"(i64 %n.0)
134  %tobool = icmp eq i64 %0, 0
135  br i1 %tobool, label %do.end, label %do.body1
136
137do.end:                                           ; preds = %do.body1
138  %1 = tail call i64 asm "subi     $1,$1,1", "=r,0"(i64 %m.addr.0)
139  %tobool3 = icmp eq i64 %1, 0
140  br i1 %tobool3, label %do.end4, label %do.body
141
142do.end4:                                          ; preds = %do.end
143  ret void
144
145
146; CHECK-LABEL: @small_loop
147; CHECK: mr
148; GENERIC: .p2align  4
149; PWR: .p2align  5
150; GENERIC-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
151; PWR-DISABLE-PPC-INNERMOST-LOOP-ALIGN32: .p2align  4
152; CHECK: bne
153}
154
155; Test the loop alignment when the innermost cold loop has more than 8 instructions.
156define void @big_loop_cold_innerloop(%struct.parm* %arg) {
157entry:
158  %localArg.sroa.0.0..sroa_idx = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 0
159  %localArg.sroa.0.0.copyload = load i32*, i32** %localArg.sroa.0.0..sroa_idx, align 8
160  %localArg.sroa.4.0..sroa_idx56 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 1
161  %localArg.sroa.4.0.copyload = load i32, i32* %localArg.sroa.4.0..sroa_idx56, align 8
162  %localArg.sroa.5.0..sroa_idx58 = getelementptr inbounds %struct.parm, %struct.parm* %arg, i64 0, i32 2
163  %localArg.sroa.5.0.copyload = load i32, i32* %localArg.sroa.5.0..sroa_idx58, align 4
164  %0 = sext i32 %localArg.sroa.5.0.copyload to i64
165  br label %do.body
166
167do.body:                                          ; preds = %do.end, %entry
168  %m.0 = phi i32 [ %localArg.sroa.4.0.copyload, %entry ], [ %dec24, %do.end ]
169  br label %do.body3
170
171do.body3:                                         ; preds = %do.body3, %do.body
172  %indvars.iv = phi i64 [ %indvars.iv.next, %do.body3 ], [ %0, %do.body ]
173  %1 = add nsw i64 %indvars.iv, 2
174  %arrayidx = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %1
175  %2 = add nsw i64 %indvars.iv, 3
176  %3 = trunc i64 %1 to i32
177  %4 = add nsw i64 %indvars.iv, 4
178  %arrayidx10 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %2
179  %5 = trunc i64 %2 to i32
180  store i32 %5, i32* %arrayidx10, align 4
181  %arrayidx12 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %4
182  %6 = trunc i64 %4 to i32
183  store i32 %6, i32* %arrayidx12, align 4
184  store i32 %3, i32* %arrayidx, align 4
185  %arrayidx21 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %indvars.iv
186  %7 = trunc i64 %indvars.iv to i32
187  %8 = add i32 %7, 1
188  store i32 %8, i32* %arrayidx21, align 4
189  %indvars.iv.next = add nsw i64 %indvars.iv, -1
190  %9 = icmp eq i64 %indvars.iv, 0
191  br i1 %9, label %do.end, label %do.body3
192
193do.end:                                           ; preds = %do.body3
194  %dec24 = add nsw i32 %m.0, -1
195  %tobool25 = icmp eq i32 %m.0, 0
196  br i1 %tobool25, label %do.end26, label %do.body
197
198do.end26:                                         ; preds = %do.end
199  %arrayidx28 = getelementptr inbounds i32, i32* %localArg.sroa.0.0.copyload, i64 %0
200  store i32 0, i32* %arrayidx28, align 4
201  ret void
202
203
204; CHECK-LABEL: @big_loop_cold_innerloop
205; CHECK: mtctr
206; PWR: .p2align 5
207; CHECK-NOT: .p2align 5
208; CHECK: bdnz
209}
210