1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
3
4; void foo(float *data, float d) {
5;   long i;
6;   for (i = 0; i < 8000; i++)
7;     data[i] = d;
8; }
9;
10; This loop will be unrolled by 96 and vectorized on power9.
11; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
12; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})'
13
14define void @foo(float* nocapture %data, float %d) {
15; CHECK-LABEL: foo:
16; CHECK:  .LBB0_1: # %vector.body
17; CHECK:         add 5, 3, 4
18; CHECK-NEXT:    stxvx 0, 3, 4
19; CHECK-NEXT:    addi 4, 4, 384
20; CHECK-NEXT:    stxv 0, 16(5)
21; CHECK-NEXT:    stxv 0, 32(5)
22; CHECK-NEXT:    stxv 0, 48(5)
23; CHECK-NEXT:    stxv 0, 64(5)
24; CHECK-NEXT:    stxv 0, 80(5)
25; CHECK-NEXT:    stxv 0, 96(5)
26; CHECK-NEXT:    stxv 0, 112(5)
27; CHECK-NEXT:    stxv 0, 128(5)
28; CHECK-NEXT:    stxv 0, 144(5)
29; CHECK-NEXT:    stxv 0, 160(5)
30; CHECK-NEXT:    stxv 0, 176(5)
31; CHECK-NEXT:    stxv 0, 192(5)
32; CHECK-NEXT:    stxv 0, 208(5)
33; CHECK-NEXT:    stxv 0, 224(5)
34; CHECK-NEXT:    stxv 0, 240(5)
35; CHECK-NEXT:    stxv 0, 256(5)
36; CHECK-NEXT:    stxv 0, 272(5)
37; CHECK-NEXT:    stxv 0, 288(5)
38; CHECK-NEXT:    stxv 0, 304(5)
39; CHECK-NEXT:    stxv 0, 320(5)
40; CHECK-NEXT:    stxv 0, 336(5)
41; CHECK-NEXT:    stxv 0, 352(5)
42; CHECK-NEXT:    stxv 0, 368(5)
43; CHECK-NEXT:    bdnz .LBB0_1
44
45entry:
46  %broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
47  %broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
48  %broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
49  %broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
50  %broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
51  %broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
52  %broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
53  %broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
54  %broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
55  %broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
56  %broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
57  %broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
58  %broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
59  %broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
60  %broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
61  %broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
62  %broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
63  %broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
64  %broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
65  %broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
66  %broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
67  %broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
68  %broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
69  %broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
70  br label %vector.body
71
72vector.body:                                      ; preds = %vector.body, %entry
73  %index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
74  %0 = getelementptr inbounds float, float* %data, i64 %index
75  %1 = bitcast float* %0 to <4 x float>*
76  store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
77  %2 = getelementptr inbounds float, float* %0, i64 4
78  %3 = bitcast float* %2 to <4 x float>*
79  store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
80  %4 = getelementptr inbounds float, float* %0, i64 8
81  %5 = bitcast float* %4 to <4 x float>*
82  store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
83  %6 = getelementptr inbounds float, float* %0, i64 12
84  %7 = bitcast float* %6 to <4 x float>*
85  store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
86  %8 = getelementptr inbounds float, float* %0, i64 16
87  %9 = bitcast float* %8 to <4 x float>*
88  store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
89  %10 = getelementptr inbounds float, float* %0, i64 20
90  %11 = bitcast float* %10 to <4 x float>*
91  store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
92  %12 = getelementptr inbounds float, float* %0, i64 24
93  %13 = bitcast float* %12 to <4 x float>*
94  store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
95  %14 = getelementptr inbounds float, float* %0, i64 28
96  %15 = bitcast float* %14 to <4 x float>*
97  store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
98  %16 = getelementptr inbounds float, float* %0, i64 32
99  %17 = bitcast float* %16 to <4 x float>*
100  store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
101  %18 = getelementptr inbounds float, float* %0, i64 36
102  %19 = bitcast float* %18 to <4 x float>*
103  store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
104  %20 = getelementptr inbounds float, float* %0, i64 40
105  %21 = bitcast float* %20 to <4 x float>*
106  store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
107  %22 = getelementptr inbounds float, float* %0, i64 44
108  %23 = bitcast float* %22 to <4 x float>*
109  store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
110  %index.next = add nuw nsw i64 %index, 48
111  %24 = getelementptr inbounds float, float* %data, i64 %index.next
112  %25 = bitcast float* %24 to <4 x float>*
113  store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
114  %26 = getelementptr inbounds float, float* %24, i64 4
115  %27 = bitcast float* %26 to <4 x float>*
116  store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
117  %28 = getelementptr inbounds float, float* %24, i64 8
118  %29 = bitcast float* %28 to <4 x float>*
119  store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
120  %30 = getelementptr inbounds float, float* %24, i64 12
121  %31 = bitcast float* %30 to <4 x float>*
122  store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
123  %32 = getelementptr inbounds float, float* %24, i64 16
124  %33 = bitcast float* %32 to <4 x float>*
125  store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
126  %34 = getelementptr inbounds float, float* %24, i64 20
127  %35 = bitcast float* %34 to <4 x float>*
128  store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
129  %36 = getelementptr inbounds float, float* %24, i64 24
130  %37 = bitcast float* %36 to <4 x float>*
131  store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
132  %38 = getelementptr inbounds float, float* %24, i64 28
133  %39 = bitcast float* %38 to <4 x float>*
134  store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
135  %40 = getelementptr inbounds float, float* %24, i64 32
136  %41 = bitcast float* %40 to <4 x float>*
137  store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
138  %42 = getelementptr inbounds float, float* %24, i64 36
139  %43 = bitcast float* %42 to <4 x float>*
140  store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
141  %44 = getelementptr inbounds float, float* %24, i64 40
142  %45 = bitcast float* %44 to <4 x float>*
143  store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
144  %46 = getelementptr inbounds float, float* %24, i64 44
145  %47 = bitcast float* %46 to <4 x float>*
146  store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
147  %index.next.1 = add nuw nsw i64 %index, 96
148  %48 = icmp eq i64 %index.next.1, 7968
149  br i1 %48, label %for.body, label %vector.body
150
151for.body:                                         ; preds = %vector.body
152  %arrayidx = getelementptr inbounds float, float* %data, i64 7968
153  store float %d, float* %arrayidx, align 4
154  %arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
155  store float %d, float* %arrayidx.1, align 4
156  %arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
157  store float %d, float* %arrayidx.2, align 4
158  %arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
159  store float %d, float* %arrayidx.3, align 4
160  %arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
161  store float %d, float* %arrayidx.4, align 4
162  %arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
163  store float %d, float* %arrayidx.5, align 4
164  %arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
165  store float %d, float* %arrayidx.6, align 4
166  %arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
167  store float %d, float* %arrayidx.7, align 4
168  %arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
169  store float %d, float* %arrayidx.8, align 4
170  %arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
171  store float %d, float* %arrayidx.9, align 4
172  %arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
173  store float %d, float* %arrayidx.10, align 4
174  %arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
175  store float %d, float* %arrayidx.11, align 4
176  %arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
177  store float %d, float* %arrayidx.12, align 4
178  %arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
179  store float %d, float* %arrayidx.13, align 4
180  %arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
181  store float %d, float* %arrayidx.14, align 4
182  %arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
183  store float %d, float* %arrayidx.15, align 4
184  %arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
185  store float %d, float* %arrayidx.16, align 4
186  %arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
187  store float %d, float* %arrayidx.17, align 4
188  %arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
189  store float %d, float* %arrayidx.18, align 4
190  %arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
191  store float %d, float* %arrayidx.19, align 4
192  %arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
193  store float %d, float* %arrayidx.20, align 4
194  %arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
195  store float %d, float* %arrayidx.21, align 4
196  %arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
197  store float %d, float* %arrayidx.22, align 4
198  %arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
199  store float %d, float* %arrayidx.23, align 4
200  %arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
201  store float %d, float* %arrayidx.24, align 4
202  %arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
203  store float %d, float* %arrayidx.25, align 4
204  %arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
205  store float %d, float* %arrayidx.26, align 4
206  %arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
207  store float %d, float* %arrayidx.27, align 4
208  %arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
209  store float %d, float* %arrayidx.28, align 4
210  %arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
211  store float %d, float* %arrayidx.29, align 4
212  %arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
213  store float %d, float* %arrayidx.30, align 4
214  %arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
215  store float %d, float* %arrayidx.31, align 4
216  ret void
217}
218