1; RUN: opt < %s  -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
2
3; This test checks auto-vectorization with FP induction variable.
4; The FP operation is not "fast" and requires "fast-math" function attribute.
5
6;void fp_iv_loop1(float * __restrict__ A, int N) {
7;  float x = 1.0;
8;  for (int i=0; i < N; ++i) {
9;    A[i] = x;
10;    x += 0.5;
11;  }
12;}
13
14
15; AUTO_VEC-LABEL: @fp_iv_loop1(
16; AUTO_VEC: vector.body
17; AUTO_VEC: store <8 x float>
18
19define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
20entry:
21  %cmp4 = icmp sgt i32 %N, 0
22  br i1 %cmp4, label %for.body.preheader, label %for.end
23
24for.body.preheader:                               ; preds = %entry
25  br label %for.body
26
27for.body:                                         ; preds = %for.body.preheader, %for.body
28  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
29  %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
30  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
31  store float %x.06, float* %arrayidx, align 4
32  %conv1 = fadd float %x.06, 5.000000e-01
33  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
34  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
35  %exitcond = icmp eq i32 %lftr.wideiv, %N
36  br i1 %exitcond, label %for.end.loopexit, label %for.body
37
38for.end.loopexit:                                 ; preds = %for.body
39  br label %for.end
40
41for.end:                                          ; preds = %for.end.loopexit, %entry
42  ret void
43}
44
45; The same as the previous, FP operation is not fast, different function attribute
46; Vectorization should be rejected.
47;void fp_iv_loop2(float * __restrict__ A, int N) {
48;  float x = 1.0;
49;  for (int i=0; i < N; ++i) {
50;    A[i] = x;
51;    x += 0.5;
52;  }
53;}
54
55; AUTO_VEC-LABEL: @fp_iv_loop2(
56; AUTO_VEC-NOT: vector.body
57; AUTO_VEC-NOT: store <{{.*}} x float>
58
59define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 {
60entry:
61  %cmp4 = icmp sgt i32 %N, 0
62  br i1 %cmp4, label %for.body.preheader, label %for.end
63
64for.body.preheader:                               ; preds = %entry
65  br label %for.body
66
67for.body:                                         ; preds = %for.body.preheader, %for.body
68  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
69  %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
70  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
71  store float %x.06, float* %arrayidx, align 4
72  %conv1 = fadd float %x.06, 5.000000e-01
73  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
74  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
75  %exitcond = icmp eq i32 %lftr.wideiv, %N
76  br i1 %exitcond, label %for.end.loopexit, label %for.body
77
78for.end.loopexit:                                 ; preds = %for.body
79  br label %for.end
80
81for.end:                                          ; preds = %for.end.loopexit, %entry
82  ret void
83}
84
85attributes #0 = { "no-nans-fp-math"="true" }
86attributes #1 = { "no-nans-fp-math"="false" }
87