1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
3
4; Exercise tail folding on RISCV w/scalable vectors.
5
6target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
7target triple = "riscv64"
8
9define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) {
10; CHECK-LABEL: @vector_add(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
14; CHECK-NEXT:    br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
17; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
18; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP3]], 1
19; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
20; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
21; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
22; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
23; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
24; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
25; CHECK:       vector.body:
26; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
27; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
28; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i32 0
29; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
30; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
31; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP6]]
32; CHECK-NEXT:    [[VEC_IV:%.*]] = add <vscale x 1 x i64> [[BROADCAST_SPLAT]], [[TMP7]]
33; CHECK-NEXT:    [[TMP8:%.*]] = icmp ule <vscale x 1 x i64> [[VEC_IV]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1023, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
34; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]]
35; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
36; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64.p0(ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]], <vscale x 1 x i64> poison)
37; CHECK-NEXT:    [[TMP11:%.*]] = add <vscale x 1 x i64> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT2]]
38; CHECK-NEXT:    call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[TMP11]], ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]])
39; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
40; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]]
41; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
42; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43; CHECK:       middle.block:
44; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
45; CHECK:       scalar.ph:
46; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
47; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
48; CHECK:       for.body:
49; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
50; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
51; CHECK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
52; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[ELEM]], [[V]]
53; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX]], align 8
54; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
55; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
56; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
57; CHECK:       for.end:
58; CHECK-NEXT:    ret void
59;
60entry:
61  br label %for.body
62
63for.body:
64  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
65  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
66  %elem = load i64, ptr %arrayidx
67  %add = add i64 %elem, %v
68  store i64 %add, ptr %arrayidx
69  %iv.next = add nuw nsw i64 %iv, 1
70  %exitcond.not = icmp eq i64 %iv.next, 1024
71  br i1 %exitcond.not, label %for.end, label %for.body
72
73for.end:
74  ret void
75}
76
77
78; a[b[i]] = v, exercise scatter support
79define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
80; CHECK-LABEL: @indexed_store(
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
83; CHECK:       for.body:
84; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
85; CHECK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
86; CHECK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
87; CHECK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
88; CHECK-NEXT:    store i64 [[V:%.*]], ptr [[AADDR]], align 8
89; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
90; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
91; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
92; CHECK:       for.end:
93; CHECK-NEXT:    ret void
94;
95entry:
96  br label %for.body
97
98for.body:
99  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
100  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
101  %aidx = load i64, ptr %baddr
102  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
103  store i64 %v, ptr %aaddr
104  %iv.next = add nuw nsw i64 %iv, 1
105  %exitcond.not = icmp eq i64 %iv.next, 1024
106  br i1 %exitcond.not, label %for.end, label %for.body
107
108for.end:
109  ret void
110}
111
112define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
113; CHECK-LABEL: @indexed_load(
114; CHECK-NEXT:  entry:
115; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
116; CHECK:       for.body:
117; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
118; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ]
119; CHECK-NEXT:    [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]]
120; CHECK-NEXT:    [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8
121; CHECK-NEXT:    [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]]
122; CHECK-NEXT:    [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8
123; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
124; CHECK-NEXT:    [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]]
125; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
126; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
127; CHECK:       for.end:
128; CHECK-NEXT:    [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ]
129; CHECK-NEXT:    ret i64 [[SUM_NEXT_LCSSA]]
130;
131entry:
132  br label %for.body
133
134for.body:
135  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
136  %sum = phi i64 [0, %entry], [%sum.next, %for.body]
137  %baddr = getelementptr inbounds i64, ptr %b, i64 %iv
138  %aidx = load i64, ptr %baddr
139  %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx
140  %elem = load i64, ptr %aaddr
141  %iv.next = add nuw nsw i64 %iv, 1
142  %sum.next = add i64 %sum, %elem
143  %exitcond.not = icmp eq i64 %iv.next, 1024
144  br i1 %exitcond.not, label %for.end, label %for.body
145
146for.end:
147  ret i64 %sum.next
148}
149
150define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) {
151; CHECK-LABEL: @splat_int(
152; CHECK-NEXT:  entry:
153; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
154; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 -1025, [[TMP0]]
155; CHECK-NEXT:    br i1 [[TMP1]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
156; CHECK:       vector.ph:
157; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
158; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
159; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP3]], 1
160; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
161; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP2]]
162; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
163; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0
164; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
165; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
166; CHECK:       vector.body:
167; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
168; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 0
169; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[INDEX]], i32 0
170; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
171; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
172; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 1 x i64> zeroinitializer, [[TMP6]]
173; CHECK-NEXT:    [[VEC_IV:%.*]] = add <vscale x 1 x i64> [[BROADCAST_SPLAT]], [[TMP7]]
174; CHECK-NEXT:    [[TMP8:%.*]] = icmp ule <vscale x 1 x i64> [[VEC_IV]], shufflevector (<vscale x 1 x i64> insertelement (<vscale x 1 x i64> poison, i64 1023, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer)
175; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]]
176; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0
177; CHECK-NEXT:    call void @llvm.masked.store.nxv1i64.p0(<vscale x 1 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP10]], i32 8, <vscale x 1 x i1> [[TMP8]])
178; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
179; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]]
180; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
181; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
182; CHECK:       middle.block:
183; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
184; CHECK:       scalar.ph:
185; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
186; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
187; CHECK:       for.body:
188; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
189; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
190; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
191; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
192; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
193; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
194; CHECK:       for.end:
195; CHECK-NEXT:    ret void
196;
197entry:
198  br label %for.body
199
200for.body:
201  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
202  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
203  store i64 %v, ptr %arrayidx
204  %iv.next = add nuw nsw i64 %iv, 1
205  %exitcond.not = icmp eq i64 %iv.next, 1024
206  br i1 %exitcond.not, label %for.end, label %for.body
207
208for.end:
209  ret void
210}
211
212define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) {
213; CHECK-LABEL: @uniform_store(
214; CHECK-NEXT:  entry:
215; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
216; CHECK:       for.body:
217; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
218; CHECK-NEXT:    store i64 [[V:%.*]], ptr [[B:%.*]], align 8
219; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]]
220; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
221; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
222; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
223; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
224; CHECK:       for.end:
225; CHECK-NEXT:    ret void
226;
227entry:
228  br label %for.body
229
230for.body:
231  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
232  store i64 %v, ptr %b, align 8
233  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
234  store i64 %v, ptr %arrayidx
235  %iv.next = add nuw nsw i64 %iv, 1
236  %exitcond.not = icmp eq i64 %iv.next, 1024
237  br i1 %exitcond.not, label %for.end, label %for.body
238
239for.end:
240  ret void
241}
242
243define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) {
244; CHECK-LABEL: @uniform_load(
245; CHECK-NEXT:  entry:
246; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
247; CHECK:       for.body:
248; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
249; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[B:%.*]], align 8
250; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]]
251; CHECK-NEXT:    store i64 [[V]], ptr [[ARRAYIDX]], align 8
252; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
253; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
254; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
255; CHECK:       for.end:
256; CHECK-NEXT:    [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ]
257; CHECK-NEXT:    ret i64 [[V_LCSSA]]
258;
259entry:
260  br label %for.body
261
262for.body:
263  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
264  %v = load i64, ptr %b, align 8
265  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
266  store i64 %v, ptr %arrayidx
267  %iv.next = add nuw nsw i64 %iv, 1
268  %exitcond.not = icmp eq i64 %iv.next, 1024
269  br i1 %exitcond.not, label %for.end, label %for.body
270
271for.end:
272  ret i64 %v
273}
274