1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s
6
7define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
8; CHECK-LABEL: ceil_nxv1f16:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
11; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
12; CHECK-NEXT:    vfcvt.f.x.v v9, v9
13; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
14; CHECK-NEXT:    flh ft0, %lo(.LCPI0_0)(a0)
15; CHECK-NEXT:    vmflt.vv v0, v9, v8
16; CHECK-NEXT:    lui a0, %hi(.LCPI0_1)
17; CHECK-NEXT:    flh ft1, %lo(.LCPI0_1)(a0)
18; CHECK-NEXT:    vfadd.vf v10, v9, ft0
19; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
20; CHECK-NEXT:    vfabs.v v10, v8
21; CHECK-NEXT:    vmflt.vf v0, v10, ft1
22; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
23; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
24; CHECK-NEXT:    ret
25  %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
26  ret <vscale x 1 x half> %a
27}
28declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>)
29
30define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
31; CHECK-LABEL: ceil_nxv2f16:
32; CHECK:       # %bb.0:
33; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
34; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
35; CHECK-NEXT:    vfcvt.f.x.v v9, v9
36; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
37; CHECK-NEXT:    flh ft0, %lo(.LCPI1_0)(a0)
38; CHECK-NEXT:    vmflt.vv v0, v9, v8
39; CHECK-NEXT:    lui a0, %hi(.LCPI1_1)
40; CHECK-NEXT:    flh ft1, %lo(.LCPI1_1)(a0)
41; CHECK-NEXT:    vfadd.vf v10, v9, ft0
42; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
43; CHECK-NEXT:    vfabs.v v10, v8
44; CHECK-NEXT:    vmflt.vf v0, v10, ft1
45; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
46; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
47; CHECK-NEXT:    ret
48  %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
49  ret <vscale x 2 x half> %a
50}
51declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>)
52
53define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
54; CHECK-LABEL: ceil_nxv4f16:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
57; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
58; CHECK-NEXT:    vfcvt.f.x.v v9, v9
59; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
60; CHECK-NEXT:    flh ft0, %lo(.LCPI2_0)(a0)
61; CHECK-NEXT:    vmflt.vv v0, v9, v8
62; CHECK-NEXT:    lui a0, %hi(.LCPI2_1)
63; CHECK-NEXT:    flh ft1, %lo(.LCPI2_1)(a0)
64; CHECK-NEXT:    vfadd.vf v10, v9, ft0
65; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
66; CHECK-NEXT:    vfabs.v v10, v8
67; CHECK-NEXT:    vmflt.vf v0, v10, ft1
68; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
69; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
70; CHECK-NEXT:    ret
71  %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
72  ret <vscale x 4 x half> %a
73}
74declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>)
75
76define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
77; CHECK-LABEL: ceil_nxv8f16:
78; CHECK:       # %bb.0:
79; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
80; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8
81; CHECK-NEXT:    vfcvt.f.x.v v10, v10
82; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
83; CHECK-NEXT:    flh ft0, %lo(.LCPI3_0)(a0)
84; CHECK-NEXT:    vmflt.vv v0, v10, v8
85; CHECK-NEXT:    lui a0, %hi(.LCPI3_1)
86; CHECK-NEXT:    flh ft1, %lo(.LCPI3_1)(a0)
87; CHECK-NEXT:    vfadd.vf v12, v10, ft0
88; CHECK-NEXT:    vmerge.vvm v10, v10, v12, v0
89; CHECK-NEXT:    vfabs.v v12, v8
90; CHECK-NEXT:    vmflt.vf v0, v12, ft1
91; CHECK-NEXT:    vfsgnj.vv v10, v10, v8
92; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
93; CHECK-NEXT:    ret
94  %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
95  ret <vscale x 8 x half> %a
96}
97declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>)
98
99define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
100; CHECK-LABEL: ceil_nxv16f16:
101; CHECK:       # %bb.0:
102; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
103; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8
104; CHECK-NEXT:    vfcvt.f.x.v v12, v12
105; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
106; CHECK-NEXT:    flh ft0, %lo(.LCPI4_0)(a0)
107; CHECK-NEXT:    vmflt.vv v0, v12, v8
108; CHECK-NEXT:    lui a0, %hi(.LCPI4_1)
109; CHECK-NEXT:    flh ft1, %lo(.LCPI4_1)(a0)
110; CHECK-NEXT:    vfadd.vf v16, v12, ft0
111; CHECK-NEXT:    vmerge.vvm v12, v12, v16, v0
112; CHECK-NEXT:    vfabs.v v16, v8
113; CHECK-NEXT:    vmflt.vf v0, v16, ft1
114; CHECK-NEXT:    vfsgnj.vv v12, v12, v8
115; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
116; CHECK-NEXT:    ret
117  %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
118  ret <vscale x 16 x half> %a
119}
120declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>)
121
122define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
123; CHECK-LABEL: ceil_nxv32f16:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
126; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8
127; CHECK-NEXT:    vfcvt.f.x.v v16, v16
128; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
129; CHECK-NEXT:    flh ft0, %lo(.LCPI5_0)(a0)
130; CHECK-NEXT:    vmflt.vv v0, v16, v8
131; CHECK-NEXT:    lui a0, %hi(.LCPI5_1)
132; CHECK-NEXT:    flh ft1, %lo(.LCPI5_1)(a0)
133; CHECK-NEXT:    vfadd.vf v24, v16, ft0
134; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
135; CHECK-NEXT:    vfabs.v v24, v8
136; CHECK-NEXT:    vmflt.vf v0, v24, ft1
137; CHECK-NEXT:    vfsgnj.vv v16, v16, v8
138; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
139; CHECK-NEXT:    ret
140  %a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
141  ret <vscale x 32 x half> %a
142}
143declare <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half>)
144
145define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) {
146; CHECK-LABEL: ceil_nxv1f32:
147; CHECK:       # %bb.0:
148; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
149; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
150; CHECK-NEXT:    vfcvt.f.x.v v9, v9
151; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
152; CHECK-NEXT:    flw ft0, %lo(.LCPI6_0)(a0)
153; CHECK-NEXT:    vmflt.vv v0, v9, v8
154; CHECK-NEXT:    lui a0, %hi(.LCPI6_1)
155; CHECK-NEXT:    flw ft1, %lo(.LCPI6_1)(a0)
156; CHECK-NEXT:    vfadd.vf v10, v9, ft0
157; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
158; CHECK-NEXT:    vfabs.v v10, v8
159; CHECK-NEXT:    vmflt.vf v0, v10, ft1
160; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
161; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
162; CHECK-NEXT:    ret
163  %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
164  ret <vscale x 1 x float> %a
165}
166declare <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float>)
167
168define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) {
169; CHECK-LABEL: ceil_nxv2f32:
170; CHECK:       # %bb.0:
171; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
172; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
173; CHECK-NEXT:    vfcvt.f.x.v v9, v9
174; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
175; CHECK-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
176; CHECK-NEXT:    vmflt.vv v0, v9, v8
177; CHECK-NEXT:    lui a0, %hi(.LCPI7_1)
178; CHECK-NEXT:    flw ft1, %lo(.LCPI7_1)(a0)
179; CHECK-NEXT:    vfadd.vf v10, v9, ft0
180; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
181; CHECK-NEXT:    vfabs.v v10, v8
182; CHECK-NEXT:    vmflt.vf v0, v10, ft1
183; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
184; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
185; CHECK-NEXT:    ret
186  %a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
187  ret <vscale x 2 x float> %a
188}
189declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
190
191define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) {
192; CHECK-LABEL: ceil_nxv4f32:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
195; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8
196; CHECK-NEXT:    vfcvt.f.x.v v10, v10
197; CHECK-NEXT:    lui a0, %hi(.LCPI8_0)
198; CHECK-NEXT:    flw ft0, %lo(.LCPI8_0)(a0)
199; CHECK-NEXT:    vmflt.vv v0, v10, v8
200; CHECK-NEXT:    lui a0, %hi(.LCPI8_1)
201; CHECK-NEXT:    flw ft1, %lo(.LCPI8_1)(a0)
202; CHECK-NEXT:    vfadd.vf v12, v10, ft0
203; CHECK-NEXT:    vmerge.vvm v10, v10, v12, v0
204; CHECK-NEXT:    vfabs.v v12, v8
205; CHECK-NEXT:    vmflt.vf v0, v12, ft1
206; CHECK-NEXT:    vfsgnj.vv v10, v10, v8
207; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
208; CHECK-NEXT:    ret
209  %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
210  ret <vscale x 4 x float> %a
211}
212declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
213
214define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) {
215; CHECK-LABEL: ceil_nxv8f32:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
218; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8
219; CHECK-NEXT:    vfcvt.f.x.v v12, v12
220; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
221; CHECK-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
222; CHECK-NEXT:    vmflt.vv v0, v12, v8
223; CHECK-NEXT:    lui a0, %hi(.LCPI9_1)
224; CHECK-NEXT:    flw ft1, %lo(.LCPI9_1)(a0)
225; CHECK-NEXT:    vfadd.vf v16, v12, ft0
226; CHECK-NEXT:    vmerge.vvm v12, v12, v16, v0
227; CHECK-NEXT:    vfabs.v v16, v8
228; CHECK-NEXT:    vmflt.vf v0, v16, ft1
229; CHECK-NEXT:    vfsgnj.vv v12, v12, v8
230; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
231; CHECK-NEXT:    ret
232  %a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x)
233  ret <vscale x 8 x float> %a
234}
235declare <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float>)
236
237define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
238; CHECK-LABEL: ceil_nxv16f32:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
241; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8
242; CHECK-NEXT:    vfcvt.f.x.v v16, v16
243; CHECK-NEXT:    lui a0, %hi(.LCPI10_0)
244; CHECK-NEXT:    flw ft0, %lo(.LCPI10_0)(a0)
245; CHECK-NEXT:    vmflt.vv v0, v16, v8
246; CHECK-NEXT:    lui a0, %hi(.LCPI10_1)
247; CHECK-NEXT:    flw ft1, %lo(.LCPI10_1)(a0)
248; CHECK-NEXT:    vfadd.vf v24, v16, ft0
249; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
250; CHECK-NEXT:    vfabs.v v24, v8
251; CHECK-NEXT:    vmflt.vf v0, v24, ft1
252; CHECK-NEXT:    vfsgnj.vv v16, v16, v8
253; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
254; CHECK-NEXT:    ret
255  %a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x)
256  ret <vscale x 16 x float> %a
257}
258declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>)
259
260define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
261; CHECK-LABEL: ceil_nxv1f64:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
264; CHECK-NEXT:    vfcvt.rtz.x.f.v v9, v8
265; CHECK-NEXT:    vfcvt.f.x.v v9, v9
266; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
267; CHECK-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
268; CHECK-NEXT:    vmflt.vv v0, v9, v8
269; CHECK-NEXT:    lui a0, %hi(.LCPI11_1)
270; CHECK-NEXT:    fld ft1, %lo(.LCPI11_1)(a0)
271; CHECK-NEXT:    vfadd.vf v10, v9, ft0
272; CHECK-NEXT:    vmerge.vvm v9, v9, v10, v0
273; CHECK-NEXT:    vfabs.v v10, v8
274; CHECK-NEXT:    vmflt.vf v0, v10, ft1
275; CHECK-NEXT:    vfsgnj.vv v9, v9, v8
276; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
277; CHECK-NEXT:    ret
278  %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
279  ret <vscale x 1 x double> %a
280}
281declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>)
282
283define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
284; CHECK-LABEL: ceil_nxv2f64:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
287; CHECK-NEXT:    vfcvt.rtz.x.f.v v10, v8
288; CHECK-NEXT:    vfcvt.f.x.v v10, v10
289; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
290; CHECK-NEXT:    fld ft0, %lo(.LCPI12_0)(a0)
291; CHECK-NEXT:    vmflt.vv v0, v10, v8
292; CHECK-NEXT:    lui a0, %hi(.LCPI12_1)
293; CHECK-NEXT:    fld ft1, %lo(.LCPI12_1)(a0)
294; CHECK-NEXT:    vfadd.vf v12, v10, ft0
295; CHECK-NEXT:    vmerge.vvm v10, v10, v12, v0
296; CHECK-NEXT:    vfabs.v v12, v8
297; CHECK-NEXT:    vmflt.vf v0, v12, ft1
298; CHECK-NEXT:    vfsgnj.vv v10, v10, v8
299; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
300; CHECK-NEXT:    ret
301  %a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x)
302  ret <vscale x 2 x double> %a
303}
304declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
305
306define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
307; CHECK-LABEL: ceil_nxv4f64:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
310; CHECK-NEXT:    vfcvt.rtz.x.f.v v12, v8
311; CHECK-NEXT:    vfcvt.f.x.v v12, v12
312; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
313; CHECK-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
314; CHECK-NEXT:    vmflt.vv v0, v12, v8
315; CHECK-NEXT:    lui a0, %hi(.LCPI13_1)
316; CHECK-NEXT:    fld ft1, %lo(.LCPI13_1)(a0)
317; CHECK-NEXT:    vfadd.vf v16, v12, ft0
318; CHECK-NEXT:    vmerge.vvm v12, v12, v16, v0
319; CHECK-NEXT:    vfabs.v v16, v8
320; CHECK-NEXT:    vmflt.vf v0, v16, ft1
321; CHECK-NEXT:    vfsgnj.vv v12, v12, v8
322; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
323; CHECK-NEXT:    ret
324  %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
325  ret <vscale x 4 x double> %a
326}
327declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>)
328
329define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
330; CHECK-LABEL: ceil_nxv8f64:
331; CHECK:       # %bb.0:
332; CHECK-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
333; CHECK-NEXT:    vfcvt.rtz.x.f.v v16, v8
334; CHECK-NEXT:    vfcvt.f.x.v v16, v16
335; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
336; CHECK-NEXT:    fld ft0, %lo(.LCPI14_0)(a0)
337; CHECK-NEXT:    vmflt.vv v0, v16, v8
338; CHECK-NEXT:    lui a0, %hi(.LCPI14_1)
339; CHECK-NEXT:    fld ft1, %lo(.LCPI14_1)(a0)
340; CHECK-NEXT:    vfadd.vf v24, v16, ft0
341; CHECK-NEXT:    vmerge.vvm v16, v16, v24, v0
342; CHECK-NEXT:    vfabs.v v24, v8
343; CHECK-NEXT:    vmflt.vf v0, v24, ft1
344; CHECK-NEXT:    vfsgnj.vv v16, v16, v8
345; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
346; CHECK-NEXT:    ret
347  %a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x)
348  ret <vscale x 8 x double> %a
349}
350declare <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double>)
351