1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) { 8; CHECK-LABEL: ceil_nxv1f16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 11; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 12; CHECK-NEXT: vfcvt.f.x.v v9, v9 13; CHECK-NEXT: lui a0, %hi(.LCPI0_0) 14; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) 15; CHECK-NEXT: vmflt.vv v0, v9, v8 16; CHECK-NEXT: lui a0, %hi(.LCPI0_1) 17; CHECK-NEXT: flh ft1, %lo(.LCPI0_1)(a0) 18; CHECK-NEXT: vfadd.vf v10, v9, ft0 19; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 20; CHECK-NEXT: vfabs.v v10, v8 21; CHECK-NEXT: vmflt.vf v0, v10, ft1 22; CHECK-NEXT: vfsgnj.vv v9, v9, v8 23; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 24; CHECK-NEXT: ret 25 %a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x) 26 ret <vscale x 1 x half> %a 27} 28declare <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half>) 29 30define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) { 31; CHECK-LABEL: ceil_nxv2f16: 32; CHECK: # %bb.0: 33; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 34; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 35; CHECK-NEXT: vfcvt.f.x.v v9, v9 36; CHECK-NEXT: lui a0, %hi(.LCPI1_0) 37; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) 38; CHECK-NEXT: vmflt.vv v0, v9, v8 39; CHECK-NEXT: lui a0, %hi(.LCPI1_1) 40; CHECK-NEXT: flh ft1, %lo(.LCPI1_1)(a0) 41; CHECK-NEXT: vfadd.vf v10, v9, ft0 42; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 43; CHECK-NEXT: vfabs.v v10, v8 44; CHECK-NEXT: vmflt.vf v0, v10, ft1 45; CHECK-NEXT: vfsgnj.vv v9, v9, v8 46; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 47; CHECK-NEXT: ret 48 %a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x) 49 ret <vscale x 2 x half> %a 50} 51declare <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half>) 52 53define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) { 54; CHECK-LABEL: ceil_nxv4f16: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 57; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 58; CHECK-NEXT: vfcvt.f.x.v v9, v9 59; CHECK-NEXT: lui a0, %hi(.LCPI2_0) 60; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) 61; CHECK-NEXT: vmflt.vv v0, v9, v8 62; CHECK-NEXT: lui a0, %hi(.LCPI2_1) 63; CHECK-NEXT: flh ft1, %lo(.LCPI2_1)(a0) 64; CHECK-NEXT: vfadd.vf v10, v9, ft0 65; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 66; CHECK-NEXT: vfabs.v v10, v8 67; CHECK-NEXT: vmflt.vf v0, v10, ft1 68; CHECK-NEXT: vfsgnj.vv v9, v9, v8 69; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 70; CHECK-NEXT: ret 71 %a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x) 72 ret <vscale x 4 x half> %a 73} 74declare <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half>) 75 76define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) { 77; CHECK-LABEL: ceil_nxv8f16: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu 80; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 81; CHECK-NEXT: vfcvt.f.x.v v10, v10 82; CHECK-NEXT: lui a0, %hi(.LCPI3_0) 83; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) 84; CHECK-NEXT: vmflt.vv v0, v10, v8 85; CHECK-NEXT: lui a0, %hi(.LCPI3_1) 86; CHECK-NEXT: flh ft1, %lo(.LCPI3_1)(a0) 87; CHECK-NEXT: vfadd.vf v12, v10, ft0 88; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 89; CHECK-NEXT: vfabs.v v12, v8 90; CHECK-NEXT: vmflt.vf v0, v12, ft1 91; CHECK-NEXT: vfsgnj.vv v10, v10, v8 92; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 93; CHECK-NEXT: ret 94 %a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x) 95 ret <vscale x 8 x half> %a 96} 97declare <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half>) 98 99define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) { 100; CHECK-LABEL: ceil_nxv16f16: 101; CHECK: # %bb.0: 102; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu 103; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 104; CHECK-NEXT: vfcvt.f.x.v v12, v12 105; CHECK-NEXT: lui a0, %hi(.LCPI4_0) 106; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) 107; CHECK-NEXT: vmflt.vv v0, v12, v8 108; CHECK-NEXT: lui a0, %hi(.LCPI4_1) 109; CHECK-NEXT: flh ft1, %lo(.LCPI4_1)(a0) 110; CHECK-NEXT: vfadd.vf v16, v12, ft0 111; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 112; CHECK-NEXT: vfabs.v v16, v8 113; CHECK-NEXT: vmflt.vf v0, v16, ft1 114; CHECK-NEXT: vfsgnj.vv v12, v12, v8 115; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 116; CHECK-NEXT: ret 117 %a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x) 118 ret <vscale x 16 x half> %a 119} 120declare <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half>) 121 122define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) { 123; CHECK-LABEL: ceil_nxv32f16: 124; CHECK: # %bb.0: 125; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu 126; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 127; CHECK-NEXT: vfcvt.f.x.v v16, v16 128; CHECK-NEXT: lui a0, %hi(.LCPI5_0) 129; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) 130; CHECK-NEXT: vmflt.vv v0, v16, v8 131; CHECK-NEXT: lui a0, %hi(.LCPI5_1) 132; CHECK-NEXT: flh ft1, %lo(.LCPI5_1)(a0) 133; CHECK-NEXT: vfadd.vf v24, v16, ft0 134; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 135; CHECK-NEXT: vfabs.v v24, v8 136; CHECK-NEXT: vmflt.vf v0, v24, ft1 137; CHECK-NEXT: vfsgnj.vv v16, v16, v8 138; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 139; CHECK-NEXT: ret 140 %a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x) 141 ret <vscale x 32 x half> %a 142} 143declare <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half>) 144 145define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) { 146; CHECK-LABEL: ceil_nxv1f32: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 149; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 150; CHECK-NEXT: vfcvt.f.x.v v9, v9 151; CHECK-NEXT: lui a0, %hi(.LCPI6_0) 152; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) 153; CHECK-NEXT: vmflt.vv v0, v9, v8 154; CHECK-NEXT: lui a0, %hi(.LCPI6_1) 155; CHECK-NEXT: flw ft1, %lo(.LCPI6_1)(a0) 156; CHECK-NEXT: vfadd.vf v10, v9, ft0 157; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 158; CHECK-NEXT: vfabs.v v10, v8 159; CHECK-NEXT: vmflt.vf v0, v10, ft1 160; CHECK-NEXT: vfsgnj.vv v9, v9, v8 161; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 162; CHECK-NEXT: ret 163 %a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x) 164 ret <vscale x 1 x float> %a 165} 166declare <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float>) 167 168define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) { 169; CHECK-LABEL: ceil_nxv2f32: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 172; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 173; CHECK-NEXT: vfcvt.f.x.v v9, v9 174; CHECK-NEXT: lui a0, %hi(.LCPI7_0) 175; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) 176; CHECK-NEXT: vmflt.vv v0, v9, v8 177; CHECK-NEXT: lui a0, %hi(.LCPI7_1) 178; CHECK-NEXT: flw ft1, %lo(.LCPI7_1)(a0) 179; CHECK-NEXT: vfadd.vf v10, v9, ft0 180; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 181; CHECK-NEXT: vfabs.v v10, v8 182; CHECK-NEXT: vmflt.vf v0, v10, ft1 183; CHECK-NEXT: vfsgnj.vv v9, v9, v8 184; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 185; CHECK-NEXT: ret 186 %a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x) 187 ret <vscale x 2 x float> %a 188} 189declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>) 190 191define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) { 192; CHECK-LABEL: ceil_nxv4f32: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 195; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 196; CHECK-NEXT: vfcvt.f.x.v v10, v10 197; CHECK-NEXT: lui a0, %hi(.LCPI8_0) 198; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) 199; CHECK-NEXT: vmflt.vv v0, v10, v8 200; CHECK-NEXT: lui a0, %hi(.LCPI8_1) 201; CHECK-NEXT: flw ft1, %lo(.LCPI8_1)(a0) 202; CHECK-NEXT: vfadd.vf v12, v10, ft0 203; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 204; CHECK-NEXT: vfabs.v v12, v8 205; CHECK-NEXT: vmflt.vf v0, v12, ft1 206; CHECK-NEXT: vfsgnj.vv v10, v10, v8 207; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 208; CHECK-NEXT: ret 209 %a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x) 210 ret <vscale x 4 x float> %a 211} 212declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>) 213 214define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) { 215; CHECK-LABEL: ceil_nxv8f32: 216; CHECK: # %bb.0: 217; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu 218; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 219; CHECK-NEXT: vfcvt.f.x.v v12, v12 220; CHECK-NEXT: lui a0, %hi(.LCPI9_0) 221; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) 222; CHECK-NEXT: vmflt.vv v0, v12, v8 223; CHECK-NEXT: lui a0, %hi(.LCPI9_1) 224; CHECK-NEXT: flw ft1, %lo(.LCPI9_1)(a0) 225; CHECK-NEXT: vfadd.vf v16, v12, ft0 226; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 227; CHECK-NEXT: vfabs.v v16, v8 228; CHECK-NEXT: vmflt.vf v0, v16, ft1 229; CHECK-NEXT: vfsgnj.vv v12, v12, v8 230; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 231; CHECK-NEXT: ret 232 %a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x) 233 ret <vscale x 8 x float> %a 234} 235declare <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float>) 236 237define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) { 238; CHECK-LABEL: ceil_nxv16f32: 239; CHECK: # %bb.0: 240; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu 241; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 242; CHECK-NEXT: vfcvt.f.x.v v16, v16 243; CHECK-NEXT: lui a0, %hi(.LCPI10_0) 244; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) 245; CHECK-NEXT: vmflt.vv v0, v16, v8 246; CHECK-NEXT: lui a0, %hi(.LCPI10_1) 247; CHECK-NEXT: flw ft1, %lo(.LCPI10_1)(a0) 248; CHECK-NEXT: vfadd.vf v24, v16, ft0 249; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 250; CHECK-NEXT: vfabs.v v24, v8 251; CHECK-NEXT: vmflt.vf v0, v24, ft1 252; CHECK-NEXT: vfsgnj.vv v16, v16, v8 253; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 254; CHECK-NEXT: ret 255 %a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x) 256 ret <vscale x 16 x float> %a 257} 258declare <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float>) 259 260define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) { 261; CHECK-LABEL: ceil_nxv1f64: 262; CHECK: # %bb.0: 263; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 264; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8 265; CHECK-NEXT: vfcvt.f.x.v v9, v9 266; CHECK-NEXT: lui a0, %hi(.LCPI11_0) 267; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) 268; CHECK-NEXT: vmflt.vv v0, v9, v8 269; CHECK-NEXT: lui a0, %hi(.LCPI11_1) 270; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) 271; CHECK-NEXT: vfadd.vf v10, v9, ft0 272; CHECK-NEXT: vmerge.vvm v9, v9, v10, v0 273; CHECK-NEXT: vfabs.v v10, v8 274; CHECK-NEXT: vmflt.vf v0, v10, ft1 275; CHECK-NEXT: vfsgnj.vv v9, v9, v8 276; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 277; CHECK-NEXT: ret 278 %a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x) 279 ret <vscale x 1 x double> %a 280} 281declare <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double>) 282 283define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) { 284; CHECK-LABEL: ceil_nxv2f64: 285; CHECK: # %bb.0: 286; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 287; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8 288; CHECK-NEXT: vfcvt.f.x.v v10, v10 289; CHECK-NEXT: lui a0, %hi(.LCPI12_0) 290; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) 291; CHECK-NEXT: vmflt.vv v0, v10, v8 292; CHECK-NEXT: lui a0, %hi(.LCPI12_1) 293; CHECK-NEXT: fld ft1, %lo(.LCPI12_1)(a0) 294; CHECK-NEXT: vfadd.vf v12, v10, ft0 295; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 296; CHECK-NEXT: vfabs.v v12, v8 297; CHECK-NEXT: vmflt.vf v0, v12, ft1 298; CHECK-NEXT: vfsgnj.vv v10, v10, v8 299; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 300; CHECK-NEXT: ret 301 %a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x) 302 ret <vscale x 2 x double> %a 303} 304declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>) 305 306define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) { 307; CHECK-LABEL: ceil_nxv4f64: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 310; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8 311; CHECK-NEXT: vfcvt.f.x.v v12, v12 312; CHECK-NEXT: lui a0, %hi(.LCPI13_0) 313; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) 314; CHECK-NEXT: vmflt.vv v0, v12, v8 315; CHECK-NEXT: lui a0, %hi(.LCPI13_1) 316; CHECK-NEXT: fld ft1, %lo(.LCPI13_1)(a0) 317; CHECK-NEXT: vfadd.vf v16, v12, ft0 318; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 319; CHECK-NEXT: vfabs.v v16, v8 320; CHECK-NEXT: vmflt.vf v0, v16, ft1 321; CHECK-NEXT: vfsgnj.vv v12, v12, v8 322; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 323; CHECK-NEXT: ret 324 %a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x) 325 ret <vscale x 4 x double> %a 326} 327declare <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double>) 328 329define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) { 330; CHECK-LABEL: ceil_nxv8f64: 331; CHECK: # %bb.0: 332; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu 333; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8 334; CHECK-NEXT: vfcvt.f.x.v v16, v16 335; CHECK-NEXT: lui a0, %hi(.LCPI14_0) 336; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) 337; CHECK-NEXT: vmflt.vv v0, v16, v8 338; CHECK-NEXT: lui a0, %hi(.LCPI14_1) 339; CHECK-NEXT: fld ft1, %lo(.LCPI14_1)(a0) 340; CHECK-NEXT: vfadd.vf v24, v16, ft0 341; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 342; CHECK-NEXT: vfabs.v v24, v8 343; CHECK-NEXT: vmflt.vf v0, v24, ft1 344; CHECK-NEXT: vfsgnj.vv v16, v16, v8 345; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 346; CHECK-NEXT: ret 347 %a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x) 348 ret <vscale x 8 x double> %a 349} 350declare <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double>) 351