1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s 6 7define <vscale x 1 x half> @round_nxv1f16(<vscale x 1 x half> %x) { 8; CHECK-LABEL: round_nxv1f16: 9; CHECK: # %bb.0: 10; CHECK-NEXT: lui a0, %hi(.LCPI0_0) 11; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0) 12; CHECK-NEXT: lui a0, %hi(.LCPI0_1) 13; CHECK-NEXT: flh ft1, %lo(.LCPI0_1)(a0) 14; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 15; CHECK-NEXT: vfabs.v v9, v8 16; CHECK-NEXT: vmflt.vf v0, v9, ft0 17; CHECK-NEXT: vfadd.vf v9, v9, ft1 18; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 19; CHECK-NEXT: vfcvt.f.x.v v9, v9 20; CHECK-NEXT: vfsgnj.vv v9, v9, v8 21; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 22; CHECK-NEXT: ret 23 %a = call <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half> %x) 24 ret <vscale x 1 x half> %a 25} 26declare <vscale x 1 x half> @llvm.round.nxv1f16(<vscale x 1 x half>) 27 28define <vscale x 2 x half> @round_nxv2f16(<vscale x 2 x half> %x) { 29; CHECK-LABEL: round_nxv2f16: 30; CHECK: # %bb.0: 31; CHECK-NEXT: lui a0, %hi(.LCPI1_0) 32; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0) 33; CHECK-NEXT: lui a0, %hi(.LCPI1_1) 34; CHECK-NEXT: flh ft1, %lo(.LCPI1_1)(a0) 35; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 36; CHECK-NEXT: vfabs.v v9, v8 37; CHECK-NEXT: vmflt.vf v0, v9, ft0 38; CHECK-NEXT: vfadd.vf v9, v9, ft1 39; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 40; CHECK-NEXT: vfcvt.f.x.v v9, v9 41; CHECK-NEXT: vfsgnj.vv v9, v9, v8 42; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 43; CHECK-NEXT: ret 44 %a = call <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half> %x) 45 ret <vscale x 2 x half> %a 46} 47declare <vscale x 2 x half> @llvm.round.nxv2f16(<vscale x 2 x half>) 48 49define <vscale x 4 x half> @round_nxv4f16(<vscale x 4 x half> %x) { 50; CHECK-LABEL: round_nxv4f16: 51; CHECK: # %bb.0: 52; CHECK-NEXT: lui a0, %hi(.LCPI2_0) 53; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0) 54; CHECK-NEXT: lui a0, %hi(.LCPI2_1) 55; CHECK-NEXT: flh ft1, %lo(.LCPI2_1)(a0) 56; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 57; CHECK-NEXT: vfabs.v v9, v8 58; CHECK-NEXT: vmflt.vf v0, v9, ft0 59; CHECK-NEXT: vfadd.vf v9, v9, ft1 60; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 61; CHECK-NEXT: vfcvt.f.x.v v9, v9 62; CHECK-NEXT: vfsgnj.vv v9, v9, v8 63; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 64; CHECK-NEXT: ret 65 %a = call <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half> %x) 66 ret <vscale x 4 x half> %a 67} 68declare <vscale x 4 x half> @llvm.round.nxv4f16(<vscale x 4 x half>) 69 70define <vscale x 8 x half> @round_nxv8f16(<vscale x 8 x half> %x) { 71; CHECK-LABEL: round_nxv8f16: 72; CHECK: # %bb.0: 73; CHECK-NEXT: lui a0, %hi(.LCPI3_0) 74; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0) 75; CHECK-NEXT: lui a0, %hi(.LCPI3_1) 76; CHECK-NEXT: flh ft1, %lo(.LCPI3_1)(a0) 77; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu 78; CHECK-NEXT: vfabs.v v10, v8 79; CHECK-NEXT: vmflt.vf v0, v10, ft0 80; CHECK-NEXT: vfadd.vf v10, v10, ft1 81; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 82; CHECK-NEXT: vfcvt.f.x.v v10, v10 83; CHECK-NEXT: vfsgnj.vv v10, v10, v8 84; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 85; CHECK-NEXT: ret 86 %a = call <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half> %x) 87 ret <vscale x 8 x half> %a 88} 89declare <vscale x 8 x half> @llvm.round.nxv8f16(<vscale x 8 x half>) 90 91define <vscale x 16 x half> @round_nxv16f16(<vscale x 16 x half> %x) { 92; CHECK-LABEL: round_nxv16f16: 93; CHECK: # %bb.0: 94; CHECK-NEXT: lui a0, %hi(.LCPI4_0) 95; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0) 96; CHECK-NEXT: lui a0, %hi(.LCPI4_1) 97; CHECK-NEXT: flh ft1, %lo(.LCPI4_1)(a0) 98; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu 99; CHECK-NEXT: vfabs.v v12, v8 100; CHECK-NEXT: vmflt.vf v0, v12, ft0 101; CHECK-NEXT: vfadd.vf v12, v12, ft1 102; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 103; CHECK-NEXT: vfcvt.f.x.v v12, v12 104; CHECK-NEXT: vfsgnj.vv v12, v12, v8 105; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 106; CHECK-NEXT: ret 107 %a = call <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half> %x) 108 ret <vscale x 16 x half> %a 109} 110declare <vscale x 16 x half> @llvm.round.nxv16f16(<vscale x 16 x half>) 111 112define <vscale x 32 x half> @round_nxv32f16(<vscale x 32 x half> %x) { 113; CHECK-LABEL: round_nxv32f16: 114; CHECK: # %bb.0: 115; CHECK-NEXT: lui a0, %hi(.LCPI5_0) 116; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0) 117; CHECK-NEXT: lui a0, %hi(.LCPI5_1) 118; CHECK-NEXT: flh ft1, %lo(.LCPI5_1)(a0) 119; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu 120; CHECK-NEXT: vfabs.v v16, v8 121; CHECK-NEXT: vmflt.vf v0, v16, ft0 122; CHECK-NEXT: vfadd.vf v16, v16, ft1 123; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 124; CHECK-NEXT: vfcvt.f.x.v v16, v16 125; CHECK-NEXT: vfsgnj.vv v16, v16, v8 126; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 127; CHECK-NEXT: ret 128 %a = call <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half> %x) 129 ret <vscale x 32 x half> %a 130} 131declare <vscale x 32 x half> @llvm.round.nxv32f16(<vscale x 32 x half>) 132 133define <vscale x 1 x float> @round_nxv1f32(<vscale x 1 x float> %x) { 134; CHECK-LABEL: round_nxv1f32: 135; CHECK: # %bb.0: 136; CHECK-NEXT: lui a0, %hi(.LCPI6_0) 137; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0) 138; CHECK-NEXT: lui a0, %hi(.LCPI6_1) 139; CHECK-NEXT: flw ft1, %lo(.LCPI6_1)(a0) 140; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 141; CHECK-NEXT: vfabs.v v9, v8 142; CHECK-NEXT: vmflt.vf v0, v9, ft0 143; CHECK-NEXT: vfadd.vf v9, v9, ft1 144; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 145; CHECK-NEXT: vfcvt.f.x.v v9, v9 146; CHECK-NEXT: vfsgnj.vv v9, v9, v8 147; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 148; CHECK-NEXT: ret 149 %a = call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> %x) 150 ret <vscale x 1 x float> %a 151} 152declare <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float>) 153 154define <vscale x 2 x float> @round_nxv2f32(<vscale x 2 x float> %x) { 155; CHECK-LABEL: round_nxv2f32: 156; CHECK: # %bb.0: 157; CHECK-NEXT: lui a0, %hi(.LCPI7_0) 158; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0) 159; CHECK-NEXT: lui a0, %hi(.LCPI7_1) 160; CHECK-NEXT: flw ft1, %lo(.LCPI7_1)(a0) 161; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu 162; CHECK-NEXT: vfabs.v v9, v8 163; CHECK-NEXT: vmflt.vf v0, v9, ft0 164; CHECK-NEXT: vfadd.vf v9, v9, ft1 165; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 166; CHECK-NEXT: vfcvt.f.x.v v9, v9 167; CHECK-NEXT: vfsgnj.vv v9, v9, v8 168; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 169; CHECK-NEXT: ret 170 %a = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> %x) 171 ret <vscale x 2 x float> %a 172} 173declare <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float>) 174 175define <vscale x 4 x float> @round_nxv4f32(<vscale x 4 x float> %x) { 176; CHECK-LABEL: round_nxv4f32: 177; CHECK: # %bb.0: 178; CHECK-NEXT: lui a0, %hi(.LCPI8_0) 179; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0) 180; CHECK-NEXT: lui a0, %hi(.LCPI8_1) 181; CHECK-NEXT: flw ft1, %lo(.LCPI8_1)(a0) 182; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu 183; CHECK-NEXT: vfabs.v v10, v8 184; CHECK-NEXT: vmflt.vf v0, v10, ft0 185; CHECK-NEXT: vfadd.vf v10, v10, ft1 186; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 187; CHECK-NEXT: vfcvt.f.x.v v10, v10 188; CHECK-NEXT: vfsgnj.vv v10, v10, v8 189; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 190; CHECK-NEXT: ret 191 %a = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> %x) 192 ret <vscale x 4 x float> %a 193} 194declare <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float>) 195 196define <vscale x 8 x float> @round_nxv8f32(<vscale x 8 x float> %x) { 197; CHECK-LABEL: round_nxv8f32: 198; CHECK: # %bb.0: 199; CHECK-NEXT: lui a0, %hi(.LCPI9_0) 200; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0) 201; CHECK-NEXT: lui a0, %hi(.LCPI9_1) 202; CHECK-NEXT: flw ft1, %lo(.LCPI9_1)(a0) 203; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu 204; CHECK-NEXT: vfabs.v v12, v8 205; CHECK-NEXT: vmflt.vf v0, v12, ft0 206; CHECK-NEXT: vfadd.vf v12, v12, ft1 207; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 208; CHECK-NEXT: vfcvt.f.x.v v12, v12 209; CHECK-NEXT: vfsgnj.vv v12, v12, v8 210; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 211; CHECK-NEXT: ret 212 %a = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> %x) 213 ret <vscale x 8 x float> %a 214} 215declare <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float>) 216 217define <vscale x 16 x float> @round_nxv16f32(<vscale x 16 x float> %x) { 218; CHECK-LABEL: round_nxv16f32: 219; CHECK: # %bb.0: 220; CHECK-NEXT: lui a0, %hi(.LCPI10_0) 221; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0) 222; CHECK-NEXT: lui a0, %hi(.LCPI10_1) 223; CHECK-NEXT: flw ft1, %lo(.LCPI10_1)(a0) 224; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu 225; CHECK-NEXT: vfabs.v v16, v8 226; CHECK-NEXT: vmflt.vf v0, v16, ft0 227; CHECK-NEXT: vfadd.vf v16, v16, ft1 228; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 229; CHECK-NEXT: vfcvt.f.x.v v16, v16 230; CHECK-NEXT: vfsgnj.vv v16, v16, v8 231; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 232; CHECK-NEXT: ret 233 %a = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> %x) 234 ret <vscale x 16 x float> %a 235} 236declare <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float>) 237 238define <vscale x 1 x double> @round_nxv1f64(<vscale x 1 x double> %x) { 239; CHECK-LABEL: round_nxv1f64: 240; CHECK: # %bb.0: 241; CHECK-NEXT: lui a0, %hi(.LCPI11_0) 242; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0) 243; CHECK-NEXT: lui a0, %hi(.LCPI11_1) 244; CHECK-NEXT: fld ft1, %lo(.LCPI11_1)(a0) 245; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu 246; CHECK-NEXT: vfabs.v v9, v8 247; CHECK-NEXT: vmflt.vf v0, v9, ft0 248; CHECK-NEXT: vfadd.vf v9, v9, ft1 249; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9 250; CHECK-NEXT: vfcvt.f.x.v v9, v9 251; CHECK-NEXT: vfsgnj.vv v9, v9, v8 252; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 253; CHECK-NEXT: ret 254 %a = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> %x) 255 ret <vscale x 1 x double> %a 256} 257declare <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double>) 258 259define <vscale x 2 x double> @round_nxv2f64(<vscale x 2 x double> %x) { 260; CHECK-LABEL: round_nxv2f64: 261; CHECK: # %bb.0: 262; CHECK-NEXT: lui a0, %hi(.LCPI12_0) 263; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0) 264; CHECK-NEXT: lui a0, %hi(.LCPI12_1) 265; CHECK-NEXT: fld ft1, %lo(.LCPI12_1)(a0) 266; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu 267; CHECK-NEXT: vfabs.v v10, v8 268; CHECK-NEXT: vmflt.vf v0, v10, ft0 269; CHECK-NEXT: vfadd.vf v10, v10, ft1 270; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v10 271; CHECK-NEXT: vfcvt.f.x.v v10, v10 272; CHECK-NEXT: vfsgnj.vv v10, v10, v8 273; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 274; CHECK-NEXT: ret 275 %a = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> %x) 276 ret <vscale x 2 x double> %a 277} 278declare <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double>) 279 280define <vscale x 4 x double> @round_nxv4f64(<vscale x 4 x double> %x) { 281; CHECK-LABEL: round_nxv4f64: 282; CHECK: # %bb.0: 283; CHECK-NEXT: lui a0, %hi(.LCPI13_0) 284; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0) 285; CHECK-NEXT: lui a0, %hi(.LCPI13_1) 286; CHECK-NEXT: fld ft1, %lo(.LCPI13_1)(a0) 287; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu 288; CHECK-NEXT: vfabs.v v12, v8 289; CHECK-NEXT: vmflt.vf v0, v12, ft0 290; CHECK-NEXT: vfadd.vf v12, v12, ft1 291; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v12 292; CHECK-NEXT: vfcvt.f.x.v v12, v12 293; CHECK-NEXT: vfsgnj.vv v12, v12, v8 294; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 295; CHECK-NEXT: ret 296 %a = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> %x) 297 ret <vscale x 4 x double> %a 298} 299declare <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double>) 300 301define <vscale x 8 x double> @round_nxv8f64(<vscale x 8 x double> %x) { 302; CHECK-LABEL: round_nxv8f64: 303; CHECK: # %bb.0: 304; CHECK-NEXT: lui a0, %hi(.LCPI14_0) 305; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0) 306; CHECK-NEXT: lui a0, %hi(.LCPI14_1) 307; CHECK-NEXT: fld ft1, %lo(.LCPI14_1)(a0) 308; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu 309; CHECK-NEXT: vfabs.v v16, v8 310; CHECK-NEXT: vmflt.vf v0, v16, ft0 311; CHECK-NEXT: vfadd.vf v16, v16, ft1 312; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 313; CHECK-NEXT: vfcvt.f.x.v v16, v16 314; CHECK-NEXT: vfsgnj.vv v16, v16, v8 315; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 316; CHECK-NEXT: ret 317 %a = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> %x) 318 ret <vscale x 8 x double> %a 319} 320declare <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double>) 321