1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) { 6; CHECK-LABEL: bswap_nxv1i16: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu 9; CHECK-NEXT: vsrl.vi v9, v8, 8 10; CHECK-NEXT: vsll.vi v8, v8, 8 11; CHECK-NEXT: vor.vv v8, v8, v9 12; CHECK-NEXT: ret 13 %a = call <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16> %va) 14 ret <vscale x 1 x i16> %a 15} 16declare <vscale x 1 x i16> @llvm.bswap.nxv1i16(<vscale x 1 x i16>) 17 18define <vscale x 2 x i16> @bswap_nxv2i16(<vscale x 2 x i16> %va) { 19; CHECK-LABEL: bswap_nxv2i16: 20; CHECK: # %bb.0: 21; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu 22; CHECK-NEXT: vsrl.vi v9, v8, 8 23; CHECK-NEXT: vsll.vi v8, v8, 8 24; CHECK-NEXT: vor.vv v8, v8, v9 25; CHECK-NEXT: ret 26 %a = call <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16> %va) 27 ret <vscale x 2 x i16> %a 28} 29declare <vscale x 2 x i16> @llvm.bswap.nxv2i16(<vscale x 2 x i16>) 30 31define <vscale x 4 x i16> @bswap_nxv4i16(<vscale x 4 x i16> %va) { 32; CHECK-LABEL: bswap_nxv4i16: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu 35; CHECK-NEXT: vsrl.vi v9, v8, 8 36; CHECK-NEXT: vsll.vi v8, v8, 8 37; CHECK-NEXT: vor.vv v8, v8, v9 38; CHECK-NEXT: ret 39 %a = call <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16> %va) 40 ret <vscale x 4 x i16> %a 41} 42declare <vscale x 4 x i16> @llvm.bswap.nxv4i16(<vscale x 4 x i16>) 43 44define <vscale x 8 x i16> @bswap_nxv8i16(<vscale x 8 x i16> %va) { 45; CHECK-LABEL: bswap_nxv8i16: 46; CHECK: # %bb.0: 47; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu 48; CHECK-NEXT: vsrl.vi v10, v8, 8 49; CHECK-NEXT: vsll.vi v8, v8, 8 50; CHECK-NEXT: vor.vv v8, v8, v10 51; CHECK-NEXT: ret 52 %a = call <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16> %va) 53 ret <vscale x 8 x i16> %a 54} 55declare <vscale x 8 x i16> @llvm.bswap.nxv8i16(<vscale x 8 x i16>) 56 57define <vscale x 16 x i16> @bswap_nxv16i16(<vscale x 16 x i16> %va) { 58; CHECK-LABEL: bswap_nxv16i16: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu 61; CHECK-NEXT: vsrl.vi v12, v8, 8 62; CHECK-NEXT: vsll.vi v8, v8, 8 63; CHECK-NEXT: vor.vv v8, v8, v12 64; CHECK-NEXT: ret 65 %a = call <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16> %va) 66 ret <vscale x 16 x i16> %a 67} 68declare <vscale x 16 x i16> @llvm.bswap.nxv16i16(<vscale x 16 x i16>) 69 70define <vscale x 32 x i16> @bswap_nxv32i16(<vscale x 32 x i16> %va) { 71; CHECK-LABEL: bswap_nxv32i16: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu 74; CHECK-NEXT: vsrl.vi v16, v8, 8 75; CHECK-NEXT: vsll.vi v8, v8, 8 76; CHECK-NEXT: vor.vv v8, v8, v16 77; CHECK-NEXT: ret 78 %a = call <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16> %va) 79 ret <vscale x 32 x i16> %a 80} 81declare <vscale x 32 x i16> @llvm.bswap.nxv32i16(<vscale x 32 x i16>) 82 83define <vscale x 1 x i32> @bswap_nxv1i32(<vscale x 1 x i32> %va) { 84; RV32-LABEL: bswap_nxv1i32: 85; RV32: # %bb.0: 86; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 87; RV32-NEXT: vsrl.vi v9, v8, 8 88; RV32-NEXT: lui a0, 16 89; RV32-NEXT: addi a0, a0, -256 90; RV32-NEXT: vand.vx v9, v9, a0 91; RV32-NEXT: vsrl.vi v10, v8, 24 92; RV32-NEXT: vor.vv v9, v9, v10 93; RV32-NEXT: vsll.vi v10, v8, 8 94; RV32-NEXT: lui a0, 4080 95; RV32-NEXT: vand.vx v10, v10, a0 96; RV32-NEXT: vsll.vi v8, v8, 24 97; RV32-NEXT: vor.vv v8, v8, v10 98; RV32-NEXT: vor.vv v8, v8, v9 99; RV32-NEXT: ret 100; 101; RV64-LABEL: bswap_nxv1i32: 102; RV64: # %bb.0: 103; RV64-NEXT: vsetvli a0, zero, e32, mf2, ta, mu 104; RV64-NEXT: vsrl.vi v9, v8, 8 105; RV64-NEXT: lui a0, 16 106; RV64-NEXT: addiw a0, a0, -256 107; RV64-NEXT: vand.vx v9, v9, a0 108; RV64-NEXT: vsrl.vi v10, v8, 24 109; RV64-NEXT: vor.vv v9, v9, v10 110; RV64-NEXT: vsll.vi v10, v8, 8 111; RV64-NEXT: lui a0, 4080 112; RV64-NEXT: vand.vx v10, v10, a0 113; RV64-NEXT: vsll.vi v8, v8, 24 114; RV64-NEXT: vor.vv v8, v8, v10 115; RV64-NEXT: vor.vv v8, v8, v9 116; RV64-NEXT: ret 117 %a = call <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32> %va) 118 ret <vscale x 1 x i32> %a 119} 120declare <vscale x 1 x i32> @llvm.bswap.nxv1i32(<vscale x 1 x i32>) 121 122define <vscale x 2 x i32> @bswap_nxv2i32(<vscale x 2 x i32> %va) { 123; RV32-LABEL: bswap_nxv2i32: 124; RV32: # %bb.0: 125; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu 126; RV32-NEXT: vsrl.vi v9, v8, 8 127; RV32-NEXT: lui a0, 16 128; RV32-NEXT: addi a0, a0, -256 129; RV32-NEXT: vand.vx v9, v9, a0 130; RV32-NEXT: vsrl.vi v10, v8, 24 131; RV32-NEXT: vor.vv v9, v9, v10 132; RV32-NEXT: vsll.vi v10, v8, 8 133; RV32-NEXT: lui a0, 4080 134; RV32-NEXT: vand.vx v10, v10, a0 135; RV32-NEXT: vsll.vi v8, v8, 24 136; RV32-NEXT: vor.vv v8, v8, v10 137; RV32-NEXT: vor.vv v8, v8, v9 138; RV32-NEXT: ret 139; 140; RV64-LABEL: bswap_nxv2i32: 141; RV64: # %bb.0: 142; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, mu 143; RV64-NEXT: vsrl.vi v9, v8, 8 144; RV64-NEXT: lui a0, 16 145; RV64-NEXT: addiw a0, a0, -256 146; RV64-NEXT: vand.vx v9, v9, a0 147; RV64-NEXT: vsrl.vi v10, v8, 24 148; RV64-NEXT: vor.vv v9, v9, v10 149; RV64-NEXT: vsll.vi v10, v8, 8 150; RV64-NEXT: lui a0, 4080 151; RV64-NEXT: vand.vx v10, v10, a0 152; RV64-NEXT: vsll.vi v8, v8, 24 153; RV64-NEXT: vor.vv v8, v8, v10 154; RV64-NEXT: vor.vv v8, v8, v9 155; RV64-NEXT: ret 156 %a = call <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32> %va) 157 ret <vscale x 2 x i32> %a 158} 159declare <vscale x 2 x i32> @llvm.bswap.nxv2i32(<vscale x 2 x i32>) 160 161define <vscale x 4 x i32> @bswap_nxv4i32(<vscale x 4 x i32> %va) { 162; RV32-LABEL: bswap_nxv4i32: 163; RV32: # %bb.0: 164; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu 165; RV32-NEXT: vsrl.vi v10, v8, 8 166; RV32-NEXT: lui a0, 16 167; RV32-NEXT: addi a0, a0, -256 168; RV32-NEXT: vand.vx v10, v10, a0 169; RV32-NEXT: vsrl.vi v12, v8, 24 170; RV32-NEXT: vor.vv v10, v10, v12 171; RV32-NEXT: vsll.vi v12, v8, 8 172; RV32-NEXT: lui a0, 4080 173; RV32-NEXT: vand.vx v12, v12, a0 174; RV32-NEXT: vsll.vi v8, v8, 24 175; RV32-NEXT: vor.vv v8, v8, v12 176; RV32-NEXT: vor.vv v8, v8, v10 177; RV32-NEXT: ret 178; 179; RV64-LABEL: bswap_nxv4i32: 180; RV64: # %bb.0: 181; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, mu 182; RV64-NEXT: vsrl.vi v10, v8, 8 183; RV64-NEXT: lui a0, 16 184; RV64-NEXT: addiw a0, a0, -256 185; RV64-NEXT: vand.vx v10, v10, a0 186; RV64-NEXT: vsrl.vi v12, v8, 24 187; RV64-NEXT: vor.vv v10, v10, v12 188; RV64-NEXT: vsll.vi v12, v8, 8 189; RV64-NEXT: lui a0, 4080 190; RV64-NEXT: vand.vx v12, v12, a0 191; RV64-NEXT: vsll.vi v8, v8, 24 192; RV64-NEXT: vor.vv v8, v8, v12 193; RV64-NEXT: vor.vv v8, v8, v10 194; RV64-NEXT: ret 195 %a = call <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32> %va) 196 ret <vscale x 4 x i32> %a 197} 198declare <vscale x 4 x i32> @llvm.bswap.nxv4i32(<vscale x 4 x i32>) 199 200define <vscale x 8 x i32> @bswap_nxv8i32(<vscale x 8 x i32> %va) { 201; RV32-LABEL: bswap_nxv8i32: 202; RV32: # %bb.0: 203; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu 204; RV32-NEXT: vsrl.vi v12, v8, 8 205; RV32-NEXT: lui a0, 16 206; RV32-NEXT: addi a0, a0, -256 207; RV32-NEXT: vand.vx v12, v12, a0 208; RV32-NEXT: vsrl.vi v16, v8, 24 209; RV32-NEXT: vor.vv v12, v12, v16 210; RV32-NEXT: vsll.vi v16, v8, 8 211; RV32-NEXT: lui a0, 4080 212; RV32-NEXT: vand.vx v16, v16, a0 213; RV32-NEXT: vsll.vi v8, v8, 24 214; RV32-NEXT: vor.vv v8, v8, v16 215; RV32-NEXT: vor.vv v8, v8, v12 216; RV32-NEXT: ret 217; 218; RV64-LABEL: bswap_nxv8i32: 219; RV64: # %bb.0: 220; RV64-NEXT: vsetvli a0, zero, e32, m4, ta, mu 221; RV64-NEXT: vsrl.vi v12, v8, 8 222; RV64-NEXT: lui a0, 16 223; RV64-NEXT: addiw a0, a0, -256 224; RV64-NEXT: vand.vx v12, v12, a0 225; RV64-NEXT: vsrl.vi v16, v8, 24 226; RV64-NEXT: vor.vv v12, v12, v16 227; RV64-NEXT: vsll.vi v16, v8, 8 228; RV64-NEXT: lui a0, 4080 229; RV64-NEXT: vand.vx v16, v16, a0 230; RV64-NEXT: vsll.vi v8, v8, 24 231; RV64-NEXT: vor.vv v8, v8, v16 232; RV64-NEXT: vor.vv v8, v8, v12 233; RV64-NEXT: ret 234 %a = call <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32> %va) 235 ret <vscale x 8 x i32> %a 236} 237declare <vscale x 8 x i32> @llvm.bswap.nxv8i32(<vscale x 8 x i32>) 238 239define <vscale x 16 x i32> @bswap_nxv16i32(<vscale x 16 x i32> %va) { 240; RV32-LABEL: bswap_nxv16i32: 241; RV32: # %bb.0: 242; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, mu 243; RV32-NEXT: vsrl.vi v16, v8, 8 244; RV32-NEXT: lui a0, 16 245; RV32-NEXT: addi a0, a0, -256 246; RV32-NEXT: vand.vx v16, v16, a0 247; RV32-NEXT: vsrl.vi v24, v8, 24 248; RV32-NEXT: vor.vv v16, v16, v24 249; RV32-NEXT: vsll.vi v24, v8, 8 250; RV32-NEXT: lui a0, 4080 251; RV32-NEXT: vand.vx v24, v24, a0 252; RV32-NEXT: vsll.vi v8, v8, 24 253; RV32-NEXT: vor.vv v8, v8, v24 254; RV32-NEXT: vor.vv v8, v8, v16 255; RV32-NEXT: ret 256; 257; RV64-LABEL: bswap_nxv16i32: 258; RV64: # %bb.0: 259; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, mu 260; RV64-NEXT: vsrl.vi v16, v8, 8 261; RV64-NEXT: lui a0, 16 262; RV64-NEXT: addiw a0, a0, -256 263; RV64-NEXT: vand.vx v16, v16, a0 264; RV64-NEXT: vsrl.vi v24, v8, 24 265; RV64-NEXT: vor.vv v16, v16, v24 266; RV64-NEXT: vsll.vi v24, v8, 8 267; RV64-NEXT: lui a0, 4080 268; RV64-NEXT: vand.vx v24, v24, a0 269; RV64-NEXT: vsll.vi v8, v8, 24 270; RV64-NEXT: vor.vv v8, v8, v24 271; RV64-NEXT: vor.vv v8, v8, v16 272; RV64-NEXT: ret 273 %a = call <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32> %va) 274 ret <vscale x 16 x i32> %a 275} 276declare <vscale x 16 x i32> @llvm.bswap.nxv16i32(<vscale x 16 x i32>) 277 278define <vscale x 1 x i64> @bswap_nxv1i64(<vscale x 1 x i64> %va) { 279; RV32-LABEL: bswap_nxv1i64: 280; RV32: # %bb.0: 281; RV32-NEXT: addi sp, sp, -16 282; RV32-NEXT: .cfi_def_cfa_offset 16 283; RV32-NEXT: sw zero, 12(sp) 284; RV32-NEXT: lui a0, 1044480 285; RV32-NEXT: sw a0, 8(sp) 286; RV32-NEXT: lui a0, 4080 287; RV32-NEXT: sw a0, 12(sp) 288; RV32-NEXT: sw zero, 8(sp) 289; RV32-NEXT: li a1, 255 290; RV32-NEXT: sw a1, 12(sp) 291; RV32-NEXT: lui a1, 16 292; RV32-NEXT: addi a1, a1, -256 293; RV32-NEXT: sw a1, 12(sp) 294; RV32-NEXT: li a2, 56 295; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, mu 296; RV32-NEXT: vsrl.vx v9, v8, a2 297; RV32-NEXT: li a3, 40 298; RV32-NEXT: vsrl.vx v10, v8, a3 299; RV32-NEXT: vand.vx v10, v10, a1 300; RV32-NEXT: vor.vv v9, v10, v9 301; RV32-NEXT: addi a1, sp, 8 302; RV32-NEXT: vlse64.v v10, (a1), zero 303; RV32-NEXT: vsrl.vi v11, v8, 24 304; RV32-NEXT: vand.vx v11, v11, a0 305; RV32-NEXT: vsrl.vi v12, v8, 8 306; RV32-NEXT: vand.vv v10, v12, v10 307; RV32-NEXT: vor.vv v10, v10, v11 308; RV32-NEXT: vlse64.v v11, (a1), zero 309; RV32-NEXT: vor.vv v9, v10, v9 310; RV32-NEXT: vsll.vx v10, v8, a2 311; RV32-NEXT: vsll.vx v12, v8, a3 312; RV32-NEXT: vand.vv v11, v12, v11 313; RV32-NEXT: vlse64.v v12, (a1), zero 314; RV32-NEXT: vor.vv v10, v10, v11 315; RV32-NEXT: vlse64.v v11, (a1), zero 316; RV32-NEXT: vsll.vi v13, v8, 8 317; RV32-NEXT: vand.vv v12, v13, v12 318; RV32-NEXT: vsll.vi v8, v8, 24 319; RV32-NEXT: vand.vv v8, v8, v11 320; RV32-NEXT: vor.vv v8, v8, v12 321; RV32-NEXT: vor.vv v8, v10, v8 322; RV32-NEXT: vor.vv v8, v8, v9 323; RV32-NEXT: addi sp, sp, 16 324; RV32-NEXT: ret 325; 326; RV64-LABEL: bswap_nxv1i64: 327; RV64: # %bb.0: 328; RV64-NEXT: li a0, 56 329; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu 330; RV64-NEXT: vsrl.vx v9, v8, a0 331; RV64-NEXT: li a1, 40 332; RV64-NEXT: vsrl.vx v10, v8, a1 333; RV64-NEXT: lui a2, 16 334; RV64-NEXT: addiw a2, a2, -256 335; RV64-NEXT: vand.vx v10, v10, a2 336; RV64-NEXT: vor.vv v9, v10, v9 337; RV64-NEXT: vsrl.vi v10, v8, 24 338; RV64-NEXT: lui a2, 4080 339; RV64-NEXT: vand.vx v10, v10, a2 340; RV64-NEXT: vsrl.vi v11, v8, 8 341; RV64-NEXT: li a2, 255 342; RV64-NEXT: slli a3, a2, 24 343; RV64-NEXT: vand.vx v11, v11, a3 344; RV64-NEXT: vor.vv v10, v11, v10 345; RV64-NEXT: vor.vv v9, v10, v9 346; RV64-NEXT: vsll.vi v10, v8, 8 347; RV64-NEXT: slli a3, a2, 32 348; RV64-NEXT: vand.vx v10, v10, a3 349; RV64-NEXT: vsll.vi v11, v8, 24 350; RV64-NEXT: slli a3, a2, 40 351; RV64-NEXT: vand.vx v11, v11, a3 352; RV64-NEXT: vor.vv v10, v11, v10 353; RV64-NEXT: vsll.vx v11, v8, a0 354; RV64-NEXT: vsll.vx v8, v8, a1 355; RV64-NEXT: slli a0, a2, 48 356; RV64-NEXT: vand.vx v8, v8, a0 357; RV64-NEXT: vor.vv v8, v11, v8 358; RV64-NEXT: vor.vv v8, v8, v10 359; RV64-NEXT: vor.vv v8, v8, v9 360; RV64-NEXT: ret 361 %a = call <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64> %va) 362 ret <vscale x 1 x i64> %a 363} 364declare <vscale x 1 x i64> @llvm.bswap.nxv1i64(<vscale x 1 x i64>) 365 366define <vscale x 2 x i64> @bswap_nxv2i64(<vscale x 2 x i64> %va) { 367; RV32-LABEL: bswap_nxv2i64: 368; RV32: # %bb.0: 369; RV32-NEXT: addi sp, sp, -16 370; RV32-NEXT: .cfi_def_cfa_offset 16 371; RV32-NEXT: sw zero, 12(sp) 372; RV32-NEXT: lui a0, 1044480 373; RV32-NEXT: sw a0, 8(sp) 374; RV32-NEXT: lui a0, 4080 375; RV32-NEXT: sw a0, 12(sp) 376; RV32-NEXT: sw zero, 8(sp) 377; RV32-NEXT: li a1, 255 378; RV32-NEXT: sw a1, 12(sp) 379; RV32-NEXT: lui a1, 16 380; RV32-NEXT: addi a1, a1, -256 381; RV32-NEXT: sw a1, 12(sp) 382; RV32-NEXT: li a2, 56 383; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, mu 384; RV32-NEXT: vsrl.vx v10, v8, a2 385; RV32-NEXT: li a3, 40 386; RV32-NEXT: vsrl.vx v12, v8, a3 387; RV32-NEXT: vand.vx v12, v12, a1 388; RV32-NEXT: vor.vv v10, v12, v10 389; RV32-NEXT: addi a1, sp, 8 390; RV32-NEXT: vlse64.v v12, (a1), zero 391; RV32-NEXT: vsrl.vi v14, v8, 24 392; RV32-NEXT: vand.vx v14, v14, a0 393; RV32-NEXT: vsrl.vi v16, v8, 8 394; RV32-NEXT: vand.vv v12, v16, v12 395; RV32-NEXT: vor.vv v12, v12, v14 396; RV32-NEXT: vlse64.v v14, (a1), zero 397; RV32-NEXT: vor.vv v10, v12, v10 398; RV32-NEXT: vsll.vx v12, v8, a2 399; RV32-NEXT: vsll.vx v16, v8, a3 400; RV32-NEXT: vand.vv v14, v16, v14 401; RV32-NEXT: vlse64.v v16, (a1), zero 402; RV32-NEXT: vor.vv v12, v12, v14 403; RV32-NEXT: vlse64.v v14, (a1), zero 404; RV32-NEXT: vsll.vi v18, v8, 8 405; RV32-NEXT: vand.vv v16, v18, v16 406; RV32-NEXT: vsll.vi v8, v8, 24 407; RV32-NEXT: vand.vv v8, v8, v14 408; RV32-NEXT: vor.vv v8, v8, v16 409; RV32-NEXT: vor.vv v8, v12, v8 410; RV32-NEXT: vor.vv v8, v8, v10 411; RV32-NEXT: addi sp, sp, 16 412; RV32-NEXT: ret 413; 414; RV64-LABEL: bswap_nxv2i64: 415; RV64: # %bb.0: 416; RV64-NEXT: li a0, 56 417; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu 418; RV64-NEXT: vsrl.vx v10, v8, a0 419; RV64-NEXT: li a1, 40 420; RV64-NEXT: vsrl.vx v12, v8, a1 421; RV64-NEXT: lui a2, 16 422; RV64-NEXT: addiw a2, a2, -256 423; RV64-NEXT: vand.vx v12, v12, a2 424; RV64-NEXT: vor.vv v10, v12, v10 425; RV64-NEXT: vsrl.vi v12, v8, 24 426; RV64-NEXT: lui a2, 4080 427; RV64-NEXT: vand.vx v12, v12, a2 428; RV64-NEXT: vsrl.vi v14, v8, 8 429; RV64-NEXT: li a2, 255 430; RV64-NEXT: slli a3, a2, 24 431; RV64-NEXT: vand.vx v14, v14, a3 432; RV64-NEXT: vor.vv v12, v14, v12 433; RV64-NEXT: vor.vv v10, v12, v10 434; RV64-NEXT: vsll.vi v12, v8, 8 435; RV64-NEXT: slli a3, a2, 32 436; RV64-NEXT: vand.vx v12, v12, a3 437; RV64-NEXT: vsll.vi v14, v8, 24 438; RV64-NEXT: slli a3, a2, 40 439; RV64-NEXT: vand.vx v14, v14, a3 440; RV64-NEXT: vor.vv v12, v14, v12 441; RV64-NEXT: vsll.vx v14, v8, a0 442; RV64-NEXT: vsll.vx v8, v8, a1 443; RV64-NEXT: slli a0, a2, 48 444; RV64-NEXT: vand.vx v8, v8, a0 445; RV64-NEXT: vor.vv v8, v14, v8 446; RV64-NEXT: vor.vv v8, v8, v12 447; RV64-NEXT: vor.vv v8, v8, v10 448; RV64-NEXT: ret 449 %a = call <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64> %va) 450 ret <vscale x 2 x i64> %a 451} 452declare <vscale x 2 x i64> @llvm.bswap.nxv2i64(<vscale x 2 x i64>) 453 454define <vscale x 4 x i64> @bswap_nxv4i64(<vscale x 4 x i64> %va) { 455; RV32-LABEL: bswap_nxv4i64: 456; RV32: # %bb.0: 457; RV32-NEXT: addi sp, sp, -16 458; RV32-NEXT: .cfi_def_cfa_offset 16 459; RV32-NEXT: sw zero, 12(sp) 460; RV32-NEXT: lui a0, 1044480 461; RV32-NEXT: sw a0, 8(sp) 462; RV32-NEXT: lui a0, 4080 463; RV32-NEXT: sw a0, 12(sp) 464; RV32-NEXT: sw zero, 8(sp) 465; RV32-NEXT: li a1, 255 466; RV32-NEXT: sw a1, 12(sp) 467; RV32-NEXT: lui a1, 16 468; RV32-NEXT: addi a1, a1, -256 469; RV32-NEXT: sw a1, 12(sp) 470; RV32-NEXT: li a2, 56 471; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, mu 472; RV32-NEXT: vsrl.vx v12, v8, a2 473; RV32-NEXT: li a3, 40 474; RV32-NEXT: vsrl.vx v16, v8, a3 475; RV32-NEXT: vand.vx v16, v16, a1 476; RV32-NEXT: vor.vv v12, v16, v12 477; RV32-NEXT: addi a1, sp, 8 478; RV32-NEXT: vlse64.v v16, (a1), zero 479; RV32-NEXT: vsrl.vi v20, v8, 24 480; RV32-NEXT: vand.vx v20, v20, a0 481; RV32-NEXT: vsrl.vi v24, v8, 8 482; RV32-NEXT: vand.vv v16, v24, v16 483; RV32-NEXT: vor.vv v16, v16, v20 484; RV32-NEXT: vlse64.v v20, (a1), zero 485; RV32-NEXT: vor.vv v12, v16, v12 486; RV32-NEXT: vsll.vx v16, v8, a2 487; RV32-NEXT: vsll.vx v24, v8, a3 488; RV32-NEXT: vand.vv v20, v24, v20 489; RV32-NEXT: vlse64.v v24, (a1), zero 490; RV32-NEXT: vor.vv v16, v16, v20 491; RV32-NEXT: vlse64.v v20, (a1), zero 492; RV32-NEXT: vsll.vi v28, v8, 8 493; RV32-NEXT: vand.vv v24, v28, v24 494; RV32-NEXT: vsll.vi v8, v8, 24 495; RV32-NEXT: vand.vv v8, v8, v20 496; RV32-NEXT: vor.vv v8, v8, v24 497; RV32-NEXT: vor.vv v8, v16, v8 498; RV32-NEXT: vor.vv v8, v8, v12 499; RV32-NEXT: addi sp, sp, 16 500; RV32-NEXT: ret 501; 502; RV64-LABEL: bswap_nxv4i64: 503; RV64: # %bb.0: 504; RV64-NEXT: li a0, 56 505; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu 506; RV64-NEXT: vsrl.vx v12, v8, a0 507; RV64-NEXT: li a1, 40 508; RV64-NEXT: vsrl.vx v16, v8, a1 509; RV64-NEXT: lui a2, 16 510; RV64-NEXT: addiw a2, a2, -256 511; RV64-NEXT: vand.vx v16, v16, a2 512; RV64-NEXT: vor.vv v12, v16, v12 513; RV64-NEXT: vsrl.vi v16, v8, 24 514; RV64-NEXT: lui a2, 4080 515; RV64-NEXT: vand.vx v16, v16, a2 516; RV64-NEXT: vsrl.vi v20, v8, 8 517; RV64-NEXT: li a2, 255 518; RV64-NEXT: slli a3, a2, 24 519; RV64-NEXT: vand.vx v20, v20, a3 520; RV64-NEXT: vor.vv v16, v20, v16 521; RV64-NEXT: vor.vv v12, v16, v12 522; RV64-NEXT: vsll.vi v16, v8, 8 523; RV64-NEXT: slli a3, a2, 32 524; RV64-NEXT: vand.vx v16, v16, a3 525; RV64-NEXT: vsll.vi v20, v8, 24 526; RV64-NEXT: slli a3, a2, 40 527; RV64-NEXT: vand.vx v20, v20, a3 528; RV64-NEXT: vor.vv v16, v20, v16 529; RV64-NEXT: vsll.vx v20, v8, a0 530; RV64-NEXT: vsll.vx v8, v8, a1 531; RV64-NEXT: slli a0, a2, 48 532; RV64-NEXT: vand.vx v8, v8, a0 533; RV64-NEXT: vor.vv v8, v20, v8 534; RV64-NEXT: vor.vv v8, v8, v16 535; RV64-NEXT: vor.vv v8, v8, v12 536; RV64-NEXT: ret 537 %a = call <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64> %va) 538 ret <vscale x 4 x i64> %a 539} 540declare <vscale x 4 x i64> @llvm.bswap.nxv4i64(<vscale x 4 x i64>) 541 542define <vscale x 8 x i64> @bswap_nxv8i64(<vscale x 8 x i64> %va) { 543; RV32-LABEL: bswap_nxv8i64: 544; RV32: # %bb.0: 545; RV32-NEXT: addi sp, sp, -16 546; RV32-NEXT: .cfi_def_cfa_offset 16 547; RV32-NEXT: csrr a0, vlenb 548; RV32-NEXT: slli a0, a0, 4 549; RV32-NEXT: sub sp, sp, a0 550; RV32-NEXT: sw zero, 12(sp) 551; RV32-NEXT: lui a0, 1044480 552; RV32-NEXT: sw a0, 8(sp) 553; RV32-NEXT: lui a0, 4080 554; RV32-NEXT: sw a0, 12(sp) 555; RV32-NEXT: sw zero, 8(sp) 556; RV32-NEXT: li a1, 255 557; RV32-NEXT: sw a1, 12(sp) 558; RV32-NEXT: lui a1, 16 559; RV32-NEXT: addi a1, a1, -256 560; RV32-NEXT: sw a1, 12(sp) 561; RV32-NEXT: li a2, 56 562; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, mu 563; RV32-NEXT: li a3, 40 564; RV32-NEXT: vsrl.vx v16, v8, a3 565; RV32-NEXT: vand.vx v16, v16, a1 566; RV32-NEXT: addi a1, sp, 8 567; RV32-NEXT: vlse64.v v24, (a1), zero 568; RV32-NEXT: vsrl.vx v0, v8, a2 569; RV32-NEXT: vor.vv v16, v16, v0 570; RV32-NEXT: csrr a4, vlenb 571; RV32-NEXT: slli a4, a4, 3 572; RV32-NEXT: add a4, sp, a4 573; RV32-NEXT: addi a4, a4, 16 574; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 575; RV32-NEXT: vsrl.vi v0, v8, 8 576; RV32-NEXT: vand.vv v24, v0, v24 577; RV32-NEXT: vsrl.vi v0, v8, 24 578; RV32-NEXT: vand.vx v0, v0, a0 579; RV32-NEXT: vlse64.v v16, (a1), zero 580; RV32-NEXT: vor.vv v24, v24, v0 581; RV32-NEXT: csrr a0, vlenb 582; RV32-NEXT: slli a0, a0, 3 583; RV32-NEXT: add a0, sp, a0 584; RV32-NEXT: addi a0, a0, 16 585; RV32-NEXT: vl8re8.v v0, (a0) # Unknown-size Folded Reload 586; RV32-NEXT: vor.vv v24, v24, v0 587; RV32-NEXT: csrr a0, vlenb 588; RV32-NEXT: slli a0, a0, 3 589; RV32-NEXT: add a0, sp, a0 590; RV32-NEXT: addi a0, a0, 16 591; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill 592; RV32-NEXT: vsll.vx v0, v8, a3 593; RV32-NEXT: vand.vv v16, v0, v16 594; RV32-NEXT: vsll.vx v0, v8, a2 595; RV32-NEXT: vlse64.v v24, (a1), zero 596; RV32-NEXT: vor.vv v16, v0, v16 597; RV32-NEXT: addi a0, sp, 16 598; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 599; RV32-NEXT: vlse64.v v0, (a1), zero 600; RV32-NEXT: vsll.vi v16, v8, 8 601; RV32-NEXT: vand.vv v16, v16, v24 602; RV32-NEXT: vsll.vi v8, v8, 24 603; RV32-NEXT: vand.vv v8, v8, v0 604; RV32-NEXT: vor.vv v8, v8, v16 605; RV32-NEXT: addi a0, sp, 16 606; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload 607; RV32-NEXT: vor.vv v8, v16, v8 608; RV32-NEXT: csrr a0, vlenb 609; RV32-NEXT: slli a0, a0, 3 610; RV32-NEXT: add a0, sp, a0 611; RV32-NEXT: addi a0, a0, 16 612; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload 613; RV32-NEXT: vor.vv v8, v8, v16 614; RV32-NEXT: csrr a0, vlenb 615; RV32-NEXT: slli a0, a0, 4 616; RV32-NEXT: add sp, sp, a0 617; RV32-NEXT: addi sp, sp, 16 618; RV32-NEXT: ret 619; 620; RV64-LABEL: bswap_nxv8i64: 621; RV64: # %bb.0: 622; RV64-NEXT: li a0, 56 623; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu 624; RV64-NEXT: vsrl.vx v16, v8, a0 625; RV64-NEXT: li a1, 40 626; RV64-NEXT: vsrl.vx v24, v8, a1 627; RV64-NEXT: lui a2, 16 628; RV64-NEXT: addiw a2, a2, -256 629; RV64-NEXT: vand.vx v24, v24, a2 630; RV64-NEXT: vor.vv v16, v24, v16 631; RV64-NEXT: vsrl.vi v24, v8, 24 632; RV64-NEXT: lui a2, 4080 633; RV64-NEXT: vand.vx v24, v24, a2 634; RV64-NEXT: vsrl.vi v0, v8, 8 635; RV64-NEXT: li a2, 255 636; RV64-NEXT: slli a3, a2, 24 637; RV64-NEXT: vand.vx v0, v0, a3 638; RV64-NEXT: vor.vv v24, v0, v24 639; RV64-NEXT: vor.vv v16, v24, v16 640; RV64-NEXT: vsll.vi v24, v8, 8 641; RV64-NEXT: slli a3, a2, 32 642; RV64-NEXT: vand.vx v24, v24, a3 643; RV64-NEXT: vsll.vi v0, v8, 24 644; RV64-NEXT: slli a3, a2, 40 645; RV64-NEXT: vand.vx v0, v0, a3 646; RV64-NEXT: vor.vv v24, v0, v24 647; RV64-NEXT: vsll.vx v0, v8, a0 648; RV64-NEXT: vsll.vx v8, v8, a1 649; RV64-NEXT: slli a0, a2, 48 650; RV64-NEXT: vand.vx v8, v8, a0 651; RV64-NEXT: vor.vv v8, v0, v8 652; RV64-NEXT: vor.vv v8, v8, v24 653; RV64-NEXT: vor.vv v8, v8, v16 654; RV64-NEXT: ret 655 %a = call <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64> %va) 656 ret <vscale x 8 x i64> %a 657} 658declare <vscale x 8 x i64> @llvm.bswap.nxv8i64(<vscale x 8 x i64>) 659