1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
6; CHECK-LABEL: bitreverse_nxv1i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
9; CHECK-NEXT:    vand.vi v9, v8, 15
10; CHECK-NEXT:    vsll.vi v9, v9, 4
11; CHECK-NEXT:    vsrl.vi v8, v8, 4
12; CHECK-NEXT:    vand.vi v8, v8, 15
13; CHECK-NEXT:    vor.vv v8, v8, v9
14; CHECK-NEXT:    vsrl.vi v9, v8, 2
15; CHECK-NEXT:    li a0, 51
16; CHECK-NEXT:    vand.vx v9, v9, a0
17; CHECK-NEXT:    vand.vx v8, v8, a0
18; CHECK-NEXT:    vsll.vi v8, v8, 2
19; CHECK-NEXT:    vor.vv v8, v9, v8
20; CHECK-NEXT:    vsrl.vi v9, v8, 1
21; CHECK-NEXT:    li a0, 85
22; CHECK-NEXT:    vand.vx v9, v9, a0
23; CHECK-NEXT:    vand.vx v8, v8, a0
24; CHECK-NEXT:    vadd.vv v8, v8, v8
25; CHECK-NEXT:    vor.vv v8, v9, v8
26; CHECK-NEXT:    ret
27  %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va)
28  ret <vscale x 1 x i8> %a
29}
30declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
31
32define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) {
33; CHECK-LABEL: bitreverse_nxv2i8:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
36; CHECK-NEXT:    vand.vi v9, v8, 15
37; CHECK-NEXT:    vsll.vi v9, v9, 4
38; CHECK-NEXT:    vsrl.vi v8, v8, 4
39; CHECK-NEXT:    vand.vi v8, v8, 15
40; CHECK-NEXT:    vor.vv v8, v8, v9
41; CHECK-NEXT:    vsrl.vi v9, v8, 2
42; CHECK-NEXT:    li a0, 51
43; CHECK-NEXT:    vand.vx v9, v9, a0
44; CHECK-NEXT:    vand.vx v8, v8, a0
45; CHECK-NEXT:    vsll.vi v8, v8, 2
46; CHECK-NEXT:    vor.vv v8, v9, v8
47; CHECK-NEXT:    vsrl.vi v9, v8, 1
48; CHECK-NEXT:    li a0, 85
49; CHECK-NEXT:    vand.vx v9, v9, a0
50; CHECK-NEXT:    vand.vx v8, v8, a0
51; CHECK-NEXT:    vadd.vv v8, v8, v8
52; CHECK-NEXT:    vor.vv v8, v9, v8
53; CHECK-NEXT:    ret
54  %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va)
55  ret <vscale x 2 x i8> %a
56}
57declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
58
59define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) {
60; CHECK-LABEL: bitreverse_nxv4i8:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
63; CHECK-NEXT:    vand.vi v9, v8, 15
64; CHECK-NEXT:    vsll.vi v9, v9, 4
65; CHECK-NEXT:    vsrl.vi v8, v8, 4
66; CHECK-NEXT:    vand.vi v8, v8, 15
67; CHECK-NEXT:    vor.vv v8, v8, v9
68; CHECK-NEXT:    vsrl.vi v9, v8, 2
69; CHECK-NEXT:    li a0, 51
70; CHECK-NEXT:    vand.vx v9, v9, a0
71; CHECK-NEXT:    vand.vx v8, v8, a0
72; CHECK-NEXT:    vsll.vi v8, v8, 2
73; CHECK-NEXT:    vor.vv v8, v9, v8
74; CHECK-NEXT:    vsrl.vi v9, v8, 1
75; CHECK-NEXT:    li a0, 85
76; CHECK-NEXT:    vand.vx v9, v9, a0
77; CHECK-NEXT:    vand.vx v8, v8, a0
78; CHECK-NEXT:    vadd.vv v8, v8, v8
79; CHECK-NEXT:    vor.vv v8, v9, v8
80; CHECK-NEXT:    ret
81  %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va)
82  ret <vscale x 4 x i8> %a
83}
84declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
85
86define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) {
87; CHECK-LABEL: bitreverse_nxv8i8:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
90; CHECK-NEXT:    vand.vi v9, v8, 15
91; CHECK-NEXT:    vsll.vi v9, v9, 4
92; CHECK-NEXT:    vsrl.vi v8, v8, 4
93; CHECK-NEXT:    vand.vi v8, v8, 15
94; CHECK-NEXT:    vor.vv v8, v8, v9
95; CHECK-NEXT:    vsrl.vi v9, v8, 2
96; CHECK-NEXT:    li a0, 51
97; CHECK-NEXT:    vand.vx v9, v9, a0
98; CHECK-NEXT:    vand.vx v8, v8, a0
99; CHECK-NEXT:    vsll.vi v8, v8, 2
100; CHECK-NEXT:    vor.vv v8, v9, v8
101; CHECK-NEXT:    vsrl.vi v9, v8, 1
102; CHECK-NEXT:    li a0, 85
103; CHECK-NEXT:    vand.vx v9, v9, a0
104; CHECK-NEXT:    vand.vx v8, v8, a0
105; CHECK-NEXT:    vadd.vv v8, v8, v8
106; CHECK-NEXT:    vor.vv v8, v9, v8
107; CHECK-NEXT:    ret
108  %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va)
109  ret <vscale x 8 x i8> %a
110}
111declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
112
113define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) {
114; CHECK-LABEL: bitreverse_nxv16i8:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
117; CHECK-NEXT:    vand.vi v10, v8, 15
118; CHECK-NEXT:    vsll.vi v10, v10, 4
119; CHECK-NEXT:    vsrl.vi v8, v8, 4
120; CHECK-NEXT:    vand.vi v8, v8, 15
121; CHECK-NEXT:    vor.vv v8, v8, v10
122; CHECK-NEXT:    vsrl.vi v10, v8, 2
123; CHECK-NEXT:    li a0, 51
124; CHECK-NEXT:    vand.vx v10, v10, a0
125; CHECK-NEXT:    vand.vx v8, v8, a0
126; CHECK-NEXT:    vsll.vi v8, v8, 2
127; CHECK-NEXT:    vor.vv v8, v10, v8
128; CHECK-NEXT:    vsrl.vi v10, v8, 1
129; CHECK-NEXT:    li a0, 85
130; CHECK-NEXT:    vand.vx v10, v10, a0
131; CHECK-NEXT:    vand.vx v8, v8, a0
132; CHECK-NEXT:    vadd.vv v8, v8, v8
133; CHECK-NEXT:    vor.vv v8, v10, v8
134; CHECK-NEXT:    ret
135  %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va)
136  ret <vscale x 16 x i8> %a
137}
138declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
139
140define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) {
141; CHECK-LABEL: bitreverse_nxv32i8:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
144; CHECK-NEXT:    vand.vi v12, v8, 15
145; CHECK-NEXT:    vsll.vi v12, v12, 4
146; CHECK-NEXT:    vsrl.vi v8, v8, 4
147; CHECK-NEXT:    vand.vi v8, v8, 15
148; CHECK-NEXT:    vor.vv v8, v8, v12
149; CHECK-NEXT:    vsrl.vi v12, v8, 2
150; CHECK-NEXT:    li a0, 51
151; CHECK-NEXT:    vand.vx v12, v12, a0
152; CHECK-NEXT:    vand.vx v8, v8, a0
153; CHECK-NEXT:    vsll.vi v8, v8, 2
154; CHECK-NEXT:    vor.vv v8, v12, v8
155; CHECK-NEXT:    vsrl.vi v12, v8, 1
156; CHECK-NEXT:    li a0, 85
157; CHECK-NEXT:    vand.vx v12, v12, a0
158; CHECK-NEXT:    vand.vx v8, v8, a0
159; CHECK-NEXT:    vadd.vv v8, v8, v8
160; CHECK-NEXT:    vor.vv v8, v12, v8
161; CHECK-NEXT:    ret
162  %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va)
163  ret <vscale x 32 x i8> %a
164}
165declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>)
166
167define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
168; CHECK-LABEL: bitreverse_nxv64i8:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
171; CHECK-NEXT:    vand.vi v16, v8, 15
172; CHECK-NEXT:    vsll.vi v16, v16, 4
173; CHECK-NEXT:    vsrl.vi v8, v8, 4
174; CHECK-NEXT:    vand.vi v8, v8, 15
175; CHECK-NEXT:    vor.vv v8, v8, v16
176; CHECK-NEXT:    vsrl.vi v16, v8, 2
177; CHECK-NEXT:    li a0, 51
178; CHECK-NEXT:    vand.vx v16, v16, a0
179; CHECK-NEXT:    vand.vx v8, v8, a0
180; CHECK-NEXT:    vsll.vi v8, v8, 2
181; CHECK-NEXT:    vor.vv v8, v16, v8
182; CHECK-NEXT:    vsrl.vi v16, v8, 1
183; CHECK-NEXT:    li a0, 85
184; CHECK-NEXT:    vand.vx v16, v16, a0
185; CHECK-NEXT:    vand.vx v8, v8, a0
186; CHECK-NEXT:    vadd.vv v8, v8, v8
187; CHECK-NEXT:    vor.vv v8, v16, v8
188; CHECK-NEXT:    ret
189  %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va)
190  ret <vscale x 64 x i8> %a
191}
192declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
193
194define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
195; RV32-LABEL: bitreverse_nxv1i16:
196; RV32:       # %bb.0:
197; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
198; RV32-NEXT:    vsrl.vi v9, v8, 8
199; RV32-NEXT:    vsll.vi v8, v8, 8
200; RV32-NEXT:    vor.vv v8, v8, v9
201; RV32-NEXT:    vsrl.vi v9, v8, 4
202; RV32-NEXT:    lui a0, 1
203; RV32-NEXT:    addi a0, a0, -241
204; RV32-NEXT:    vand.vx v9, v9, a0
205; RV32-NEXT:    vand.vx v8, v8, a0
206; RV32-NEXT:    vsll.vi v8, v8, 4
207; RV32-NEXT:    vor.vv v8, v9, v8
208; RV32-NEXT:    vsrl.vi v9, v8, 2
209; RV32-NEXT:    lui a0, 3
210; RV32-NEXT:    addi a0, a0, 819
211; RV32-NEXT:    vand.vx v9, v9, a0
212; RV32-NEXT:    vand.vx v8, v8, a0
213; RV32-NEXT:    vsll.vi v8, v8, 2
214; RV32-NEXT:    vor.vv v8, v9, v8
215; RV32-NEXT:    vsrl.vi v9, v8, 1
216; RV32-NEXT:    lui a0, 5
217; RV32-NEXT:    addi a0, a0, 1365
218; RV32-NEXT:    vand.vx v9, v9, a0
219; RV32-NEXT:    vand.vx v8, v8, a0
220; RV32-NEXT:    vadd.vv v8, v8, v8
221; RV32-NEXT:    vor.vv v8, v9, v8
222; RV32-NEXT:    ret
223;
224; RV64-LABEL: bitreverse_nxv1i16:
225; RV64:       # %bb.0:
226; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
227; RV64-NEXT:    vsrl.vi v9, v8, 8
228; RV64-NEXT:    vsll.vi v8, v8, 8
229; RV64-NEXT:    vor.vv v8, v8, v9
230; RV64-NEXT:    vsrl.vi v9, v8, 4
231; RV64-NEXT:    lui a0, 1
232; RV64-NEXT:    addiw a0, a0, -241
233; RV64-NEXT:    vand.vx v9, v9, a0
234; RV64-NEXT:    vand.vx v8, v8, a0
235; RV64-NEXT:    vsll.vi v8, v8, 4
236; RV64-NEXT:    vor.vv v8, v9, v8
237; RV64-NEXT:    vsrl.vi v9, v8, 2
238; RV64-NEXT:    lui a0, 3
239; RV64-NEXT:    addiw a0, a0, 819
240; RV64-NEXT:    vand.vx v9, v9, a0
241; RV64-NEXT:    vand.vx v8, v8, a0
242; RV64-NEXT:    vsll.vi v8, v8, 2
243; RV64-NEXT:    vor.vv v8, v9, v8
244; RV64-NEXT:    vsrl.vi v9, v8, 1
245; RV64-NEXT:    lui a0, 5
246; RV64-NEXT:    addiw a0, a0, 1365
247; RV64-NEXT:    vand.vx v9, v9, a0
248; RV64-NEXT:    vand.vx v8, v8, a0
249; RV64-NEXT:    vadd.vv v8, v8, v8
250; RV64-NEXT:    vor.vv v8, v9, v8
251; RV64-NEXT:    ret
252  %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va)
253  ret <vscale x 1 x i16> %a
254}
255declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
256
257define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
258; RV32-LABEL: bitreverse_nxv2i16:
259; RV32:       # %bb.0:
260; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
261; RV32-NEXT:    vsrl.vi v9, v8, 8
262; RV32-NEXT:    vsll.vi v8, v8, 8
263; RV32-NEXT:    vor.vv v8, v8, v9
264; RV32-NEXT:    vsrl.vi v9, v8, 4
265; RV32-NEXT:    lui a0, 1
266; RV32-NEXT:    addi a0, a0, -241
267; RV32-NEXT:    vand.vx v9, v9, a0
268; RV32-NEXT:    vand.vx v8, v8, a0
269; RV32-NEXT:    vsll.vi v8, v8, 4
270; RV32-NEXT:    vor.vv v8, v9, v8
271; RV32-NEXT:    vsrl.vi v9, v8, 2
272; RV32-NEXT:    lui a0, 3
273; RV32-NEXT:    addi a0, a0, 819
274; RV32-NEXT:    vand.vx v9, v9, a0
275; RV32-NEXT:    vand.vx v8, v8, a0
276; RV32-NEXT:    vsll.vi v8, v8, 2
277; RV32-NEXT:    vor.vv v8, v9, v8
278; RV32-NEXT:    vsrl.vi v9, v8, 1
279; RV32-NEXT:    lui a0, 5
280; RV32-NEXT:    addi a0, a0, 1365
281; RV32-NEXT:    vand.vx v9, v9, a0
282; RV32-NEXT:    vand.vx v8, v8, a0
283; RV32-NEXT:    vadd.vv v8, v8, v8
284; RV32-NEXT:    vor.vv v8, v9, v8
285; RV32-NEXT:    ret
286;
287; RV64-LABEL: bitreverse_nxv2i16:
288; RV64:       # %bb.0:
289; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
290; RV64-NEXT:    vsrl.vi v9, v8, 8
291; RV64-NEXT:    vsll.vi v8, v8, 8
292; RV64-NEXT:    vor.vv v8, v8, v9
293; RV64-NEXT:    vsrl.vi v9, v8, 4
294; RV64-NEXT:    lui a0, 1
295; RV64-NEXT:    addiw a0, a0, -241
296; RV64-NEXT:    vand.vx v9, v9, a0
297; RV64-NEXT:    vand.vx v8, v8, a0
298; RV64-NEXT:    vsll.vi v8, v8, 4
299; RV64-NEXT:    vor.vv v8, v9, v8
300; RV64-NEXT:    vsrl.vi v9, v8, 2
301; RV64-NEXT:    lui a0, 3
302; RV64-NEXT:    addiw a0, a0, 819
303; RV64-NEXT:    vand.vx v9, v9, a0
304; RV64-NEXT:    vand.vx v8, v8, a0
305; RV64-NEXT:    vsll.vi v8, v8, 2
306; RV64-NEXT:    vor.vv v8, v9, v8
307; RV64-NEXT:    vsrl.vi v9, v8, 1
308; RV64-NEXT:    lui a0, 5
309; RV64-NEXT:    addiw a0, a0, 1365
310; RV64-NEXT:    vand.vx v9, v9, a0
311; RV64-NEXT:    vand.vx v8, v8, a0
312; RV64-NEXT:    vadd.vv v8, v8, v8
313; RV64-NEXT:    vor.vv v8, v9, v8
314; RV64-NEXT:    ret
315  %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va)
316  ret <vscale x 2 x i16> %a
317}
318declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
319
320define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
321; RV32-LABEL: bitreverse_nxv4i16:
322; RV32:       # %bb.0:
323; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
324; RV32-NEXT:    vsrl.vi v9, v8, 8
325; RV32-NEXT:    vsll.vi v8, v8, 8
326; RV32-NEXT:    vor.vv v8, v8, v9
327; RV32-NEXT:    vsrl.vi v9, v8, 4
328; RV32-NEXT:    lui a0, 1
329; RV32-NEXT:    addi a0, a0, -241
330; RV32-NEXT:    vand.vx v9, v9, a0
331; RV32-NEXT:    vand.vx v8, v8, a0
332; RV32-NEXT:    vsll.vi v8, v8, 4
333; RV32-NEXT:    vor.vv v8, v9, v8
334; RV32-NEXT:    vsrl.vi v9, v8, 2
335; RV32-NEXT:    lui a0, 3
336; RV32-NEXT:    addi a0, a0, 819
337; RV32-NEXT:    vand.vx v9, v9, a0
338; RV32-NEXT:    vand.vx v8, v8, a0
339; RV32-NEXT:    vsll.vi v8, v8, 2
340; RV32-NEXT:    vor.vv v8, v9, v8
341; RV32-NEXT:    vsrl.vi v9, v8, 1
342; RV32-NEXT:    lui a0, 5
343; RV32-NEXT:    addi a0, a0, 1365
344; RV32-NEXT:    vand.vx v9, v9, a0
345; RV32-NEXT:    vand.vx v8, v8, a0
346; RV32-NEXT:    vadd.vv v8, v8, v8
347; RV32-NEXT:    vor.vv v8, v9, v8
348; RV32-NEXT:    ret
349;
350; RV64-LABEL: bitreverse_nxv4i16:
351; RV64:       # %bb.0:
352; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
353; RV64-NEXT:    vsrl.vi v9, v8, 8
354; RV64-NEXT:    vsll.vi v8, v8, 8
355; RV64-NEXT:    vor.vv v8, v8, v9
356; RV64-NEXT:    vsrl.vi v9, v8, 4
357; RV64-NEXT:    lui a0, 1
358; RV64-NEXT:    addiw a0, a0, -241
359; RV64-NEXT:    vand.vx v9, v9, a0
360; RV64-NEXT:    vand.vx v8, v8, a0
361; RV64-NEXT:    vsll.vi v8, v8, 4
362; RV64-NEXT:    vor.vv v8, v9, v8
363; RV64-NEXT:    vsrl.vi v9, v8, 2
364; RV64-NEXT:    lui a0, 3
365; RV64-NEXT:    addiw a0, a0, 819
366; RV64-NEXT:    vand.vx v9, v9, a0
367; RV64-NEXT:    vand.vx v8, v8, a0
368; RV64-NEXT:    vsll.vi v8, v8, 2
369; RV64-NEXT:    vor.vv v8, v9, v8
370; RV64-NEXT:    vsrl.vi v9, v8, 1
371; RV64-NEXT:    lui a0, 5
372; RV64-NEXT:    addiw a0, a0, 1365
373; RV64-NEXT:    vand.vx v9, v9, a0
374; RV64-NEXT:    vand.vx v8, v8, a0
375; RV64-NEXT:    vadd.vv v8, v8, v8
376; RV64-NEXT:    vor.vv v8, v9, v8
377; RV64-NEXT:    ret
378  %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va)
379  ret <vscale x 4 x i16> %a
380}
381declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
382
383define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
384; RV32-LABEL: bitreverse_nxv8i16:
385; RV32:       # %bb.0:
386; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
387; RV32-NEXT:    vsrl.vi v10, v8, 8
388; RV32-NEXT:    vsll.vi v8, v8, 8
389; RV32-NEXT:    vor.vv v8, v8, v10
390; RV32-NEXT:    vsrl.vi v10, v8, 4
391; RV32-NEXT:    lui a0, 1
392; RV32-NEXT:    addi a0, a0, -241
393; RV32-NEXT:    vand.vx v10, v10, a0
394; RV32-NEXT:    vand.vx v8, v8, a0
395; RV32-NEXT:    vsll.vi v8, v8, 4
396; RV32-NEXT:    vor.vv v8, v10, v8
397; RV32-NEXT:    vsrl.vi v10, v8, 2
398; RV32-NEXT:    lui a0, 3
399; RV32-NEXT:    addi a0, a0, 819
400; RV32-NEXT:    vand.vx v10, v10, a0
401; RV32-NEXT:    vand.vx v8, v8, a0
402; RV32-NEXT:    vsll.vi v8, v8, 2
403; RV32-NEXT:    vor.vv v8, v10, v8
404; RV32-NEXT:    vsrl.vi v10, v8, 1
405; RV32-NEXT:    lui a0, 5
406; RV32-NEXT:    addi a0, a0, 1365
407; RV32-NEXT:    vand.vx v10, v10, a0
408; RV32-NEXT:    vand.vx v8, v8, a0
409; RV32-NEXT:    vadd.vv v8, v8, v8
410; RV32-NEXT:    vor.vv v8, v10, v8
411; RV32-NEXT:    ret
412;
413; RV64-LABEL: bitreverse_nxv8i16:
414; RV64:       # %bb.0:
415; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
416; RV64-NEXT:    vsrl.vi v10, v8, 8
417; RV64-NEXT:    vsll.vi v8, v8, 8
418; RV64-NEXT:    vor.vv v8, v8, v10
419; RV64-NEXT:    vsrl.vi v10, v8, 4
420; RV64-NEXT:    lui a0, 1
421; RV64-NEXT:    addiw a0, a0, -241
422; RV64-NEXT:    vand.vx v10, v10, a0
423; RV64-NEXT:    vand.vx v8, v8, a0
424; RV64-NEXT:    vsll.vi v8, v8, 4
425; RV64-NEXT:    vor.vv v8, v10, v8
426; RV64-NEXT:    vsrl.vi v10, v8, 2
427; RV64-NEXT:    lui a0, 3
428; RV64-NEXT:    addiw a0, a0, 819
429; RV64-NEXT:    vand.vx v10, v10, a0
430; RV64-NEXT:    vand.vx v8, v8, a0
431; RV64-NEXT:    vsll.vi v8, v8, 2
432; RV64-NEXT:    vor.vv v8, v10, v8
433; RV64-NEXT:    vsrl.vi v10, v8, 1
434; RV64-NEXT:    lui a0, 5
435; RV64-NEXT:    addiw a0, a0, 1365
436; RV64-NEXT:    vand.vx v10, v10, a0
437; RV64-NEXT:    vand.vx v8, v8, a0
438; RV64-NEXT:    vadd.vv v8, v8, v8
439; RV64-NEXT:    vor.vv v8, v10, v8
440; RV64-NEXT:    ret
441  %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va)
442  ret <vscale x 8 x i16> %a
443}
444declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
445
446define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
447; RV32-LABEL: bitreverse_nxv16i16:
448; RV32:       # %bb.0:
449; RV32-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
450; RV32-NEXT:    vsrl.vi v12, v8, 8
451; RV32-NEXT:    vsll.vi v8, v8, 8
452; RV32-NEXT:    vor.vv v8, v8, v12
453; RV32-NEXT:    vsrl.vi v12, v8, 4
454; RV32-NEXT:    lui a0, 1
455; RV32-NEXT:    addi a0, a0, -241
456; RV32-NEXT:    vand.vx v12, v12, a0
457; RV32-NEXT:    vand.vx v8, v8, a0
458; RV32-NEXT:    vsll.vi v8, v8, 4
459; RV32-NEXT:    vor.vv v8, v12, v8
460; RV32-NEXT:    vsrl.vi v12, v8, 2
461; RV32-NEXT:    lui a0, 3
462; RV32-NEXT:    addi a0, a0, 819
463; RV32-NEXT:    vand.vx v12, v12, a0
464; RV32-NEXT:    vand.vx v8, v8, a0
465; RV32-NEXT:    vsll.vi v8, v8, 2
466; RV32-NEXT:    vor.vv v8, v12, v8
467; RV32-NEXT:    vsrl.vi v12, v8, 1
468; RV32-NEXT:    lui a0, 5
469; RV32-NEXT:    addi a0, a0, 1365
470; RV32-NEXT:    vand.vx v12, v12, a0
471; RV32-NEXT:    vand.vx v8, v8, a0
472; RV32-NEXT:    vadd.vv v8, v8, v8
473; RV32-NEXT:    vor.vv v8, v12, v8
474; RV32-NEXT:    ret
475;
476; RV64-LABEL: bitreverse_nxv16i16:
477; RV64:       # %bb.0:
478; RV64-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
479; RV64-NEXT:    vsrl.vi v12, v8, 8
480; RV64-NEXT:    vsll.vi v8, v8, 8
481; RV64-NEXT:    vor.vv v8, v8, v12
482; RV64-NEXT:    vsrl.vi v12, v8, 4
483; RV64-NEXT:    lui a0, 1
484; RV64-NEXT:    addiw a0, a0, -241
485; RV64-NEXT:    vand.vx v12, v12, a0
486; RV64-NEXT:    vand.vx v8, v8, a0
487; RV64-NEXT:    vsll.vi v8, v8, 4
488; RV64-NEXT:    vor.vv v8, v12, v8
489; RV64-NEXT:    vsrl.vi v12, v8, 2
490; RV64-NEXT:    lui a0, 3
491; RV64-NEXT:    addiw a0, a0, 819
492; RV64-NEXT:    vand.vx v12, v12, a0
493; RV64-NEXT:    vand.vx v8, v8, a0
494; RV64-NEXT:    vsll.vi v8, v8, 2
495; RV64-NEXT:    vor.vv v8, v12, v8
496; RV64-NEXT:    vsrl.vi v12, v8, 1
497; RV64-NEXT:    lui a0, 5
498; RV64-NEXT:    addiw a0, a0, 1365
499; RV64-NEXT:    vand.vx v12, v12, a0
500; RV64-NEXT:    vand.vx v8, v8, a0
501; RV64-NEXT:    vadd.vv v8, v8, v8
502; RV64-NEXT:    vor.vv v8, v12, v8
503; RV64-NEXT:    ret
504  %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va)
505  ret <vscale x 16 x i16> %a
506}
507declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
508
509define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
510; RV32-LABEL: bitreverse_nxv32i16:
511; RV32:       # %bb.0:
512; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
513; RV32-NEXT:    vsrl.vi v16, v8, 8
514; RV32-NEXT:    vsll.vi v8, v8, 8
515; RV32-NEXT:    vor.vv v8, v8, v16
516; RV32-NEXT:    vsrl.vi v16, v8, 4
517; RV32-NEXT:    lui a0, 1
518; RV32-NEXT:    addi a0, a0, -241
519; RV32-NEXT:    vand.vx v16, v16, a0
520; RV32-NEXT:    vand.vx v8, v8, a0
521; RV32-NEXT:    vsll.vi v8, v8, 4
522; RV32-NEXT:    vor.vv v8, v16, v8
523; RV32-NEXT:    vsrl.vi v16, v8, 2
524; RV32-NEXT:    lui a0, 3
525; RV32-NEXT:    addi a0, a0, 819
526; RV32-NEXT:    vand.vx v16, v16, a0
527; RV32-NEXT:    vand.vx v8, v8, a0
528; RV32-NEXT:    vsll.vi v8, v8, 2
529; RV32-NEXT:    vor.vv v8, v16, v8
530; RV32-NEXT:    vsrl.vi v16, v8, 1
531; RV32-NEXT:    lui a0, 5
532; RV32-NEXT:    addi a0, a0, 1365
533; RV32-NEXT:    vand.vx v16, v16, a0
534; RV32-NEXT:    vand.vx v8, v8, a0
535; RV32-NEXT:    vadd.vv v8, v8, v8
536; RV32-NEXT:    vor.vv v8, v16, v8
537; RV32-NEXT:    ret
538;
539; RV64-LABEL: bitreverse_nxv32i16:
540; RV64:       # %bb.0:
541; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
542; RV64-NEXT:    vsrl.vi v16, v8, 8
543; RV64-NEXT:    vsll.vi v8, v8, 8
544; RV64-NEXT:    vor.vv v8, v8, v16
545; RV64-NEXT:    vsrl.vi v16, v8, 4
546; RV64-NEXT:    lui a0, 1
547; RV64-NEXT:    addiw a0, a0, -241
548; RV64-NEXT:    vand.vx v16, v16, a0
549; RV64-NEXT:    vand.vx v8, v8, a0
550; RV64-NEXT:    vsll.vi v8, v8, 4
551; RV64-NEXT:    vor.vv v8, v16, v8
552; RV64-NEXT:    vsrl.vi v16, v8, 2
553; RV64-NEXT:    lui a0, 3
554; RV64-NEXT:    addiw a0, a0, 819
555; RV64-NEXT:    vand.vx v16, v16, a0
556; RV64-NEXT:    vand.vx v8, v8, a0
557; RV64-NEXT:    vsll.vi v8, v8, 2
558; RV64-NEXT:    vor.vv v8, v16, v8
559; RV64-NEXT:    vsrl.vi v16, v8, 1
560; RV64-NEXT:    lui a0, 5
561; RV64-NEXT:    addiw a0, a0, 1365
562; RV64-NEXT:    vand.vx v16, v16, a0
563; RV64-NEXT:    vand.vx v8, v8, a0
564; RV64-NEXT:    vadd.vv v8, v8, v8
565; RV64-NEXT:    vor.vv v8, v16, v8
566; RV64-NEXT:    ret
567  %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va)
568  ret <vscale x 32 x i16> %a
569}
570declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
571
572define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
573; RV32-LABEL: bitreverse_nxv1i32:
574; RV32:       # %bb.0:
575; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
576; RV32-NEXT:    vsrl.vi v9, v8, 8
577; RV32-NEXT:    lui a0, 16
578; RV32-NEXT:    addi a0, a0, -256
579; RV32-NEXT:    vand.vx v9, v9, a0
580; RV32-NEXT:    vsrl.vi v10, v8, 24
581; RV32-NEXT:    vor.vv v9, v9, v10
582; RV32-NEXT:    vsll.vi v10, v8, 8
583; RV32-NEXT:    lui a0, 4080
584; RV32-NEXT:    vand.vx v10, v10, a0
585; RV32-NEXT:    vsll.vi v8, v8, 24
586; RV32-NEXT:    vor.vv v8, v8, v10
587; RV32-NEXT:    vor.vv v8, v8, v9
588; RV32-NEXT:    vsrl.vi v9, v8, 4
589; RV32-NEXT:    lui a0, 61681
590; RV32-NEXT:    addi a0, a0, -241
591; RV32-NEXT:    vand.vx v9, v9, a0
592; RV32-NEXT:    vand.vx v8, v8, a0
593; RV32-NEXT:    vsll.vi v8, v8, 4
594; RV32-NEXT:    vor.vv v8, v9, v8
595; RV32-NEXT:    vsrl.vi v9, v8, 2
596; RV32-NEXT:    lui a0, 209715
597; RV32-NEXT:    addi a0, a0, 819
598; RV32-NEXT:    vand.vx v9, v9, a0
599; RV32-NEXT:    vand.vx v8, v8, a0
600; RV32-NEXT:    vsll.vi v8, v8, 2
601; RV32-NEXT:    vor.vv v8, v9, v8
602; RV32-NEXT:    vsrl.vi v9, v8, 1
603; RV32-NEXT:    lui a0, 349525
604; RV32-NEXT:    addi a0, a0, 1365
605; RV32-NEXT:    vand.vx v9, v9, a0
606; RV32-NEXT:    vand.vx v8, v8, a0
607; RV32-NEXT:    vadd.vv v8, v8, v8
608; RV32-NEXT:    vor.vv v8, v9, v8
609; RV32-NEXT:    ret
610;
611; RV64-LABEL: bitreverse_nxv1i32:
612; RV64:       # %bb.0:
613; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
614; RV64-NEXT:    vsrl.vi v9, v8, 8
615; RV64-NEXT:    lui a0, 16
616; RV64-NEXT:    addiw a0, a0, -256
617; RV64-NEXT:    vand.vx v9, v9, a0
618; RV64-NEXT:    vsrl.vi v10, v8, 24
619; RV64-NEXT:    vor.vv v9, v9, v10
620; RV64-NEXT:    vsll.vi v10, v8, 8
621; RV64-NEXT:    lui a0, 4080
622; RV64-NEXT:    vand.vx v10, v10, a0
623; RV64-NEXT:    vsll.vi v8, v8, 24
624; RV64-NEXT:    vor.vv v8, v8, v10
625; RV64-NEXT:    vor.vv v8, v8, v9
626; RV64-NEXT:    vsrl.vi v9, v8, 4
627; RV64-NEXT:    lui a0, 61681
628; RV64-NEXT:    addiw a0, a0, -241
629; RV64-NEXT:    vand.vx v9, v9, a0
630; RV64-NEXT:    vand.vx v8, v8, a0
631; RV64-NEXT:    vsll.vi v8, v8, 4
632; RV64-NEXT:    vor.vv v8, v9, v8
633; RV64-NEXT:    vsrl.vi v9, v8, 2
634; RV64-NEXT:    lui a0, 209715
635; RV64-NEXT:    addiw a0, a0, 819
636; RV64-NEXT:    vand.vx v9, v9, a0
637; RV64-NEXT:    vand.vx v8, v8, a0
638; RV64-NEXT:    vsll.vi v8, v8, 2
639; RV64-NEXT:    vor.vv v8, v9, v8
640; RV64-NEXT:    vsrl.vi v9, v8, 1
641; RV64-NEXT:    lui a0, 349525
642; RV64-NEXT:    addiw a0, a0, 1365
643; RV64-NEXT:    vand.vx v9, v9, a0
644; RV64-NEXT:    vand.vx v8, v8, a0
645; RV64-NEXT:    vadd.vv v8, v8, v8
646; RV64-NEXT:    vor.vv v8, v9, v8
647; RV64-NEXT:    ret
648  %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va)
649  ret <vscale x 1 x i32> %a
650}
651declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
652
653define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
654; RV32-LABEL: bitreverse_nxv2i32:
655; RV32:       # %bb.0:
656; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
657; RV32-NEXT:    vsrl.vi v9, v8, 8
658; RV32-NEXT:    lui a0, 16
659; RV32-NEXT:    addi a0, a0, -256
660; RV32-NEXT:    vand.vx v9, v9, a0
661; RV32-NEXT:    vsrl.vi v10, v8, 24
662; RV32-NEXT:    vor.vv v9, v9, v10
663; RV32-NEXT:    vsll.vi v10, v8, 8
664; RV32-NEXT:    lui a0, 4080
665; RV32-NEXT:    vand.vx v10, v10, a0
666; RV32-NEXT:    vsll.vi v8, v8, 24
667; RV32-NEXT:    vor.vv v8, v8, v10
668; RV32-NEXT:    vor.vv v8, v8, v9
669; RV32-NEXT:    vsrl.vi v9, v8, 4
670; RV32-NEXT:    lui a0, 61681
671; RV32-NEXT:    addi a0, a0, -241
672; RV32-NEXT:    vand.vx v9, v9, a0
673; RV32-NEXT:    vand.vx v8, v8, a0
674; RV32-NEXT:    vsll.vi v8, v8, 4
675; RV32-NEXT:    vor.vv v8, v9, v8
676; RV32-NEXT:    vsrl.vi v9, v8, 2
677; RV32-NEXT:    lui a0, 209715
678; RV32-NEXT:    addi a0, a0, 819
679; RV32-NEXT:    vand.vx v9, v9, a0
680; RV32-NEXT:    vand.vx v8, v8, a0
681; RV32-NEXT:    vsll.vi v8, v8, 2
682; RV32-NEXT:    vor.vv v8, v9, v8
683; RV32-NEXT:    vsrl.vi v9, v8, 1
684; RV32-NEXT:    lui a0, 349525
685; RV32-NEXT:    addi a0, a0, 1365
686; RV32-NEXT:    vand.vx v9, v9, a0
687; RV32-NEXT:    vand.vx v8, v8, a0
688; RV32-NEXT:    vadd.vv v8, v8, v8
689; RV32-NEXT:    vor.vv v8, v9, v8
690; RV32-NEXT:    ret
691;
692; RV64-LABEL: bitreverse_nxv2i32:
693; RV64:       # %bb.0:
694; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
695; RV64-NEXT:    vsrl.vi v9, v8, 8
696; RV64-NEXT:    lui a0, 16
697; RV64-NEXT:    addiw a0, a0, -256
698; RV64-NEXT:    vand.vx v9, v9, a0
699; RV64-NEXT:    vsrl.vi v10, v8, 24
700; RV64-NEXT:    vor.vv v9, v9, v10
701; RV64-NEXT:    vsll.vi v10, v8, 8
702; RV64-NEXT:    lui a0, 4080
703; RV64-NEXT:    vand.vx v10, v10, a0
704; RV64-NEXT:    vsll.vi v8, v8, 24
705; RV64-NEXT:    vor.vv v8, v8, v10
706; RV64-NEXT:    vor.vv v8, v8, v9
707; RV64-NEXT:    vsrl.vi v9, v8, 4
708; RV64-NEXT:    lui a0, 61681
709; RV64-NEXT:    addiw a0, a0, -241
710; RV64-NEXT:    vand.vx v9, v9, a0
711; RV64-NEXT:    vand.vx v8, v8, a0
712; RV64-NEXT:    vsll.vi v8, v8, 4
713; RV64-NEXT:    vor.vv v8, v9, v8
714; RV64-NEXT:    vsrl.vi v9, v8, 2
715; RV64-NEXT:    lui a0, 209715
716; RV64-NEXT:    addiw a0, a0, 819
717; RV64-NEXT:    vand.vx v9, v9, a0
718; RV64-NEXT:    vand.vx v8, v8, a0
719; RV64-NEXT:    vsll.vi v8, v8, 2
720; RV64-NEXT:    vor.vv v8, v9, v8
721; RV64-NEXT:    vsrl.vi v9, v8, 1
722; RV64-NEXT:    lui a0, 349525
723; RV64-NEXT:    addiw a0, a0, 1365
724; RV64-NEXT:    vand.vx v9, v9, a0
725; RV64-NEXT:    vand.vx v8, v8, a0
726; RV64-NEXT:    vadd.vv v8, v8, v8
727; RV64-NEXT:    vor.vv v8, v9, v8
728; RV64-NEXT:    ret
729  %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va)
730  ret <vscale x 2 x i32> %a
731}
732declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
733
734define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
735; RV32-LABEL: bitreverse_nxv4i32:
736; RV32:       # %bb.0:
737; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
738; RV32-NEXT:    vsrl.vi v10, v8, 8
739; RV32-NEXT:    lui a0, 16
740; RV32-NEXT:    addi a0, a0, -256
741; RV32-NEXT:    vand.vx v10, v10, a0
742; RV32-NEXT:    vsrl.vi v12, v8, 24
743; RV32-NEXT:    vor.vv v10, v10, v12
744; RV32-NEXT:    vsll.vi v12, v8, 8
745; RV32-NEXT:    lui a0, 4080
746; RV32-NEXT:    vand.vx v12, v12, a0
747; RV32-NEXT:    vsll.vi v8, v8, 24
748; RV32-NEXT:    vor.vv v8, v8, v12
749; RV32-NEXT:    vor.vv v8, v8, v10
750; RV32-NEXT:    vsrl.vi v10, v8, 4
751; RV32-NEXT:    lui a0, 61681
752; RV32-NEXT:    addi a0, a0, -241
753; RV32-NEXT:    vand.vx v10, v10, a0
754; RV32-NEXT:    vand.vx v8, v8, a0
755; RV32-NEXT:    vsll.vi v8, v8, 4
756; RV32-NEXT:    vor.vv v8, v10, v8
757; RV32-NEXT:    vsrl.vi v10, v8, 2
758; RV32-NEXT:    lui a0, 209715
759; RV32-NEXT:    addi a0, a0, 819
760; RV32-NEXT:    vand.vx v10, v10, a0
761; RV32-NEXT:    vand.vx v8, v8, a0
762; RV32-NEXT:    vsll.vi v8, v8, 2
763; RV32-NEXT:    vor.vv v8, v10, v8
764; RV32-NEXT:    vsrl.vi v10, v8, 1
765; RV32-NEXT:    lui a0, 349525
766; RV32-NEXT:    addi a0, a0, 1365
767; RV32-NEXT:    vand.vx v10, v10, a0
768; RV32-NEXT:    vand.vx v8, v8, a0
769; RV32-NEXT:    vadd.vv v8, v8, v8
770; RV32-NEXT:    vor.vv v8, v10, v8
771; RV32-NEXT:    ret
772;
773; RV64-LABEL: bitreverse_nxv4i32:
774; RV64:       # %bb.0:
775; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
776; RV64-NEXT:    vsrl.vi v10, v8, 8
777; RV64-NEXT:    lui a0, 16
778; RV64-NEXT:    addiw a0, a0, -256
779; RV64-NEXT:    vand.vx v10, v10, a0
780; RV64-NEXT:    vsrl.vi v12, v8, 24
781; RV64-NEXT:    vor.vv v10, v10, v12
782; RV64-NEXT:    vsll.vi v12, v8, 8
783; RV64-NEXT:    lui a0, 4080
784; RV64-NEXT:    vand.vx v12, v12, a0
785; RV64-NEXT:    vsll.vi v8, v8, 24
786; RV64-NEXT:    vor.vv v8, v8, v12
787; RV64-NEXT:    vor.vv v8, v8, v10
788; RV64-NEXT:    vsrl.vi v10, v8, 4
789; RV64-NEXT:    lui a0, 61681
790; RV64-NEXT:    addiw a0, a0, -241
791; RV64-NEXT:    vand.vx v10, v10, a0
792; RV64-NEXT:    vand.vx v8, v8, a0
793; RV64-NEXT:    vsll.vi v8, v8, 4
794; RV64-NEXT:    vor.vv v8, v10, v8
795; RV64-NEXT:    vsrl.vi v10, v8, 2
796; RV64-NEXT:    lui a0, 209715
797; RV64-NEXT:    addiw a0, a0, 819
798; RV64-NEXT:    vand.vx v10, v10, a0
799; RV64-NEXT:    vand.vx v8, v8, a0
800; RV64-NEXT:    vsll.vi v8, v8, 2
801; RV64-NEXT:    vor.vv v8, v10, v8
802; RV64-NEXT:    vsrl.vi v10, v8, 1
803; RV64-NEXT:    lui a0, 349525
804; RV64-NEXT:    addiw a0, a0, 1365
805; RV64-NEXT:    vand.vx v10, v10, a0
806; RV64-NEXT:    vand.vx v8, v8, a0
807; RV64-NEXT:    vadd.vv v8, v8, v8
808; RV64-NEXT:    vor.vv v8, v10, v8
809; RV64-NEXT:    ret
810  %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va)
811  ret <vscale x 4 x i32> %a
812}
813declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
814
815define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
816; RV32-LABEL: bitreverse_nxv8i32:
817; RV32:       # %bb.0:
818; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
819; RV32-NEXT:    vsrl.vi v12, v8, 8
820; RV32-NEXT:    lui a0, 16
821; RV32-NEXT:    addi a0, a0, -256
822; RV32-NEXT:    vand.vx v12, v12, a0
823; RV32-NEXT:    vsrl.vi v16, v8, 24
824; RV32-NEXT:    vor.vv v12, v12, v16
825; RV32-NEXT:    vsll.vi v16, v8, 8
826; RV32-NEXT:    lui a0, 4080
827; RV32-NEXT:    vand.vx v16, v16, a0
828; RV32-NEXT:    vsll.vi v8, v8, 24
829; RV32-NEXT:    vor.vv v8, v8, v16
830; RV32-NEXT:    vor.vv v8, v8, v12
831; RV32-NEXT:    vsrl.vi v12, v8, 4
832; RV32-NEXT:    lui a0, 61681
833; RV32-NEXT:    addi a0, a0, -241
834; RV32-NEXT:    vand.vx v12, v12, a0
835; RV32-NEXT:    vand.vx v8, v8, a0
836; RV32-NEXT:    vsll.vi v8, v8, 4
837; RV32-NEXT:    vor.vv v8, v12, v8
838; RV32-NEXT:    vsrl.vi v12, v8, 2
839; RV32-NEXT:    lui a0, 209715
840; RV32-NEXT:    addi a0, a0, 819
841; RV32-NEXT:    vand.vx v12, v12, a0
842; RV32-NEXT:    vand.vx v8, v8, a0
843; RV32-NEXT:    vsll.vi v8, v8, 2
844; RV32-NEXT:    vor.vv v8, v12, v8
845; RV32-NEXT:    vsrl.vi v12, v8, 1
846; RV32-NEXT:    lui a0, 349525
847; RV32-NEXT:    addi a0, a0, 1365
848; RV32-NEXT:    vand.vx v12, v12, a0
849; RV32-NEXT:    vand.vx v8, v8, a0
850; RV32-NEXT:    vadd.vv v8, v8, v8
851; RV32-NEXT:    vor.vv v8, v12, v8
852; RV32-NEXT:    ret
853;
854; RV64-LABEL: bitreverse_nxv8i32:
855; RV64:       # %bb.0:
856; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
857; RV64-NEXT:    vsrl.vi v12, v8, 8
858; RV64-NEXT:    lui a0, 16
859; RV64-NEXT:    addiw a0, a0, -256
860; RV64-NEXT:    vand.vx v12, v12, a0
861; RV64-NEXT:    vsrl.vi v16, v8, 24
862; RV64-NEXT:    vor.vv v12, v12, v16
863; RV64-NEXT:    vsll.vi v16, v8, 8
864; RV64-NEXT:    lui a0, 4080
865; RV64-NEXT:    vand.vx v16, v16, a0
866; RV64-NEXT:    vsll.vi v8, v8, 24
867; RV64-NEXT:    vor.vv v8, v8, v16
868; RV64-NEXT:    vor.vv v8, v8, v12
869; RV64-NEXT:    vsrl.vi v12, v8, 4
870; RV64-NEXT:    lui a0, 61681
871; RV64-NEXT:    addiw a0, a0, -241
872; RV64-NEXT:    vand.vx v12, v12, a0
873; RV64-NEXT:    vand.vx v8, v8, a0
874; RV64-NEXT:    vsll.vi v8, v8, 4
875; RV64-NEXT:    vor.vv v8, v12, v8
876; RV64-NEXT:    vsrl.vi v12, v8, 2
877; RV64-NEXT:    lui a0, 209715
878; RV64-NEXT:    addiw a0, a0, 819
879; RV64-NEXT:    vand.vx v12, v12, a0
880; RV64-NEXT:    vand.vx v8, v8, a0
881; RV64-NEXT:    vsll.vi v8, v8, 2
882; RV64-NEXT:    vor.vv v8, v12, v8
883; RV64-NEXT:    vsrl.vi v12, v8, 1
884; RV64-NEXT:    lui a0, 349525
885; RV64-NEXT:    addiw a0, a0, 1365
886; RV64-NEXT:    vand.vx v12, v12, a0
887; RV64-NEXT:    vand.vx v8, v8, a0
888; RV64-NEXT:    vadd.vv v8, v8, v8
889; RV64-NEXT:    vor.vv v8, v12, v8
890; RV64-NEXT:    ret
891  %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va)
892  ret <vscale x 8 x i32> %a
893}
894declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
895
896define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
897; RV32-LABEL: bitreverse_nxv16i32:
898; RV32:       # %bb.0:
899; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
900; RV32-NEXT:    vsrl.vi v16, v8, 8
901; RV32-NEXT:    lui a0, 16
902; RV32-NEXT:    addi a0, a0, -256
903; RV32-NEXT:    vand.vx v16, v16, a0
904; RV32-NEXT:    vsrl.vi v24, v8, 24
905; RV32-NEXT:    vor.vv v16, v16, v24
906; RV32-NEXT:    vsll.vi v24, v8, 8
907; RV32-NEXT:    lui a0, 4080
908; RV32-NEXT:    vand.vx v24, v24, a0
909; RV32-NEXT:    vsll.vi v8, v8, 24
910; RV32-NEXT:    vor.vv v8, v8, v24
911; RV32-NEXT:    vor.vv v8, v8, v16
912; RV32-NEXT:    vsrl.vi v16, v8, 4
913; RV32-NEXT:    lui a0, 61681
914; RV32-NEXT:    addi a0, a0, -241
915; RV32-NEXT:    vand.vx v16, v16, a0
916; RV32-NEXT:    vand.vx v8, v8, a0
917; RV32-NEXT:    vsll.vi v8, v8, 4
918; RV32-NEXT:    vor.vv v8, v16, v8
919; RV32-NEXT:    vsrl.vi v16, v8, 2
920; RV32-NEXT:    lui a0, 209715
921; RV32-NEXT:    addi a0, a0, 819
922; RV32-NEXT:    vand.vx v16, v16, a0
923; RV32-NEXT:    vand.vx v8, v8, a0
924; RV32-NEXT:    vsll.vi v8, v8, 2
925; RV32-NEXT:    vor.vv v8, v16, v8
926; RV32-NEXT:    vsrl.vi v16, v8, 1
927; RV32-NEXT:    lui a0, 349525
928; RV32-NEXT:    addi a0, a0, 1365
929; RV32-NEXT:    vand.vx v16, v16, a0
930; RV32-NEXT:    vand.vx v8, v8, a0
931; RV32-NEXT:    vadd.vv v8, v8, v8
932; RV32-NEXT:    vor.vv v8, v16, v8
933; RV32-NEXT:    ret
934;
935; RV64-LABEL: bitreverse_nxv16i32:
936; RV64:       # %bb.0:
937; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
938; RV64-NEXT:    vsrl.vi v16, v8, 8
939; RV64-NEXT:    lui a0, 16
940; RV64-NEXT:    addiw a0, a0, -256
941; RV64-NEXT:    vand.vx v16, v16, a0
942; RV64-NEXT:    vsrl.vi v24, v8, 24
943; RV64-NEXT:    vor.vv v16, v16, v24
944; RV64-NEXT:    vsll.vi v24, v8, 8
945; RV64-NEXT:    lui a0, 4080
946; RV64-NEXT:    vand.vx v24, v24, a0
947; RV64-NEXT:    vsll.vi v8, v8, 24
948; RV64-NEXT:    vor.vv v8, v8, v24
949; RV64-NEXT:    vor.vv v8, v8, v16
950; RV64-NEXT:    vsrl.vi v16, v8, 4
951; RV64-NEXT:    lui a0, 61681
952; RV64-NEXT:    addiw a0, a0, -241
953; RV64-NEXT:    vand.vx v16, v16, a0
954; RV64-NEXT:    vand.vx v8, v8, a0
955; RV64-NEXT:    vsll.vi v8, v8, 4
956; RV64-NEXT:    vor.vv v8, v16, v8
957; RV64-NEXT:    vsrl.vi v16, v8, 2
958; RV64-NEXT:    lui a0, 209715
959; RV64-NEXT:    addiw a0, a0, 819
960; RV64-NEXT:    vand.vx v16, v16, a0
961; RV64-NEXT:    vand.vx v8, v8, a0
962; RV64-NEXT:    vsll.vi v8, v8, 2
963; RV64-NEXT:    vor.vv v8, v16, v8
964; RV64-NEXT:    vsrl.vi v16, v8, 1
965; RV64-NEXT:    lui a0, 349525
966; RV64-NEXT:    addiw a0, a0, 1365
967; RV64-NEXT:    vand.vx v16, v16, a0
968; RV64-NEXT:    vand.vx v8, v8, a0
969; RV64-NEXT:    vadd.vv v8, v8, v8
970; RV64-NEXT:    vor.vv v8, v16, v8
971; RV64-NEXT:    ret
972  %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va)
973  ret <vscale x 16 x i32> %a
974}
975declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
976
977define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) {
978; RV32-LABEL: bitreverse_nxv1i64:
979; RV32:       # %bb.0:
980; RV32-NEXT:    addi sp, sp, -16
981; RV32-NEXT:    .cfi_def_cfa_offset 16
982; RV32-NEXT:    sw zero, 12(sp)
983; RV32-NEXT:    lui a0, 1044480
984; RV32-NEXT:    sw a0, 8(sp)
985; RV32-NEXT:    lui a0, 4080
986; RV32-NEXT:    sw a0, 12(sp)
987; RV32-NEXT:    sw zero, 8(sp)
988; RV32-NEXT:    li a1, 255
989; RV32-NEXT:    sw a1, 12(sp)
990; RV32-NEXT:    lui a1, 16
991; RV32-NEXT:    addi a1, a1, -256
992; RV32-NEXT:    sw a1, 12(sp)
993; RV32-NEXT:    lui a2, 61681
994; RV32-NEXT:    addi a2, a2, -241
995; RV32-NEXT:    sw a2, 12(sp)
996; RV32-NEXT:    sw a2, 8(sp)
997; RV32-NEXT:    lui a2, 209715
998; RV32-NEXT:    addi a2, a2, 819
999; RV32-NEXT:    sw a2, 12(sp)
1000; RV32-NEXT:    sw a2, 8(sp)
1001; RV32-NEXT:    lui a2, 349525
1002; RV32-NEXT:    addi a2, a2, 1365
1003; RV32-NEXT:    sw a2, 12(sp)
1004; RV32-NEXT:    sw a2, 8(sp)
1005; RV32-NEXT:    li a2, 56
1006; RV32-NEXT:    vsetvli a3, zero, e64, m1, ta, mu
1007; RV32-NEXT:    vsrl.vx v9, v8, a2
1008; RV32-NEXT:    li a3, 40
1009; RV32-NEXT:    vsrl.vx v10, v8, a3
1010; RV32-NEXT:    vand.vx v10, v10, a1
1011; RV32-NEXT:    vor.vv v9, v10, v9
1012; RV32-NEXT:    addi a1, sp, 8
1013; RV32-NEXT:    vlse64.v v10, (a1), zero
1014; RV32-NEXT:    vsrl.vi v11, v8, 24
1015; RV32-NEXT:    vand.vx v11, v11, a0
1016; RV32-NEXT:    vsrl.vi v12, v8, 8
1017; RV32-NEXT:    vand.vv v10, v12, v10
1018; RV32-NEXT:    vor.vv v10, v10, v11
1019; RV32-NEXT:    addi a0, sp, 8
1020; RV32-NEXT:    vlse64.v v11, (a0), zero
1021; RV32-NEXT:    vor.vv v9, v10, v9
1022; RV32-NEXT:    vsll.vx v10, v8, a2
1023; RV32-NEXT:    vsll.vx v12, v8, a3
1024; RV32-NEXT:    vand.vv v11, v12, v11
1025; RV32-NEXT:    addi a0, sp, 8
1026; RV32-NEXT:    vlse64.v v12, (a0), zero
1027; RV32-NEXT:    vor.vv v10, v10, v11
1028; RV32-NEXT:    addi a0, sp, 8
1029; RV32-NEXT:    vlse64.v v11, (a0), zero
1030; RV32-NEXT:    vsll.vi v13, v8, 8
1031; RV32-NEXT:    vand.vv v12, v13, v12
1032; RV32-NEXT:    vsll.vi v8, v8, 24
1033; RV32-NEXT:    vand.vv v8, v8, v11
1034; RV32-NEXT:    vor.vv v8, v8, v12
1035; RV32-NEXT:    addi a0, sp, 8
1036; RV32-NEXT:    vlse64.v v11, (a0), zero
1037; RV32-NEXT:    vor.vv v8, v10, v8
1038; RV32-NEXT:    vor.vv v8, v8, v9
1039; RV32-NEXT:    vsrl.vi v9, v8, 4
1040; RV32-NEXT:    vand.vv v9, v9, v11
1041; RV32-NEXT:    vand.vv v8, v8, v11
1042; RV32-NEXT:    addi a0, sp, 8
1043; RV32-NEXT:    vlse64.v v10, (a0), zero
1044; RV32-NEXT:    vsll.vi v8, v8, 4
1045; RV32-NEXT:    vor.vv v8, v9, v8
1046; RV32-NEXT:    vsrl.vi v9, v8, 2
1047; RV32-NEXT:    vand.vv v9, v9, v10
1048; RV32-NEXT:    vand.vv v8, v8, v10
1049; RV32-NEXT:    addi a0, sp, 8
1050; RV32-NEXT:    vlse64.v v10, (a0), zero
1051; RV32-NEXT:    vsll.vi v8, v8, 2
1052; RV32-NEXT:    vor.vv v8, v9, v8
1053; RV32-NEXT:    vsrl.vi v9, v8, 1
1054; RV32-NEXT:    vand.vv v9, v9, v10
1055; RV32-NEXT:    vand.vv v8, v8, v10
1056; RV32-NEXT:    vadd.vv v8, v8, v8
1057; RV32-NEXT:    vor.vv v8, v9, v8
1058; RV32-NEXT:    addi sp, sp, 16
1059; RV32-NEXT:    ret
1060;
1061; RV64-LABEL: bitreverse_nxv1i64:
1062; RV64:       # %bb.0:
1063; RV64-NEXT:    li a0, 56
1064; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
1065; RV64-NEXT:    vsrl.vx v9, v8, a0
1066; RV64-NEXT:    li a1, 40
1067; RV64-NEXT:    vsrl.vx v10, v8, a1
1068; RV64-NEXT:    lui a2, 16
1069; RV64-NEXT:    addiw a2, a2, -256
1070; RV64-NEXT:    vand.vx v10, v10, a2
1071; RV64-NEXT:    vor.vv v9, v10, v9
1072; RV64-NEXT:    vsrl.vi v10, v8, 24
1073; RV64-NEXT:    lui a2, 4080
1074; RV64-NEXT:    vand.vx v10, v10, a2
1075; RV64-NEXT:    vsrl.vi v11, v8, 8
1076; RV64-NEXT:    li a2, 255
1077; RV64-NEXT:    slli a3, a2, 24
1078; RV64-NEXT:    vand.vx v11, v11, a3
1079; RV64-NEXT:    vor.vv v10, v11, v10
1080; RV64-NEXT:    vor.vv v9, v10, v9
1081; RV64-NEXT:    vsll.vi v10, v8, 8
1082; RV64-NEXT:    slli a3, a2, 32
1083; RV64-NEXT:    vand.vx v10, v10, a3
1084; RV64-NEXT:    vsll.vi v11, v8, 24
1085; RV64-NEXT:    slli a3, a2, 40
1086; RV64-NEXT:    vand.vx v11, v11, a3
1087; RV64-NEXT:    vor.vv v10, v11, v10
1088; RV64-NEXT:    vsll.vx v11, v8, a0
1089; RV64-NEXT:    vsll.vx v8, v8, a1
1090; RV64-NEXT:    slli a0, a2, 48
1091; RV64-NEXT:    vand.vx v8, v8, a0
1092; RV64-NEXT:    vor.vv v8, v11, v8
1093; RV64-NEXT:    vor.vv v8, v8, v10
1094; RV64-NEXT:    vor.vv v8, v8, v9
1095; RV64-NEXT:    vsrl.vi v9, v8, 4
1096; RV64-NEXT:    lui a0, 3855
1097; RV64-NEXT:    addiw a0, a0, 241
1098; RV64-NEXT:    slli a0, a0, 12
1099; RV64-NEXT:    addi a0, a0, -241
1100; RV64-NEXT:    slli a0, a0, 12
1101; RV64-NEXT:    addi a0, a0, 241
1102; RV64-NEXT:    slli a0, a0, 12
1103; RV64-NEXT:    addi a0, a0, -241
1104; RV64-NEXT:    vand.vx v9, v9, a0
1105; RV64-NEXT:    vand.vx v8, v8, a0
1106; RV64-NEXT:    vsll.vi v8, v8, 4
1107; RV64-NEXT:    vor.vv v8, v9, v8
1108; RV64-NEXT:    vsrl.vi v9, v8, 2
1109; RV64-NEXT:    lui a0, 13107
1110; RV64-NEXT:    addiw a0, a0, 819
1111; RV64-NEXT:    slli a0, a0, 12
1112; RV64-NEXT:    addi a0, a0, 819
1113; RV64-NEXT:    slli a0, a0, 12
1114; RV64-NEXT:    addi a0, a0, 819
1115; RV64-NEXT:    slli a0, a0, 12
1116; RV64-NEXT:    addi a0, a0, 819
1117; RV64-NEXT:    vand.vx v9, v9, a0
1118; RV64-NEXT:    vand.vx v8, v8, a0
1119; RV64-NEXT:    vsll.vi v8, v8, 2
1120; RV64-NEXT:    vor.vv v8, v9, v8
1121; RV64-NEXT:    vsrl.vi v9, v8, 1
1122; RV64-NEXT:    lui a0, 21845
1123; RV64-NEXT:    addiw a0, a0, 1365
1124; RV64-NEXT:    slli a0, a0, 12
1125; RV64-NEXT:    addi a0, a0, 1365
1126; RV64-NEXT:    slli a0, a0, 12
1127; RV64-NEXT:    addi a0, a0, 1365
1128; RV64-NEXT:    slli a0, a0, 12
1129; RV64-NEXT:    addi a0, a0, 1365
1130; RV64-NEXT:    vand.vx v9, v9, a0
1131; RV64-NEXT:    vand.vx v8, v8, a0
1132; RV64-NEXT:    vadd.vv v8, v8, v8
1133; RV64-NEXT:    vor.vv v8, v9, v8
1134; RV64-NEXT:    ret
1135  %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va)
1136  ret <vscale x 1 x i64> %a
1137}
1138declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
1139
1140define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) {
1141; RV32-LABEL: bitreverse_nxv2i64:
1142; RV32:       # %bb.0:
1143; RV32-NEXT:    addi sp, sp, -16
1144; RV32-NEXT:    .cfi_def_cfa_offset 16
1145; RV32-NEXT:    sw zero, 12(sp)
1146; RV32-NEXT:    lui a0, 1044480
1147; RV32-NEXT:    sw a0, 8(sp)
1148; RV32-NEXT:    lui a0, 4080
1149; RV32-NEXT:    sw a0, 12(sp)
1150; RV32-NEXT:    sw zero, 8(sp)
1151; RV32-NEXT:    li a1, 255
1152; RV32-NEXT:    sw a1, 12(sp)
1153; RV32-NEXT:    lui a1, 16
1154; RV32-NEXT:    addi a1, a1, -256
1155; RV32-NEXT:    sw a1, 12(sp)
1156; RV32-NEXT:    lui a2, 61681
1157; RV32-NEXT:    addi a2, a2, -241
1158; RV32-NEXT:    sw a2, 12(sp)
1159; RV32-NEXT:    sw a2, 8(sp)
1160; RV32-NEXT:    lui a2, 209715
1161; RV32-NEXT:    addi a2, a2, 819
1162; RV32-NEXT:    sw a2, 12(sp)
1163; RV32-NEXT:    sw a2, 8(sp)
1164; RV32-NEXT:    lui a2, 349525
1165; RV32-NEXT:    addi a2, a2, 1365
1166; RV32-NEXT:    sw a2, 12(sp)
1167; RV32-NEXT:    sw a2, 8(sp)
1168; RV32-NEXT:    li a2, 56
1169; RV32-NEXT:    vsetvli a3, zero, e64, m2, ta, mu
1170; RV32-NEXT:    vsrl.vx v10, v8, a2
1171; RV32-NEXT:    li a3, 40
1172; RV32-NEXT:    vsrl.vx v12, v8, a3
1173; RV32-NEXT:    vand.vx v12, v12, a1
1174; RV32-NEXT:    vor.vv v10, v12, v10
1175; RV32-NEXT:    addi a1, sp, 8
1176; RV32-NEXT:    vlse64.v v12, (a1), zero
1177; RV32-NEXT:    vsrl.vi v14, v8, 24
1178; RV32-NEXT:    vand.vx v14, v14, a0
1179; RV32-NEXT:    vsrl.vi v16, v8, 8
1180; RV32-NEXT:    vand.vv v12, v16, v12
1181; RV32-NEXT:    vor.vv v12, v12, v14
1182; RV32-NEXT:    addi a0, sp, 8
1183; RV32-NEXT:    vlse64.v v14, (a0), zero
1184; RV32-NEXT:    vor.vv v10, v12, v10
1185; RV32-NEXT:    vsll.vx v12, v8, a2
1186; RV32-NEXT:    vsll.vx v16, v8, a3
1187; RV32-NEXT:    vand.vv v14, v16, v14
1188; RV32-NEXT:    addi a0, sp, 8
1189; RV32-NEXT:    vlse64.v v16, (a0), zero
1190; RV32-NEXT:    vor.vv v12, v12, v14
1191; RV32-NEXT:    addi a0, sp, 8
1192; RV32-NEXT:    vlse64.v v14, (a0), zero
1193; RV32-NEXT:    vsll.vi v18, v8, 8
1194; RV32-NEXT:    vand.vv v16, v18, v16
1195; RV32-NEXT:    vsll.vi v8, v8, 24
1196; RV32-NEXT:    vand.vv v8, v8, v14
1197; RV32-NEXT:    vor.vv v8, v8, v16
1198; RV32-NEXT:    addi a0, sp, 8
1199; RV32-NEXT:    vlse64.v v14, (a0), zero
1200; RV32-NEXT:    vor.vv v8, v12, v8
1201; RV32-NEXT:    vor.vv v8, v8, v10
1202; RV32-NEXT:    vsrl.vi v10, v8, 4
1203; RV32-NEXT:    vand.vv v10, v10, v14
1204; RV32-NEXT:    vand.vv v8, v8, v14
1205; RV32-NEXT:    addi a0, sp, 8
1206; RV32-NEXT:    vlse64.v v12, (a0), zero
1207; RV32-NEXT:    vsll.vi v8, v8, 4
1208; RV32-NEXT:    vor.vv v8, v10, v8
1209; RV32-NEXT:    vsrl.vi v10, v8, 2
1210; RV32-NEXT:    vand.vv v10, v10, v12
1211; RV32-NEXT:    vand.vv v8, v8, v12
1212; RV32-NEXT:    addi a0, sp, 8
1213; RV32-NEXT:    vlse64.v v12, (a0), zero
1214; RV32-NEXT:    vsll.vi v8, v8, 2
1215; RV32-NEXT:    vor.vv v8, v10, v8
1216; RV32-NEXT:    vsrl.vi v10, v8, 1
1217; RV32-NEXT:    vand.vv v10, v10, v12
1218; RV32-NEXT:    vand.vv v8, v8, v12
1219; RV32-NEXT:    vadd.vv v8, v8, v8
1220; RV32-NEXT:    vor.vv v8, v10, v8
1221; RV32-NEXT:    addi sp, sp, 16
1222; RV32-NEXT:    ret
1223;
1224; RV64-LABEL: bitreverse_nxv2i64:
1225; RV64:       # %bb.0:
1226; RV64-NEXT:    li a0, 56
1227; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
1228; RV64-NEXT:    vsrl.vx v10, v8, a0
1229; RV64-NEXT:    li a1, 40
1230; RV64-NEXT:    vsrl.vx v12, v8, a1
1231; RV64-NEXT:    lui a2, 16
1232; RV64-NEXT:    addiw a2, a2, -256
1233; RV64-NEXT:    vand.vx v12, v12, a2
1234; RV64-NEXT:    vor.vv v10, v12, v10
1235; RV64-NEXT:    vsrl.vi v12, v8, 24
1236; RV64-NEXT:    lui a2, 4080
1237; RV64-NEXT:    vand.vx v12, v12, a2
1238; RV64-NEXT:    vsrl.vi v14, v8, 8
1239; RV64-NEXT:    li a2, 255
1240; RV64-NEXT:    slli a3, a2, 24
1241; RV64-NEXT:    vand.vx v14, v14, a3
1242; RV64-NEXT:    vor.vv v12, v14, v12
1243; RV64-NEXT:    vor.vv v10, v12, v10
1244; RV64-NEXT:    vsll.vi v12, v8, 8
1245; RV64-NEXT:    slli a3, a2, 32
1246; RV64-NEXT:    vand.vx v12, v12, a3
1247; RV64-NEXT:    vsll.vi v14, v8, 24
1248; RV64-NEXT:    slli a3, a2, 40
1249; RV64-NEXT:    vand.vx v14, v14, a3
1250; RV64-NEXT:    vor.vv v12, v14, v12
1251; RV64-NEXT:    vsll.vx v14, v8, a0
1252; RV64-NEXT:    vsll.vx v8, v8, a1
1253; RV64-NEXT:    slli a0, a2, 48
1254; RV64-NEXT:    vand.vx v8, v8, a0
1255; RV64-NEXT:    vor.vv v8, v14, v8
1256; RV64-NEXT:    vor.vv v8, v8, v12
1257; RV64-NEXT:    vor.vv v8, v8, v10
1258; RV64-NEXT:    vsrl.vi v10, v8, 4
1259; RV64-NEXT:    lui a0, 3855
1260; RV64-NEXT:    addiw a0, a0, 241
1261; RV64-NEXT:    slli a0, a0, 12
1262; RV64-NEXT:    addi a0, a0, -241
1263; RV64-NEXT:    slli a0, a0, 12
1264; RV64-NEXT:    addi a0, a0, 241
1265; RV64-NEXT:    slli a0, a0, 12
1266; RV64-NEXT:    addi a0, a0, -241
1267; RV64-NEXT:    vand.vx v10, v10, a0
1268; RV64-NEXT:    vand.vx v8, v8, a0
1269; RV64-NEXT:    vsll.vi v8, v8, 4
1270; RV64-NEXT:    vor.vv v8, v10, v8
1271; RV64-NEXT:    vsrl.vi v10, v8, 2
1272; RV64-NEXT:    lui a0, 13107
1273; RV64-NEXT:    addiw a0, a0, 819
1274; RV64-NEXT:    slli a0, a0, 12
1275; RV64-NEXT:    addi a0, a0, 819
1276; RV64-NEXT:    slli a0, a0, 12
1277; RV64-NEXT:    addi a0, a0, 819
1278; RV64-NEXT:    slli a0, a0, 12
1279; RV64-NEXT:    addi a0, a0, 819
1280; RV64-NEXT:    vand.vx v10, v10, a0
1281; RV64-NEXT:    vand.vx v8, v8, a0
1282; RV64-NEXT:    vsll.vi v8, v8, 2
1283; RV64-NEXT:    vor.vv v8, v10, v8
1284; RV64-NEXT:    vsrl.vi v10, v8, 1
1285; RV64-NEXT:    lui a0, 21845
1286; RV64-NEXT:    addiw a0, a0, 1365
1287; RV64-NEXT:    slli a0, a0, 12
1288; RV64-NEXT:    addi a0, a0, 1365
1289; RV64-NEXT:    slli a0, a0, 12
1290; RV64-NEXT:    addi a0, a0, 1365
1291; RV64-NEXT:    slli a0, a0, 12
1292; RV64-NEXT:    addi a0, a0, 1365
1293; RV64-NEXT:    vand.vx v10, v10, a0
1294; RV64-NEXT:    vand.vx v8, v8, a0
1295; RV64-NEXT:    vadd.vv v8, v8, v8
1296; RV64-NEXT:    vor.vv v8, v10, v8
1297; RV64-NEXT:    ret
1298  %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va)
1299  ret <vscale x 2 x i64> %a
1300}
1301declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
1302
1303define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) {
1304; RV32-LABEL: bitreverse_nxv4i64:
1305; RV32:       # %bb.0:
1306; RV32-NEXT:    addi sp, sp, -16
1307; RV32-NEXT:    .cfi_def_cfa_offset 16
1308; RV32-NEXT:    sw zero, 12(sp)
1309; RV32-NEXT:    lui a0, 1044480
1310; RV32-NEXT:    sw a0, 8(sp)
1311; RV32-NEXT:    lui a0, 4080
1312; RV32-NEXT:    sw a0, 12(sp)
1313; RV32-NEXT:    sw zero, 8(sp)
1314; RV32-NEXT:    li a1, 255
1315; RV32-NEXT:    sw a1, 12(sp)
1316; RV32-NEXT:    lui a1, 16
1317; RV32-NEXT:    addi a1, a1, -256
1318; RV32-NEXT:    sw a1, 12(sp)
1319; RV32-NEXT:    lui a2, 61681
1320; RV32-NEXT:    addi a2, a2, -241
1321; RV32-NEXT:    sw a2, 12(sp)
1322; RV32-NEXT:    sw a2, 8(sp)
1323; RV32-NEXT:    lui a2, 209715
1324; RV32-NEXT:    addi a2, a2, 819
1325; RV32-NEXT:    sw a2, 12(sp)
1326; RV32-NEXT:    sw a2, 8(sp)
1327; RV32-NEXT:    lui a2, 349525
1328; RV32-NEXT:    addi a2, a2, 1365
1329; RV32-NEXT:    sw a2, 12(sp)
1330; RV32-NEXT:    sw a2, 8(sp)
1331; RV32-NEXT:    li a2, 56
1332; RV32-NEXT:    vsetvli a3, zero, e64, m4, ta, mu
1333; RV32-NEXT:    vsrl.vx v12, v8, a2
1334; RV32-NEXT:    li a3, 40
1335; RV32-NEXT:    vsrl.vx v16, v8, a3
1336; RV32-NEXT:    vand.vx v16, v16, a1
1337; RV32-NEXT:    vor.vv v12, v16, v12
1338; RV32-NEXT:    addi a1, sp, 8
1339; RV32-NEXT:    vlse64.v v16, (a1), zero
1340; RV32-NEXT:    vsrl.vi v20, v8, 24
1341; RV32-NEXT:    vand.vx v20, v20, a0
1342; RV32-NEXT:    vsrl.vi v24, v8, 8
1343; RV32-NEXT:    vand.vv v16, v24, v16
1344; RV32-NEXT:    vor.vv v16, v16, v20
1345; RV32-NEXT:    addi a0, sp, 8
1346; RV32-NEXT:    vlse64.v v20, (a0), zero
1347; RV32-NEXT:    vor.vv v12, v16, v12
1348; RV32-NEXT:    vsll.vx v16, v8, a2
1349; RV32-NEXT:    vsll.vx v24, v8, a3
1350; RV32-NEXT:    vand.vv v20, v24, v20
1351; RV32-NEXT:    addi a0, sp, 8
1352; RV32-NEXT:    vlse64.v v24, (a0), zero
1353; RV32-NEXT:    vor.vv v16, v16, v20
1354; RV32-NEXT:    addi a0, sp, 8
1355; RV32-NEXT:    vlse64.v v20, (a0), zero
1356; RV32-NEXT:    vsll.vi v28, v8, 8
1357; RV32-NEXT:    vand.vv v24, v28, v24
1358; RV32-NEXT:    vsll.vi v8, v8, 24
1359; RV32-NEXT:    vand.vv v8, v8, v20
1360; RV32-NEXT:    vor.vv v8, v8, v24
1361; RV32-NEXT:    addi a0, sp, 8
1362; RV32-NEXT:    vlse64.v v20, (a0), zero
1363; RV32-NEXT:    vor.vv v8, v16, v8
1364; RV32-NEXT:    vor.vv v8, v8, v12
1365; RV32-NEXT:    vsrl.vi v12, v8, 4
1366; RV32-NEXT:    vand.vv v12, v12, v20
1367; RV32-NEXT:    vand.vv v8, v8, v20
1368; RV32-NEXT:    addi a0, sp, 8
1369; RV32-NEXT:    vlse64.v v16, (a0), zero
1370; RV32-NEXT:    vsll.vi v8, v8, 4
1371; RV32-NEXT:    vor.vv v8, v12, v8
1372; RV32-NEXT:    vsrl.vi v12, v8, 2
1373; RV32-NEXT:    vand.vv v12, v12, v16
1374; RV32-NEXT:    vand.vv v8, v8, v16
1375; RV32-NEXT:    addi a0, sp, 8
1376; RV32-NEXT:    vlse64.v v16, (a0), zero
1377; RV32-NEXT:    vsll.vi v8, v8, 2
1378; RV32-NEXT:    vor.vv v8, v12, v8
1379; RV32-NEXT:    vsrl.vi v12, v8, 1
1380; RV32-NEXT:    vand.vv v12, v12, v16
1381; RV32-NEXT:    vand.vv v8, v8, v16
1382; RV32-NEXT:    vadd.vv v8, v8, v8
1383; RV32-NEXT:    vor.vv v8, v12, v8
1384; RV32-NEXT:    addi sp, sp, 16
1385; RV32-NEXT:    ret
1386;
1387; RV64-LABEL: bitreverse_nxv4i64:
1388; RV64:       # %bb.0:
1389; RV64-NEXT:    li a0, 56
1390; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
1391; RV64-NEXT:    vsrl.vx v12, v8, a0
1392; RV64-NEXT:    li a1, 40
1393; RV64-NEXT:    vsrl.vx v16, v8, a1
1394; RV64-NEXT:    lui a2, 16
1395; RV64-NEXT:    addiw a2, a2, -256
1396; RV64-NEXT:    vand.vx v16, v16, a2
1397; RV64-NEXT:    vor.vv v12, v16, v12
1398; RV64-NEXT:    vsrl.vi v16, v8, 24
1399; RV64-NEXT:    lui a2, 4080
1400; RV64-NEXT:    vand.vx v16, v16, a2
1401; RV64-NEXT:    vsrl.vi v20, v8, 8
1402; RV64-NEXT:    li a2, 255
1403; RV64-NEXT:    slli a3, a2, 24
1404; RV64-NEXT:    vand.vx v20, v20, a3
1405; RV64-NEXT:    vor.vv v16, v20, v16
1406; RV64-NEXT:    vor.vv v12, v16, v12
1407; RV64-NEXT:    vsll.vi v16, v8, 8
1408; RV64-NEXT:    slli a3, a2, 32
1409; RV64-NEXT:    vand.vx v16, v16, a3
1410; RV64-NEXT:    vsll.vi v20, v8, 24
1411; RV64-NEXT:    slli a3, a2, 40
1412; RV64-NEXT:    vand.vx v20, v20, a3
1413; RV64-NEXT:    vor.vv v16, v20, v16
1414; RV64-NEXT:    vsll.vx v20, v8, a0
1415; RV64-NEXT:    vsll.vx v8, v8, a1
1416; RV64-NEXT:    slli a0, a2, 48
1417; RV64-NEXT:    vand.vx v8, v8, a0
1418; RV64-NEXT:    vor.vv v8, v20, v8
1419; RV64-NEXT:    vor.vv v8, v8, v16
1420; RV64-NEXT:    vor.vv v8, v8, v12
1421; RV64-NEXT:    vsrl.vi v12, v8, 4
1422; RV64-NEXT:    lui a0, 3855
1423; RV64-NEXT:    addiw a0, a0, 241
1424; RV64-NEXT:    slli a0, a0, 12
1425; RV64-NEXT:    addi a0, a0, -241
1426; RV64-NEXT:    slli a0, a0, 12
1427; RV64-NEXT:    addi a0, a0, 241
1428; RV64-NEXT:    slli a0, a0, 12
1429; RV64-NEXT:    addi a0, a0, -241
1430; RV64-NEXT:    vand.vx v12, v12, a0
1431; RV64-NEXT:    vand.vx v8, v8, a0
1432; RV64-NEXT:    vsll.vi v8, v8, 4
1433; RV64-NEXT:    vor.vv v8, v12, v8
1434; RV64-NEXT:    vsrl.vi v12, v8, 2
1435; RV64-NEXT:    lui a0, 13107
1436; RV64-NEXT:    addiw a0, a0, 819
1437; RV64-NEXT:    slli a0, a0, 12
1438; RV64-NEXT:    addi a0, a0, 819
1439; RV64-NEXT:    slli a0, a0, 12
1440; RV64-NEXT:    addi a0, a0, 819
1441; RV64-NEXT:    slli a0, a0, 12
1442; RV64-NEXT:    addi a0, a0, 819
1443; RV64-NEXT:    vand.vx v12, v12, a0
1444; RV64-NEXT:    vand.vx v8, v8, a0
1445; RV64-NEXT:    vsll.vi v8, v8, 2
1446; RV64-NEXT:    vor.vv v8, v12, v8
1447; RV64-NEXT:    vsrl.vi v12, v8, 1
1448; RV64-NEXT:    lui a0, 21845
1449; RV64-NEXT:    addiw a0, a0, 1365
1450; RV64-NEXT:    slli a0, a0, 12
1451; RV64-NEXT:    addi a0, a0, 1365
1452; RV64-NEXT:    slli a0, a0, 12
1453; RV64-NEXT:    addi a0, a0, 1365
1454; RV64-NEXT:    slli a0, a0, 12
1455; RV64-NEXT:    addi a0, a0, 1365
1456; RV64-NEXT:    vand.vx v12, v12, a0
1457; RV64-NEXT:    vand.vx v8, v8, a0
1458; RV64-NEXT:    vadd.vv v8, v8, v8
1459; RV64-NEXT:    vor.vv v8, v12, v8
1460; RV64-NEXT:    ret
1461  %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va)
1462  ret <vscale x 4 x i64> %a
1463}
1464declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
1465
1466define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) {
1467; RV32-LABEL: bitreverse_nxv8i64:
1468; RV32:       # %bb.0:
1469; RV32-NEXT:    addi sp, sp, -16
1470; RV32-NEXT:    .cfi_def_cfa_offset 16
1471; RV32-NEXT:    csrr a0, vlenb
1472; RV32-NEXT:    slli a0, a0, 4
1473; RV32-NEXT:    sub sp, sp, a0
1474; RV32-NEXT:    sw zero, 12(sp)
1475; RV32-NEXT:    lui a0, 1044480
1476; RV32-NEXT:    sw a0, 8(sp)
1477; RV32-NEXT:    lui a0, 4080
1478; RV32-NEXT:    sw a0, 12(sp)
1479; RV32-NEXT:    sw zero, 8(sp)
1480; RV32-NEXT:    li a1, 255
1481; RV32-NEXT:    sw a1, 12(sp)
1482; RV32-NEXT:    lui a1, 16
1483; RV32-NEXT:    addi a1, a1, -256
1484; RV32-NEXT:    sw a1, 12(sp)
1485; RV32-NEXT:    lui a2, 61681
1486; RV32-NEXT:    addi a2, a2, -241
1487; RV32-NEXT:    sw a2, 12(sp)
1488; RV32-NEXT:    sw a2, 8(sp)
1489; RV32-NEXT:    lui a2, 209715
1490; RV32-NEXT:    addi a2, a2, 819
1491; RV32-NEXT:    sw a2, 12(sp)
1492; RV32-NEXT:    sw a2, 8(sp)
1493; RV32-NEXT:    lui a2, 349525
1494; RV32-NEXT:    addi a2, a2, 1365
1495; RV32-NEXT:    sw a2, 12(sp)
1496; RV32-NEXT:    sw a2, 8(sp)
1497; RV32-NEXT:    li a2, 56
1498; RV32-NEXT:    vsetvli a3, zero, e64, m8, ta, mu
1499; RV32-NEXT:    vsrl.vx v16, v8, a2
1500; RV32-NEXT:    li a3, 40
1501; RV32-NEXT:    vsrl.vx v24, v8, a3
1502; RV32-NEXT:    addi a4, sp, 8
1503; RV32-NEXT:    vlse64.v v0, (a4), zero
1504; RV32-NEXT:    vand.vx v24, v24, a1
1505; RV32-NEXT:    vor.vv v16, v24, v16
1506; RV32-NEXT:    csrr a1, vlenb
1507; RV32-NEXT:    slli a1, a1, 3
1508; RV32-NEXT:    add a1, sp, a1
1509; RV32-NEXT:    addi a1, a1, 16
1510; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1511; RV32-NEXT:    vsrl.vi v24, v8, 8
1512; RV32-NEXT:    vand.vv v24, v24, v0
1513; RV32-NEXT:    vsrl.vi v0, v8, 24
1514; RV32-NEXT:    vand.vx v0, v0, a0
1515; RV32-NEXT:    addi a0, sp, 8
1516; RV32-NEXT:    vlse64.v v16, (a0), zero
1517; RV32-NEXT:    vor.vv v24, v24, v0
1518; RV32-NEXT:    csrr a0, vlenb
1519; RV32-NEXT:    slli a0, a0, 3
1520; RV32-NEXT:    add a0, sp, a0
1521; RV32-NEXT:    addi a0, a0, 16
1522; RV32-NEXT:    vl8re8.v v0, (a0) # Unknown-size Folded Reload
1523; RV32-NEXT:    vor.vv v24, v24, v0
1524; RV32-NEXT:    csrr a0, vlenb
1525; RV32-NEXT:    slli a0, a0, 3
1526; RV32-NEXT:    add a0, sp, a0
1527; RV32-NEXT:    addi a0, a0, 16
1528; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
1529; RV32-NEXT:    vsll.vx v24, v8, a3
1530; RV32-NEXT:    vand.vv v16, v24, v16
1531; RV32-NEXT:    vsll.vx v24, v8, a2
1532; RV32-NEXT:    addi a0, sp, 8
1533; RV32-NEXT:    vlse64.v v0, (a0), zero
1534; RV32-NEXT:    vor.vv v16, v24, v16
1535; RV32-NEXT:    addi a0, sp, 16
1536; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1537; RV32-NEXT:    addi a0, sp, 8
1538; RV32-NEXT:    vlse64.v v16, (a0), zero
1539; RV32-NEXT:    vsll.vi v24, v8, 8
1540; RV32-NEXT:    vand.vv v24, v24, v0
1541; RV32-NEXT:    vsll.vi v8, v8, 24
1542; RV32-NEXT:    vand.vv v8, v8, v16
1543; RV32-NEXT:    vor.vv v8, v8, v24
1544; RV32-NEXT:    addi a0, sp, 8
1545; RV32-NEXT:    vlse64.v v16, (a0), zero
1546; RV32-NEXT:    addi a0, sp, 16
1547; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
1548; RV32-NEXT:    vor.vv v8, v24, v8
1549; RV32-NEXT:    csrr a0, vlenb
1550; RV32-NEXT:    slli a0, a0, 3
1551; RV32-NEXT:    add a0, sp, a0
1552; RV32-NEXT:    addi a0, a0, 16
1553; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
1554; RV32-NEXT:    vor.vv v8, v8, v24
1555; RV32-NEXT:    vsrl.vi v24, v8, 4
1556; RV32-NEXT:    vand.vv v24, v24, v16
1557; RV32-NEXT:    vand.vv v8, v8, v16
1558; RV32-NEXT:    addi a0, sp, 8
1559; RV32-NEXT:    vlse64.v v16, (a0), zero
1560; RV32-NEXT:    vsll.vi v8, v8, 4
1561; RV32-NEXT:    vor.vv v8, v24, v8
1562; RV32-NEXT:    vsrl.vi v24, v8, 2
1563; RV32-NEXT:    vand.vv v24, v24, v16
1564; RV32-NEXT:    vand.vv v8, v8, v16
1565; RV32-NEXT:    addi a0, sp, 8
1566; RV32-NEXT:    vlse64.v v16, (a0), zero
1567; RV32-NEXT:    vsll.vi v8, v8, 2
1568; RV32-NEXT:    vor.vv v8, v24, v8
1569; RV32-NEXT:    vsrl.vi v24, v8, 1
1570; RV32-NEXT:    vand.vv v24, v24, v16
1571; RV32-NEXT:    vand.vv v8, v8, v16
1572; RV32-NEXT:    vadd.vv v8, v8, v8
1573; RV32-NEXT:    vor.vv v8, v24, v8
1574; RV32-NEXT:    csrr a0, vlenb
1575; RV32-NEXT:    slli a0, a0, 4
1576; RV32-NEXT:    add sp, sp, a0
1577; RV32-NEXT:    addi sp, sp, 16
1578; RV32-NEXT:    ret
1579;
1580; RV64-LABEL: bitreverse_nxv8i64:
1581; RV64:       # %bb.0:
1582; RV64-NEXT:    li a0, 56
1583; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1584; RV64-NEXT:    vsrl.vx v16, v8, a0
1585; RV64-NEXT:    li a1, 40
1586; RV64-NEXT:    vsrl.vx v24, v8, a1
1587; RV64-NEXT:    lui a2, 16
1588; RV64-NEXT:    addiw a2, a2, -256
1589; RV64-NEXT:    vand.vx v24, v24, a2
1590; RV64-NEXT:    vor.vv v16, v24, v16
1591; RV64-NEXT:    vsrl.vi v24, v8, 24
1592; RV64-NEXT:    lui a2, 4080
1593; RV64-NEXT:    vand.vx v24, v24, a2
1594; RV64-NEXT:    vsrl.vi v0, v8, 8
1595; RV64-NEXT:    li a2, 255
1596; RV64-NEXT:    slli a3, a2, 24
1597; RV64-NEXT:    vand.vx v0, v0, a3
1598; RV64-NEXT:    vor.vv v24, v0, v24
1599; RV64-NEXT:    vor.vv v16, v24, v16
1600; RV64-NEXT:    vsll.vi v24, v8, 8
1601; RV64-NEXT:    slli a3, a2, 32
1602; RV64-NEXT:    vand.vx v24, v24, a3
1603; RV64-NEXT:    vsll.vi v0, v8, 24
1604; RV64-NEXT:    slli a3, a2, 40
1605; RV64-NEXT:    vand.vx v0, v0, a3
1606; RV64-NEXT:    vor.vv v24, v0, v24
1607; RV64-NEXT:    vsll.vx v0, v8, a0
1608; RV64-NEXT:    vsll.vx v8, v8, a1
1609; RV64-NEXT:    slli a0, a2, 48
1610; RV64-NEXT:    vand.vx v8, v8, a0
1611; RV64-NEXT:    vor.vv v8, v0, v8
1612; RV64-NEXT:    vor.vv v8, v8, v24
1613; RV64-NEXT:    vor.vv v8, v8, v16
1614; RV64-NEXT:    vsrl.vi v16, v8, 4
1615; RV64-NEXT:    lui a0, 3855
1616; RV64-NEXT:    addiw a0, a0, 241
1617; RV64-NEXT:    slli a0, a0, 12
1618; RV64-NEXT:    addi a0, a0, -241
1619; RV64-NEXT:    slli a0, a0, 12
1620; RV64-NEXT:    addi a0, a0, 241
1621; RV64-NEXT:    slli a0, a0, 12
1622; RV64-NEXT:    addi a0, a0, -241
1623; RV64-NEXT:    vand.vx v16, v16, a0
1624; RV64-NEXT:    vand.vx v8, v8, a0
1625; RV64-NEXT:    vsll.vi v8, v8, 4
1626; RV64-NEXT:    vor.vv v8, v16, v8
1627; RV64-NEXT:    vsrl.vi v16, v8, 2
1628; RV64-NEXT:    lui a0, 13107
1629; RV64-NEXT:    addiw a0, a0, 819
1630; RV64-NEXT:    slli a0, a0, 12
1631; RV64-NEXT:    addi a0, a0, 819
1632; RV64-NEXT:    slli a0, a0, 12
1633; RV64-NEXT:    addi a0, a0, 819
1634; RV64-NEXT:    slli a0, a0, 12
1635; RV64-NEXT:    addi a0, a0, 819
1636; RV64-NEXT:    vand.vx v16, v16, a0
1637; RV64-NEXT:    vand.vx v8, v8, a0
1638; RV64-NEXT:    vsll.vi v8, v8, 2
1639; RV64-NEXT:    vor.vv v8, v16, v8
1640; RV64-NEXT:    vsrl.vi v16, v8, 1
1641; RV64-NEXT:    lui a0, 21845
1642; RV64-NEXT:    addiw a0, a0, 1365
1643; RV64-NEXT:    slli a0, a0, 12
1644; RV64-NEXT:    addi a0, a0, 1365
1645; RV64-NEXT:    slli a0, a0, 12
1646; RV64-NEXT:    addi a0, a0, 1365
1647; RV64-NEXT:    slli a0, a0, 12
1648; RV64-NEXT:    addi a0, a0, 1365
1649; RV64-NEXT:    vand.vx v16, v16, a0
1650; RV64-NEXT:    vand.vx v8, v8, a0
1651; RV64-NEXT:    vadd.vv v8, v8, v8
1652; RV64-NEXT:    vor.vv v8, v16, v8
1653; RV64-NEXT:    ret
1654  %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va)
1655  ret <vscale x 8 x i64> %a
1656}
1657declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
1658