1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
6; CHECK-LABEL: bitreverse_nxv1i8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
9; CHECK-NEXT:    vand.vi v9, v8, 15
10; CHECK-NEXT:    vsll.vi v9, v9, 4
11; CHECK-NEXT:    vsrl.vi v8, v8, 4
12; CHECK-NEXT:    vand.vi v8, v8, 15
13; CHECK-NEXT:    vor.vv v8, v8, v9
14; CHECK-NEXT:    vsrl.vi v9, v8, 2
15; CHECK-NEXT:    li a0, 51
16; CHECK-NEXT:    vand.vx v9, v9, a0
17; CHECK-NEXT:    vand.vx v8, v8, a0
18; CHECK-NEXT:    vsll.vi v8, v8, 2
19; CHECK-NEXT:    vor.vv v8, v9, v8
20; CHECK-NEXT:    vsrl.vi v9, v8, 1
21; CHECK-NEXT:    li a0, 85
22; CHECK-NEXT:    vand.vx v9, v9, a0
23; CHECK-NEXT:    vand.vx v8, v8, a0
24; CHECK-NEXT:    vadd.vv v8, v8, v8
25; CHECK-NEXT:    vor.vv v8, v9, v8
26; CHECK-NEXT:    ret
27  %a = call <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8> %va)
28  ret <vscale x 1 x i8> %a
29}
30declare <vscale x 1 x i8> @llvm.bitreverse.nxv1i8(<vscale x 1 x i8>)
31
32define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) {
33; CHECK-LABEL: bitreverse_nxv2i8:
34; CHECK:       # %bb.0:
35; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
36; CHECK-NEXT:    vand.vi v9, v8, 15
37; CHECK-NEXT:    vsll.vi v9, v9, 4
38; CHECK-NEXT:    vsrl.vi v8, v8, 4
39; CHECK-NEXT:    vand.vi v8, v8, 15
40; CHECK-NEXT:    vor.vv v8, v8, v9
41; CHECK-NEXT:    vsrl.vi v9, v8, 2
42; CHECK-NEXT:    li a0, 51
43; CHECK-NEXT:    vand.vx v9, v9, a0
44; CHECK-NEXT:    vand.vx v8, v8, a0
45; CHECK-NEXT:    vsll.vi v8, v8, 2
46; CHECK-NEXT:    vor.vv v8, v9, v8
47; CHECK-NEXT:    vsrl.vi v9, v8, 1
48; CHECK-NEXT:    li a0, 85
49; CHECK-NEXT:    vand.vx v9, v9, a0
50; CHECK-NEXT:    vand.vx v8, v8, a0
51; CHECK-NEXT:    vadd.vv v8, v8, v8
52; CHECK-NEXT:    vor.vv v8, v9, v8
53; CHECK-NEXT:    ret
54  %a = call <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8> %va)
55  ret <vscale x 2 x i8> %a
56}
57declare <vscale x 2 x i8> @llvm.bitreverse.nxv2i8(<vscale x 2 x i8>)
58
59define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) {
60; CHECK-LABEL: bitreverse_nxv4i8:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
63; CHECK-NEXT:    vand.vi v9, v8, 15
64; CHECK-NEXT:    vsll.vi v9, v9, 4
65; CHECK-NEXT:    vsrl.vi v8, v8, 4
66; CHECK-NEXT:    vand.vi v8, v8, 15
67; CHECK-NEXT:    vor.vv v8, v8, v9
68; CHECK-NEXT:    vsrl.vi v9, v8, 2
69; CHECK-NEXT:    li a0, 51
70; CHECK-NEXT:    vand.vx v9, v9, a0
71; CHECK-NEXT:    vand.vx v8, v8, a0
72; CHECK-NEXT:    vsll.vi v8, v8, 2
73; CHECK-NEXT:    vor.vv v8, v9, v8
74; CHECK-NEXT:    vsrl.vi v9, v8, 1
75; CHECK-NEXT:    li a0, 85
76; CHECK-NEXT:    vand.vx v9, v9, a0
77; CHECK-NEXT:    vand.vx v8, v8, a0
78; CHECK-NEXT:    vadd.vv v8, v8, v8
79; CHECK-NEXT:    vor.vv v8, v9, v8
80; CHECK-NEXT:    ret
81  %a = call <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8> %va)
82  ret <vscale x 4 x i8> %a
83}
84declare <vscale x 4 x i8> @llvm.bitreverse.nxv4i8(<vscale x 4 x i8>)
85
86define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) {
87; CHECK-LABEL: bitreverse_nxv8i8:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
90; CHECK-NEXT:    vand.vi v9, v8, 15
91; CHECK-NEXT:    vsll.vi v9, v9, 4
92; CHECK-NEXT:    vsrl.vi v8, v8, 4
93; CHECK-NEXT:    vand.vi v8, v8, 15
94; CHECK-NEXT:    vor.vv v8, v8, v9
95; CHECK-NEXT:    vsrl.vi v9, v8, 2
96; CHECK-NEXT:    li a0, 51
97; CHECK-NEXT:    vand.vx v9, v9, a0
98; CHECK-NEXT:    vand.vx v8, v8, a0
99; CHECK-NEXT:    vsll.vi v8, v8, 2
100; CHECK-NEXT:    vor.vv v8, v9, v8
101; CHECK-NEXT:    vsrl.vi v9, v8, 1
102; CHECK-NEXT:    li a0, 85
103; CHECK-NEXT:    vand.vx v9, v9, a0
104; CHECK-NEXT:    vand.vx v8, v8, a0
105; CHECK-NEXT:    vadd.vv v8, v8, v8
106; CHECK-NEXT:    vor.vv v8, v9, v8
107; CHECK-NEXT:    ret
108  %a = call <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8> %va)
109  ret <vscale x 8 x i8> %a
110}
111declare <vscale x 8 x i8> @llvm.bitreverse.nxv8i8(<vscale x 8 x i8>)
112
113define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) {
114; CHECK-LABEL: bitreverse_nxv16i8:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
117; CHECK-NEXT:    vand.vi v10, v8, 15
118; CHECK-NEXT:    vsll.vi v10, v10, 4
119; CHECK-NEXT:    vsrl.vi v8, v8, 4
120; CHECK-NEXT:    vand.vi v8, v8, 15
121; CHECK-NEXT:    vor.vv v8, v8, v10
122; CHECK-NEXT:    vsrl.vi v10, v8, 2
123; CHECK-NEXT:    li a0, 51
124; CHECK-NEXT:    vand.vx v10, v10, a0
125; CHECK-NEXT:    vand.vx v8, v8, a0
126; CHECK-NEXT:    vsll.vi v8, v8, 2
127; CHECK-NEXT:    vor.vv v8, v10, v8
128; CHECK-NEXT:    vsrl.vi v10, v8, 1
129; CHECK-NEXT:    li a0, 85
130; CHECK-NEXT:    vand.vx v10, v10, a0
131; CHECK-NEXT:    vand.vx v8, v8, a0
132; CHECK-NEXT:    vadd.vv v8, v8, v8
133; CHECK-NEXT:    vor.vv v8, v10, v8
134; CHECK-NEXT:    ret
135  %a = call <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8> %va)
136  ret <vscale x 16 x i8> %a
137}
138declare <vscale x 16 x i8> @llvm.bitreverse.nxv16i8(<vscale x 16 x i8>)
139
140define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) {
141; CHECK-LABEL: bitreverse_nxv32i8:
142; CHECK:       # %bb.0:
143; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
144; CHECK-NEXT:    vand.vi v12, v8, 15
145; CHECK-NEXT:    vsll.vi v12, v12, 4
146; CHECK-NEXT:    vsrl.vi v8, v8, 4
147; CHECK-NEXT:    vand.vi v8, v8, 15
148; CHECK-NEXT:    vor.vv v8, v8, v12
149; CHECK-NEXT:    vsrl.vi v12, v8, 2
150; CHECK-NEXT:    li a0, 51
151; CHECK-NEXT:    vand.vx v12, v12, a0
152; CHECK-NEXT:    vand.vx v8, v8, a0
153; CHECK-NEXT:    vsll.vi v8, v8, 2
154; CHECK-NEXT:    vor.vv v8, v12, v8
155; CHECK-NEXT:    vsrl.vi v12, v8, 1
156; CHECK-NEXT:    li a0, 85
157; CHECK-NEXT:    vand.vx v12, v12, a0
158; CHECK-NEXT:    vand.vx v8, v8, a0
159; CHECK-NEXT:    vadd.vv v8, v8, v8
160; CHECK-NEXT:    vor.vv v8, v12, v8
161; CHECK-NEXT:    ret
162  %a = call <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8> %va)
163  ret <vscale x 32 x i8> %a
164}
165declare <vscale x 32 x i8> @llvm.bitreverse.nxv32i8(<vscale x 32 x i8>)
166
167define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
168; CHECK-LABEL: bitreverse_nxv64i8:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
171; CHECK-NEXT:    vand.vi v16, v8, 15
172; CHECK-NEXT:    vsll.vi v16, v16, 4
173; CHECK-NEXT:    vsrl.vi v8, v8, 4
174; CHECK-NEXT:    vand.vi v8, v8, 15
175; CHECK-NEXT:    vor.vv v8, v8, v16
176; CHECK-NEXT:    vsrl.vi v16, v8, 2
177; CHECK-NEXT:    li a0, 51
178; CHECK-NEXT:    vand.vx v16, v16, a0
179; CHECK-NEXT:    vand.vx v8, v8, a0
180; CHECK-NEXT:    vsll.vi v8, v8, 2
181; CHECK-NEXT:    vor.vv v8, v16, v8
182; CHECK-NEXT:    vsrl.vi v16, v8, 1
183; CHECK-NEXT:    li a0, 85
184; CHECK-NEXT:    vand.vx v16, v16, a0
185; CHECK-NEXT:    vand.vx v8, v8, a0
186; CHECK-NEXT:    vadd.vv v8, v8, v8
187; CHECK-NEXT:    vor.vv v8, v16, v8
188; CHECK-NEXT:    ret
189  %a = call <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8> %va)
190  ret <vscale x 64 x i8> %a
191}
192declare <vscale x 64 x i8> @llvm.bitreverse.nxv64i8(<vscale x 64 x i8>)
193
194define <vscale x 1 x i16> @bitreverse_nxv1i16(<vscale x 1 x i16> %va) {
195; RV32-LABEL: bitreverse_nxv1i16:
196; RV32:       # %bb.0:
197; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
198; RV32-NEXT:    vsrl.vi v9, v8, 8
199; RV32-NEXT:    vsll.vi v8, v8, 8
200; RV32-NEXT:    vor.vv v8, v8, v9
201; RV32-NEXT:    vsrl.vi v9, v8, 4
202; RV32-NEXT:    lui a0, 1
203; RV32-NEXT:    addi a0, a0, -241
204; RV32-NEXT:    vand.vx v9, v9, a0
205; RV32-NEXT:    vand.vx v8, v8, a0
206; RV32-NEXT:    vsll.vi v8, v8, 4
207; RV32-NEXT:    vor.vv v8, v9, v8
208; RV32-NEXT:    vsrl.vi v9, v8, 2
209; RV32-NEXT:    lui a0, 3
210; RV32-NEXT:    addi a0, a0, 819
211; RV32-NEXT:    vand.vx v9, v9, a0
212; RV32-NEXT:    vand.vx v8, v8, a0
213; RV32-NEXT:    vsll.vi v8, v8, 2
214; RV32-NEXT:    vor.vv v8, v9, v8
215; RV32-NEXT:    vsrl.vi v9, v8, 1
216; RV32-NEXT:    lui a0, 5
217; RV32-NEXT:    addi a0, a0, 1365
218; RV32-NEXT:    vand.vx v9, v9, a0
219; RV32-NEXT:    vand.vx v8, v8, a0
220; RV32-NEXT:    vadd.vv v8, v8, v8
221; RV32-NEXT:    vor.vv v8, v9, v8
222; RV32-NEXT:    ret
223;
224; RV64-LABEL: bitreverse_nxv1i16:
225; RV64:       # %bb.0:
226; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
227; RV64-NEXT:    vsrl.vi v9, v8, 8
228; RV64-NEXT:    vsll.vi v8, v8, 8
229; RV64-NEXT:    vor.vv v8, v8, v9
230; RV64-NEXT:    vsrl.vi v9, v8, 4
231; RV64-NEXT:    lui a0, 1
232; RV64-NEXT:    addiw a0, a0, -241
233; RV64-NEXT:    vand.vx v9, v9, a0
234; RV64-NEXT:    vand.vx v8, v8, a0
235; RV64-NEXT:    vsll.vi v8, v8, 4
236; RV64-NEXT:    vor.vv v8, v9, v8
237; RV64-NEXT:    vsrl.vi v9, v8, 2
238; RV64-NEXT:    lui a0, 3
239; RV64-NEXT:    addiw a0, a0, 819
240; RV64-NEXT:    vand.vx v9, v9, a0
241; RV64-NEXT:    vand.vx v8, v8, a0
242; RV64-NEXT:    vsll.vi v8, v8, 2
243; RV64-NEXT:    vor.vv v8, v9, v8
244; RV64-NEXT:    vsrl.vi v9, v8, 1
245; RV64-NEXT:    lui a0, 5
246; RV64-NEXT:    addiw a0, a0, 1365
247; RV64-NEXT:    vand.vx v9, v9, a0
248; RV64-NEXT:    vand.vx v8, v8, a0
249; RV64-NEXT:    vadd.vv v8, v8, v8
250; RV64-NEXT:    vor.vv v8, v9, v8
251; RV64-NEXT:    ret
252  %a = call <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16> %va)
253  ret <vscale x 1 x i16> %a
254}
255declare <vscale x 1 x i16> @llvm.bitreverse.nxv1i16(<vscale x 1 x i16>)
256
257define <vscale x 2 x i16> @bitreverse_nxv2i16(<vscale x 2 x i16> %va) {
258; RV32-LABEL: bitreverse_nxv2i16:
259; RV32:       # %bb.0:
260; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
261; RV32-NEXT:    vsrl.vi v9, v8, 8
262; RV32-NEXT:    vsll.vi v8, v8, 8
263; RV32-NEXT:    vor.vv v8, v8, v9
264; RV32-NEXT:    vsrl.vi v9, v8, 4
265; RV32-NEXT:    lui a0, 1
266; RV32-NEXT:    addi a0, a0, -241
267; RV32-NEXT:    vand.vx v9, v9, a0
268; RV32-NEXT:    vand.vx v8, v8, a0
269; RV32-NEXT:    vsll.vi v8, v8, 4
270; RV32-NEXT:    vor.vv v8, v9, v8
271; RV32-NEXT:    vsrl.vi v9, v8, 2
272; RV32-NEXT:    lui a0, 3
273; RV32-NEXT:    addi a0, a0, 819
274; RV32-NEXT:    vand.vx v9, v9, a0
275; RV32-NEXT:    vand.vx v8, v8, a0
276; RV32-NEXT:    vsll.vi v8, v8, 2
277; RV32-NEXT:    vor.vv v8, v9, v8
278; RV32-NEXT:    vsrl.vi v9, v8, 1
279; RV32-NEXT:    lui a0, 5
280; RV32-NEXT:    addi a0, a0, 1365
281; RV32-NEXT:    vand.vx v9, v9, a0
282; RV32-NEXT:    vand.vx v8, v8, a0
283; RV32-NEXT:    vadd.vv v8, v8, v8
284; RV32-NEXT:    vor.vv v8, v9, v8
285; RV32-NEXT:    ret
286;
287; RV64-LABEL: bitreverse_nxv2i16:
288; RV64:       # %bb.0:
289; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
290; RV64-NEXT:    vsrl.vi v9, v8, 8
291; RV64-NEXT:    vsll.vi v8, v8, 8
292; RV64-NEXT:    vor.vv v8, v8, v9
293; RV64-NEXT:    vsrl.vi v9, v8, 4
294; RV64-NEXT:    lui a0, 1
295; RV64-NEXT:    addiw a0, a0, -241
296; RV64-NEXT:    vand.vx v9, v9, a0
297; RV64-NEXT:    vand.vx v8, v8, a0
298; RV64-NEXT:    vsll.vi v8, v8, 4
299; RV64-NEXT:    vor.vv v8, v9, v8
300; RV64-NEXT:    vsrl.vi v9, v8, 2
301; RV64-NEXT:    lui a0, 3
302; RV64-NEXT:    addiw a0, a0, 819
303; RV64-NEXT:    vand.vx v9, v9, a0
304; RV64-NEXT:    vand.vx v8, v8, a0
305; RV64-NEXT:    vsll.vi v8, v8, 2
306; RV64-NEXT:    vor.vv v8, v9, v8
307; RV64-NEXT:    vsrl.vi v9, v8, 1
308; RV64-NEXT:    lui a0, 5
309; RV64-NEXT:    addiw a0, a0, 1365
310; RV64-NEXT:    vand.vx v9, v9, a0
311; RV64-NEXT:    vand.vx v8, v8, a0
312; RV64-NEXT:    vadd.vv v8, v8, v8
313; RV64-NEXT:    vor.vv v8, v9, v8
314; RV64-NEXT:    ret
315  %a = call <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16> %va)
316  ret <vscale x 2 x i16> %a
317}
318declare <vscale x 2 x i16> @llvm.bitreverse.nxv2i16(<vscale x 2 x i16>)
319
320define <vscale x 4 x i16> @bitreverse_nxv4i16(<vscale x 4 x i16> %va) {
321; RV32-LABEL: bitreverse_nxv4i16:
322; RV32:       # %bb.0:
323; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
324; RV32-NEXT:    vsrl.vi v9, v8, 8
325; RV32-NEXT:    vsll.vi v8, v8, 8
326; RV32-NEXT:    vor.vv v8, v8, v9
327; RV32-NEXT:    vsrl.vi v9, v8, 4
328; RV32-NEXT:    lui a0, 1
329; RV32-NEXT:    addi a0, a0, -241
330; RV32-NEXT:    vand.vx v9, v9, a0
331; RV32-NEXT:    vand.vx v8, v8, a0
332; RV32-NEXT:    vsll.vi v8, v8, 4
333; RV32-NEXT:    vor.vv v8, v9, v8
334; RV32-NEXT:    vsrl.vi v9, v8, 2
335; RV32-NEXT:    lui a0, 3
336; RV32-NEXT:    addi a0, a0, 819
337; RV32-NEXT:    vand.vx v9, v9, a0
338; RV32-NEXT:    vand.vx v8, v8, a0
339; RV32-NEXT:    vsll.vi v8, v8, 2
340; RV32-NEXT:    vor.vv v8, v9, v8
341; RV32-NEXT:    vsrl.vi v9, v8, 1
342; RV32-NEXT:    lui a0, 5
343; RV32-NEXT:    addi a0, a0, 1365
344; RV32-NEXT:    vand.vx v9, v9, a0
345; RV32-NEXT:    vand.vx v8, v8, a0
346; RV32-NEXT:    vadd.vv v8, v8, v8
347; RV32-NEXT:    vor.vv v8, v9, v8
348; RV32-NEXT:    ret
349;
350; RV64-LABEL: bitreverse_nxv4i16:
351; RV64:       # %bb.0:
352; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
353; RV64-NEXT:    vsrl.vi v9, v8, 8
354; RV64-NEXT:    vsll.vi v8, v8, 8
355; RV64-NEXT:    vor.vv v8, v8, v9
356; RV64-NEXT:    vsrl.vi v9, v8, 4
357; RV64-NEXT:    lui a0, 1
358; RV64-NEXT:    addiw a0, a0, -241
359; RV64-NEXT:    vand.vx v9, v9, a0
360; RV64-NEXT:    vand.vx v8, v8, a0
361; RV64-NEXT:    vsll.vi v8, v8, 4
362; RV64-NEXT:    vor.vv v8, v9, v8
363; RV64-NEXT:    vsrl.vi v9, v8, 2
364; RV64-NEXT:    lui a0, 3
365; RV64-NEXT:    addiw a0, a0, 819
366; RV64-NEXT:    vand.vx v9, v9, a0
367; RV64-NEXT:    vand.vx v8, v8, a0
368; RV64-NEXT:    vsll.vi v8, v8, 2
369; RV64-NEXT:    vor.vv v8, v9, v8
370; RV64-NEXT:    vsrl.vi v9, v8, 1
371; RV64-NEXT:    lui a0, 5
372; RV64-NEXT:    addiw a0, a0, 1365
373; RV64-NEXT:    vand.vx v9, v9, a0
374; RV64-NEXT:    vand.vx v8, v8, a0
375; RV64-NEXT:    vadd.vv v8, v8, v8
376; RV64-NEXT:    vor.vv v8, v9, v8
377; RV64-NEXT:    ret
378  %a = call <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16> %va)
379  ret <vscale x 4 x i16> %a
380}
381declare <vscale x 4 x i16> @llvm.bitreverse.nxv4i16(<vscale x 4 x i16>)
382
383define <vscale x 8 x i16> @bitreverse_nxv8i16(<vscale x 8 x i16> %va) {
384; RV32-LABEL: bitreverse_nxv8i16:
385; RV32:       # %bb.0:
386; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
387; RV32-NEXT:    vsrl.vi v10, v8, 8
388; RV32-NEXT:    vsll.vi v8, v8, 8
389; RV32-NEXT:    vor.vv v8, v8, v10
390; RV32-NEXT:    vsrl.vi v10, v8, 4
391; RV32-NEXT:    lui a0, 1
392; RV32-NEXT:    addi a0, a0, -241
393; RV32-NEXT:    vand.vx v10, v10, a0
394; RV32-NEXT:    vand.vx v8, v8, a0
395; RV32-NEXT:    vsll.vi v8, v8, 4
396; RV32-NEXT:    vor.vv v8, v10, v8
397; RV32-NEXT:    vsrl.vi v10, v8, 2
398; RV32-NEXT:    lui a0, 3
399; RV32-NEXT:    addi a0, a0, 819
400; RV32-NEXT:    vand.vx v10, v10, a0
401; RV32-NEXT:    vand.vx v8, v8, a0
402; RV32-NEXT:    vsll.vi v8, v8, 2
403; RV32-NEXT:    vor.vv v8, v10, v8
404; RV32-NEXT:    vsrl.vi v10, v8, 1
405; RV32-NEXT:    lui a0, 5
406; RV32-NEXT:    addi a0, a0, 1365
407; RV32-NEXT:    vand.vx v10, v10, a0
408; RV32-NEXT:    vand.vx v8, v8, a0
409; RV32-NEXT:    vadd.vv v8, v8, v8
410; RV32-NEXT:    vor.vv v8, v10, v8
411; RV32-NEXT:    ret
412;
413; RV64-LABEL: bitreverse_nxv8i16:
414; RV64:       # %bb.0:
415; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
416; RV64-NEXT:    vsrl.vi v10, v8, 8
417; RV64-NEXT:    vsll.vi v8, v8, 8
418; RV64-NEXT:    vor.vv v8, v8, v10
419; RV64-NEXT:    vsrl.vi v10, v8, 4
420; RV64-NEXT:    lui a0, 1
421; RV64-NEXT:    addiw a0, a0, -241
422; RV64-NEXT:    vand.vx v10, v10, a0
423; RV64-NEXT:    vand.vx v8, v8, a0
424; RV64-NEXT:    vsll.vi v8, v8, 4
425; RV64-NEXT:    vor.vv v8, v10, v8
426; RV64-NEXT:    vsrl.vi v10, v8, 2
427; RV64-NEXT:    lui a0, 3
428; RV64-NEXT:    addiw a0, a0, 819
429; RV64-NEXT:    vand.vx v10, v10, a0
430; RV64-NEXT:    vand.vx v8, v8, a0
431; RV64-NEXT:    vsll.vi v8, v8, 2
432; RV64-NEXT:    vor.vv v8, v10, v8
433; RV64-NEXT:    vsrl.vi v10, v8, 1
434; RV64-NEXT:    lui a0, 5
435; RV64-NEXT:    addiw a0, a0, 1365
436; RV64-NEXT:    vand.vx v10, v10, a0
437; RV64-NEXT:    vand.vx v8, v8, a0
438; RV64-NEXT:    vadd.vv v8, v8, v8
439; RV64-NEXT:    vor.vv v8, v10, v8
440; RV64-NEXT:    ret
441  %a = call <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16> %va)
442  ret <vscale x 8 x i16> %a
443}
444declare <vscale x 8 x i16> @llvm.bitreverse.nxv8i16(<vscale x 8 x i16>)
445
446define <vscale x 16 x i16> @bitreverse_nxv16i16(<vscale x 16 x i16> %va) {
447; RV32-LABEL: bitreverse_nxv16i16:
448; RV32:       # %bb.0:
449; RV32-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
450; RV32-NEXT:    vsrl.vi v12, v8, 8
451; RV32-NEXT:    vsll.vi v8, v8, 8
452; RV32-NEXT:    vor.vv v8, v8, v12
453; RV32-NEXT:    vsrl.vi v12, v8, 4
454; RV32-NEXT:    lui a0, 1
455; RV32-NEXT:    addi a0, a0, -241
456; RV32-NEXT:    vand.vx v12, v12, a0
457; RV32-NEXT:    vand.vx v8, v8, a0
458; RV32-NEXT:    vsll.vi v8, v8, 4
459; RV32-NEXT:    vor.vv v8, v12, v8
460; RV32-NEXT:    vsrl.vi v12, v8, 2
461; RV32-NEXT:    lui a0, 3
462; RV32-NEXT:    addi a0, a0, 819
463; RV32-NEXT:    vand.vx v12, v12, a0
464; RV32-NEXT:    vand.vx v8, v8, a0
465; RV32-NEXT:    vsll.vi v8, v8, 2
466; RV32-NEXT:    vor.vv v8, v12, v8
467; RV32-NEXT:    vsrl.vi v12, v8, 1
468; RV32-NEXT:    lui a0, 5
469; RV32-NEXT:    addi a0, a0, 1365
470; RV32-NEXT:    vand.vx v12, v12, a0
471; RV32-NEXT:    vand.vx v8, v8, a0
472; RV32-NEXT:    vadd.vv v8, v8, v8
473; RV32-NEXT:    vor.vv v8, v12, v8
474; RV32-NEXT:    ret
475;
476; RV64-LABEL: bitreverse_nxv16i16:
477; RV64:       # %bb.0:
478; RV64-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
479; RV64-NEXT:    vsrl.vi v12, v8, 8
480; RV64-NEXT:    vsll.vi v8, v8, 8
481; RV64-NEXT:    vor.vv v8, v8, v12
482; RV64-NEXT:    vsrl.vi v12, v8, 4
483; RV64-NEXT:    lui a0, 1
484; RV64-NEXT:    addiw a0, a0, -241
485; RV64-NEXT:    vand.vx v12, v12, a0
486; RV64-NEXT:    vand.vx v8, v8, a0
487; RV64-NEXT:    vsll.vi v8, v8, 4
488; RV64-NEXT:    vor.vv v8, v12, v8
489; RV64-NEXT:    vsrl.vi v12, v8, 2
490; RV64-NEXT:    lui a0, 3
491; RV64-NEXT:    addiw a0, a0, 819
492; RV64-NEXT:    vand.vx v12, v12, a0
493; RV64-NEXT:    vand.vx v8, v8, a0
494; RV64-NEXT:    vsll.vi v8, v8, 2
495; RV64-NEXT:    vor.vv v8, v12, v8
496; RV64-NEXT:    vsrl.vi v12, v8, 1
497; RV64-NEXT:    lui a0, 5
498; RV64-NEXT:    addiw a0, a0, 1365
499; RV64-NEXT:    vand.vx v12, v12, a0
500; RV64-NEXT:    vand.vx v8, v8, a0
501; RV64-NEXT:    vadd.vv v8, v8, v8
502; RV64-NEXT:    vor.vv v8, v12, v8
503; RV64-NEXT:    ret
504  %a = call <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16> %va)
505  ret <vscale x 16 x i16> %a
506}
507declare <vscale x 16 x i16> @llvm.bitreverse.nxv16i16(<vscale x 16 x i16>)
508
509define <vscale x 32 x i16> @bitreverse_nxv32i16(<vscale x 32 x i16> %va) {
510; RV32-LABEL: bitreverse_nxv32i16:
511; RV32:       # %bb.0:
512; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
513; RV32-NEXT:    vsrl.vi v16, v8, 8
514; RV32-NEXT:    vsll.vi v8, v8, 8
515; RV32-NEXT:    vor.vv v8, v8, v16
516; RV32-NEXT:    vsrl.vi v16, v8, 4
517; RV32-NEXT:    lui a0, 1
518; RV32-NEXT:    addi a0, a0, -241
519; RV32-NEXT:    vand.vx v16, v16, a0
520; RV32-NEXT:    vand.vx v8, v8, a0
521; RV32-NEXT:    vsll.vi v8, v8, 4
522; RV32-NEXT:    vor.vv v8, v16, v8
523; RV32-NEXT:    vsrl.vi v16, v8, 2
524; RV32-NEXT:    lui a0, 3
525; RV32-NEXT:    addi a0, a0, 819
526; RV32-NEXT:    vand.vx v16, v16, a0
527; RV32-NEXT:    vand.vx v8, v8, a0
528; RV32-NEXT:    vsll.vi v8, v8, 2
529; RV32-NEXT:    vor.vv v8, v16, v8
530; RV32-NEXT:    vsrl.vi v16, v8, 1
531; RV32-NEXT:    lui a0, 5
532; RV32-NEXT:    addi a0, a0, 1365
533; RV32-NEXT:    vand.vx v16, v16, a0
534; RV32-NEXT:    vand.vx v8, v8, a0
535; RV32-NEXT:    vadd.vv v8, v8, v8
536; RV32-NEXT:    vor.vv v8, v16, v8
537; RV32-NEXT:    ret
538;
539; RV64-LABEL: bitreverse_nxv32i16:
540; RV64:       # %bb.0:
541; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
542; RV64-NEXT:    vsrl.vi v16, v8, 8
543; RV64-NEXT:    vsll.vi v8, v8, 8
544; RV64-NEXT:    vor.vv v8, v8, v16
545; RV64-NEXT:    vsrl.vi v16, v8, 4
546; RV64-NEXT:    lui a0, 1
547; RV64-NEXT:    addiw a0, a0, -241
548; RV64-NEXT:    vand.vx v16, v16, a0
549; RV64-NEXT:    vand.vx v8, v8, a0
550; RV64-NEXT:    vsll.vi v8, v8, 4
551; RV64-NEXT:    vor.vv v8, v16, v8
552; RV64-NEXT:    vsrl.vi v16, v8, 2
553; RV64-NEXT:    lui a0, 3
554; RV64-NEXT:    addiw a0, a0, 819
555; RV64-NEXT:    vand.vx v16, v16, a0
556; RV64-NEXT:    vand.vx v8, v8, a0
557; RV64-NEXT:    vsll.vi v8, v8, 2
558; RV64-NEXT:    vor.vv v8, v16, v8
559; RV64-NEXT:    vsrl.vi v16, v8, 1
560; RV64-NEXT:    lui a0, 5
561; RV64-NEXT:    addiw a0, a0, 1365
562; RV64-NEXT:    vand.vx v16, v16, a0
563; RV64-NEXT:    vand.vx v8, v8, a0
564; RV64-NEXT:    vadd.vv v8, v8, v8
565; RV64-NEXT:    vor.vv v8, v16, v8
566; RV64-NEXT:    ret
567  %a = call <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16> %va)
568  ret <vscale x 32 x i16> %a
569}
570declare <vscale x 32 x i16> @llvm.bitreverse.nxv32i16(<vscale x 32 x i16>)
571
572define <vscale x 1 x i32> @bitreverse_nxv1i32(<vscale x 1 x i32> %va) {
573; RV32-LABEL: bitreverse_nxv1i32:
574; RV32:       # %bb.0:
575; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
576; RV32-NEXT:    vsrl.vi v9, v8, 8
577; RV32-NEXT:    lui a0, 16
578; RV32-NEXT:    addi a0, a0, -256
579; RV32-NEXT:    vand.vx v9, v9, a0
580; RV32-NEXT:    vsrl.vi v10, v8, 24
581; RV32-NEXT:    vor.vv v9, v9, v10
582; RV32-NEXT:    vsll.vi v10, v8, 8
583; RV32-NEXT:    lui a0, 4080
584; RV32-NEXT:    vand.vx v10, v10, a0
585; RV32-NEXT:    vsll.vi v8, v8, 24
586; RV32-NEXT:    vor.vv v8, v8, v10
587; RV32-NEXT:    vor.vv v8, v8, v9
588; RV32-NEXT:    vsrl.vi v9, v8, 4
589; RV32-NEXT:    lui a0, 61681
590; RV32-NEXT:    addi a0, a0, -241
591; RV32-NEXT:    vand.vx v9, v9, a0
592; RV32-NEXT:    vand.vx v8, v8, a0
593; RV32-NEXT:    vsll.vi v8, v8, 4
594; RV32-NEXT:    vor.vv v8, v9, v8
595; RV32-NEXT:    vsrl.vi v9, v8, 2
596; RV32-NEXT:    lui a0, 209715
597; RV32-NEXT:    addi a0, a0, 819
598; RV32-NEXT:    vand.vx v9, v9, a0
599; RV32-NEXT:    vand.vx v8, v8, a0
600; RV32-NEXT:    vsll.vi v8, v8, 2
601; RV32-NEXT:    vor.vv v8, v9, v8
602; RV32-NEXT:    vsrl.vi v9, v8, 1
603; RV32-NEXT:    lui a0, 349525
604; RV32-NEXT:    addi a0, a0, 1365
605; RV32-NEXT:    vand.vx v9, v9, a0
606; RV32-NEXT:    vand.vx v8, v8, a0
607; RV32-NEXT:    vadd.vv v8, v8, v8
608; RV32-NEXT:    vor.vv v8, v9, v8
609; RV32-NEXT:    ret
610;
611; RV64-LABEL: bitreverse_nxv1i32:
612; RV64:       # %bb.0:
613; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
614; RV64-NEXT:    vsrl.vi v9, v8, 8
615; RV64-NEXT:    lui a0, 16
616; RV64-NEXT:    addiw a0, a0, -256
617; RV64-NEXT:    vand.vx v9, v9, a0
618; RV64-NEXT:    vsrl.vi v10, v8, 24
619; RV64-NEXT:    vor.vv v9, v9, v10
620; RV64-NEXT:    vsll.vi v10, v8, 8
621; RV64-NEXT:    lui a0, 4080
622; RV64-NEXT:    vand.vx v10, v10, a0
623; RV64-NEXT:    vsll.vi v8, v8, 24
624; RV64-NEXT:    vor.vv v8, v8, v10
625; RV64-NEXT:    vor.vv v8, v8, v9
626; RV64-NEXT:    vsrl.vi v9, v8, 4
627; RV64-NEXT:    lui a0, 61681
628; RV64-NEXT:    addiw a0, a0, -241
629; RV64-NEXT:    vand.vx v9, v9, a0
630; RV64-NEXT:    vand.vx v8, v8, a0
631; RV64-NEXT:    vsll.vi v8, v8, 4
632; RV64-NEXT:    vor.vv v8, v9, v8
633; RV64-NEXT:    vsrl.vi v9, v8, 2
634; RV64-NEXT:    lui a0, 209715
635; RV64-NEXT:    addiw a0, a0, 819
636; RV64-NEXT:    vand.vx v9, v9, a0
637; RV64-NEXT:    vand.vx v8, v8, a0
638; RV64-NEXT:    vsll.vi v8, v8, 2
639; RV64-NEXT:    vor.vv v8, v9, v8
640; RV64-NEXT:    vsrl.vi v9, v8, 1
641; RV64-NEXT:    lui a0, 349525
642; RV64-NEXT:    addiw a0, a0, 1365
643; RV64-NEXT:    vand.vx v9, v9, a0
644; RV64-NEXT:    vand.vx v8, v8, a0
645; RV64-NEXT:    vadd.vv v8, v8, v8
646; RV64-NEXT:    vor.vv v8, v9, v8
647; RV64-NEXT:    ret
648  %a = call <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32> %va)
649  ret <vscale x 1 x i32> %a
650}
651declare <vscale x 1 x i32> @llvm.bitreverse.nxv1i32(<vscale x 1 x i32>)
652
653define <vscale x 2 x i32> @bitreverse_nxv2i32(<vscale x 2 x i32> %va) {
654; RV32-LABEL: bitreverse_nxv2i32:
655; RV32:       # %bb.0:
656; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
657; RV32-NEXT:    vsrl.vi v9, v8, 8
658; RV32-NEXT:    lui a0, 16
659; RV32-NEXT:    addi a0, a0, -256
660; RV32-NEXT:    vand.vx v9, v9, a0
661; RV32-NEXT:    vsrl.vi v10, v8, 24
662; RV32-NEXT:    vor.vv v9, v9, v10
663; RV32-NEXT:    vsll.vi v10, v8, 8
664; RV32-NEXT:    lui a0, 4080
665; RV32-NEXT:    vand.vx v10, v10, a0
666; RV32-NEXT:    vsll.vi v8, v8, 24
667; RV32-NEXT:    vor.vv v8, v8, v10
668; RV32-NEXT:    vor.vv v8, v8, v9
669; RV32-NEXT:    vsrl.vi v9, v8, 4
670; RV32-NEXT:    lui a0, 61681
671; RV32-NEXT:    addi a0, a0, -241
672; RV32-NEXT:    vand.vx v9, v9, a0
673; RV32-NEXT:    vand.vx v8, v8, a0
674; RV32-NEXT:    vsll.vi v8, v8, 4
675; RV32-NEXT:    vor.vv v8, v9, v8
676; RV32-NEXT:    vsrl.vi v9, v8, 2
677; RV32-NEXT:    lui a0, 209715
678; RV32-NEXT:    addi a0, a0, 819
679; RV32-NEXT:    vand.vx v9, v9, a0
680; RV32-NEXT:    vand.vx v8, v8, a0
681; RV32-NEXT:    vsll.vi v8, v8, 2
682; RV32-NEXT:    vor.vv v8, v9, v8
683; RV32-NEXT:    vsrl.vi v9, v8, 1
684; RV32-NEXT:    lui a0, 349525
685; RV32-NEXT:    addi a0, a0, 1365
686; RV32-NEXT:    vand.vx v9, v9, a0
687; RV32-NEXT:    vand.vx v8, v8, a0
688; RV32-NEXT:    vadd.vv v8, v8, v8
689; RV32-NEXT:    vor.vv v8, v9, v8
690; RV32-NEXT:    ret
691;
692; RV64-LABEL: bitreverse_nxv2i32:
693; RV64:       # %bb.0:
694; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
695; RV64-NEXT:    vsrl.vi v9, v8, 8
696; RV64-NEXT:    lui a0, 16
697; RV64-NEXT:    addiw a0, a0, -256
698; RV64-NEXT:    vand.vx v9, v9, a0
699; RV64-NEXT:    vsrl.vi v10, v8, 24
700; RV64-NEXT:    vor.vv v9, v9, v10
701; RV64-NEXT:    vsll.vi v10, v8, 8
702; RV64-NEXT:    lui a0, 4080
703; RV64-NEXT:    vand.vx v10, v10, a0
704; RV64-NEXT:    vsll.vi v8, v8, 24
705; RV64-NEXT:    vor.vv v8, v8, v10
706; RV64-NEXT:    vor.vv v8, v8, v9
707; RV64-NEXT:    vsrl.vi v9, v8, 4
708; RV64-NEXT:    lui a0, 61681
709; RV64-NEXT:    addiw a0, a0, -241
710; RV64-NEXT:    vand.vx v9, v9, a0
711; RV64-NEXT:    vand.vx v8, v8, a0
712; RV64-NEXT:    vsll.vi v8, v8, 4
713; RV64-NEXT:    vor.vv v8, v9, v8
714; RV64-NEXT:    vsrl.vi v9, v8, 2
715; RV64-NEXT:    lui a0, 209715
716; RV64-NEXT:    addiw a0, a0, 819
717; RV64-NEXT:    vand.vx v9, v9, a0
718; RV64-NEXT:    vand.vx v8, v8, a0
719; RV64-NEXT:    vsll.vi v8, v8, 2
720; RV64-NEXT:    vor.vv v8, v9, v8
721; RV64-NEXT:    vsrl.vi v9, v8, 1
722; RV64-NEXT:    lui a0, 349525
723; RV64-NEXT:    addiw a0, a0, 1365
724; RV64-NEXT:    vand.vx v9, v9, a0
725; RV64-NEXT:    vand.vx v8, v8, a0
726; RV64-NEXT:    vadd.vv v8, v8, v8
727; RV64-NEXT:    vor.vv v8, v9, v8
728; RV64-NEXT:    ret
729  %a = call <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32> %va)
730  ret <vscale x 2 x i32> %a
731}
732declare <vscale x 2 x i32> @llvm.bitreverse.nxv2i32(<vscale x 2 x i32>)
733
734define <vscale x 4 x i32> @bitreverse_nxv4i32(<vscale x 4 x i32> %va) {
735; RV32-LABEL: bitreverse_nxv4i32:
736; RV32:       # %bb.0:
737; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
738; RV32-NEXT:    vsrl.vi v10, v8, 8
739; RV32-NEXT:    lui a0, 16
740; RV32-NEXT:    addi a0, a0, -256
741; RV32-NEXT:    vand.vx v10, v10, a0
742; RV32-NEXT:    vsrl.vi v12, v8, 24
743; RV32-NEXT:    vor.vv v10, v10, v12
744; RV32-NEXT:    vsll.vi v12, v8, 8
745; RV32-NEXT:    lui a0, 4080
746; RV32-NEXT:    vand.vx v12, v12, a0
747; RV32-NEXT:    vsll.vi v8, v8, 24
748; RV32-NEXT:    vor.vv v8, v8, v12
749; RV32-NEXT:    vor.vv v8, v8, v10
750; RV32-NEXT:    vsrl.vi v10, v8, 4
751; RV32-NEXT:    lui a0, 61681
752; RV32-NEXT:    addi a0, a0, -241
753; RV32-NEXT:    vand.vx v10, v10, a0
754; RV32-NEXT:    vand.vx v8, v8, a0
755; RV32-NEXT:    vsll.vi v8, v8, 4
756; RV32-NEXT:    vor.vv v8, v10, v8
757; RV32-NEXT:    vsrl.vi v10, v8, 2
758; RV32-NEXT:    lui a0, 209715
759; RV32-NEXT:    addi a0, a0, 819
760; RV32-NEXT:    vand.vx v10, v10, a0
761; RV32-NEXT:    vand.vx v8, v8, a0
762; RV32-NEXT:    vsll.vi v8, v8, 2
763; RV32-NEXT:    vor.vv v8, v10, v8
764; RV32-NEXT:    vsrl.vi v10, v8, 1
765; RV32-NEXT:    lui a0, 349525
766; RV32-NEXT:    addi a0, a0, 1365
767; RV32-NEXT:    vand.vx v10, v10, a0
768; RV32-NEXT:    vand.vx v8, v8, a0
769; RV32-NEXT:    vadd.vv v8, v8, v8
770; RV32-NEXT:    vor.vv v8, v10, v8
771; RV32-NEXT:    ret
772;
773; RV64-LABEL: bitreverse_nxv4i32:
774; RV64:       # %bb.0:
775; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
776; RV64-NEXT:    vsrl.vi v10, v8, 8
777; RV64-NEXT:    lui a0, 16
778; RV64-NEXT:    addiw a0, a0, -256
779; RV64-NEXT:    vand.vx v10, v10, a0
780; RV64-NEXT:    vsrl.vi v12, v8, 24
781; RV64-NEXT:    vor.vv v10, v10, v12
782; RV64-NEXT:    vsll.vi v12, v8, 8
783; RV64-NEXT:    lui a0, 4080
784; RV64-NEXT:    vand.vx v12, v12, a0
785; RV64-NEXT:    vsll.vi v8, v8, 24
786; RV64-NEXT:    vor.vv v8, v8, v12
787; RV64-NEXT:    vor.vv v8, v8, v10
788; RV64-NEXT:    vsrl.vi v10, v8, 4
789; RV64-NEXT:    lui a0, 61681
790; RV64-NEXT:    addiw a0, a0, -241
791; RV64-NEXT:    vand.vx v10, v10, a0
792; RV64-NEXT:    vand.vx v8, v8, a0
793; RV64-NEXT:    vsll.vi v8, v8, 4
794; RV64-NEXT:    vor.vv v8, v10, v8
795; RV64-NEXT:    vsrl.vi v10, v8, 2
796; RV64-NEXT:    lui a0, 209715
797; RV64-NEXT:    addiw a0, a0, 819
798; RV64-NEXT:    vand.vx v10, v10, a0
799; RV64-NEXT:    vand.vx v8, v8, a0
800; RV64-NEXT:    vsll.vi v8, v8, 2
801; RV64-NEXT:    vor.vv v8, v10, v8
802; RV64-NEXT:    vsrl.vi v10, v8, 1
803; RV64-NEXT:    lui a0, 349525
804; RV64-NEXT:    addiw a0, a0, 1365
805; RV64-NEXT:    vand.vx v10, v10, a0
806; RV64-NEXT:    vand.vx v8, v8, a0
807; RV64-NEXT:    vadd.vv v8, v8, v8
808; RV64-NEXT:    vor.vv v8, v10, v8
809; RV64-NEXT:    ret
810  %a = call <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32> %va)
811  ret <vscale x 4 x i32> %a
812}
813declare <vscale x 4 x i32> @llvm.bitreverse.nxv4i32(<vscale x 4 x i32>)
814
815define <vscale x 8 x i32> @bitreverse_nxv8i32(<vscale x 8 x i32> %va) {
816; RV32-LABEL: bitreverse_nxv8i32:
817; RV32:       # %bb.0:
818; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
819; RV32-NEXT:    vsrl.vi v12, v8, 8
820; RV32-NEXT:    lui a0, 16
821; RV32-NEXT:    addi a0, a0, -256
822; RV32-NEXT:    vand.vx v12, v12, a0
823; RV32-NEXT:    vsrl.vi v16, v8, 24
824; RV32-NEXT:    vor.vv v12, v12, v16
825; RV32-NEXT:    vsll.vi v16, v8, 8
826; RV32-NEXT:    lui a0, 4080
827; RV32-NEXT:    vand.vx v16, v16, a0
828; RV32-NEXT:    vsll.vi v8, v8, 24
829; RV32-NEXT:    vor.vv v8, v8, v16
830; RV32-NEXT:    vor.vv v8, v8, v12
831; RV32-NEXT:    vsrl.vi v12, v8, 4
832; RV32-NEXT:    lui a0, 61681
833; RV32-NEXT:    addi a0, a0, -241
834; RV32-NEXT:    vand.vx v12, v12, a0
835; RV32-NEXT:    vand.vx v8, v8, a0
836; RV32-NEXT:    vsll.vi v8, v8, 4
837; RV32-NEXT:    vor.vv v8, v12, v8
838; RV32-NEXT:    vsrl.vi v12, v8, 2
839; RV32-NEXT:    lui a0, 209715
840; RV32-NEXT:    addi a0, a0, 819
841; RV32-NEXT:    vand.vx v12, v12, a0
842; RV32-NEXT:    vand.vx v8, v8, a0
843; RV32-NEXT:    vsll.vi v8, v8, 2
844; RV32-NEXT:    vor.vv v8, v12, v8
845; RV32-NEXT:    vsrl.vi v12, v8, 1
846; RV32-NEXT:    lui a0, 349525
847; RV32-NEXT:    addi a0, a0, 1365
848; RV32-NEXT:    vand.vx v12, v12, a0
849; RV32-NEXT:    vand.vx v8, v8, a0
850; RV32-NEXT:    vadd.vv v8, v8, v8
851; RV32-NEXT:    vor.vv v8, v12, v8
852; RV32-NEXT:    ret
853;
854; RV64-LABEL: bitreverse_nxv8i32:
855; RV64:       # %bb.0:
856; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
857; RV64-NEXT:    vsrl.vi v12, v8, 8
858; RV64-NEXT:    lui a0, 16
859; RV64-NEXT:    addiw a0, a0, -256
860; RV64-NEXT:    vand.vx v12, v12, a0
861; RV64-NEXT:    vsrl.vi v16, v8, 24
862; RV64-NEXT:    vor.vv v12, v12, v16
863; RV64-NEXT:    vsll.vi v16, v8, 8
864; RV64-NEXT:    lui a0, 4080
865; RV64-NEXT:    vand.vx v16, v16, a0
866; RV64-NEXT:    vsll.vi v8, v8, 24
867; RV64-NEXT:    vor.vv v8, v8, v16
868; RV64-NEXT:    vor.vv v8, v8, v12
869; RV64-NEXT:    vsrl.vi v12, v8, 4
870; RV64-NEXT:    lui a0, 61681
871; RV64-NEXT:    addiw a0, a0, -241
872; RV64-NEXT:    vand.vx v12, v12, a0
873; RV64-NEXT:    vand.vx v8, v8, a0
874; RV64-NEXT:    vsll.vi v8, v8, 4
875; RV64-NEXT:    vor.vv v8, v12, v8
876; RV64-NEXT:    vsrl.vi v12, v8, 2
877; RV64-NEXT:    lui a0, 209715
878; RV64-NEXT:    addiw a0, a0, 819
879; RV64-NEXT:    vand.vx v12, v12, a0
880; RV64-NEXT:    vand.vx v8, v8, a0
881; RV64-NEXT:    vsll.vi v8, v8, 2
882; RV64-NEXT:    vor.vv v8, v12, v8
883; RV64-NEXT:    vsrl.vi v12, v8, 1
884; RV64-NEXT:    lui a0, 349525
885; RV64-NEXT:    addiw a0, a0, 1365
886; RV64-NEXT:    vand.vx v12, v12, a0
887; RV64-NEXT:    vand.vx v8, v8, a0
888; RV64-NEXT:    vadd.vv v8, v8, v8
889; RV64-NEXT:    vor.vv v8, v12, v8
890; RV64-NEXT:    ret
891  %a = call <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32> %va)
892  ret <vscale x 8 x i32> %a
893}
894declare <vscale x 8 x i32> @llvm.bitreverse.nxv8i32(<vscale x 8 x i32>)
895
896define <vscale x 16 x i32> @bitreverse_nxv16i32(<vscale x 16 x i32> %va) {
897; RV32-LABEL: bitreverse_nxv16i32:
898; RV32:       # %bb.0:
899; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
900; RV32-NEXT:    vsrl.vi v16, v8, 8
901; RV32-NEXT:    lui a0, 16
902; RV32-NEXT:    addi a0, a0, -256
903; RV32-NEXT:    vand.vx v16, v16, a0
904; RV32-NEXT:    vsrl.vi v24, v8, 24
905; RV32-NEXT:    vor.vv v16, v16, v24
906; RV32-NEXT:    vsll.vi v24, v8, 8
907; RV32-NEXT:    lui a0, 4080
908; RV32-NEXT:    vand.vx v24, v24, a0
909; RV32-NEXT:    vsll.vi v8, v8, 24
910; RV32-NEXT:    vor.vv v8, v8, v24
911; RV32-NEXT:    vor.vv v8, v8, v16
912; RV32-NEXT:    vsrl.vi v16, v8, 4
913; RV32-NEXT:    lui a0, 61681
914; RV32-NEXT:    addi a0, a0, -241
915; RV32-NEXT:    vand.vx v16, v16, a0
916; RV32-NEXT:    vand.vx v8, v8, a0
917; RV32-NEXT:    vsll.vi v8, v8, 4
918; RV32-NEXT:    vor.vv v8, v16, v8
919; RV32-NEXT:    vsrl.vi v16, v8, 2
920; RV32-NEXT:    lui a0, 209715
921; RV32-NEXT:    addi a0, a0, 819
922; RV32-NEXT:    vand.vx v16, v16, a0
923; RV32-NEXT:    vand.vx v8, v8, a0
924; RV32-NEXT:    vsll.vi v8, v8, 2
925; RV32-NEXT:    vor.vv v8, v16, v8
926; RV32-NEXT:    vsrl.vi v16, v8, 1
927; RV32-NEXT:    lui a0, 349525
928; RV32-NEXT:    addi a0, a0, 1365
929; RV32-NEXT:    vand.vx v16, v16, a0
930; RV32-NEXT:    vand.vx v8, v8, a0
931; RV32-NEXT:    vadd.vv v8, v8, v8
932; RV32-NEXT:    vor.vv v8, v16, v8
933; RV32-NEXT:    ret
934;
935; RV64-LABEL: bitreverse_nxv16i32:
936; RV64:       # %bb.0:
937; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
938; RV64-NEXT:    vsrl.vi v16, v8, 8
939; RV64-NEXT:    lui a0, 16
940; RV64-NEXT:    addiw a0, a0, -256
941; RV64-NEXT:    vand.vx v16, v16, a0
942; RV64-NEXT:    vsrl.vi v24, v8, 24
943; RV64-NEXT:    vor.vv v16, v16, v24
944; RV64-NEXT:    vsll.vi v24, v8, 8
945; RV64-NEXT:    lui a0, 4080
946; RV64-NEXT:    vand.vx v24, v24, a0
947; RV64-NEXT:    vsll.vi v8, v8, 24
948; RV64-NEXT:    vor.vv v8, v8, v24
949; RV64-NEXT:    vor.vv v8, v8, v16
950; RV64-NEXT:    vsrl.vi v16, v8, 4
951; RV64-NEXT:    lui a0, 61681
952; RV64-NEXT:    addiw a0, a0, -241
953; RV64-NEXT:    vand.vx v16, v16, a0
954; RV64-NEXT:    vand.vx v8, v8, a0
955; RV64-NEXT:    vsll.vi v8, v8, 4
956; RV64-NEXT:    vor.vv v8, v16, v8
957; RV64-NEXT:    vsrl.vi v16, v8, 2
958; RV64-NEXT:    lui a0, 209715
959; RV64-NEXT:    addiw a0, a0, 819
960; RV64-NEXT:    vand.vx v16, v16, a0
961; RV64-NEXT:    vand.vx v8, v8, a0
962; RV64-NEXT:    vsll.vi v8, v8, 2
963; RV64-NEXT:    vor.vv v8, v16, v8
964; RV64-NEXT:    vsrl.vi v16, v8, 1
965; RV64-NEXT:    lui a0, 349525
966; RV64-NEXT:    addiw a0, a0, 1365
967; RV64-NEXT:    vand.vx v16, v16, a0
968; RV64-NEXT:    vand.vx v8, v8, a0
969; RV64-NEXT:    vadd.vv v8, v8, v8
970; RV64-NEXT:    vor.vv v8, v16, v8
971; RV64-NEXT:    ret
972  %a = call <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32> %va)
973  ret <vscale x 16 x i32> %a
974}
975declare <vscale x 16 x i32> @llvm.bitreverse.nxv16i32(<vscale x 16 x i32>)
976
977define <vscale x 1 x i64> @bitreverse_nxv1i64(<vscale x 1 x i64> %va) {
978; RV32-LABEL: bitreverse_nxv1i64:
979; RV32:       # %bb.0:
980; RV32-NEXT:    addi sp, sp, -16
981; RV32-NEXT:    .cfi_def_cfa_offset 16
982; RV32-NEXT:    sw zero, 12(sp)
983; RV32-NEXT:    lui a0, 1044480
984; RV32-NEXT:    sw a0, 8(sp)
985; RV32-NEXT:    lui a0, 4080
986; RV32-NEXT:    sw a0, 12(sp)
987; RV32-NEXT:    sw zero, 8(sp)
988; RV32-NEXT:    li a1, 255
989; RV32-NEXT:    sw a1, 12(sp)
990; RV32-NEXT:    lui a1, 16
991; RV32-NEXT:    addi a1, a1, -256
992; RV32-NEXT:    sw a1, 12(sp)
993; RV32-NEXT:    lui a2, 61681
994; RV32-NEXT:    addi a2, a2, -241
995; RV32-NEXT:    sw a2, 12(sp)
996; RV32-NEXT:    sw a2, 8(sp)
997; RV32-NEXT:    lui a2, 209715
998; RV32-NEXT:    addi a2, a2, 819
999; RV32-NEXT:    sw a2, 12(sp)
1000; RV32-NEXT:    sw a2, 8(sp)
1001; RV32-NEXT:    lui a2, 349525
1002; RV32-NEXT:    addi a2, a2, 1365
1003; RV32-NEXT:    sw a2, 12(sp)
1004; RV32-NEXT:    sw a2, 8(sp)
1005; RV32-NEXT:    li a2, 56
1006; RV32-NEXT:    vsetvli a3, zero, e64, m1, ta, mu
1007; RV32-NEXT:    vsrl.vx v9, v8, a2
1008; RV32-NEXT:    li a3, 40
1009; RV32-NEXT:    vsrl.vx v10, v8, a3
1010; RV32-NEXT:    vand.vx v10, v10, a1
1011; RV32-NEXT:    vor.vv v9, v10, v9
1012; RV32-NEXT:    addi a1, sp, 8
1013; RV32-NEXT:    vlse64.v v10, (a1), zero
1014; RV32-NEXT:    vsrl.vi v11, v8, 24
1015; RV32-NEXT:    vand.vx v11, v11, a0
1016; RV32-NEXT:    vsrl.vi v12, v8, 8
1017; RV32-NEXT:    vand.vv v10, v12, v10
1018; RV32-NEXT:    vor.vv v10, v10, v11
1019; RV32-NEXT:    vlse64.v v11, (a1), zero
1020; RV32-NEXT:    vor.vv v9, v10, v9
1021; RV32-NEXT:    vsll.vx v10, v8, a2
1022; RV32-NEXT:    vsll.vx v12, v8, a3
1023; RV32-NEXT:    vand.vv v11, v12, v11
1024; RV32-NEXT:    vlse64.v v12, (a1), zero
1025; RV32-NEXT:    vor.vv v10, v10, v11
1026; RV32-NEXT:    vlse64.v v11, (a1), zero
1027; RV32-NEXT:    vsll.vi v13, v8, 8
1028; RV32-NEXT:    vand.vv v12, v13, v12
1029; RV32-NEXT:    vsll.vi v8, v8, 24
1030; RV32-NEXT:    vand.vv v8, v8, v11
1031; RV32-NEXT:    vor.vv v8, v8, v12
1032; RV32-NEXT:    vlse64.v v11, (a1), zero
1033; RV32-NEXT:    vor.vv v8, v10, v8
1034; RV32-NEXT:    vor.vv v8, v8, v9
1035; RV32-NEXT:    vsrl.vi v9, v8, 4
1036; RV32-NEXT:    vand.vv v9, v9, v11
1037; RV32-NEXT:    vand.vv v8, v8, v11
1038; RV32-NEXT:    vlse64.v v10, (a1), zero
1039; RV32-NEXT:    vsll.vi v8, v8, 4
1040; RV32-NEXT:    vor.vv v8, v9, v8
1041; RV32-NEXT:    vsrl.vi v9, v8, 2
1042; RV32-NEXT:    vand.vv v9, v9, v10
1043; RV32-NEXT:    vand.vv v8, v8, v10
1044; RV32-NEXT:    vlse64.v v10, (a1), zero
1045; RV32-NEXT:    vsll.vi v8, v8, 2
1046; RV32-NEXT:    vor.vv v8, v9, v8
1047; RV32-NEXT:    vsrl.vi v9, v8, 1
1048; RV32-NEXT:    vand.vv v9, v9, v10
1049; RV32-NEXT:    vand.vv v8, v8, v10
1050; RV32-NEXT:    vadd.vv v8, v8, v8
1051; RV32-NEXT:    vor.vv v8, v9, v8
1052; RV32-NEXT:    addi sp, sp, 16
1053; RV32-NEXT:    ret
1054;
1055; RV64-LABEL: bitreverse_nxv1i64:
1056; RV64:       # %bb.0:
1057; RV64-NEXT:    li a0, 56
1058; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
1059; RV64-NEXT:    vsrl.vx v9, v8, a0
1060; RV64-NEXT:    li a1, 40
1061; RV64-NEXT:    vsrl.vx v10, v8, a1
1062; RV64-NEXT:    lui a2, 16
1063; RV64-NEXT:    addiw a2, a2, -256
1064; RV64-NEXT:    vand.vx v10, v10, a2
1065; RV64-NEXT:    vor.vv v9, v10, v9
1066; RV64-NEXT:    vsrl.vi v10, v8, 24
1067; RV64-NEXT:    lui a2, 4080
1068; RV64-NEXT:    vand.vx v10, v10, a2
1069; RV64-NEXT:    vsrl.vi v11, v8, 8
1070; RV64-NEXT:    li a2, 255
1071; RV64-NEXT:    slli a3, a2, 24
1072; RV64-NEXT:    vand.vx v11, v11, a3
1073; RV64-NEXT:    vor.vv v10, v11, v10
1074; RV64-NEXT:    vor.vv v9, v10, v9
1075; RV64-NEXT:    vsll.vi v10, v8, 8
1076; RV64-NEXT:    slli a3, a2, 32
1077; RV64-NEXT:    vand.vx v10, v10, a3
1078; RV64-NEXT:    vsll.vi v11, v8, 24
1079; RV64-NEXT:    slli a3, a2, 40
1080; RV64-NEXT:    vand.vx v11, v11, a3
1081; RV64-NEXT:    vor.vv v10, v11, v10
1082; RV64-NEXT:    vsll.vx v11, v8, a0
1083; RV64-NEXT:    vsll.vx v8, v8, a1
1084; RV64-NEXT:    slli a0, a2, 48
1085; RV64-NEXT:    vand.vx v8, v8, a0
1086; RV64-NEXT:    vor.vv v8, v11, v8
1087; RV64-NEXT:    lui a0, %hi(.LCPI18_0)
1088; RV64-NEXT:    ld a0, %lo(.LCPI18_0)(a0)
1089; RV64-NEXT:    vor.vv v8, v8, v10
1090; RV64-NEXT:    vor.vv v8, v8, v9
1091; RV64-NEXT:    vsrl.vi v9, v8, 4
1092; RV64-NEXT:    vand.vx v9, v9, a0
1093; RV64-NEXT:    vand.vx v8, v8, a0
1094; RV64-NEXT:    lui a0, %hi(.LCPI18_1)
1095; RV64-NEXT:    ld a0, %lo(.LCPI18_1)(a0)
1096; RV64-NEXT:    vsll.vi v8, v8, 4
1097; RV64-NEXT:    vor.vv v8, v9, v8
1098; RV64-NEXT:    vsrl.vi v9, v8, 2
1099; RV64-NEXT:    vand.vx v9, v9, a0
1100; RV64-NEXT:    vand.vx v8, v8, a0
1101; RV64-NEXT:    lui a0, %hi(.LCPI18_2)
1102; RV64-NEXT:    ld a0, %lo(.LCPI18_2)(a0)
1103; RV64-NEXT:    vsll.vi v8, v8, 2
1104; RV64-NEXT:    vor.vv v8, v9, v8
1105; RV64-NEXT:    vsrl.vi v9, v8, 1
1106; RV64-NEXT:    vand.vx v9, v9, a0
1107; RV64-NEXT:    vand.vx v8, v8, a0
1108; RV64-NEXT:    vadd.vv v8, v8, v8
1109; RV64-NEXT:    vor.vv v8, v9, v8
1110; RV64-NEXT:    ret
1111  %a = call <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64> %va)
1112  ret <vscale x 1 x i64> %a
1113}
1114declare <vscale x 1 x i64> @llvm.bitreverse.nxv1i64(<vscale x 1 x i64>)
1115
1116define <vscale x 2 x i64> @bitreverse_nxv2i64(<vscale x 2 x i64> %va) {
1117; RV32-LABEL: bitreverse_nxv2i64:
1118; RV32:       # %bb.0:
1119; RV32-NEXT:    addi sp, sp, -16
1120; RV32-NEXT:    .cfi_def_cfa_offset 16
1121; RV32-NEXT:    sw zero, 12(sp)
1122; RV32-NEXT:    lui a0, 1044480
1123; RV32-NEXT:    sw a0, 8(sp)
1124; RV32-NEXT:    lui a0, 4080
1125; RV32-NEXT:    sw a0, 12(sp)
1126; RV32-NEXT:    sw zero, 8(sp)
1127; RV32-NEXT:    li a1, 255
1128; RV32-NEXT:    sw a1, 12(sp)
1129; RV32-NEXT:    lui a1, 16
1130; RV32-NEXT:    addi a1, a1, -256
1131; RV32-NEXT:    sw a1, 12(sp)
1132; RV32-NEXT:    lui a2, 61681
1133; RV32-NEXT:    addi a2, a2, -241
1134; RV32-NEXT:    sw a2, 12(sp)
1135; RV32-NEXT:    sw a2, 8(sp)
1136; RV32-NEXT:    lui a2, 209715
1137; RV32-NEXT:    addi a2, a2, 819
1138; RV32-NEXT:    sw a2, 12(sp)
1139; RV32-NEXT:    sw a2, 8(sp)
1140; RV32-NEXT:    lui a2, 349525
1141; RV32-NEXT:    addi a2, a2, 1365
1142; RV32-NEXT:    sw a2, 12(sp)
1143; RV32-NEXT:    sw a2, 8(sp)
1144; RV32-NEXT:    li a2, 56
1145; RV32-NEXT:    vsetvli a3, zero, e64, m2, ta, mu
1146; RV32-NEXT:    vsrl.vx v10, v8, a2
1147; RV32-NEXT:    li a3, 40
1148; RV32-NEXT:    vsrl.vx v12, v8, a3
1149; RV32-NEXT:    vand.vx v12, v12, a1
1150; RV32-NEXT:    vor.vv v10, v12, v10
1151; RV32-NEXT:    addi a1, sp, 8
1152; RV32-NEXT:    vlse64.v v12, (a1), zero
1153; RV32-NEXT:    vsrl.vi v14, v8, 24
1154; RV32-NEXT:    vand.vx v14, v14, a0
1155; RV32-NEXT:    vsrl.vi v16, v8, 8
1156; RV32-NEXT:    vand.vv v12, v16, v12
1157; RV32-NEXT:    vor.vv v12, v12, v14
1158; RV32-NEXT:    vlse64.v v14, (a1), zero
1159; RV32-NEXT:    vor.vv v10, v12, v10
1160; RV32-NEXT:    vsll.vx v12, v8, a2
1161; RV32-NEXT:    vsll.vx v16, v8, a3
1162; RV32-NEXT:    vand.vv v14, v16, v14
1163; RV32-NEXT:    vlse64.v v16, (a1), zero
1164; RV32-NEXT:    vor.vv v12, v12, v14
1165; RV32-NEXT:    vlse64.v v14, (a1), zero
1166; RV32-NEXT:    vsll.vi v18, v8, 8
1167; RV32-NEXT:    vand.vv v16, v18, v16
1168; RV32-NEXT:    vsll.vi v8, v8, 24
1169; RV32-NEXT:    vand.vv v8, v8, v14
1170; RV32-NEXT:    vor.vv v8, v8, v16
1171; RV32-NEXT:    vlse64.v v14, (a1), zero
1172; RV32-NEXT:    vor.vv v8, v12, v8
1173; RV32-NEXT:    vor.vv v8, v8, v10
1174; RV32-NEXT:    vsrl.vi v10, v8, 4
1175; RV32-NEXT:    vand.vv v10, v10, v14
1176; RV32-NEXT:    vand.vv v8, v8, v14
1177; RV32-NEXT:    vlse64.v v12, (a1), zero
1178; RV32-NEXT:    vsll.vi v8, v8, 4
1179; RV32-NEXT:    vor.vv v8, v10, v8
1180; RV32-NEXT:    vsrl.vi v10, v8, 2
1181; RV32-NEXT:    vand.vv v10, v10, v12
1182; RV32-NEXT:    vand.vv v8, v8, v12
1183; RV32-NEXT:    vlse64.v v12, (a1), zero
1184; RV32-NEXT:    vsll.vi v8, v8, 2
1185; RV32-NEXT:    vor.vv v8, v10, v8
1186; RV32-NEXT:    vsrl.vi v10, v8, 1
1187; RV32-NEXT:    vand.vv v10, v10, v12
1188; RV32-NEXT:    vand.vv v8, v8, v12
1189; RV32-NEXT:    vadd.vv v8, v8, v8
1190; RV32-NEXT:    vor.vv v8, v10, v8
1191; RV32-NEXT:    addi sp, sp, 16
1192; RV32-NEXT:    ret
1193;
1194; RV64-LABEL: bitreverse_nxv2i64:
1195; RV64:       # %bb.0:
1196; RV64-NEXT:    li a0, 56
1197; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
1198; RV64-NEXT:    vsrl.vx v10, v8, a0
1199; RV64-NEXT:    li a1, 40
1200; RV64-NEXT:    vsrl.vx v12, v8, a1
1201; RV64-NEXT:    lui a2, 16
1202; RV64-NEXT:    addiw a2, a2, -256
1203; RV64-NEXT:    vand.vx v12, v12, a2
1204; RV64-NEXT:    vor.vv v10, v12, v10
1205; RV64-NEXT:    vsrl.vi v12, v8, 24
1206; RV64-NEXT:    lui a2, 4080
1207; RV64-NEXT:    vand.vx v12, v12, a2
1208; RV64-NEXT:    vsrl.vi v14, v8, 8
1209; RV64-NEXT:    li a2, 255
1210; RV64-NEXT:    slli a3, a2, 24
1211; RV64-NEXT:    vand.vx v14, v14, a3
1212; RV64-NEXT:    vor.vv v12, v14, v12
1213; RV64-NEXT:    vor.vv v10, v12, v10
1214; RV64-NEXT:    vsll.vi v12, v8, 8
1215; RV64-NEXT:    slli a3, a2, 32
1216; RV64-NEXT:    vand.vx v12, v12, a3
1217; RV64-NEXT:    vsll.vi v14, v8, 24
1218; RV64-NEXT:    slli a3, a2, 40
1219; RV64-NEXT:    vand.vx v14, v14, a3
1220; RV64-NEXT:    vor.vv v12, v14, v12
1221; RV64-NEXT:    vsll.vx v14, v8, a0
1222; RV64-NEXT:    vsll.vx v8, v8, a1
1223; RV64-NEXT:    slli a0, a2, 48
1224; RV64-NEXT:    vand.vx v8, v8, a0
1225; RV64-NEXT:    vor.vv v8, v14, v8
1226; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
1227; RV64-NEXT:    ld a0, %lo(.LCPI19_0)(a0)
1228; RV64-NEXT:    vor.vv v8, v8, v12
1229; RV64-NEXT:    vor.vv v8, v8, v10
1230; RV64-NEXT:    vsrl.vi v10, v8, 4
1231; RV64-NEXT:    vand.vx v10, v10, a0
1232; RV64-NEXT:    vand.vx v8, v8, a0
1233; RV64-NEXT:    lui a0, %hi(.LCPI19_1)
1234; RV64-NEXT:    ld a0, %lo(.LCPI19_1)(a0)
1235; RV64-NEXT:    vsll.vi v8, v8, 4
1236; RV64-NEXT:    vor.vv v8, v10, v8
1237; RV64-NEXT:    vsrl.vi v10, v8, 2
1238; RV64-NEXT:    vand.vx v10, v10, a0
1239; RV64-NEXT:    vand.vx v8, v8, a0
1240; RV64-NEXT:    lui a0, %hi(.LCPI19_2)
1241; RV64-NEXT:    ld a0, %lo(.LCPI19_2)(a0)
1242; RV64-NEXT:    vsll.vi v8, v8, 2
1243; RV64-NEXT:    vor.vv v8, v10, v8
1244; RV64-NEXT:    vsrl.vi v10, v8, 1
1245; RV64-NEXT:    vand.vx v10, v10, a0
1246; RV64-NEXT:    vand.vx v8, v8, a0
1247; RV64-NEXT:    vadd.vv v8, v8, v8
1248; RV64-NEXT:    vor.vv v8, v10, v8
1249; RV64-NEXT:    ret
1250  %a = call <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64> %va)
1251  ret <vscale x 2 x i64> %a
1252}
1253declare <vscale x 2 x i64> @llvm.bitreverse.nxv2i64(<vscale x 2 x i64>)
1254
1255define <vscale x 4 x i64> @bitreverse_nxv4i64(<vscale x 4 x i64> %va) {
1256; RV32-LABEL: bitreverse_nxv4i64:
1257; RV32:       # %bb.0:
1258; RV32-NEXT:    addi sp, sp, -16
1259; RV32-NEXT:    .cfi_def_cfa_offset 16
1260; RV32-NEXT:    sw zero, 12(sp)
1261; RV32-NEXT:    lui a0, 1044480
1262; RV32-NEXT:    sw a0, 8(sp)
1263; RV32-NEXT:    lui a0, 4080
1264; RV32-NEXT:    sw a0, 12(sp)
1265; RV32-NEXT:    sw zero, 8(sp)
1266; RV32-NEXT:    li a1, 255
1267; RV32-NEXT:    sw a1, 12(sp)
1268; RV32-NEXT:    lui a1, 16
1269; RV32-NEXT:    addi a1, a1, -256
1270; RV32-NEXT:    sw a1, 12(sp)
1271; RV32-NEXT:    lui a2, 61681
1272; RV32-NEXT:    addi a2, a2, -241
1273; RV32-NEXT:    sw a2, 12(sp)
1274; RV32-NEXT:    sw a2, 8(sp)
1275; RV32-NEXT:    lui a2, 209715
1276; RV32-NEXT:    addi a2, a2, 819
1277; RV32-NEXT:    sw a2, 12(sp)
1278; RV32-NEXT:    sw a2, 8(sp)
1279; RV32-NEXT:    lui a2, 349525
1280; RV32-NEXT:    addi a2, a2, 1365
1281; RV32-NEXT:    sw a2, 12(sp)
1282; RV32-NEXT:    sw a2, 8(sp)
1283; RV32-NEXT:    li a2, 56
1284; RV32-NEXT:    vsetvli a3, zero, e64, m4, ta, mu
1285; RV32-NEXT:    vsrl.vx v12, v8, a2
1286; RV32-NEXT:    li a3, 40
1287; RV32-NEXT:    vsrl.vx v16, v8, a3
1288; RV32-NEXT:    vand.vx v16, v16, a1
1289; RV32-NEXT:    vor.vv v12, v16, v12
1290; RV32-NEXT:    addi a1, sp, 8
1291; RV32-NEXT:    vlse64.v v16, (a1), zero
1292; RV32-NEXT:    vsrl.vi v20, v8, 24
1293; RV32-NEXT:    vand.vx v20, v20, a0
1294; RV32-NEXT:    vsrl.vi v24, v8, 8
1295; RV32-NEXT:    vand.vv v16, v24, v16
1296; RV32-NEXT:    vor.vv v16, v16, v20
1297; RV32-NEXT:    vlse64.v v20, (a1), zero
1298; RV32-NEXT:    vor.vv v12, v16, v12
1299; RV32-NEXT:    vsll.vx v16, v8, a2
1300; RV32-NEXT:    vsll.vx v24, v8, a3
1301; RV32-NEXT:    vand.vv v20, v24, v20
1302; RV32-NEXT:    vlse64.v v24, (a1), zero
1303; RV32-NEXT:    vor.vv v16, v16, v20
1304; RV32-NEXT:    vlse64.v v20, (a1), zero
1305; RV32-NEXT:    vsll.vi v28, v8, 8
1306; RV32-NEXT:    vand.vv v24, v28, v24
1307; RV32-NEXT:    vsll.vi v8, v8, 24
1308; RV32-NEXT:    vand.vv v8, v8, v20
1309; RV32-NEXT:    vor.vv v8, v8, v24
1310; RV32-NEXT:    vlse64.v v20, (a1), zero
1311; RV32-NEXT:    vor.vv v8, v16, v8
1312; RV32-NEXT:    vor.vv v8, v8, v12
1313; RV32-NEXT:    vsrl.vi v12, v8, 4
1314; RV32-NEXT:    vand.vv v12, v12, v20
1315; RV32-NEXT:    vand.vv v8, v8, v20
1316; RV32-NEXT:    vlse64.v v16, (a1), zero
1317; RV32-NEXT:    vsll.vi v8, v8, 4
1318; RV32-NEXT:    vor.vv v8, v12, v8
1319; RV32-NEXT:    vsrl.vi v12, v8, 2
1320; RV32-NEXT:    vand.vv v12, v12, v16
1321; RV32-NEXT:    vand.vv v8, v8, v16
1322; RV32-NEXT:    vlse64.v v16, (a1), zero
1323; RV32-NEXT:    vsll.vi v8, v8, 2
1324; RV32-NEXT:    vor.vv v8, v12, v8
1325; RV32-NEXT:    vsrl.vi v12, v8, 1
1326; RV32-NEXT:    vand.vv v12, v12, v16
1327; RV32-NEXT:    vand.vv v8, v8, v16
1328; RV32-NEXT:    vadd.vv v8, v8, v8
1329; RV32-NEXT:    vor.vv v8, v12, v8
1330; RV32-NEXT:    addi sp, sp, 16
1331; RV32-NEXT:    ret
1332;
1333; RV64-LABEL: bitreverse_nxv4i64:
1334; RV64:       # %bb.0:
1335; RV64-NEXT:    li a0, 56
1336; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
1337; RV64-NEXT:    vsrl.vx v12, v8, a0
1338; RV64-NEXT:    li a1, 40
1339; RV64-NEXT:    vsrl.vx v16, v8, a1
1340; RV64-NEXT:    lui a2, 16
1341; RV64-NEXT:    addiw a2, a2, -256
1342; RV64-NEXT:    vand.vx v16, v16, a2
1343; RV64-NEXT:    vor.vv v12, v16, v12
1344; RV64-NEXT:    vsrl.vi v16, v8, 24
1345; RV64-NEXT:    lui a2, 4080
1346; RV64-NEXT:    vand.vx v16, v16, a2
1347; RV64-NEXT:    vsrl.vi v20, v8, 8
1348; RV64-NEXT:    li a2, 255
1349; RV64-NEXT:    slli a3, a2, 24
1350; RV64-NEXT:    vand.vx v20, v20, a3
1351; RV64-NEXT:    vor.vv v16, v20, v16
1352; RV64-NEXT:    vor.vv v12, v16, v12
1353; RV64-NEXT:    vsll.vi v16, v8, 8
1354; RV64-NEXT:    slli a3, a2, 32
1355; RV64-NEXT:    vand.vx v16, v16, a3
1356; RV64-NEXT:    vsll.vi v20, v8, 24
1357; RV64-NEXT:    slli a3, a2, 40
1358; RV64-NEXT:    vand.vx v20, v20, a3
1359; RV64-NEXT:    vor.vv v16, v20, v16
1360; RV64-NEXT:    vsll.vx v20, v8, a0
1361; RV64-NEXT:    vsll.vx v8, v8, a1
1362; RV64-NEXT:    slli a0, a2, 48
1363; RV64-NEXT:    vand.vx v8, v8, a0
1364; RV64-NEXT:    vor.vv v8, v20, v8
1365; RV64-NEXT:    lui a0, %hi(.LCPI20_0)
1366; RV64-NEXT:    ld a0, %lo(.LCPI20_0)(a0)
1367; RV64-NEXT:    vor.vv v8, v8, v16
1368; RV64-NEXT:    vor.vv v8, v8, v12
1369; RV64-NEXT:    vsrl.vi v12, v8, 4
1370; RV64-NEXT:    vand.vx v12, v12, a0
1371; RV64-NEXT:    vand.vx v8, v8, a0
1372; RV64-NEXT:    lui a0, %hi(.LCPI20_1)
1373; RV64-NEXT:    ld a0, %lo(.LCPI20_1)(a0)
1374; RV64-NEXT:    vsll.vi v8, v8, 4
1375; RV64-NEXT:    vor.vv v8, v12, v8
1376; RV64-NEXT:    vsrl.vi v12, v8, 2
1377; RV64-NEXT:    vand.vx v12, v12, a0
1378; RV64-NEXT:    vand.vx v8, v8, a0
1379; RV64-NEXT:    lui a0, %hi(.LCPI20_2)
1380; RV64-NEXT:    ld a0, %lo(.LCPI20_2)(a0)
1381; RV64-NEXT:    vsll.vi v8, v8, 2
1382; RV64-NEXT:    vor.vv v8, v12, v8
1383; RV64-NEXT:    vsrl.vi v12, v8, 1
1384; RV64-NEXT:    vand.vx v12, v12, a0
1385; RV64-NEXT:    vand.vx v8, v8, a0
1386; RV64-NEXT:    vadd.vv v8, v8, v8
1387; RV64-NEXT:    vor.vv v8, v12, v8
1388; RV64-NEXT:    ret
1389  %a = call <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64> %va)
1390  ret <vscale x 4 x i64> %a
1391}
1392declare <vscale x 4 x i64> @llvm.bitreverse.nxv4i64(<vscale x 4 x i64>)
1393
1394define <vscale x 8 x i64> @bitreverse_nxv8i64(<vscale x 8 x i64> %va) {
1395; RV32-LABEL: bitreverse_nxv8i64:
1396; RV32:       # %bb.0:
1397; RV32-NEXT:    addi sp, sp, -16
1398; RV32-NEXT:    .cfi_def_cfa_offset 16
1399; RV32-NEXT:    csrr a0, vlenb
1400; RV32-NEXT:    slli a0, a0, 4
1401; RV32-NEXT:    sub sp, sp, a0
1402; RV32-NEXT:    sw zero, 12(sp)
1403; RV32-NEXT:    lui a0, 1044480
1404; RV32-NEXT:    sw a0, 8(sp)
1405; RV32-NEXT:    lui a0, 4080
1406; RV32-NEXT:    sw a0, 12(sp)
1407; RV32-NEXT:    sw zero, 8(sp)
1408; RV32-NEXT:    li a1, 255
1409; RV32-NEXT:    sw a1, 12(sp)
1410; RV32-NEXT:    lui a1, 16
1411; RV32-NEXT:    addi a1, a1, -256
1412; RV32-NEXT:    sw a1, 12(sp)
1413; RV32-NEXT:    lui a2, 61681
1414; RV32-NEXT:    addi a2, a2, -241
1415; RV32-NEXT:    sw a2, 12(sp)
1416; RV32-NEXT:    sw a2, 8(sp)
1417; RV32-NEXT:    lui a2, 209715
1418; RV32-NEXT:    addi a2, a2, 819
1419; RV32-NEXT:    sw a2, 12(sp)
1420; RV32-NEXT:    sw a2, 8(sp)
1421; RV32-NEXT:    lui a2, 349525
1422; RV32-NEXT:    addi a2, a2, 1365
1423; RV32-NEXT:    sw a2, 12(sp)
1424; RV32-NEXT:    sw a2, 8(sp)
1425; RV32-NEXT:    li a2, 56
1426; RV32-NEXT:    vsetvli a3, zero, e64, m8, ta, mu
1427; RV32-NEXT:    li a3, 40
1428; RV32-NEXT:    vsrl.vx v16, v8, a3
1429; RV32-NEXT:    vand.vx v16, v16, a1
1430; RV32-NEXT:    addi a1, sp, 8
1431; RV32-NEXT:    vlse64.v v24, (a1), zero
1432; RV32-NEXT:    vsrl.vx v0, v8, a2
1433; RV32-NEXT:    vor.vv v16, v16, v0
1434; RV32-NEXT:    csrr a4, vlenb
1435; RV32-NEXT:    slli a4, a4, 3
1436; RV32-NEXT:    add a4, sp, a4
1437; RV32-NEXT:    addi a4, a4, 16
1438; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
1439; RV32-NEXT:    vsrl.vi v0, v8, 8
1440; RV32-NEXT:    vand.vv v24, v0, v24
1441; RV32-NEXT:    vsrl.vi v0, v8, 24
1442; RV32-NEXT:    vand.vx v0, v0, a0
1443; RV32-NEXT:    vlse64.v v16, (a1), zero
1444; RV32-NEXT:    vor.vv v24, v24, v0
1445; RV32-NEXT:    csrr a0, vlenb
1446; RV32-NEXT:    slli a0, a0, 3
1447; RV32-NEXT:    add a0, sp, a0
1448; RV32-NEXT:    addi a0, a0, 16
1449; RV32-NEXT:    vl8re8.v v0, (a0) # Unknown-size Folded Reload
1450; RV32-NEXT:    vor.vv v24, v24, v0
1451; RV32-NEXT:    csrr a0, vlenb
1452; RV32-NEXT:    slli a0, a0, 3
1453; RV32-NEXT:    add a0, sp, a0
1454; RV32-NEXT:    addi a0, a0, 16
1455; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
1456; RV32-NEXT:    vsll.vx v24, v8, a3
1457; RV32-NEXT:    vand.vv v16, v24, v16
1458; RV32-NEXT:    vsll.vx v24, v8, a2
1459; RV32-NEXT:    vlse64.v v0, (a1), zero
1460; RV32-NEXT:    vor.vv v16, v24, v16
1461; RV32-NEXT:    addi a0, sp, 16
1462; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
1463; RV32-NEXT:    vlse64.v v16, (a1), zero
1464; RV32-NEXT:    vsll.vi v24, v8, 8
1465; RV32-NEXT:    vand.vv v24, v24, v0
1466; RV32-NEXT:    vsll.vi v8, v8, 24
1467; RV32-NEXT:    vand.vv v8, v8, v16
1468; RV32-NEXT:    vor.vv v8, v8, v24
1469; RV32-NEXT:    vlse64.v v16, (a1), zero
1470; RV32-NEXT:    addi a0, sp, 16
1471; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
1472; RV32-NEXT:    vor.vv v8, v24, v8
1473; RV32-NEXT:    csrr a0, vlenb
1474; RV32-NEXT:    slli a0, a0, 3
1475; RV32-NEXT:    add a0, sp, a0
1476; RV32-NEXT:    addi a0, a0, 16
1477; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
1478; RV32-NEXT:    vor.vv v8, v8, v24
1479; RV32-NEXT:    vsrl.vi v24, v8, 4
1480; RV32-NEXT:    vand.vv v24, v24, v16
1481; RV32-NEXT:    vand.vv v8, v8, v16
1482; RV32-NEXT:    vlse64.v v16, (a1), zero
1483; RV32-NEXT:    vsll.vi v8, v8, 4
1484; RV32-NEXT:    vor.vv v8, v24, v8
1485; RV32-NEXT:    vsrl.vi v24, v8, 2
1486; RV32-NEXT:    vand.vv v24, v24, v16
1487; RV32-NEXT:    vand.vv v8, v8, v16
1488; RV32-NEXT:    vlse64.v v16, (a1), zero
1489; RV32-NEXT:    vsll.vi v8, v8, 2
1490; RV32-NEXT:    vor.vv v8, v24, v8
1491; RV32-NEXT:    vsrl.vi v24, v8, 1
1492; RV32-NEXT:    vand.vv v24, v24, v16
1493; RV32-NEXT:    vand.vv v8, v8, v16
1494; RV32-NEXT:    vadd.vv v8, v8, v8
1495; RV32-NEXT:    vor.vv v8, v24, v8
1496; RV32-NEXT:    csrr a0, vlenb
1497; RV32-NEXT:    slli a0, a0, 4
1498; RV32-NEXT:    add sp, sp, a0
1499; RV32-NEXT:    addi sp, sp, 16
1500; RV32-NEXT:    ret
1501;
1502; RV64-LABEL: bitreverse_nxv8i64:
1503; RV64:       # %bb.0:
1504; RV64-NEXT:    li a0, 56
1505; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1506; RV64-NEXT:    vsrl.vx v16, v8, a0
1507; RV64-NEXT:    li a1, 40
1508; RV64-NEXT:    vsrl.vx v24, v8, a1
1509; RV64-NEXT:    lui a2, 16
1510; RV64-NEXT:    addiw a2, a2, -256
1511; RV64-NEXT:    vand.vx v24, v24, a2
1512; RV64-NEXT:    vor.vv v16, v24, v16
1513; RV64-NEXT:    vsrl.vi v24, v8, 24
1514; RV64-NEXT:    lui a2, 4080
1515; RV64-NEXT:    vand.vx v24, v24, a2
1516; RV64-NEXT:    vsrl.vi v0, v8, 8
1517; RV64-NEXT:    li a2, 255
1518; RV64-NEXT:    slli a3, a2, 24
1519; RV64-NEXT:    vand.vx v0, v0, a3
1520; RV64-NEXT:    vor.vv v24, v0, v24
1521; RV64-NEXT:    vor.vv v16, v24, v16
1522; RV64-NEXT:    vsll.vi v24, v8, 8
1523; RV64-NEXT:    slli a3, a2, 32
1524; RV64-NEXT:    vand.vx v24, v24, a3
1525; RV64-NEXT:    vsll.vi v0, v8, 24
1526; RV64-NEXT:    slli a3, a2, 40
1527; RV64-NEXT:    vand.vx v0, v0, a3
1528; RV64-NEXT:    vor.vv v24, v0, v24
1529; RV64-NEXT:    vsll.vx v0, v8, a0
1530; RV64-NEXT:    vsll.vx v8, v8, a1
1531; RV64-NEXT:    slli a0, a2, 48
1532; RV64-NEXT:    vand.vx v8, v8, a0
1533; RV64-NEXT:    vor.vv v8, v0, v8
1534; RV64-NEXT:    lui a0, %hi(.LCPI21_0)
1535; RV64-NEXT:    ld a0, %lo(.LCPI21_0)(a0)
1536; RV64-NEXT:    vor.vv v8, v8, v24
1537; RV64-NEXT:    vor.vv v8, v8, v16
1538; RV64-NEXT:    vsrl.vi v16, v8, 4
1539; RV64-NEXT:    vand.vx v16, v16, a0
1540; RV64-NEXT:    vand.vx v8, v8, a0
1541; RV64-NEXT:    lui a0, %hi(.LCPI21_1)
1542; RV64-NEXT:    ld a0, %lo(.LCPI21_1)(a0)
1543; RV64-NEXT:    vsll.vi v8, v8, 4
1544; RV64-NEXT:    vor.vv v8, v16, v8
1545; RV64-NEXT:    vsrl.vi v16, v8, 2
1546; RV64-NEXT:    vand.vx v16, v16, a0
1547; RV64-NEXT:    vand.vx v8, v8, a0
1548; RV64-NEXT:    lui a0, %hi(.LCPI21_2)
1549; RV64-NEXT:    ld a0, %lo(.LCPI21_2)(a0)
1550; RV64-NEXT:    vsll.vi v8, v8, 2
1551; RV64-NEXT:    vor.vv v8, v16, v8
1552; RV64-NEXT:    vsrl.vi v16, v8, 1
1553; RV64-NEXT:    vand.vx v16, v16, a0
1554; RV64-NEXT:    vand.vx v8, v8, a0
1555; RV64-NEXT:    vadd.vv v8, v8, v8
1556; RV64-NEXT:    vor.vv v8, v16, v8
1557; RV64-NEXT:    ret
1558  %a = call <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64> %va)
1559  ret <vscale x 8 x i64> %a
1560}
1561declare <vscale x 8 x i64> @llvm.bitreverse.nxv8i64(<vscale x 8 x i64>)
1562