1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I
3; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I
4; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32
5; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64
6
7define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) {
8; CHECK-ZVE64X-LABEL: ctlz_nxv1i8:
9; CHECK-ZVE64X:       # %bb.0:
10; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
11; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
12; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
13; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
14; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
15; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
16; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
17; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
18; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
19; CHECK-ZVE64X-NEXT:    li a0, 85
20; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
21; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
22; CHECK-ZVE64X-NEXT:    li a0, 51
23; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
24; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
25; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
26; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
27; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
28; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
29; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
30; CHECK-ZVE64X-NEXT:    ret
31;
32; CHECK-D-LABEL: ctlz_nxv1i8:
33; CHECK-D:       # %bb.0:
34; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
35; CHECK-D-NEXT:    vzext.vf4 v9, v8
36; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v9
37; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
38; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
39; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
40; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
41; CHECK-D-NEXT:    li a0, 134
42; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
43; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
44; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
45; CHECK-D-NEXT:    ret
46  %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 false)
47  ret <vscale x 1 x i8> %a
48}
49declare <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8>, i1)
50
51define <vscale x 2 x i8> @ctlz_nxv2i8(<vscale x 2 x i8> %va) {
52; CHECK-ZVE64X-LABEL: ctlz_nxv2i8:
53; CHECK-ZVE64X:       # %bb.0:
54; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
55; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
56; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
57; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
58; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
59; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
60; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
61; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
62; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
63; CHECK-ZVE64X-NEXT:    li a0, 85
64; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
65; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
66; CHECK-ZVE64X-NEXT:    li a0, 51
67; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
68; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
69; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
70; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
71; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
72; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
73; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
74; CHECK-ZVE64X-NEXT:    ret
75;
76; CHECK-D-LABEL: ctlz_nxv2i8:
77; CHECK-D:       # %bb.0:
78; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
79; CHECK-D-NEXT:    vzext.vf4 v9, v8
80; CHECK-D-NEXT:    vfcvt.f.xu.v v9, v9
81; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
82; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
83; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
84; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
85; CHECK-D-NEXT:    li a0, 134
86; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
87; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
88; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
89; CHECK-D-NEXT:    ret
90  %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 false)
91  ret <vscale x 2 x i8> %a
92}
93declare <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8>, i1)
94
95define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
96; CHECK-ZVE64X-LABEL: ctlz_nxv4i8:
97; CHECK-ZVE64X:       # %bb.0:
98; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
99; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
100; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
101; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
102; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
103; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
104; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
105; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
106; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
107; CHECK-ZVE64X-NEXT:    li a0, 85
108; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
109; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
110; CHECK-ZVE64X-NEXT:    li a0, 51
111; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
112; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
113; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
114; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
115; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
116; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
117; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
118; CHECK-ZVE64X-NEXT:    ret
119;
120; CHECK-D-LABEL: ctlz_nxv4i8:
121; CHECK-D:       # %bb.0:
122; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
123; CHECK-D-NEXT:    vzext.vf4 v10, v8
124; CHECK-D-NEXT:    vfcvt.f.xu.v v10, v10
125; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
126; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
127; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
128; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
129; CHECK-D-NEXT:    li a0, 134
130; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
131; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
132; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
133; CHECK-D-NEXT:    ret
134  %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 false)
135  ret <vscale x 4 x i8> %a
136}
137declare <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8>, i1)
138
139define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
140; CHECK-ZVE64X-LABEL: ctlz_nxv8i8:
141; CHECK-ZVE64X:       # %bb.0:
142; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
143; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
144; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
145; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
146; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
147; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
148; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
149; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
150; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
151; CHECK-ZVE64X-NEXT:    li a0, 85
152; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
153; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
154; CHECK-ZVE64X-NEXT:    li a0, 51
155; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
156; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
157; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
158; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
159; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
160; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
161; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
162; CHECK-ZVE64X-NEXT:    ret
163;
164; CHECK-D-LABEL: ctlz_nxv8i8:
165; CHECK-D:       # %bb.0:
166; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
167; CHECK-D-NEXT:    vzext.vf4 v12, v8
168; CHECK-D-NEXT:    vfcvt.f.xu.v v12, v12
169; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
170; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
171; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
172; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
173; CHECK-D-NEXT:    li a0, 134
174; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
175; CHECK-D-NEXT:    vrsub.vx v8, v9, a0
176; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
177; CHECK-D-NEXT:    ret
178  %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 false)
179  ret <vscale x 8 x i8> %a
180}
181declare <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8>, i1)
182
183define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
184; CHECK-ZVE64X-LABEL: ctlz_nxv16i8:
185; CHECK-ZVE64X:       # %bb.0:
186; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
187; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
188; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
189; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
190; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
191; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
192; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
193; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
194; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
195; CHECK-ZVE64X-NEXT:    li a0, 85
196; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
197; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
198; CHECK-ZVE64X-NEXT:    li a0, 51
199; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
200; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
201; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
202; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
203; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
204; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
205; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
206; CHECK-ZVE64X-NEXT:    ret
207;
208; CHECK-D-LABEL: ctlz_nxv16i8:
209; CHECK-D:       # %bb.0:
210; CHECK-D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
211; CHECK-D-NEXT:    vzext.vf4 v16, v8
212; CHECK-D-NEXT:    vfcvt.f.xu.v v16, v16
213; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
214; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
215; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
216; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
217; CHECK-D-NEXT:    li a0, 134
218; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
219; CHECK-D-NEXT:    vrsub.vx v8, v10, a0
220; CHECK-D-NEXT:    vmerge.vim v8, v8, 8, v0
221; CHECK-D-NEXT:    ret
222  %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 false)
223  ret <vscale x 16 x i8> %a
224}
225declare <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8>, i1)
226
227define <vscale x 32 x i8> @ctlz_nxv32i8(<vscale x 32 x i8> %va) {
228; CHECK-LABEL: ctlz_nxv32i8:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
231; CHECK-NEXT:    vsrl.vi v12, v8, 1
232; CHECK-NEXT:    vor.vv v8, v8, v12
233; CHECK-NEXT:    vsrl.vi v12, v8, 2
234; CHECK-NEXT:    vor.vv v8, v8, v12
235; CHECK-NEXT:    vsrl.vi v12, v8, 4
236; CHECK-NEXT:    vor.vv v8, v8, v12
237; CHECK-NEXT:    vnot.v v8, v8
238; CHECK-NEXT:    vsrl.vi v12, v8, 1
239; CHECK-NEXT:    li a0, 85
240; CHECK-NEXT:    vand.vx v12, v12, a0
241; CHECK-NEXT:    vsub.vv v8, v8, v12
242; CHECK-NEXT:    li a0, 51
243; CHECK-NEXT:    vand.vx v12, v8, a0
244; CHECK-NEXT:    vsrl.vi v8, v8, 2
245; CHECK-NEXT:    vand.vx v8, v8, a0
246; CHECK-NEXT:    vadd.vv v8, v12, v8
247; CHECK-NEXT:    vsrl.vi v12, v8, 4
248; CHECK-NEXT:    vadd.vv v8, v8, v12
249; CHECK-NEXT:    vand.vi v8, v8, 15
250; CHECK-NEXT:    ret
251  %a = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 false)
252  ret <vscale x 32 x i8> %a
253}
254declare <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8>, i1)
255
256define <vscale x 64 x i8> @ctlz_nxv64i8(<vscale x 64 x i8> %va) {
257; CHECK-LABEL: ctlz_nxv64i8:
258; CHECK:       # %bb.0:
259; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
260; CHECK-NEXT:    vsrl.vi v16, v8, 1
261; CHECK-NEXT:    vor.vv v8, v8, v16
262; CHECK-NEXT:    vsrl.vi v16, v8, 2
263; CHECK-NEXT:    vor.vv v8, v8, v16
264; CHECK-NEXT:    vsrl.vi v16, v8, 4
265; CHECK-NEXT:    vor.vv v8, v8, v16
266; CHECK-NEXT:    vnot.v v8, v8
267; CHECK-NEXT:    vsrl.vi v16, v8, 1
268; CHECK-NEXT:    li a0, 85
269; CHECK-NEXT:    vand.vx v16, v16, a0
270; CHECK-NEXT:    vsub.vv v8, v8, v16
271; CHECK-NEXT:    li a0, 51
272; CHECK-NEXT:    vand.vx v16, v8, a0
273; CHECK-NEXT:    vsrl.vi v8, v8, 2
274; CHECK-NEXT:    vand.vx v8, v8, a0
275; CHECK-NEXT:    vadd.vv v8, v16, v8
276; CHECK-NEXT:    vsrl.vi v16, v8, 4
277; CHECK-NEXT:    vadd.vv v8, v8, v16
278; CHECK-NEXT:    vand.vi v8, v8, 15
279; CHECK-NEXT:    ret
280  %a = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 false)
281  ret <vscale x 64 x i8> %a
282}
283declare <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8>, i1)
284
285define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
286; RV32I-LABEL: ctlz_nxv1i16:
287; RV32I:       # %bb.0:
288; RV32I-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
289; RV32I-NEXT:    vsrl.vi v9, v8, 1
290; RV32I-NEXT:    vor.vv v8, v8, v9
291; RV32I-NEXT:    vsrl.vi v9, v8, 2
292; RV32I-NEXT:    vor.vv v8, v8, v9
293; RV32I-NEXT:    vsrl.vi v9, v8, 4
294; RV32I-NEXT:    vor.vv v8, v8, v9
295; RV32I-NEXT:    vsrl.vi v9, v8, 8
296; RV32I-NEXT:    vor.vv v8, v8, v9
297; RV32I-NEXT:    vnot.v v8, v8
298; RV32I-NEXT:    vsrl.vi v9, v8, 1
299; RV32I-NEXT:    lui a0, 5
300; RV32I-NEXT:    addi a0, a0, 1365
301; RV32I-NEXT:    vand.vx v9, v9, a0
302; RV32I-NEXT:    vsub.vv v8, v8, v9
303; RV32I-NEXT:    lui a0, 3
304; RV32I-NEXT:    addi a0, a0, 819
305; RV32I-NEXT:    vand.vx v9, v8, a0
306; RV32I-NEXT:    vsrl.vi v8, v8, 2
307; RV32I-NEXT:    vand.vx v8, v8, a0
308; RV32I-NEXT:    vadd.vv v8, v9, v8
309; RV32I-NEXT:    vsrl.vi v9, v8, 4
310; RV32I-NEXT:    vadd.vv v8, v8, v9
311; RV32I-NEXT:    lui a0, 1
312; RV32I-NEXT:    addi a0, a0, -241
313; RV32I-NEXT:    vand.vx v8, v8, a0
314; RV32I-NEXT:    li a0, 257
315; RV32I-NEXT:    vmul.vx v8, v8, a0
316; RV32I-NEXT:    vsrl.vi v8, v8, 8
317; RV32I-NEXT:    ret
318;
319; RV64I-LABEL: ctlz_nxv1i16:
320; RV64I:       # %bb.0:
321; RV64I-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
322; RV64I-NEXT:    vsrl.vi v9, v8, 1
323; RV64I-NEXT:    vor.vv v8, v8, v9
324; RV64I-NEXT:    vsrl.vi v9, v8, 2
325; RV64I-NEXT:    vor.vv v8, v8, v9
326; RV64I-NEXT:    vsrl.vi v9, v8, 4
327; RV64I-NEXT:    vor.vv v8, v8, v9
328; RV64I-NEXT:    vsrl.vi v9, v8, 8
329; RV64I-NEXT:    vor.vv v8, v8, v9
330; RV64I-NEXT:    vnot.v v8, v8
331; RV64I-NEXT:    vsrl.vi v9, v8, 1
332; RV64I-NEXT:    lui a0, 5
333; RV64I-NEXT:    addiw a0, a0, 1365
334; RV64I-NEXT:    vand.vx v9, v9, a0
335; RV64I-NEXT:    vsub.vv v8, v8, v9
336; RV64I-NEXT:    lui a0, 3
337; RV64I-NEXT:    addiw a0, a0, 819
338; RV64I-NEXT:    vand.vx v9, v8, a0
339; RV64I-NEXT:    vsrl.vi v8, v8, 2
340; RV64I-NEXT:    vand.vx v8, v8, a0
341; RV64I-NEXT:    vadd.vv v8, v9, v8
342; RV64I-NEXT:    vsrl.vi v9, v8, 4
343; RV64I-NEXT:    vadd.vv v8, v8, v9
344; RV64I-NEXT:    lui a0, 1
345; RV64I-NEXT:    addiw a0, a0, -241
346; RV64I-NEXT:    vand.vx v8, v8, a0
347; RV64I-NEXT:    li a0, 257
348; RV64I-NEXT:    vmul.vx v8, v8, a0
349; RV64I-NEXT:    vsrl.vi v8, v8, 8
350; RV64I-NEXT:    ret
351;
352; CHECK-D-LABEL: ctlz_nxv1i16:
353; CHECK-D:       # %bb.0:
354; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
355; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
356; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
357; CHECK-D-NEXT:    li a0, 142
358; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
359; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
360; CHECK-D-NEXT:    li a0, 16
361; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
362; CHECK-D-NEXT:    ret
363  %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 false)
364  ret <vscale x 1 x i16> %a
365}
366declare <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16>, i1)
367
368define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
369; RV32I-LABEL: ctlz_nxv2i16:
370; RV32I:       # %bb.0:
371; RV32I-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
372; RV32I-NEXT:    vsrl.vi v9, v8, 1
373; RV32I-NEXT:    vor.vv v8, v8, v9
374; RV32I-NEXT:    vsrl.vi v9, v8, 2
375; RV32I-NEXT:    vor.vv v8, v8, v9
376; RV32I-NEXT:    vsrl.vi v9, v8, 4
377; RV32I-NEXT:    vor.vv v8, v8, v9
378; RV32I-NEXT:    vsrl.vi v9, v8, 8
379; RV32I-NEXT:    vor.vv v8, v8, v9
380; RV32I-NEXT:    vnot.v v8, v8
381; RV32I-NEXT:    vsrl.vi v9, v8, 1
382; RV32I-NEXT:    lui a0, 5
383; RV32I-NEXT:    addi a0, a0, 1365
384; RV32I-NEXT:    vand.vx v9, v9, a0
385; RV32I-NEXT:    vsub.vv v8, v8, v9
386; RV32I-NEXT:    lui a0, 3
387; RV32I-NEXT:    addi a0, a0, 819
388; RV32I-NEXT:    vand.vx v9, v8, a0
389; RV32I-NEXT:    vsrl.vi v8, v8, 2
390; RV32I-NEXT:    vand.vx v8, v8, a0
391; RV32I-NEXT:    vadd.vv v8, v9, v8
392; RV32I-NEXT:    vsrl.vi v9, v8, 4
393; RV32I-NEXT:    vadd.vv v8, v8, v9
394; RV32I-NEXT:    lui a0, 1
395; RV32I-NEXT:    addi a0, a0, -241
396; RV32I-NEXT:    vand.vx v8, v8, a0
397; RV32I-NEXT:    li a0, 257
398; RV32I-NEXT:    vmul.vx v8, v8, a0
399; RV32I-NEXT:    vsrl.vi v8, v8, 8
400; RV32I-NEXT:    ret
401;
402; RV64I-LABEL: ctlz_nxv2i16:
403; RV64I:       # %bb.0:
404; RV64I-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
405; RV64I-NEXT:    vsrl.vi v9, v8, 1
406; RV64I-NEXT:    vor.vv v8, v8, v9
407; RV64I-NEXT:    vsrl.vi v9, v8, 2
408; RV64I-NEXT:    vor.vv v8, v8, v9
409; RV64I-NEXT:    vsrl.vi v9, v8, 4
410; RV64I-NEXT:    vor.vv v8, v8, v9
411; RV64I-NEXT:    vsrl.vi v9, v8, 8
412; RV64I-NEXT:    vor.vv v8, v8, v9
413; RV64I-NEXT:    vnot.v v8, v8
414; RV64I-NEXT:    vsrl.vi v9, v8, 1
415; RV64I-NEXT:    lui a0, 5
416; RV64I-NEXT:    addiw a0, a0, 1365
417; RV64I-NEXT:    vand.vx v9, v9, a0
418; RV64I-NEXT:    vsub.vv v8, v8, v9
419; RV64I-NEXT:    lui a0, 3
420; RV64I-NEXT:    addiw a0, a0, 819
421; RV64I-NEXT:    vand.vx v9, v8, a0
422; RV64I-NEXT:    vsrl.vi v8, v8, 2
423; RV64I-NEXT:    vand.vx v8, v8, a0
424; RV64I-NEXT:    vadd.vv v8, v9, v8
425; RV64I-NEXT:    vsrl.vi v9, v8, 4
426; RV64I-NEXT:    vadd.vv v8, v8, v9
427; RV64I-NEXT:    lui a0, 1
428; RV64I-NEXT:    addiw a0, a0, -241
429; RV64I-NEXT:    vand.vx v8, v8, a0
430; RV64I-NEXT:    li a0, 257
431; RV64I-NEXT:    vmul.vx v8, v8, a0
432; RV64I-NEXT:    vsrl.vi v8, v8, 8
433; RV64I-NEXT:    ret
434;
435; CHECK-D-LABEL: ctlz_nxv2i16:
436; CHECK-D:       # %bb.0:
437; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
438; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
439; CHECK-D-NEXT:    vnsrl.wi v9, v9, 23
440; CHECK-D-NEXT:    li a0, 142
441; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
442; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
443; CHECK-D-NEXT:    li a0, 16
444; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
445; CHECK-D-NEXT:    ret
446  %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 false)
447  ret <vscale x 2 x i16> %a
448}
449declare <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16>, i1)
450
451define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
452; RV32I-LABEL: ctlz_nxv4i16:
453; RV32I:       # %bb.0:
454; RV32I-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
455; RV32I-NEXT:    vsrl.vi v9, v8, 1
456; RV32I-NEXT:    vor.vv v8, v8, v9
457; RV32I-NEXT:    vsrl.vi v9, v8, 2
458; RV32I-NEXT:    vor.vv v8, v8, v9
459; RV32I-NEXT:    vsrl.vi v9, v8, 4
460; RV32I-NEXT:    vor.vv v8, v8, v9
461; RV32I-NEXT:    vsrl.vi v9, v8, 8
462; RV32I-NEXT:    vor.vv v8, v8, v9
463; RV32I-NEXT:    vnot.v v8, v8
464; RV32I-NEXT:    vsrl.vi v9, v8, 1
465; RV32I-NEXT:    lui a0, 5
466; RV32I-NEXT:    addi a0, a0, 1365
467; RV32I-NEXT:    vand.vx v9, v9, a0
468; RV32I-NEXT:    vsub.vv v8, v8, v9
469; RV32I-NEXT:    lui a0, 3
470; RV32I-NEXT:    addi a0, a0, 819
471; RV32I-NEXT:    vand.vx v9, v8, a0
472; RV32I-NEXT:    vsrl.vi v8, v8, 2
473; RV32I-NEXT:    vand.vx v8, v8, a0
474; RV32I-NEXT:    vadd.vv v8, v9, v8
475; RV32I-NEXT:    vsrl.vi v9, v8, 4
476; RV32I-NEXT:    vadd.vv v8, v8, v9
477; RV32I-NEXT:    lui a0, 1
478; RV32I-NEXT:    addi a0, a0, -241
479; RV32I-NEXT:    vand.vx v8, v8, a0
480; RV32I-NEXT:    li a0, 257
481; RV32I-NEXT:    vmul.vx v8, v8, a0
482; RV32I-NEXT:    vsrl.vi v8, v8, 8
483; RV32I-NEXT:    ret
484;
485; RV64I-LABEL: ctlz_nxv4i16:
486; RV64I:       # %bb.0:
487; RV64I-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
488; RV64I-NEXT:    vsrl.vi v9, v8, 1
489; RV64I-NEXT:    vor.vv v8, v8, v9
490; RV64I-NEXT:    vsrl.vi v9, v8, 2
491; RV64I-NEXT:    vor.vv v8, v8, v9
492; RV64I-NEXT:    vsrl.vi v9, v8, 4
493; RV64I-NEXT:    vor.vv v8, v8, v9
494; RV64I-NEXT:    vsrl.vi v9, v8, 8
495; RV64I-NEXT:    vor.vv v8, v8, v9
496; RV64I-NEXT:    vnot.v v8, v8
497; RV64I-NEXT:    vsrl.vi v9, v8, 1
498; RV64I-NEXT:    lui a0, 5
499; RV64I-NEXT:    addiw a0, a0, 1365
500; RV64I-NEXT:    vand.vx v9, v9, a0
501; RV64I-NEXT:    vsub.vv v8, v8, v9
502; RV64I-NEXT:    lui a0, 3
503; RV64I-NEXT:    addiw a0, a0, 819
504; RV64I-NEXT:    vand.vx v9, v8, a0
505; RV64I-NEXT:    vsrl.vi v8, v8, 2
506; RV64I-NEXT:    vand.vx v8, v8, a0
507; RV64I-NEXT:    vadd.vv v8, v9, v8
508; RV64I-NEXT:    vsrl.vi v9, v8, 4
509; RV64I-NEXT:    vadd.vv v8, v8, v9
510; RV64I-NEXT:    lui a0, 1
511; RV64I-NEXT:    addiw a0, a0, -241
512; RV64I-NEXT:    vand.vx v8, v8, a0
513; RV64I-NEXT:    li a0, 257
514; RV64I-NEXT:    vmul.vx v8, v8, a0
515; RV64I-NEXT:    vsrl.vi v8, v8, 8
516; RV64I-NEXT:    ret
517;
518; CHECK-D-LABEL: ctlz_nxv4i16:
519; CHECK-D:       # %bb.0:
520; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
521; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
522; CHECK-D-NEXT:    vnsrl.wi v9, v10, 23
523; CHECK-D-NEXT:    li a0, 142
524; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
525; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
526; CHECK-D-NEXT:    li a0, 16
527; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
528; CHECK-D-NEXT:    ret
529  %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 false)
530  ret <vscale x 4 x i16> %a
531}
532declare <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16>, i1)
533
534define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
535; RV32I-LABEL: ctlz_nxv8i16:
536; RV32I:       # %bb.0:
537; RV32I-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
538; RV32I-NEXT:    vsrl.vi v10, v8, 1
539; RV32I-NEXT:    vor.vv v8, v8, v10
540; RV32I-NEXT:    vsrl.vi v10, v8, 2
541; RV32I-NEXT:    vor.vv v8, v8, v10
542; RV32I-NEXT:    vsrl.vi v10, v8, 4
543; RV32I-NEXT:    vor.vv v8, v8, v10
544; RV32I-NEXT:    vsrl.vi v10, v8, 8
545; RV32I-NEXT:    vor.vv v8, v8, v10
546; RV32I-NEXT:    vnot.v v8, v8
547; RV32I-NEXT:    vsrl.vi v10, v8, 1
548; RV32I-NEXT:    lui a0, 5
549; RV32I-NEXT:    addi a0, a0, 1365
550; RV32I-NEXT:    vand.vx v10, v10, a0
551; RV32I-NEXT:    vsub.vv v8, v8, v10
552; RV32I-NEXT:    lui a0, 3
553; RV32I-NEXT:    addi a0, a0, 819
554; RV32I-NEXT:    vand.vx v10, v8, a0
555; RV32I-NEXT:    vsrl.vi v8, v8, 2
556; RV32I-NEXT:    vand.vx v8, v8, a0
557; RV32I-NEXT:    vadd.vv v8, v10, v8
558; RV32I-NEXT:    vsrl.vi v10, v8, 4
559; RV32I-NEXT:    vadd.vv v8, v8, v10
560; RV32I-NEXT:    lui a0, 1
561; RV32I-NEXT:    addi a0, a0, -241
562; RV32I-NEXT:    vand.vx v8, v8, a0
563; RV32I-NEXT:    li a0, 257
564; RV32I-NEXT:    vmul.vx v8, v8, a0
565; RV32I-NEXT:    vsrl.vi v8, v8, 8
566; RV32I-NEXT:    ret
567;
568; RV64I-LABEL: ctlz_nxv8i16:
569; RV64I:       # %bb.0:
570; RV64I-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
571; RV64I-NEXT:    vsrl.vi v10, v8, 1
572; RV64I-NEXT:    vor.vv v8, v8, v10
573; RV64I-NEXT:    vsrl.vi v10, v8, 2
574; RV64I-NEXT:    vor.vv v8, v8, v10
575; RV64I-NEXT:    vsrl.vi v10, v8, 4
576; RV64I-NEXT:    vor.vv v8, v8, v10
577; RV64I-NEXT:    vsrl.vi v10, v8, 8
578; RV64I-NEXT:    vor.vv v8, v8, v10
579; RV64I-NEXT:    vnot.v v8, v8
580; RV64I-NEXT:    vsrl.vi v10, v8, 1
581; RV64I-NEXT:    lui a0, 5
582; RV64I-NEXT:    addiw a0, a0, 1365
583; RV64I-NEXT:    vand.vx v10, v10, a0
584; RV64I-NEXT:    vsub.vv v8, v8, v10
585; RV64I-NEXT:    lui a0, 3
586; RV64I-NEXT:    addiw a0, a0, 819
587; RV64I-NEXT:    vand.vx v10, v8, a0
588; RV64I-NEXT:    vsrl.vi v8, v8, 2
589; RV64I-NEXT:    vand.vx v8, v8, a0
590; RV64I-NEXT:    vadd.vv v8, v10, v8
591; RV64I-NEXT:    vsrl.vi v10, v8, 4
592; RV64I-NEXT:    vadd.vv v8, v8, v10
593; RV64I-NEXT:    lui a0, 1
594; RV64I-NEXT:    addiw a0, a0, -241
595; RV64I-NEXT:    vand.vx v8, v8, a0
596; RV64I-NEXT:    li a0, 257
597; RV64I-NEXT:    vmul.vx v8, v8, a0
598; RV64I-NEXT:    vsrl.vi v8, v8, 8
599; RV64I-NEXT:    ret
600;
601; CHECK-D-LABEL: ctlz_nxv8i16:
602; CHECK-D:       # %bb.0:
603; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
604; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
605; CHECK-D-NEXT:    vnsrl.wi v10, v12, 23
606; CHECK-D-NEXT:    li a0, 142
607; CHECK-D-NEXT:    vrsub.vx v10, v10, a0
608; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
609; CHECK-D-NEXT:    li a0, 16
610; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
611; CHECK-D-NEXT:    ret
612  %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 false)
613  ret <vscale x 8 x i16> %a
614}
615declare <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16>, i1)
616
617define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
618; RV32I-LABEL: ctlz_nxv16i16:
619; RV32I:       # %bb.0:
620; RV32I-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
621; RV32I-NEXT:    vsrl.vi v12, v8, 1
622; RV32I-NEXT:    vor.vv v8, v8, v12
623; RV32I-NEXT:    vsrl.vi v12, v8, 2
624; RV32I-NEXT:    vor.vv v8, v8, v12
625; RV32I-NEXT:    vsrl.vi v12, v8, 4
626; RV32I-NEXT:    vor.vv v8, v8, v12
627; RV32I-NEXT:    vsrl.vi v12, v8, 8
628; RV32I-NEXT:    vor.vv v8, v8, v12
629; RV32I-NEXT:    vnot.v v8, v8
630; RV32I-NEXT:    vsrl.vi v12, v8, 1
631; RV32I-NEXT:    lui a0, 5
632; RV32I-NEXT:    addi a0, a0, 1365
633; RV32I-NEXT:    vand.vx v12, v12, a0
634; RV32I-NEXT:    vsub.vv v8, v8, v12
635; RV32I-NEXT:    lui a0, 3
636; RV32I-NEXT:    addi a0, a0, 819
637; RV32I-NEXT:    vand.vx v12, v8, a0
638; RV32I-NEXT:    vsrl.vi v8, v8, 2
639; RV32I-NEXT:    vand.vx v8, v8, a0
640; RV32I-NEXT:    vadd.vv v8, v12, v8
641; RV32I-NEXT:    vsrl.vi v12, v8, 4
642; RV32I-NEXT:    vadd.vv v8, v8, v12
643; RV32I-NEXT:    lui a0, 1
644; RV32I-NEXT:    addi a0, a0, -241
645; RV32I-NEXT:    vand.vx v8, v8, a0
646; RV32I-NEXT:    li a0, 257
647; RV32I-NEXT:    vmul.vx v8, v8, a0
648; RV32I-NEXT:    vsrl.vi v8, v8, 8
649; RV32I-NEXT:    ret
650;
651; RV64I-LABEL: ctlz_nxv16i16:
652; RV64I:       # %bb.0:
653; RV64I-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
654; RV64I-NEXT:    vsrl.vi v12, v8, 1
655; RV64I-NEXT:    vor.vv v8, v8, v12
656; RV64I-NEXT:    vsrl.vi v12, v8, 2
657; RV64I-NEXT:    vor.vv v8, v8, v12
658; RV64I-NEXT:    vsrl.vi v12, v8, 4
659; RV64I-NEXT:    vor.vv v8, v8, v12
660; RV64I-NEXT:    vsrl.vi v12, v8, 8
661; RV64I-NEXT:    vor.vv v8, v8, v12
662; RV64I-NEXT:    vnot.v v8, v8
663; RV64I-NEXT:    vsrl.vi v12, v8, 1
664; RV64I-NEXT:    lui a0, 5
665; RV64I-NEXT:    addiw a0, a0, 1365
666; RV64I-NEXT:    vand.vx v12, v12, a0
667; RV64I-NEXT:    vsub.vv v8, v8, v12
668; RV64I-NEXT:    lui a0, 3
669; RV64I-NEXT:    addiw a0, a0, 819
670; RV64I-NEXT:    vand.vx v12, v8, a0
671; RV64I-NEXT:    vsrl.vi v8, v8, 2
672; RV64I-NEXT:    vand.vx v8, v8, a0
673; RV64I-NEXT:    vadd.vv v8, v12, v8
674; RV64I-NEXT:    vsrl.vi v12, v8, 4
675; RV64I-NEXT:    vadd.vv v8, v8, v12
676; RV64I-NEXT:    lui a0, 1
677; RV64I-NEXT:    addiw a0, a0, -241
678; RV64I-NEXT:    vand.vx v8, v8, a0
679; RV64I-NEXT:    li a0, 257
680; RV64I-NEXT:    vmul.vx v8, v8, a0
681; RV64I-NEXT:    vsrl.vi v8, v8, 8
682; RV64I-NEXT:    ret
683;
684; CHECK-D-LABEL: ctlz_nxv16i16:
685; CHECK-D:       # %bb.0:
686; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
687; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
688; CHECK-D-NEXT:    vnsrl.wi v12, v16, 23
689; CHECK-D-NEXT:    li a0, 142
690; CHECK-D-NEXT:    vrsub.vx v12, v12, a0
691; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
692; CHECK-D-NEXT:    li a0, 16
693; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
694; CHECK-D-NEXT:    ret
695  %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 false)
696  ret <vscale x 16 x i16> %a
697}
698declare <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16>, i1)
699
700define <vscale x 32 x i16> @ctlz_nxv32i16(<vscale x 32 x i16> %va) {
701; RV32-LABEL: ctlz_nxv32i16:
702; RV32:       # %bb.0:
703; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
704; RV32-NEXT:    vsrl.vi v16, v8, 1
705; RV32-NEXT:    vor.vv v8, v8, v16
706; RV32-NEXT:    vsrl.vi v16, v8, 2
707; RV32-NEXT:    vor.vv v8, v8, v16
708; RV32-NEXT:    vsrl.vi v16, v8, 4
709; RV32-NEXT:    vor.vv v8, v8, v16
710; RV32-NEXT:    vsrl.vi v16, v8, 8
711; RV32-NEXT:    vor.vv v8, v8, v16
712; RV32-NEXT:    vnot.v v8, v8
713; RV32-NEXT:    vsrl.vi v16, v8, 1
714; RV32-NEXT:    lui a0, 5
715; RV32-NEXT:    addi a0, a0, 1365
716; RV32-NEXT:    vand.vx v16, v16, a0
717; RV32-NEXT:    vsub.vv v8, v8, v16
718; RV32-NEXT:    lui a0, 3
719; RV32-NEXT:    addi a0, a0, 819
720; RV32-NEXT:    vand.vx v16, v8, a0
721; RV32-NEXT:    vsrl.vi v8, v8, 2
722; RV32-NEXT:    vand.vx v8, v8, a0
723; RV32-NEXT:    vadd.vv v8, v16, v8
724; RV32-NEXT:    vsrl.vi v16, v8, 4
725; RV32-NEXT:    vadd.vv v8, v8, v16
726; RV32-NEXT:    lui a0, 1
727; RV32-NEXT:    addi a0, a0, -241
728; RV32-NEXT:    vand.vx v8, v8, a0
729; RV32-NEXT:    li a0, 257
730; RV32-NEXT:    vmul.vx v8, v8, a0
731; RV32-NEXT:    vsrl.vi v8, v8, 8
732; RV32-NEXT:    ret
733;
734; RV64-LABEL: ctlz_nxv32i16:
735; RV64:       # %bb.0:
736; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
737; RV64-NEXT:    vsrl.vi v16, v8, 1
738; RV64-NEXT:    vor.vv v8, v8, v16
739; RV64-NEXT:    vsrl.vi v16, v8, 2
740; RV64-NEXT:    vor.vv v8, v8, v16
741; RV64-NEXT:    vsrl.vi v16, v8, 4
742; RV64-NEXT:    vor.vv v8, v8, v16
743; RV64-NEXT:    vsrl.vi v16, v8, 8
744; RV64-NEXT:    vor.vv v8, v8, v16
745; RV64-NEXT:    vnot.v v8, v8
746; RV64-NEXT:    vsrl.vi v16, v8, 1
747; RV64-NEXT:    lui a0, 5
748; RV64-NEXT:    addiw a0, a0, 1365
749; RV64-NEXT:    vand.vx v16, v16, a0
750; RV64-NEXT:    vsub.vv v8, v8, v16
751; RV64-NEXT:    lui a0, 3
752; RV64-NEXT:    addiw a0, a0, 819
753; RV64-NEXT:    vand.vx v16, v8, a0
754; RV64-NEXT:    vsrl.vi v8, v8, 2
755; RV64-NEXT:    vand.vx v8, v8, a0
756; RV64-NEXT:    vadd.vv v8, v16, v8
757; RV64-NEXT:    vsrl.vi v16, v8, 4
758; RV64-NEXT:    vadd.vv v8, v8, v16
759; RV64-NEXT:    lui a0, 1
760; RV64-NEXT:    addiw a0, a0, -241
761; RV64-NEXT:    vand.vx v8, v8, a0
762; RV64-NEXT:    li a0, 257
763; RV64-NEXT:    vmul.vx v8, v8, a0
764; RV64-NEXT:    vsrl.vi v8, v8, 8
765; RV64-NEXT:    ret
766  %a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 false)
767  ret <vscale x 32 x i16> %a
768}
769declare <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16>, i1)
770
771define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
772; RV32I-LABEL: ctlz_nxv1i32:
773; RV32I:       # %bb.0:
774; RV32I-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
775; RV32I-NEXT:    vsrl.vi v9, v8, 1
776; RV32I-NEXT:    vor.vv v8, v8, v9
777; RV32I-NEXT:    vsrl.vi v9, v8, 2
778; RV32I-NEXT:    vor.vv v8, v8, v9
779; RV32I-NEXT:    vsrl.vi v9, v8, 4
780; RV32I-NEXT:    vor.vv v8, v8, v9
781; RV32I-NEXT:    vsrl.vi v9, v8, 8
782; RV32I-NEXT:    vor.vv v8, v8, v9
783; RV32I-NEXT:    vsrl.vi v9, v8, 16
784; RV32I-NEXT:    vor.vv v8, v8, v9
785; RV32I-NEXT:    vnot.v v8, v8
786; RV32I-NEXT:    vsrl.vi v9, v8, 1
787; RV32I-NEXT:    lui a0, 349525
788; RV32I-NEXT:    addi a0, a0, 1365
789; RV32I-NEXT:    vand.vx v9, v9, a0
790; RV32I-NEXT:    vsub.vv v8, v8, v9
791; RV32I-NEXT:    lui a0, 209715
792; RV32I-NEXT:    addi a0, a0, 819
793; RV32I-NEXT:    vand.vx v9, v8, a0
794; RV32I-NEXT:    vsrl.vi v8, v8, 2
795; RV32I-NEXT:    vand.vx v8, v8, a0
796; RV32I-NEXT:    vadd.vv v8, v9, v8
797; RV32I-NEXT:    vsrl.vi v9, v8, 4
798; RV32I-NEXT:    vadd.vv v8, v8, v9
799; RV32I-NEXT:    lui a0, 61681
800; RV32I-NEXT:    addi a0, a0, -241
801; RV32I-NEXT:    vand.vx v8, v8, a0
802; RV32I-NEXT:    lui a0, 4112
803; RV32I-NEXT:    addi a0, a0, 257
804; RV32I-NEXT:    vmul.vx v8, v8, a0
805; RV32I-NEXT:    vsrl.vi v8, v8, 24
806; RV32I-NEXT:    ret
807;
808; RV64I-LABEL: ctlz_nxv1i32:
809; RV64I:       # %bb.0:
810; RV64I-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
811; RV64I-NEXT:    vsrl.vi v9, v8, 1
812; RV64I-NEXT:    vor.vv v8, v8, v9
813; RV64I-NEXT:    vsrl.vi v9, v8, 2
814; RV64I-NEXT:    vor.vv v8, v8, v9
815; RV64I-NEXT:    vsrl.vi v9, v8, 4
816; RV64I-NEXT:    vor.vv v8, v8, v9
817; RV64I-NEXT:    vsrl.vi v9, v8, 8
818; RV64I-NEXT:    vor.vv v8, v8, v9
819; RV64I-NEXT:    vsrl.vi v9, v8, 16
820; RV64I-NEXT:    vor.vv v8, v8, v9
821; RV64I-NEXT:    vnot.v v8, v8
822; RV64I-NEXT:    vsrl.vi v9, v8, 1
823; RV64I-NEXT:    lui a0, 349525
824; RV64I-NEXT:    addiw a0, a0, 1365
825; RV64I-NEXT:    vand.vx v9, v9, a0
826; RV64I-NEXT:    vsub.vv v8, v8, v9
827; RV64I-NEXT:    lui a0, 209715
828; RV64I-NEXT:    addiw a0, a0, 819
829; RV64I-NEXT:    vand.vx v9, v8, a0
830; RV64I-NEXT:    vsrl.vi v8, v8, 2
831; RV64I-NEXT:    vand.vx v8, v8, a0
832; RV64I-NEXT:    vadd.vv v8, v9, v8
833; RV64I-NEXT:    vsrl.vi v9, v8, 4
834; RV64I-NEXT:    vadd.vv v8, v8, v9
835; RV64I-NEXT:    lui a0, 61681
836; RV64I-NEXT:    addiw a0, a0, -241
837; RV64I-NEXT:    vand.vx v8, v8, a0
838; RV64I-NEXT:    lui a0, 4112
839; RV64I-NEXT:    addiw a0, a0, 257
840; RV64I-NEXT:    vmul.vx v8, v8, a0
841; RV64I-NEXT:    vsrl.vi v8, v8, 24
842; RV64I-NEXT:    ret
843;
844; CHECK-D-LABEL: ctlz_nxv1i32:
845; CHECK-D:       # %bb.0:
846; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
847; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
848; CHECK-D-NEXT:    li a0, 52
849; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
850; CHECK-D-NEXT:    vsrl.vx v9, v9, a0
851; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
852; CHECK-D-NEXT:    vncvt.x.x.w v9, v9
853; CHECK-D-NEXT:    li a0, 1054
854; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
855; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
856; CHECK-D-NEXT:    li a0, 32
857; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
858; CHECK-D-NEXT:    ret
859  %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 false)
860  ret <vscale x 1 x i32> %a
861}
862declare <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32>, i1)
863
864define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
865; RV32I-LABEL: ctlz_nxv2i32:
866; RV32I:       # %bb.0:
867; RV32I-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
868; RV32I-NEXT:    vsrl.vi v9, v8, 1
869; RV32I-NEXT:    vor.vv v8, v8, v9
870; RV32I-NEXT:    vsrl.vi v9, v8, 2
871; RV32I-NEXT:    vor.vv v8, v8, v9
872; RV32I-NEXT:    vsrl.vi v9, v8, 4
873; RV32I-NEXT:    vor.vv v8, v8, v9
874; RV32I-NEXT:    vsrl.vi v9, v8, 8
875; RV32I-NEXT:    vor.vv v8, v8, v9
876; RV32I-NEXT:    vsrl.vi v9, v8, 16
877; RV32I-NEXT:    vor.vv v8, v8, v9
878; RV32I-NEXT:    vnot.v v8, v8
879; RV32I-NEXT:    vsrl.vi v9, v8, 1
880; RV32I-NEXT:    lui a0, 349525
881; RV32I-NEXT:    addi a0, a0, 1365
882; RV32I-NEXT:    vand.vx v9, v9, a0
883; RV32I-NEXT:    vsub.vv v8, v8, v9
884; RV32I-NEXT:    lui a0, 209715
885; RV32I-NEXT:    addi a0, a0, 819
886; RV32I-NEXT:    vand.vx v9, v8, a0
887; RV32I-NEXT:    vsrl.vi v8, v8, 2
888; RV32I-NEXT:    vand.vx v8, v8, a0
889; RV32I-NEXT:    vadd.vv v8, v9, v8
890; RV32I-NEXT:    vsrl.vi v9, v8, 4
891; RV32I-NEXT:    vadd.vv v8, v8, v9
892; RV32I-NEXT:    lui a0, 61681
893; RV32I-NEXT:    addi a0, a0, -241
894; RV32I-NEXT:    vand.vx v8, v8, a0
895; RV32I-NEXT:    lui a0, 4112
896; RV32I-NEXT:    addi a0, a0, 257
897; RV32I-NEXT:    vmul.vx v8, v8, a0
898; RV32I-NEXT:    vsrl.vi v8, v8, 24
899; RV32I-NEXT:    ret
900;
901; RV64I-LABEL: ctlz_nxv2i32:
902; RV64I:       # %bb.0:
903; RV64I-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
904; RV64I-NEXT:    vsrl.vi v9, v8, 1
905; RV64I-NEXT:    vor.vv v8, v8, v9
906; RV64I-NEXT:    vsrl.vi v9, v8, 2
907; RV64I-NEXT:    vor.vv v8, v8, v9
908; RV64I-NEXT:    vsrl.vi v9, v8, 4
909; RV64I-NEXT:    vor.vv v8, v8, v9
910; RV64I-NEXT:    vsrl.vi v9, v8, 8
911; RV64I-NEXT:    vor.vv v8, v8, v9
912; RV64I-NEXT:    vsrl.vi v9, v8, 16
913; RV64I-NEXT:    vor.vv v8, v8, v9
914; RV64I-NEXT:    vnot.v v8, v8
915; RV64I-NEXT:    vsrl.vi v9, v8, 1
916; RV64I-NEXT:    lui a0, 349525
917; RV64I-NEXT:    addiw a0, a0, 1365
918; RV64I-NEXT:    vand.vx v9, v9, a0
919; RV64I-NEXT:    vsub.vv v8, v8, v9
920; RV64I-NEXT:    lui a0, 209715
921; RV64I-NEXT:    addiw a0, a0, 819
922; RV64I-NEXT:    vand.vx v9, v8, a0
923; RV64I-NEXT:    vsrl.vi v8, v8, 2
924; RV64I-NEXT:    vand.vx v8, v8, a0
925; RV64I-NEXT:    vadd.vv v8, v9, v8
926; RV64I-NEXT:    vsrl.vi v9, v8, 4
927; RV64I-NEXT:    vadd.vv v8, v8, v9
928; RV64I-NEXT:    lui a0, 61681
929; RV64I-NEXT:    addiw a0, a0, -241
930; RV64I-NEXT:    vand.vx v8, v8, a0
931; RV64I-NEXT:    lui a0, 4112
932; RV64I-NEXT:    addiw a0, a0, 257
933; RV64I-NEXT:    vmul.vx v8, v8, a0
934; RV64I-NEXT:    vsrl.vi v8, v8, 24
935; RV64I-NEXT:    ret
936;
937; CHECK-D-LABEL: ctlz_nxv2i32:
938; CHECK-D:       # %bb.0:
939; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
940; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
941; CHECK-D-NEXT:    li a0, 52
942; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
943; CHECK-D-NEXT:    vsrl.vx v10, v10, a0
944; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
945; CHECK-D-NEXT:    vncvt.x.x.w v9, v10
946; CHECK-D-NEXT:    li a0, 1054
947; CHECK-D-NEXT:    vrsub.vx v9, v9, a0
948; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
949; CHECK-D-NEXT:    li a0, 32
950; CHECK-D-NEXT:    vmerge.vxm v8, v9, a0, v0
951; CHECK-D-NEXT:    ret
952  %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 false)
953  ret <vscale x 2 x i32> %a
954}
955declare <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32>, i1)
956
957define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
958; RV32I-LABEL: ctlz_nxv4i32:
959; RV32I:       # %bb.0:
960; RV32I-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
961; RV32I-NEXT:    vsrl.vi v10, v8, 1
962; RV32I-NEXT:    vor.vv v8, v8, v10
963; RV32I-NEXT:    vsrl.vi v10, v8, 2
964; RV32I-NEXT:    vor.vv v8, v8, v10
965; RV32I-NEXT:    vsrl.vi v10, v8, 4
966; RV32I-NEXT:    vor.vv v8, v8, v10
967; RV32I-NEXT:    vsrl.vi v10, v8, 8
968; RV32I-NEXT:    vor.vv v8, v8, v10
969; RV32I-NEXT:    vsrl.vi v10, v8, 16
970; RV32I-NEXT:    vor.vv v8, v8, v10
971; RV32I-NEXT:    vnot.v v8, v8
972; RV32I-NEXT:    vsrl.vi v10, v8, 1
973; RV32I-NEXT:    lui a0, 349525
974; RV32I-NEXT:    addi a0, a0, 1365
975; RV32I-NEXT:    vand.vx v10, v10, a0
976; RV32I-NEXT:    vsub.vv v8, v8, v10
977; RV32I-NEXT:    lui a0, 209715
978; RV32I-NEXT:    addi a0, a0, 819
979; RV32I-NEXT:    vand.vx v10, v8, a0
980; RV32I-NEXT:    vsrl.vi v8, v8, 2
981; RV32I-NEXT:    vand.vx v8, v8, a0
982; RV32I-NEXT:    vadd.vv v8, v10, v8
983; RV32I-NEXT:    vsrl.vi v10, v8, 4
984; RV32I-NEXT:    vadd.vv v8, v8, v10
985; RV32I-NEXT:    lui a0, 61681
986; RV32I-NEXT:    addi a0, a0, -241
987; RV32I-NEXT:    vand.vx v8, v8, a0
988; RV32I-NEXT:    lui a0, 4112
989; RV32I-NEXT:    addi a0, a0, 257
990; RV32I-NEXT:    vmul.vx v8, v8, a0
991; RV32I-NEXT:    vsrl.vi v8, v8, 24
992; RV32I-NEXT:    ret
993;
994; RV64I-LABEL: ctlz_nxv4i32:
995; RV64I:       # %bb.0:
996; RV64I-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
997; RV64I-NEXT:    vsrl.vi v10, v8, 1
998; RV64I-NEXT:    vor.vv v8, v8, v10
999; RV64I-NEXT:    vsrl.vi v10, v8, 2
1000; RV64I-NEXT:    vor.vv v8, v8, v10
1001; RV64I-NEXT:    vsrl.vi v10, v8, 4
1002; RV64I-NEXT:    vor.vv v8, v8, v10
1003; RV64I-NEXT:    vsrl.vi v10, v8, 8
1004; RV64I-NEXT:    vor.vv v8, v8, v10
1005; RV64I-NEXT:    vsrl.vi v10, v8, 16
1006; RV64I-NEXT:    vor.vv v8, v8, v10
1007; RV64I-NEXT:    vnot.v v8, v8
1008; RV64I-NEXT:    vsrl.vi v10, v8, 1
1009; RV64I-NEXT:    lui a0, 349525
1010; RV64I-NEXT:    addiw a0, a0, 1365
1011; RV64I-NEXT:    vand.vx v10, v10, a0
1012; RV64I-NEXT:    vsub.vv v8, v8, v10
1013; RV64I-NEXT:    lui a0, 209715
1014; RV64I-NEXT:    addiw a0, a0, 819
1015; RV64I-NEXT:    vand.vx v10, v8, a0
1016; RV64I-NEXT:    vsrl.vi v8, v8, 2
1017; RV64I-NEXT:    vand.vx v8, v8, a0
1018; RV64I-NEXT:    vadd.vv v8, v10, v8
1019; RV64I-NEXT:    vsrl.vi v10, v8, 4
1020; RV64I-NEXT:    vadd.vv v8, v8, v10
1021; RV64I-NEXT:    lui a0, 61681
1022; RV64I-NEXT:    addiw a0, a0, -241
1023; RV64I-NEXT:    vand.vx v8, v8, a0
1024; RV64I-NEXT:    lui a0, 4112
1025; RV64I-NEXT:    addiw a0, a0, 257
1026; RV64I-NEXT:    vmul.vx v8, v8, a0
1027; RV64I-NEXT:    vsrl.vi v8, v8, 24
1028; RV64I-NEXT:    ret
1029;
1030; CHECK-D-LABEL: ctlz_nxv4i32:
1031; CHECK-D:       # %bb.0:
1032; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1033; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
1034; CHECK-D-NEXT:    li a0, 52
1035; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
1036; CHECK-D-NEXT:    vsrl.vx v12, v12, a0
1037; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
1038; CHECK-D-NEXT:    vncvt.x.x.w v10, v12
1039; CHECK-D-NEXT:    li a0, 1054
1040; CHECK-D-NEXT:    vrsub.vx v10, v10, a0
1041; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
1042; CHECK-D-NEXT:    li a0, 32
1043; CHECK-D-NEXT:    vmerge.vxm v8, v10, a0, v0
1044; CHECK-D-NEXT:    ret
1045  %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 false)
1046  ret <vscale x 4 x i32> %a
1047}
1048declare <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32>, i1)
1049
1050define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
1051; RV32I-LABEL: ctlz_nxv8i32:
1052; RV32I:       # %bb.0:
1053; RV32I-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1054; RV32I-NEXT:    vsrl.vi v12, v8, 1
1055; RV32I-NEXT:    vor.vv v8, v8, v12
1056; RV32I-NEXT:    vsrl.vi v12, v8, 2
1057; RV32I-NEXT:    vor.vv v8, v8, v12
1058; RV32I-NEXT:    vsrl.vi v12, v8, 4
1059; RV32I-NEXT:    vor.vv v8, v8, v12
1060; RV32I-NEXT:    vsrl.vi v12, v8, 8
1061; RV32I-NEXT:    vor.vv v8, v8, v12
1062; RV32I-NEXT:    vsrl.vi v12, v8, 16
1063; RV32I-NEXT:    vor.vv v8, v8, v12
1064; RV32I-NEXT:    vnot.v v8, v8
1065; RV32I-NEXT:    vsrl.vi v12, v8, 1
1066; RV32I-NEXT:    lui a0, 349525
1067; RV32I-NEXT:    addi a0, a0, 1365
1068; RV32I-NEXT:    vand.vx v12, v12, a0
1069; RV32I-NEXT:    vsub.vv v8, v8, v12
1070; RV32I-NEXT:    lui a0, 209715
1071; RV32I-NEXT:    addi a0, a0, 819
1072; RV32I-NEXT:    vand.vx v12, v8, a0
1073; RV32I-NEXT:    vsrl.vi v8, v8, 2
1074; RV32I-NEXT:    vand.vx v8, v8, a0
1075; RV32I-NEXT:    vadd.vv v8, v12, v8
1076; RV32I-NEXT:    vsrl.vi v12, v8, 4
1077; RV32I-NEXT:    vadd.vv v8, v8, v12
1078; RV32I-NEXT:    lui a0, 61681
1079; RV32I-NEXT:    addi a0, a0, -241
1080; RV32I-NEXT:    vand.vx v8, v8, a0
1081; RV32I-NEXT:    lui a0, 4112
1082; RV32I-NEXT:    addi a0, a0, 257
1083; RV32I-NEXT:    vmul.vx v8, v8, a0
1084; RV32I-NEXT:    vsrl.vi v8, v8, 24
1085; RV32I-NEXT:    ret
1086;
1087; RV64I-LABEL: ctlz_nxv8i32:
1088; RV64I:       # %bb.0:
1089; RV64I-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1090; RV64I-NEXT:    vsrl.vi v12, v8, 1
1091; RV64I-NEXT:    vor.vv v8, v8, v12
1092; RV64I-NEXT:    vsrl.vi v12, v8, 2
1093; RV64I-NEXT:    vor.vv v8, v8, v12
1094; RV64I-NEXT:    vsrl.vi v12, v8, 4
1095; RV64I-NEXT:    vor.vv v8, v8, v12
1096; RV64I-NEXT:    vsrl.vi v12, v8, 8
1097; RV64I-NEXT:    vor.vv v8, v8, v12
1098; RV64I-NEXT:    vsrl.vi v12, v8, 16
1099; RV64I-NEXT:    vor.vv v8, v8, v12
1100; RV64I-NEXT:    vnot.v v8, v8
1101; RV64I-NEXT:    vsrl.vi v12, v8, 1
1102; RV64I-NEXT:    lui a0, 349525
1103; RV64I-NEXT:    addiw a0, a0, 1365
1104; RV64I-NEXT:    vand.vx v12, v12, a0
1105; RV64I-NEXT:    vsub.vv v8, v8, v12
1106; RV64I-NEXT:    lui a0, 209715
1107; RV64I-NEXT:    addiw a0, a0, 819
1108; RV64I-NEXT:    vand.vx v12, v8, a0
1109; RV64I-NEXT:    vsrl.vi v8, v8, 2
1110; RV64I-NEXT:    vand.vx v8, v8, a0
1111; RV64I-NEXT:    vadd.vv v8, v12, v8
1112; RV64I-NEXT:    vsrl.vi v12, v8, 4
1113; RV64I-NEXT:    vadd.vv v8, v8, v12
1114; RV64I-NEXT:    lui a0, 61681
1115; RV64I-NEXT:    addiw a0, a0, -241
1116; RV64I-NEXT:    vand.vx v8, v8, a0
1117; RV64I-NEXT:    lui a0, 4112
1118; RV64I-NEXT:    addiw a0, a0, 257
1119; RV64I-NEXT:    vmul.vx v8, v8, a0
1120; RV64I-NEXT:    vsrl.vi v8, v8, 24
1121; RV64I-NEXT:    ret
1122;
1123; CHECK-D-LABEL: ctlz_nxv8i32:
1124; CHECK-D:       # %bb.0:
1125; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1126; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
1127; CHECK-D-NEXT:    li a0, 52
1128; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1129; CHECK-D-NEXT:    vsrl.vx v16, v16, a0
1130; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1131; CHECK-D-NEXT:    vncvt.x.x.w v12, v16
1132; CHECK-D-NEXT:    li a0, 1054
1133; CHECK-D-NEXT:    vrsub.vx v12, v12, a0
1134; CHECK-D-NEXT:    vmseq.vi v0, v8, 0
1135; CHECK-D-NEXT:    li a0, 32
1136; CHECK-D-NEXT:    vmerge.vxm v8, v12, a0, v0
1137; CHECK-D-NEXT:    ret
1138  %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 false)
1139  ret <vscale x 8 x i32> %a
1140}
1141declare <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32>, i1)
1142
1143define <vscale x 16 x i32> @ctlz_nxv16i32(<vscale x 16 x i32> %va) {
1144; RV32-LABEL: ctlz_nxv16i32:
1145; RV32:       # %bb.0:
1146; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
1147; RV32-NEXT:    vsrl.vi v16, v8, 1
1148; RV32-NEXT:    vor.vv v8, v8, v16
1149; RV32-NEXT:    vsrl.vi v16, v8, 2
1150; RV32-NEXT:    vor.vv v8, v8, v16
1151; RV32-NEXT:    vsrl.vi v16, v8, 4
1152; RV32-NEXT:    vor.vv v8, v8, v16
1153; RV32-NEXT:    vsrl.vi v16, v8, 8
1154; RV32-NEXT:    vor.vv v8, v8, v16
1155; RV32-NEXT:    vsrl.vi v16, v8, 16
1156; RV32-NEXT:    vor.vv v8, v8, v16
1157; RV32-NEXT:    vnot.v v8, v8
1158; RV32-NEXT:    vsrl.vi v16, v8, 1
1159; RV32-NEXT:    lui a0, 349525
1160; RV32-NEXT:    addi a0, a0, 1365
1161; RV32-NEXT:    vand.vx v16, v16, a0
1162; RV32-NEXT:    vsub.vv v8, v8, v16
1163; RV32-NEXT:    lui a0, 209715
1164; RV32-NEXT:    addi a0, a0, 819
1165; RV32-NEXT:    vand.vx v16, v8, a0
1166; RV32-NEXT:    vsrl.vi v8, v8, 2
1167; RV32-NEXT:    vand.vx v8, v8, a0
1168; RV32-NEXT:    vadd.vv v8, v16, v8
1169; RV32-NEXT:    vsrl.vi v16, v8, 4
1170; RV32-NEXT:    vadd.vv v8, v8, v16
1171; RV32-NEXT:    lui a0, 61681
1172; RV32-NEXT:    addi a0, a0, -241
1173; RV32-NEXT:    vand.vx v8, v8, a0
1174; RV32-NEXT:    lui a0, 4112
1175; RV32-NEXT:    addi a0, a0, 257
1176; RV32-NEXT:    vmul.vx v8, v8, a0
1177; RV32-NEXT:    vsrl.vi v8, v8, 24
1178; RV32-NEXT:    ret
1179;
1180; RV64-LABEL: ctlz_nxv16i32:
1181; RV64:       # %bb.0:
1182; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
1183; RV64-NEXT:    vsrl.vi v16, v8, 1
1184; RV64-NEXT:    vor.vv v8, v8, v16
1185; RV64-NEXT:    vsrl.vi v16, v8, 2
1186; RV64-NEXT:    vor.vv v8, v8, v16
1187; RV64-NEXT:    vsrl.vi v16, v8, 4
1188; RV64-NEXT:    vor.vv v8, v8, v16
1189; RV64-NEXT:    vsrl.vi v16, v8, 8
1190; RV64-NEXT:    vor.vv v8, v8, v16
1191; RV64-NEXT:    vsrl.vi v16, v8, 16
1192; RV64-NEXT:    vor.vv v8, v8, v16
1193; RV64-NEXT:    vnot.v v8, v8
1194; RV64-NEXT:    vsrl.vi v16, v8, 1
1195; RV64-NEXT:    lui a0, 349525
1196; RV64-NEXT:    addiw a0, a0, 1365
1197; RV64-NEXT:    vand.vx v16, v16, a0
1198; RV64-NEXT:    vsub.vv v8, v8, v16
1199; RV64-NEXT:    lui a0, 209715
1200; RV64-NEXT:    addiw a0, a0, 819
1201; RV64-NEXT:    vand.vx v16, v8, a0
1202; RV64-NEXT:    vsrl.vi v8, v8, 2
1203; RV64-NEXT:    vand.vx v8, v8, a0
1204; RV64-NEXT:    vadd.vv v8, v16, v8
1205; RV64-NEXT:    vsrl.vi v16, v8, 4
1206; RV64-NEXT:    vadd.vv v8, v8, v16
1207; RV64-NEXT:    lui a0, 61681
1208; RV64-NEXT:    addiw a0, a0, -241
1209; RV64-NEXT:    vand.vx v8, v8, a0
1210; RV64-NEXT:    lui a0, 4112
1211; RV64-NEXT:    addiw a0, a0, 257
1212; RV64-NEXT:    vmul.vx v8, v8, a0
1213; RV64-NEXT:    vsrl.vi v8, v8, 24
1214; RV64-NEXT:    ret
1215  %a = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 false)
1216  ret <vscale x 16 x i32> %a
1217}
1218declare <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32>, i1)
1219
1220define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
1221; RV32-LABEL: ctlz_nxv1i64:
1222; RV32:       # %bb.0:
1223; RV32-NEXT:    addi sp, sp, -16
1224; RV32-NEXT:    .cfi_def_cfa_offset 16
1225; RV32-NEXT:    lui a0, 349525
1226; RV32-NEXT:    addi a0, a0, 1365
1227; RV32-NEXT:    sw a0, 12(sp)
1228; RV32-NEXT:    sw a0, 8(sp)
1229; RV32-NEXT:    lui a0, 209715
1230; RV32-NEXT:    addi a0, a0, 819
1231; RV32-NEXT:    sw a0, 12(sp)
1232; RV32-NEXT:    sw a0, 8(sp)
1233; RV32-NEXT:    lui a0, 61681
1234; RV32-NEXT:    addi a0, a0, -241
1235; RV32-NEXT:    sw a0, 12(sp)
1236; RV32-NEXT:    sw a0, 8(sp)
1237; RV32-NEXT:    lui a0, 4112
1238; RV32-NEXT:    addi a0, a0, 257
1239; RV32-NEXT:    sw a0, 12(sp)
1240; RV32-NEXT:    sw a0, 8(sp)
1241; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1242; RV32-NEXT:    vsrl.vi v9, v8, 1
1243; RV32-NEXT:    vor.vv v8, v8, v9
1244; RV32-NEXT:    vsrl.vi v9, v8, 2
1245; RV32-NEXT:    vor.vv v8, v8, v9
1246; RV32-NEXT:    vsrl.vi v9, v8, 4
1247; RV32-NEXT:    vor.vv v8, v8, v9
1248; RV32-NEXT:    vsrl.vi v9, v8, 8
1249; RV32-NEXT:    vor.vv v8, v8, v9
1250; RV32-NEXT:    vsrl.vi v9, v8, 16
1251; RV32-NEXT:    vor.vv v8, v8, v9
1252; RV32-NEXT:    li a0, 32
1253; RV32-NEXT:    vsrl.vx v9, v8, a0
1254; RV32-NEXT:    vor.vv v8, v8, v9
1255; RV32-NEXT:    addi a0, sp, 8
1256; RV32-NEXT:    vlse64.v v9, (a0), zero
1257; RV32-NEXT:    vnot.v v8, v8
1258; RV32-NEXT:    vlse64.v v10, (a0), zero
1259; RV32-NEXT:    vsrl.vi v11, v8, 1
1260; RV32-NEXT:    vand.vv v9, v11, v9
1261; RV32-NEXT:    vsub.vv v8, v8, v9
1262; RV32-NEXT:    vand.vv v9, v8, v10
1263; RV32-NEXT:    vsrl.vi v8, v8, 2
1264; RV32-NEXT:    vand.vv v8, v8, v10
1265; RV32-NEXT:    vadd.vv v8, v9, v8
1266; RV32-NEXT:    vlse64.v v9, (a0), zero
1267; RV32-NEXT:    vlse64.v v10, (a0), zero
1268; RV32-NEXT:    vsrl.vi v11, v8, 4
1269; RV32-NEXT:    vadd.vv v8, v8, v11
1270; RV32-NEXT:    vand.vv v8, v8, v9
1271; RV32-NEXT:    vmul.vv v8, v8, v10
1272; RV32-NEXT:    li a0, 56
1273; RV32-NEXT:    vsrl.vx v8, v8, a0
1274; RV32-NEXT:    addi sp, sp, 16
1275; RV32-NEXT:    ret
1276;
1277; RV64-LABEL: ctlz_nxv1i64:
1278; RV64:       # %bb.0:
1279; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1280; RV64-NEXT:    vsrl.vi v9, v8, 1
1281; RV64-NEXT:    vor.vv v8, v8, v9
1282; RV64-NEXT:    vsrl.vi v9, v8, 2
1283; RV64-NEXT:    vor.vv v8, v8, v9
1284; RV64-NEXT:    vsrl.vi v9, v8, 4
1285; RV64-NEXT:    vor.vv v8, v8, v9
1286; RV64-NEXT:    vsrl.vi v9, v8, 8
1287; RV64-NEXT:    vor.vv v8, v8, v9
1288; RV64-NEXT:    vsrl.vi v9, v8, 16
1289; RV64-NEXT:    vor.vv v8, v8, v9
1290; RV64-NEXT:    li a0, 32
1291; RV64-NEXT:    vsrl.vx v9, v8, a0
1292; RV64-NEXT:    vor.vv v8, v8, v9
1293; RV64-NEXT:    vnot.v v8, v8
1294; RV64-NEXT:    lui a0, %hi(.LCPI18_0)
1295; RV64-NEXT:    ld a0, %lo(.LCPI18_0)(a0)
1296; RV64-NEXT:    lui a1, %hi(.LCPI18_1)
1297; RV64-NEXT:    ld a1, %lo(.LCPI18_1)(a1)
1298; RV64-NEXT:    vsrl.vi v9, v8, 1
1299; RV64-NEXT:    vand.vx v9, v9, a0
1300; RV64-NEXT:    vsub.vv v8, v8, v9
1301; RV64-NEXT:    vand.vx v9, v8, a1
1302; RV64-NEXT:    vsrl.vi v8, v8, 2
1303; RV64-NEXT:    vand.vx v8, v8, a1
1304; RV64-NEXT:    vadd.vv v8, v9, v8
1305; RV64-NEXT:    lui a0, %hi(.LCPI18_2)
1306; RV64-NEXT:    ld a0, %lo(.LCPI18_2)(a0)
1307; RV64-NEXT:    lui a1, %hi(.LCPI18_3)
1308; RV64-NEXT:    ld a1, %lo(.LCPI18_3)(a1)
1309; RV64-NEXT:    vsrl.vi v9, v8, 4
1310; RV64-NEXT:    vadd.vv v8, v8, v9
1311; RV64-NEXT:    vand.vx v8, v8, a0
1312; RV64-NEXT:    vmul.vx v8, v8, a1
1313; RV64-NEXT:    li a0, 56
1314; RV64-NEXT:    vsrl.vx v8, v8, a0
1315; RV64-NEXT:    ret
1316  %a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 false)
1317  ret <vscale x 1 x i64> %a
1318}
1319declare <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64>, i1)
1320
1321define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
1322; RV32-LABEL: ctlz_nxv2i64:
1323; RV32:       # %bb.0:
1324; RV32-NEXT:    addi sp, sp, -16
1325; RV32-NEXT:    .cfi_def_cfa_offset 16
1326; RV32-NEXT:    lui a0, 349525
1327; RV32-NEXT:    addi a0, a0, 1365
1328; RV32-NEXT:    sw a0, 12(sp)
1329; RV32-NEXT:    sw a0, 8(sp)
1330; RV32-NEXT:    lui a0, 209715
1331; RV32-NEXT:    addi a0, a0, 819
1332; RV32-NEXT:    sw a0, 12(sp)
1333; RV32-NEXT:    sw a0, 8(sp)
1334; RV32-NEXT:    lui a0, 61681
1335; RV32-NEXT:    addi a0, a0, -241
1336; RV32-NEXT:    sw a0, 12(sp)
1337; RV32-NEXT:    sw a0, 8(sp)
1338; RV32-NEXT:    lui a0, 4112
1339; RV32-NEXT:    addi a0, a0, 257
1340; RV32-NEXT:    sw a0, 12(sp)
1341; RV32-NEXT:    sw a0, 8(sp)
1342; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1343; RV32-NEXT:    vsrl.vi v10, v8, 1
1344; RV32-NEXT:    vor.vv v8, v8, v10
1345; RV32-NEXT:    vsrl.vi v10, v8, 2
1346; RV32-NEXT:    vor.vv v8, v8, v10
1347; RV32-NEXT:    vsrl.vi v10, v8, 4
1348; RV32-NEXT:    vor.vv v8, v8, v10
1349; RV32-NEXT:    vsrl.vi v10, v8, 8
1350; RV32-NEXT:    vor.vv v8, v8, v10
1351; RV32-NEXT:    vsrl.vi v10, v8, 16
1352; RV32-NEXT:    vor.vv v8, v8, v10
1353; RV32-NEXT:    li a0, 32
1354; RV32-NEXT:    vsrl.vx v10, v8, a0
1355; RV32-NEXT:    vor.vv v8, v8, v10
1356; RV32-NEXT:    addi a0, sp, 8
1357; RV32-NEXT:    vlse64.v v10, (a0), zero
1358; RV32-NEXT:    vnot.v v8, v8
1359; RV32-NEXT:    vlse64.v v12, (a0), zero
1360; RV32-NEXT:    vsrl.vi v14, v8, 1
1361; RV32-NEXT:    vand.vv v10, v14, v10
1362; RV32-NEXT:    vsub.vv v8, v8, v10
1363; RV32-NEXT:    vand.vv v10, v8, v12
1364; RV32-NEXT:    vsrl.vi v8, v8, 2
1365; RV32-NEXT:    vand.vv v8, v8, v12
1366; RV32-NEXT:    vadd.vv v8, v10, v8
1367; RV32-NEXT:    vlse64.v v10, (a0), zero
1368; RV32-NEXT:    vlse64.v v12, (a0), zero
1369; RV32-NEXT:    vsrl.vi v14, v8, 4
1370; RV32-NEXT:    vadd.vv v8, v8, v14
1371; RV32-NEXT:    vand.vv v8, v8, v10
1372; RV32-NEXT:    vmul.vv v8, v8, v12
1373; RV32-NEXT:    li a0, 56
1374; RV32-NEXT:    vsrl.vx v8, v8, a0
1375; RV32-NEXT:    addi sp, sp, 16
1376; RV32-NEXT:    ret
1377;
1378; RV64-LABEL: ctlz_nxv2i64:
1379; RV64:       # %bb.0:
1380; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1381; RV64-NEXT:    vsrl.vi v10, v8, 1
1382; RV64-NEXT:    vor.vv v8, v8, v10
1383; RV64-NEXT:    vsrl.vi v10, v8, 2
1384; RV64-NEXT:    vor.vv v8, v8, v10
1385; RV64-NEXT:    vsrl.vi v10, v8, 4
1386; RV64-NEXT:    vor.vv v8, v8, v10
1387; RV64-NEXT:    vsrl.vi v10, v8, 8
1388; RV64-NEXT:    vor.vv v8, v8, v10
1389; RV64-NEXT:    vsrl.vi v10, v8, 16
1390; RV64-NEXT:    vor.vv v8, v8, v10
1391; RV64-NEXT:    li a0, 32
1392; RV64-NEXT:    vsrl.vx v10, v8, a0
1393; RV64-NEXT:    vor.vv v8, v8, v10
1394; RV64-NEXT:    vnot.v v8, v8
1395; RV64-NEXT:    lui a0, %hi(.LCPI19_0)
1396; RV64-NEXT:    ld a0, %lo(.LCPI19_0)(a0)
1397; RV64-NEXT:    lui a1, %hi(.LCPI19_1)
1398; RV64-NEXT:    ld a1, %lo(.LCPI19_1)(a1)
1399; RV64-NEXT:    vsrl.vi v10, v8, 1
1400; RV64-NEXT:    vand.vx v10, v10, a0
1401; RV64-NEXT:    vsub.vv v8, v8, v10
1402; RV64-NEXT:    vand.vx v10, v8, a1
1403; RV64-NEXT:    vsrl.vi v8, v8, 2
1404; RV64-NEXT:    vand.vx v8, v8, a1
1405; RV64-NEXT:    vadd.vv v8, v10, v8
1406; RV64-NEXT:    lui a0, %hi(.LCPI19_2)
1407; RV64-NEXT:    ld a0, %lo(.LCPI19_2)(a0)
1408; RV64-NEXT:    lui a1, %hi(.LCPI19_3)
1409; RV64-NEXT:    ld a1, %lo(.LCPI19_3)(a1)
1410; RV64-NEXT:    vsrl.vi v10, v8, 4
1411; RV64-NEXT:    vadd.vv v8, v8, v10
1412; RV64-NEXT:    vand.vx v8, v8, a0
1413; RV64-NEXT:    vmul.vx v8, v8, a1
1414; RV64-NEXT:    li a0, 56
1415; RV64-NEXT:    vsrl.vx v8, v8, a0
1416; RV64-NEXT:    ret
1417  %a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 false)
1418  ret <vscale x 2 x i64> %a
1419}
1420declare <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64>, i1)
1421
1422define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
1423; RV32-LABEL: ctlz_nxv4i64:
1424; RV32:       # %bb.0:
1425; RV32-NEXT:    addi sp, sp, -16
1426; RV32-NEXT:    .cfi_def_cfa_offset 16
1427; RV32-NEXT:    lui a0, 349525
1428; RV32-NEXT:    addi a0, a0, 1365
1429; RV32-NEXT:    sw a0, 12(sp)
1430; RV32-NEXT:    sw a0, 8(sp)
1431; RV32-NEXT:    lui a0, 209715
1432; RV32-NEXT:    addi a0, a0, 819
1433; RV32-NEXT:    sw a0, 12(sp)
1434; RV32-NEXT:    sw a0, 8(sp)
1435; RV32-NEXT:    lui a0, 61681
1436; RV32-NEXT:    addi a0, a0, -241
1437; RV32-NEXT:    sw a0, 12(sp)
1438; RV32-NEXT:    sw a0, 8(sp)
1439; RV32-NEXT:    lui a0, 4112
1440; RV32-NEXT:    addi a0, a0, 257
1441; RV32-NEXT:    sw a0, 12(sp)
1442; RV32-NEXT:    sw a0, 8(sp)
1443; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1444; RV32-NEXT:    vsrl.vi v12, v8, 1
1445; RV32-NEXT:    vor.vv v8, v8, v12
1446; RV32-NEXT:    vsrl.vi v12, v8, 2
1447; RV32-NEXT:    vor.vv v8, v8, v12
1448; RV32-NEXT:    vsrl.vi v12, v8, 4
1449; RV32-NEXT:    vor.vv v8, v8, v12
1450; RV32-NEXT:    vsrl.vi v12, v8, 8
1451; RV32-NEXT:    vor.vv v8, v8, v12
1452; RV32-NEXT:    vsrl.vi v12, v8, 16
1453; RV32-NEXT:    vor.vv v8, v8, v12
1454; RV32-NEXT:    li a0, 32
1455; RV32-NEXT:    vsrl.vx v12, v8, a0
1456; RV32-NEXT:    vor.vv v8, v8, v12
1457; RV32-NEXT:    addi a0, sp, 8
1458; RV32-NEXT:    vlse64.v v12, (a0), zero
1459; RV32-NEXT:    vnot.v v8, v8
1460; RV32-NEXT:    vlse64.v v16, (a0), zero
1461; RV32-NEXT:    vsrl.vi v20, v8, 1
1462; RV32-NEXT:    vand.vv v12, v20, v12
1463; RV32-NEXT:    vsub.vv v8, v8, v12
1464; RV32-NEXT:    vand.vv v12, v8, v16
1465; RV32-NEXT:    vsrl.vi v8, v8, 2
1466; RV32-NEXT:    vand.vv v8, v8, v16
1467; RV32-NEXT:    vadd.vv v8, v12, v8
1468; RV32-NEXT:    vlse64.v v12, (a0), zero
1469; RV32-NEXT:    vlse64.v v16, (a0), zero
1470; RV32-NEXT:    vsrl.vi v20, v8, 4
1471; RV32-NEXT:    vadd.vv v8, v8, v20
1472; RV32-NEXT:    vand.vv v8, v8, v12
1473; RV32-NEXT:    vmul.vv v8, v8, v16
1474; RV32-NEXT:    li a0, 56
1475; RV32-NEXT:    vsrl.vx v8, v8, a0
1476; RV32-NEXT:    addi sp, sp, 16
1477; RV32-NEXT:    ret
1478;
1479; RV64-LABEL: ctlz_nxv4i64:
1480; RV64:       # %bb.0:
1481; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1482; RV64-NEXT:    vsrl.vi v12, v8, 1
1483; RV64-NEXT:    vor.vv v8, v8, v12
1484; RV64-NEXT:    vsrl.vi v12, v8, 2
1485; RV64-NEXT:    vor.vv v8, v8, v12
1486; RV64-NEXT:    vsrl.vi v12, v8, 4
1487; RV64-NEXT:    vor.vv v8, v8, v12
1488; RV64-NEXT:    vsrl.vi v12, v8, 8
1489; RV64-NEXT:    vor.vv v8, v8, v12
1490; RV64-NEXT:    vsrl.vi v12, v8, 16
1491; RV64-NEXT:    vor.vv v8, v8, v12
1492; RV64-NEXT:    li a0, 32
1493; RV64-NEXT:    vsrl.vx v12, v8, a0
1494; RV64-NEXT:    vor.vv v8, v8, v12
1495; RV64-NEXT:    vnot.v v8, v8
1496; RV64-NEXT:    lui a0, %hi(.LCPI20_0)
1497; RV64-NEXT:    ld a0, %lo(.LCPI20_0)(a0)
1498; RV64-NEXT:    lui a1, %hi(.LCPI20_1)
1499; RV64-NEXT:    ld a1, %lo(.LCPI20_1)(a1)
1500; RV64-NEXT:    vsrl.vi v12, v8, 1
1501; RV64-NEXT:    vand.vx v12, v12, a0
1502; RV64-NEXT:    vsub.vv v8, v8, v12
1503; RV64-NEXT:    vand.vx v12, v8, a1
1504; RV64-NEXT:    vsrl.vi v8, v8, 2
1505; RV64-NEXT:    vand.vx v8, v8, a1
1506; RV64-NEXT:    vadd.vv v8, v12, v8
1507; RV64-NEXT:    lui a0, %hi(.LCPI20_2)
1508; RV64-NEXT:    ld a0, %lo(.LCPI20_2)(a0)
1509; RV64-NEXT:    lui a1, %hi(.LCPI20_3)
1510; RV64-NEXT:    ld a1, %lo(.LCPI20_3)(a1)
1511; RV64-NEXT:    vsrl.vi v12, v8, 4
1512; RV64-NEXT:    vadd.vv v8, v8, v12
1513; RV64-NEXT:    vand.vx v8, v8, a0
1514; RV64-NEXT:    vmul.vx v8, v8, a1
1515; RV64-NEXT:    li a0, 56
1516; RV64-NEXT:    vsrl.vx v8, v8, a0
1517; RV64-NEXT:    ret
1518  %a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 false)
1519  ret <vscale x 4 x i64> %a
1520}
1521declare <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64>, i1)
1522
1523define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
1524; RV32-LABEL: ctlz_nxv8i64:
1525; RV32:       # %bb.0:
1526; RV32-NEXT:    addi sp, sp, -16
1527; RV32-NEXT:    .cfi_def_cfa_offset 16
1528; RV32-NEXT:    lui a0, 349525
1529; RV32-NEXT:    addi a0, a0, 1365
1530; RV32-NEXT:    sw a0, 12(sp)
1531; RV32-NEXT:    sw a0, 8(sp)
1532; RV32-NEXT:    lui a0, 209715
1533; RV32-NEXT:    addi a0, a0, 819
1534; RV32-NEXT:    sw a0, 12(sp)
1535; RV32-NEXT:    sw a0, 8(sp)
1536; RV32-NEXT:    lui a0, 61681
1537; RV32-NEXT:    addi a0, a0, -241
1538; RV32-NEXT:    sw a0, 12(sp)
1539; RV32-NEXT:    sw a0, 8(sp)
1540; RV32-NEXT:    lui a0, 4112
1541; RV32-NEXT:    addi a0, a0, 257
1542; RV32-NEXT:    sw a0, 12(sp)
1543; RV32-NEXT:    sw a0, 8(sp)
1544; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1545; RV32-NEXT:    vsrl.vi v16, v8, 1
1546; RV32-NEXT:    vor.vv v8, v8, v16
1547; RV32-NEXT:    vsrl.vi v16, v8, 2
1548; RV32-NEXT:    vor.vv v8, v8, v16
1549; RV32-NEXT:    vsrl.vi v16, v8, 4
1550; RV32-NEXT:    vor.vv v8, v8, v16
1551; RV32-NEXT:    vsrl.vi v16, v8, 8
1552; RV32-NEXT:    vor.vv v8, v8, v16
1553; RV32-NEXT:    vsrl.vi v16, v8, 16
1554; RV32-NEXT:    vor.vv v8, v8, v16
1555; RV32-NEXT:    li a0, 32
1556; RV32-NEXT:    vsrl.vx v16, v8, a0
1557; RV32-NEXT:    vor.vv v8, v8, v16
1558; RV32-NEXT:    addi a0, sp, 8
1559; RV32-NEXT:    vlse64.v v16, (a0), zero
1560; RV32-NEXT:    vnot.v v8, v8
1561; RV32-NEXT:    vlse64.v v24, (a0), zero
1562; RV32-NEXT:    vsrl.vi v0, v8, 1
1563; RV32-NEXT:    vand.vv v16, v0, v16
1564; RV32-NEXT:    vsub.vv v8, v8, v16
1565; RV32-NEXT:    vand.vv v16, v8, v24
1566; RV32-NEXT:    vsrl.vi v8, v8, 2
1567; RV32-NEXT:    vand.vv v8, v8, v24
1568; RV32-NEXT:    vadd.vv v8, v16, v8
1569; RV32-NEXT:    vlse64.v v16, (a0), zero
1570; RV32-NEXT:    vlse64.v v24, (a0), zero
1571; RV32-NEXT:    vsrl.vi v0, v8, 4
1572; RV32-NEXT:    vadd.vv v8, v8, v0
1573; RV32-NEXT:    vand.vv v8, v8, v16
1574; RV32-NEXT:    vmul.vv v8, v8, v24
1575; RV32-NEXT:    li a0, 56
1576; RV32-NEXT:    vsrl.vx v8, v8, a0
1577; RV32-NEXT:    addi sp, sp, 16
1578; RV32-NEXT:    ret
1579;
1580; RV64-LABEL: ctlz_nxv8i64:
1581; RV64:       # %bb.0:
1582; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
1583; RV64-NEXT:    vsrl.vi v16, v8, 1
1584; RV64-NEXT:    vor.vv v8, v8, v16
1585; RV64-NEXT:    vsrl.vi v16, v8, 2
1586; RV64-NEXT:    vor.vv v8, v8, v16
1587; RV64-NEXT:    vsrl.vi v16, v8, 4
1588; RV64-NEXT:    vor.vv v8, v8, v16
1589; RV64-NEXT:    vsrl.vi v16, v8, 8
1590; RV64-NEXT:    vor.vv v8, v8, v16
1591; RV64-NEXT:    vsrl.vi v16, v8, 16
1592; RV64-NEXT:    vor.vv v8, v8, v16
1593; RV64-NEXT:    li a0, 32
1594; RV64-NEXT:    vsrl.vx v16, v8, a0
1595; RV64-NEXT:    vor.vv v8, v8, v16
1596; RV64-NEXT:    vnot.v v8, v8
1597; RV64-NEXT:    lui a0, %hi(.LCPI21_0)
1598; RV64-NEXT:    ld a0, %lo(.LCPI21_0)(a0)
1599; RV64-NEXT:    lui a1, %hi(.LCPI21_1)
1600; RV64-NEXT:    ld a1, %lo(.LCPI21_1)(a1)
1601; RV64-NEXT:    vsrl.vi v16, v8, 1
1602; RV64-NEXT:    vand.vx v16, v16, a0
1603; RV64-NEXT:    vsub.vv v8, v8, v16
1604; RV64-NEXT:    vand.vx v16, v8, a1
1605; RV64-NEXT:    vsrl.vi v8, v8, 2
1606; RV64-NEXT:    vand.vx v8, v8, a1
1607; RV64-NEXT:    vadd.vv v8, v16, v8
1608; RV64-NEXT:    lui a0, %hi(.LCPI21_2)
1609; RV64-NEXT:    ld a0, %lo(.LCPI21_2)(a0)
1610; RV64-NEXT:    lui a1, %hi(.LCPI21_3)
1611; RV64-NEXT:    ld a1, %lo(.LCPI21_3)(a1)
1612; RV64-NEXT:    vsrl.vi v16, v8, 4
1613; RV64-NEXT:    vadd.vv v8, v8, v16
1614; RV64-NEXT:    vand.vx v8, v8, a0
1615; RV64-NEXT:    vmul.vx v8, v8, a1
1616; RV64-NEXT:    li a0, 56
1617; RV64-NEXT:    vsrl.vx v8, v8, a0
1618; RV64-NEXT:    ret
1619  %a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 false)
1620  ret <vscale x 8 x i64> %a
1621}
1622declare <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64>, i1)
1623
1624define <vscale x 1 x i8> @ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
1625; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv1i8:
1626; CHECK-ZVE64X:       # %bb.0:
1627; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
1628; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1629; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1630; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
1631; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1632; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1633; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1634; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1635; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1636; CHECK-ZVE64X-NEXT:    li a0, 85
1637; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1638; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1639; CHECK-ZVE64X-NEXT:    li a0, 51
1640; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1641; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1642; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1643; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1644; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1645; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1646; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1647; CHECK-ZVE64X-NEXT:    ret
1648;
1649; CHECK-D-LABEL: ctlz_zero_undef_nxv1i8:
1650; CHECK-D:       # %bb.0:
1651; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1652; CHECK-D-NEXT:    vzext.vf4 v9, v8
1653; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
1654; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
1655; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
1656; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
1657; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
1658; CHECK-D-NEXT:    li a0, 134
1659; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1660; CHECK-D-NEXT:    ret
1661  %a = call <vscale x 1 x i8> @llvm.ctlz.nxv1i8(<vscale x 1 x i8> %va, i1 true)
1662  ret <vscale x 1 x i8> %a
1663}
1664
1665define <vscale x 2 x i8> @ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
1666; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv2i8:
1667; CHECK-ZVE64X:       # %bb.0:
1668; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
1669; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1670; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1671; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
1672; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1673; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1674; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1675; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1676; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1677; CHECK-ZVE64X-NEXT:    li a0, 85
1678; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1679; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1680; CHECK-ZVE64X-NEXT:    li a0, 51
1681; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1682; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1683; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1684; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1685; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1686; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1687; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1688; CHECK-ZVE64X-NEXT:    ret
1689;
1690; CHECK-D-LABEL: ctlz_zero_undef_nxv2i8:
1691; CHECK-D:       # %bb.0:
1692; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1693; CHECK-D-NEXT:    vzext.vf4 v9, v8
1694; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v9
1695; CHECK-D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
1696; CHECK-D-NEXT:    vnsrl.wi v8, v8, 23
1697; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
1698; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
1699; CHECK-D-NEXT:    li a0, 134
1700; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1701; CHECK-D-NEXT:    ret
1702  %a = call <vscale x 2 x i8> @llvm.ctlz.nxv2i8(<vscale x 2 x i8> %va, i1 true)
1703  ret <vscale x 2 x i8> %a
1704}
1705
1706define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
1707; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv4i8:
1708; CHECK-ZVE64X:       # %bb.0:
1709; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
1710; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1711; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1712; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
1713; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1714; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1715; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1716; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1717; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1718; CHECK-ZVE64X-NEXT:    li a0, 85
1719; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1720; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1721; CHECK-ZVE64X-NEXT:    li a0, 51
1722; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1723; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1724; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1725; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1726; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1727; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1728; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1729; CHECK-ZVE64X-NEXT:    ret
1730;
1731; CHECK-D-LABEL: ctlz_zero_undef_nxv4i8:
1732; CHECK-D:       # %bb.0:
1733; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1734; CHECK-D-NEXT:    vzext.vf4 v10, v8
1735; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v10
1736; CHECK-D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
1737; CHECK-D-NEXT:    vnsrl.wi v10, v8, 23
1738; CHECK-D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
1739; CHECK-D-NEXT:    vncvt.x.x.w v8, v10
1740; CHECK-D-NEXT:    li a0, 134
1741; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1742; CHECK-D-NEXT:    ret
1743  %a = call <vscale x 4 x i8> @llvm.ctlz.nxv4i8(<vscale x 4 x i8> %va, i1 true)
1744  ret <vscale x 4 x i8> %a
1745}
1746
1747define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
1748; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv8i8:
1749; CHECK-ZVE64X:       # %bb.0:
1750; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
1751; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1752; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1753; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 2
1754; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1755; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1756; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v9
1757; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1758; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 1
1759; CHECK-ZVE64X-NEXT:    li a0, 85
1760; CHECK-ZVE64X-NEXT:    vand.vx v9, v9, a0
1761; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v9
1762; CHECK-ZVE64X-NEXT:    li a0, 51
1763; CHECK-ZVE64X-NEXT:    vand.vx v9, v8, a0
1764; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1765; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1766; CHECK-ZVE64X-NEXT:    vadd.vv v8, v9, v8
1767; CHECK-ZVE64X-NEXT:    vsrl.vi v9, v8, 4
1768; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v9
1769; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1770; CHECK-ZVE64X-NEXT:    ret
1771;
1772; CHECK-D-LABEL: ctlz_zero_undef_nxv8i8:
1773; CHECK-D:       # %bb.0:
1774; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1775; CHECK-D-NEXT:    vzext.vf4 v12, v8
1776; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v12
1777; CHECK-D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1778; CHECK-D-NEXT:    vnsrl.wi v12, v8, 23
1779; CHECK-D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
1780; CHECK-D-NEXT:    vncvt.x.x.w v8, v12
1781; CHECK-D-NEXT:    li a0, 134
1782; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1783; CHECK-D-NEXT:    ret
1784  %a = call <vscale x 8 x i8> @llvm.ctlz.nxv8i8(<vscale x 8 x i8> %va, i1 true)
1785  ret <vscale x 8 x i8> %a
1786}
1787
1788define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
1789; CHECK-ZVE64X-LABEL: ctlz_zero_undef_nxv16i8:
1790; CHECK-ZVE64X:       # %bb.0:
1791; CHECK-ZVE64X-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
1792; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
1793; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
1794; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 2
1795; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
1796; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
1797; CHECK-ZVE64X-NEXT:    vor.vv v8, v8, v10
1798; CHECK-ZVE64X-NEXT:    vnot.v v8, v8
1799; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 1
1800; CHECK-ZVE64X-NEXT:    li a0, 85
1801; CHECK-ZVE64X-NEXT:    vand.vx v10, v10, a0
1802; CHECK-ZVE64X-NEXT:    vsub.vv v8, v8, v10
1803; CHECK-ZVE64X-NEXT:    li a0, 51
1804; CHECK-ZVE64X-NEXT:    vand.vx v10, v8, a0
1805; CHECK-ZVE64X-NEXT:    vsrl.vi v8, v8, 2
1806; CHECK-ZVE64X-NEXT:    vand.vx v8, v8, a0
1807; CHECK-ZVE64X-NEXT:    vadd.vv v8, v10, v8
1808; CHECK-ZVE64X-NEXT:    vsrl.vi v10, v8, 4
1809; CHECK-ZVE64X-NEXT:    vadd.vv v8, v8, v10
1810; CHECK-ZVE64X-NEXT:    vand.vi v8, v8, 15
1811; CHECK-ZVE64X-NEXT:    ret
1812;
1813; CHECK-D-LABEL: ctlz_zero_undef_nxv16i8:
1814; CHECK-D:       # %bb.0:
1815; CHECK-D-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
1816; CHECK-D-NEXT:    vzext.vf4 v16, v8
1817; CHECK-D-NEXT:    vfcvt.f.xu.v v8, v16
1818; CHECK-D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
1819; CHECK-D-NEXT:    vnsrl.wi v16, v8, 23
1820; CHECK-D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
1821; CHECK-D-NEXT:    vncvt.x.x.w v8, v16
1822; CHECK-D-NEXT:    li a0, 134
1823; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1824; CHECK-D-NEXT:    ret
1825  %a = call <vscale x 16 x i8> @llvm.ctlz.nxv16i8(<vscale x 16 x i8> %va, i1 true)
1826  ret <vscale x 16 x i8> %a
1827}
1828
1829define <vscale x 32 x i8> @ctlz_zero_undef_nxv32i8(<vscale x 32 x i8> %va) {
1830; CHECK-LABEL: ctlz_zero_undef_nxv32i8:
1831; CHECK:       # %bb.0:
1832; CHECK-NEXT:    vsetvli a0, zero, e8, m4, ta, mu
1833; CHECK-NEXT:    vsrl.vi v12, v8, 1
1834; CHECK-NEXT:    vor.vv v8, v8, v12
1835; CHECK-NEXT:    vsrl.vi v12, v8, 2
1836; CHECK-NEXT:    vor.vv v8, v8, v12
1837; CHECK-NEXT:    vsrl.vi v12, v8, 4
1838; CHECK-NEXT:    vor.vv v8, v8, v12
1839; CHECK-NEXT:    vnot.v v8, v8
1840; CHECK-NEXT:    vsrl.vi v12, v8, 1
1841; CHECK-NEXT:    li a0, 85
1842; CHECK-NEXT:    vand.vx v12, v12, a0
1843; CHECK-NEXT:    vsub.vv v8, v8, v12
1844; CHECK-NEXT:    li a0, 51
1845; CHECK-NEXT:    vand.vx v12, v8, a0
1846; CHECK-NEXT:    vsrl.vi v8, v8, 2
1847; CHECK-NEXT:    vand.vx v8, v8, a0
1848; CHECK-NEXT:    vadd.vv v8, v12, v8
1849; CHECK-NEXT:    vsrl.vi v12, v8, 4
1850; CHECK-NEXT:    vadd.vv v8, v8, v12
1851; CHECK-NEXT:    vand.vi v8, v8, 15
1852; CHECK-NEXT:    ret
1853  %a = call <vscale x 32 x i8> @llvm.ctlz.nxv32i8(<vscale x 32 x i8> %va, i1 true)
1854  ret <vscale x 32 x i8> %a
1855}
1856
1857define <vscale x 64 x i8> @ctlz_zero_undef_nxv64i8(<vscale x 64 x i8> %va) {
1858; CHECK-LABEL: ctlz_zero_undef_nxv64i8:
1859; CHECK:       # %bb.0:
1860; CHECK-NEXT:    vsetvli a0, zero, e8, m8, ta, mu
1861; CHECK-NEXT:    vsrl.vi v16, v8, 1
1862; CHECK-NEXT:    vor.vv v8, v8, v16
1863; CHECK-NEXT:    vsrl.vi v16, v8, 2
1864; CHECK-NEXT:    vor.vv v8, v8, v16
1865; CHECK-NEXT:    vsrl.vi v16, v8, 4
1866; CHECK-NEXT:    vor.vv v8, v8, v16
1867; CHECK-NEXT:    vnot.v v8, v8
1868; CHECK-NEXT:    vsrl.vi v16, v8, 1
1869; CHECK-NEXT:    li a0, 85
1870; CHECK-NEXT:    vand.vx v16, v16, a0
1871; CHECK-NEXT:    vsub.vv v8, v8, v16
1872; CHECK-NEXT:    li a0, 51
1873; CHECK-NEXT:    vand.vx v16, v8, a0
1874; CHECK-NEXT:    vsrl.vi v8, v8, 2
1875; CHECK-NEXT:    vand.vx v8, v8, a0
1876; CHECK-NEXT:    vadd.vv v8, v16, v8
1877; CHECK-NEXT:    vsrl.vi v16, v8, 4
1878; CHECK-NEXT:    vadd.vv v8, v8, v16
1879; CHECK-NEXT:    vand.vi v8, v8, 15
1880; CHECK-NEXT:    ret
1881  %a = call <vscale x 64 x i8> @llvm.ctlz.nxv64i8(<vscale x 64 x i8> %va, i1 true)
1882  ret <vscale x 64 x i8> %a
1883}
1884
1885define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
1886; RV32I-LABEL: ctlz_zero_undef_nxv1i16:
1887; RV32I:       # %bb.0:
1888; RV32I-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1889; RV32I-NEXT:    vsrl.vi v9, v8, 1
1890; RV32I-NEXT:    vor.vv v8, v8, v9
1891; RV32I-NEXT:    vsrl.vi v9, v8, 2
1892; RV32I-NEXT:    vor.vv v8, v8, v9
1893; RV32I-NEXT:    vsrl.vi v9, v8, 4
1894; RV32I-NEXT:    vor.vv v8, v8, v9
1895; RV32I-NEXT:    vsrl.vi v9, v8, 8
1896; RV32I-NEXT:    vor.vv v8, v8, v9
1897; RV32I-NEXT:    vnot.v v8, v8
1898; RV32I-NEXT:    vsrl.vi v9, v8, 1
1899; RV32I-NEXT:    lui a0, 5
1900; RV32I-NEXT:    addi a0, a0, 1365
1901; RV32I-NEXT:    vand.vx v9, v9, a0
1902; RV32I-NEXT:    vsub.vv v8, v8, v9
1903; RV32I-NEXT:    lui a0, 3
1904; RV32I-NEXT:    addi a0, a0, 819
1905; RV32I-NEXT:    vand.vx v9, v8, a0
1906; RV32I-NEXT:    vsrl.vi v8, v8, 2
1907; RV32I-NEXT:    vand.vx v8, v8, a0
1908; RV32I-NEXT:    vadd.vv v8, v9, v8
1909; RV32I-NEXT:    vsrl.vi v9, v8, 4
1910; RV32I-NEXT:    vadd.vv v8, v8, v9
1911; RV32I-NEXT:    lui a0, 1
1912; RV32I-NEXT:    addi a0, a0, -241
1913; RV32I-NEXT:    vand.vx v8, v8, a0
1914; RV32I-NEXT:    li a0, 257
1915; RV32I-NEXT:    vmul.vx v8, v8, a0
1916; RV32I-NEXT:    vsrl.vi v8, v8, 8
1917; RV32I-NEXT:    ret
1918;
1919; RV64I-LABEL: ctlz_zero_undef_nxv1i16:
1920; RV64I:       # %bb.0:
1921; RV64I-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1922; RV64I-NEXT:    vsrl.vi v9, v8, 1
1923; RV64I-NEXT:    vor.vv v8, v8, v9
1924; RV64I-NEXT:    vsrl.vi v9, v8, 2
1925; RV64I-NEXT:    vor.vv v8, v8, v9
1926; RV64I-NEXT:    vsrl.vi v9, v8, 4
1927; RV64I-NEXT:    vor.vv v8, v8, v9
1928; RV64I-NEXT:    vsrl.vi v9, v8, 8
1929; RV64I-NEXT:    vor.vv v8, v8, v9
1930; RV64I-NEXT:    vnot.v v8, v8
1931; RV64I-NEXT:    vsrl.vi v9, v8, 1
1932; RV64I-NEXT:    lui a0, 5
1933; RV64I-NEXT:    addiw a0, a0, 1365
1934; RV64I-NEXT:    vand.vx v9, v9, a0
1935; RV64I-NEXT:    vsub.vv v8, v8, v9
1936; RV64I-NEXT:    lui a0, 3
1937; RV64I-NEXT:    addiw a0, a0, 819
1938; RV64I-NEXT:    vand.vx v9, v8, a0
1939; RV64I-NEXT:    vsrl.vi v8, v8, 2
1940; RV64I-NEXT:    vand.vx v8, v8, a0
1941; RV64I-NEXT:    vadd.vv v8, v9, v8
1942; RV64I-NEXT:    vsrl.vi v9, v8, 4
1943; RV64I-NEXT:    vadd.vv v8, v8, v9
1944; RV64I-NEXT:    lui a0, 1
1945; RV64I-NEXT:    addiw a0, a0, -241
1946; RV64I-NEXT:    vand.vx v8, v8, a0
1947; RV64I-NEXT:    li a0, 257
1948; RV64I-NEXT:    vmul.vx v8, v8, a0
1949; RV64I-NEXT:    vsrl.vi v8, v8, 8
1950; RV64I-NEXT:    ret
1951;
1952; CHECK-D-LABEL: ctlz_zero_undef_nxv1i16:
1953; CHECK-D:       # %bb.0:
1954; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1955; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
1956; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
1957; CHECK-D-NEXT:    li a0, 142
1958; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
1959; CHECK-D-NEXT:    ret
1960  %a = call <vscale x 1 x i16> @llvm.ctlz.nxv1i16(<vscale x 1 x i16> %va, i1 true)
1961  ret <vscale x 1 x i16> %a
1962}
1963
1964define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
1965; RV32I-LABEL: ctlz_zero_undef_nxv2i16:
1966; RV32I:       # %bb.0:
1967; RV32I-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1968; RV32I-NEXT:    vsrl.vi v9, v8, 1
1969; RV32I-NEXT:    vor.vv v8, v8, v9
1970; RV32I-NEXT:    vsrl.vi v9, v8, 2
1971; RV32I-NEXT:    vor.vv v8, v8, v9
1972; RV32I-NEXT:    vsrl.vi v9, v8, 4
1973; RV32I-NEXT:    vor.vv v8, v8, v9
1974; RV32I-NEXT:    vsrl.vi v9, v8, 8
1975; RV32I-NEXT:    vor.vv v8, v8, v9
1976; RV32I-NEXT:    vnot.v v8, v8
1977; RV32I-NEXT:    vsrl.vi v9, v8, 1
1978; RV32I-NEXT:    lui a0, 5
1979; RV32I-NEXT:    addi a0, a0, 1365
1980; RV32I-NEXT:    vand.vx v9, v9, a0
1981; RV32I-NEXT:    vsub.vv v8, v8, v9
1982; RV32I-NEXT:    lui a0, 3
1983; RV32I-NEXT:    addi a0, a0, 819
1984; RV32I-NEXT:    vand.vx v9, v8, a0
1985; RV32I-NEXT:    vsrl.vi v8, v8, 2
1986; RV32I-NEXT:    vand.vx v8, v8, a0
1987; RV32I-NEXT:    vadd.vv v8, v9, v8
1988; RV32I-NEXT:    vsrl.vi v9, v8, 4
1989; RV32I-NEXT:    vadd.vv v8, v8, v9
1990; RV32I-NEXT:    lui a0, 1
1991; RV32I-NEXT:    addi a0, a0, -241
1992; RV32I-NEXT:    vand.vx v8, v8, a0
1993; RV32I-NEXT:    li a0, 257
1994; RV32I-NEXT:    vmul.vx v8, v8, a0
1995; RV32I-NEXT:    vsrl.vi v8, v8, 8
1996; RV32I-NEXT:    ret
1997;
1998; RV64I-LABEL: ctlz_zero_undef_nxv2i16:
1999; RV64I:       # %bb.0:
2000; RV64I-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
2001; RV64I-NEXT:    vsrl.vi v9, v8, 1
2002; RV64I-NEXT:    vor.vv v8, v8, v9
2003; RV64I-NEXT:    vsrl.vi v9, v8, 2
2004; RV64I-NEXT:    vor.vv v8, v8, v9
2005; RV64I-NEXT:    vsrl.vi v9, v8, 4
2006; RV64I-NEXT:    vor.vv v8, v8, v9
2007; RV64I-NEXT:    vsrl.vi v9, v8, 8
2008; RV64I-NEXT:    vor.vv v8, v8, v9
2009; RV64I-NEXT:    vnot.v v8, v8
2010; RV64I-NEXT:    vsrl.vi v9, v8, 1
2011; RV64I-NEXT:    lui a0, 5
2012; RV64I-NEXT:    addiw a0, a0, 1365
2013; RV64I-NEXT:    vand.vx v9, v9, a0
2014; RV64I-NEXT:    vsub.vv v8, v8, v9
2015; RV64I-NEXT:    lui a0, 3
2016; RV64I-NEXT:    addiw a0, a0, 819
2017; RV64I-NEXT:    vand.vx v9, v8, a0
2018; RV64I-NEXT:    vsrl.vi v8, v8, 2
2019; RV64I-NEXT:    vand.vx v8, v8, a0
2020; RV64I-NEXT:    vadd.vv v8, v9, v8
2021; RV64I-NEXT:    vsrl.vi v9, v8, 4
2022; RV64I-NEXT:    vadd.vv v8, v8, v9
2023; RV64I-NEXT:    lui a0, 1
2024; RV64I-NEXT:    addiw a0, a0, -241
2025; RV64I-NEXT:    vand.vx v8, v8, a0
2026; RV64I-NEXT:    li a0, 257
2027; RV64I-NEXT:    vmul.vx v8, v8, a0
2028; RV64I-NEXT:    vsrl.vi v8, v8, 8
2029; RV64I-NEXT:    ret
2030;
2031; CHECK-D-LABEL: ctlz_zero_undef_nxv2i16:
2032; CHECK-D:       # %bb.0:
2033; CHECK-D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
2034; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
2035; CHECK-D-NEXT:    vnsrl.wi v8, v9, 23
2036; CHECK-D-NEXT:    li a0, 142
2037; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
2038; CHECK-D-NEXT:    ret
2039  %a = call <vscale x 2 x i16> @llvm.ctlz.nxv2i16(<vscale x 2 x i16> %va, i1 true)
2040  ret <vscale x 2 x i16> %a
2041}
2042
2043define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
2044; RV32I-LABEL: ctlz_zero_undef_nxv4i16:
2045; RV32I:       # %bb.0:
2046; RV32I-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
2047; RV32I-NEXT:    vsrl.vi v9, v8, 1
2048; RV32I-NEXT:    vor.vv v8, v8, v9
2049; RV32I-NEXT:    vsrl.vi v9, v8, 2
2050; RV32I-NEXT:    vor.vv v8, v8, v9
2051; RV32I-NEXT:    vsrl.vi v9, v8, 4
2052; RV32I-NEXT:    vor.vv v8, v8, v9
2053; RV32I-NEXT:    vsrl.vi v9, v8, 8
2054; RV32I-NEXT:    vor.vv v8, v8, v9
2055; RV32I-NEXT:    vnot.v v8, v8
2056; RV32I-NEXT:    vsrl.vi v9, v8, 1
2057; RV32I-NEXT:    lui a0, 5
2058; RV32I-NEXT:    addi a0, a0, 1365
2059; RV32I-NEXT:    vand.vx v9, v9, a0
2060; RV32I-NEXT:    vsub.vv v8, v8, v9
2061; RV32I-NEXT:    lui a0, 3
2062; RV32I-NEXT:    addi a0, a0, 819
2063; RV32I-NEXT:    vand.vx v9, v8, a0
2064; RV32I-NEXT:    vsrl.vi v8, v8, 2
2065; RV32I-NEXT:    vand.vx v8, v8, a0
2066; RV32I-NEXT:    vadd.vv v8, v9, v8
2067; RV32I-NEXT:    vsrl.vi v9, v8, 4
2068; RV32I-NEXT:    vadd.vv v8, v8, v9
2069; RV32I-NEXT:    lui a0, 1
2070; RV32I-NEXT:    addi a0, a0, -241
2071; RV32I-NEXT:    vand.vx v8, v8, a0
2072; RV32I-NEXT:    li a0, 257
2073; RV32I-NEXT:    vmul.vx v8, v8, a0
2074; RV32I-NEXT:    vsrl.vi v8, v8, 8
2075; RV32I-NEXT:    ret
2076;
2077; RV64I-LABEL: ctlz_zero_undef_nxv4i16:
2078; RV64I:       # %bb.0:
2079; RV64I-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
2080; RV64I-NEXT:    vsrl.vi v9, v8, 1
2081; RV64I-NEXT:    vor.vv v8, v8, v9
2082; RV64I-NEXT:    vsrl.vi v9, v8, 2
2083; RV64I-NEXT:    vor.vv v8, v8, v9
2084; RV64I-NEXT:    vsrl.vi v9, v8, 4
2085; RV64I-NEXT:    vor.vv v8, v8, v9
2086; RV64I-NEXT:    vsrl.vi v9, v8, 8
2087; RV64I-NEXT:    vor.vv v8, v8, v9
2088; RV64I-NEXT:    vnot.v v8, v8
2089; RV64I-NEXT:    vsrl.vi v9, v8, 1
2090; RV64I-NEXT:    lui a0, 5
2091; RV64I-NEXT:    addiw a0, a0, 1365
2092; RV64I-NEXT:    vand.vx v9, v9, a0
2093; RV64I-NEXT:    vsub.vv v8, v8, v9
2094; RV64I-NEXT:    lui a0, 3
2095; RV64I-NEXT:    addiw a0, a0, 819
2096; RV64I-NEXT:    vand.vx v9, v8, a0
2097; RV64I-NEXT:    vsrl.vi v8, v8, 2
2098; RV64I-NEXT:    vand.vx v8, v8, a0
2099; RV64I-NEXT:    vadd.vv v8, v9, v8
2100; RV64I-NEXT:    vsrl.vi v9, v8, 4
2101; RV64I-NEXT:    vadd.vv v8, v8, v9
2102; RV64I-NEXT:    lui a0, 1
2103; RV64I-NEXT:    addiw a0, a0, -241
2104; RV64I-NEXT:    vand.vx v8, v8, a0
2105; RV64I-NEXT:    li a0, 257
2106; RV64I-NEXT:    vmul.vx v8, v8, a0
2107; RV64I-NEXT:    vsrl.vi v8, v8, 8
2108; RV64I-NEXT:    ret
2109;
2110; CHECK-D-LABEL: ctlz_zero_undef_nxv4i16:
2111; CHECK-D:       # %bb.0:
2112; CHECK-D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
2113; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
2114; CHECK-D-NEXT:    vnsrl.wi v8, v10, 23
2115; CHECK-D-NEXT:    li a0, 142
2116; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
2117; CHECK-D-NEXT:    ret
2118  %a = call <vscale x 4 x i16> @llvm.ctlz.nxv4i16(<vscale x 4 x i16> %va, i1 true)
2119  ret <vscale x 4 x i16> %a
2120}
2121
2122define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
2123; RV32I-LABEL: ctlz_zero_undef_nxv8i16:
2124; RV32I:       # %bb.0:
2125; RV32I-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
2126; RV32I-NEXT:    vsrl.vi v10, v8, 1
2127; RV32I-NEXT:    vor.vv v8, v8, v10
2128; RV32I-NEXT:    vsrl.vi v10, v8, 2
2129; RV32I-NEXT:    vor.vv v8, v8, v10
2130; RV32I-NEXT:    vsrl.vi v10, v8, 4
2131; RV32I-NEXT:    vor.vv v8, v8, v10
2132; RV32I-NEXT:    vsrl.vi v10, v8, 8
2133; RV32I-NEXT:    vor.vv v8, v8, v10
2134; RV32I-NEXT:    vnot.v v8, v8
2135; RV32I-NEXT:    vsrl.vi v10, v8, 1
2136; RV32I-NEXT:    lui a0, 5
2137; RV32I-NEXT:    addi a0, a0, 1365
2138; RV32I-NEXT:    vand.vx v10, v10, a0
2139; RV32I-NEXT:    vsub.vv v8, v8, v10
2140; RV32I-NEXT:    lui a0, 3
2141; RV32I-NEXT:    addi a0, a0, 819
2142; RV32I-NEXT:    vand.vx v10, v8, a0
2143; RV32I-NEXT:    vsrl.vi v8, v8, 2
2144; RV32I-NEXT:    vand.vx v8, v8, a0
2145; RV32I-NEXT:    vadd.vv v8, v10, v8
2146; RV32I-NEXT:    vsrl.vi v10, v8, 4
2147; RV32I-NEXT:    vadd.vv v8, v8, v10
2148; RV32I-NEXT:    lui a0, 1
2149; RV32I-NEXT:    addi a0, a0, -241
2150; RV32I-NEXT:    vand.vx v8, v8, a0
2151; RV32I-NEXT:    li a0, 257
2152; RV32I-NEXT:    vmul.vx v8, v8, a0
2153; RV32I-NEXT:    vsrl.vi v8, v8, 8
2154; RV32I-NEXT:    ret
2155;
2156; RV64I-LABEL: ctlz_zero_undef_nxv8i16:
2157; RV64I:       # %bb.0:
2158; RV64I-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
2159; RV64I-NEXT:    vsrl.vi v10, v8, 1
2160; RV64I-NEXT:    vor.vv v8, v8, v10
2161; RV64I-NEXT:    vsrl.vi v10, v8, 2
2162; RV64I-NEXT:    vor.vv v8, v8, v10
2163; RV64I-NEXT:    vsrl.vi v10, v8, 4
2164; RV64I-NEXT:    vor.vv v8, v8, v10
2165; RV64I-NEXT:    vsrl.vi v10, v8, 8
2166; RV64I-NEXT:    vor.vv v8, v8, v10
2167; RV64I-NEXT:    vnot.v v8, v8
2168; RV64I-NEXT:    vsrl.vi v10, v8, 1
2169; RV64I-NEXT:    lui a0, 5
2170; RV64I-NEXT:    addiw a0, a0, 1365
2171; RV64I-NEXT:    vand.vx v10, v10, a0
2172; RV64I-NEXT:    vsub.vv v8, v8, v10
2173; RV64I-NEXT:    lui a0, 3
2174; RV64I-NEXT:    addiw a0, a0, 819
2175; RV64I-NEXT:    vand.vx v10, v8, a0
2176; RV64I-NEXT:    vsrl.vi v8, v8, 2
2177; RV64I-NEXT:    vand.vx v8, v8, a0
2178; RV64I-NEXT:    vadd.vv v8, v10, v8
2179; RV64I-NEXT:    vsrl.vi v10, v8, 4
2180; RV64I-NEXT:    vadd.vv v8, v8, v10
2181; RV64I-NEXT:    lui a0, 1
2182; RV64I-NEXT:    addiw a0, a0, -241
2183; RV64I-NEXT:    vand.vx v8, v8, a0
2184; RV64I-NEXT:    li a0, 257
2185; RV64I-NEXT:    vmul.vx v8, v8, a0
2186; RV64I-NEXT:    vsrl.vi v8, v8, 8
2187; RV64I-NEXT:    ret
2188;
2189; CHECK-D-LABEL: ctlz_zero_undef_nxv8i16:
2190; CHECK-D:       # %bb.0:
2191; CHECK-D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
2192; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
2193; CHECK-D-NEXT:    vnsrl.wi v8, v12, 23
2194; CHECK-D-NEXT:    li a0, 142
2195; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
2196; CHECK-D-NEXT:    ret
2197  %a = call <vscale x 8 x i16> @llvm.ctlz.nxv8i16(<vscale x 8 x i16> %va, i1 true)
2198  ret <vscale x 8 x i16> %a
2199}
2200
2201define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
2202; RV32I-LABEL: ctlz_zero_undef_nxv16i16:
2203; RV32I:       # %bb.0:
2204; RV32I-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
2205; RV32I-NEXT:    vsrl.vi v12, v8, 1
2206; RV32I-NEXT:    vor.vv v8, v8, v12
2207; RV32I-NEXT:    vsrl.vi v12, v8, 2
2208; RV32I-NEXT:    vor.vv v8, v8, v12
2209; RV32I-NEXT:    vsrl.vi v12, v8, 4
2210; RV32I-NEXT:    vor.vv v8, v8, v12
2211; RV32I-NEXT:    vsrl.vi v12, v8, 8
2212; RV32I-NEXT:    vor.vv v8, v8, v12
2213; RV32I-NEXT:    vnot.v v8, v8
2214; RV32I-NEXT:    vsrl.vi v12, v8, 1
2215; RV32I-NEXT:    lui a0, 5
2216; RV32I-NEXT:    addi a0, a0, 1365
2217; RV32I-NEXT:    vand.vx v12, v12, a0
2218; RV32I-NEXT:    vsub.vv v8, v8, v12
2219; RV32I-NEXT:    lui a0, 3
2220; RV32I-NEXT:    addi a0, a0, 819
2221; RV32I-NEXT:    vand.vx v12, v8, a0
2222; RV32I-NEXT:    vsrl.vi v8, v8, 2
2223; RV32I-NEXT:    vand.vx v8, v8, a0
2224; RV32I-NEXT:    vadd.vv v8, v12, v8
2225; RV32I-NEXT:    vsrl.vi v12, v8, 4
2226; RV32I-NEXT:    vadd.vv v8, v8, v12
2227; RV32I-NEXT:    lui a0, 1
2228; RV32I-NEXT:    addi a0, a0, -241
2229; RV32I-NEXT:    vand.vx v8, v8, a0
2230; RV32I-NEXT:    li a0, 257
2231; RV32I-NEXT:    vmul.vx v8, v8, a0
2232; RV32I-NEXT:    vsrl.vi v8, v8, 8
2233; RV32I-NEXT:    ret
2234;
2235; RV64I-LABEL: ctlz_zero_undef_nxv16i16:
2236; RV64I:       # %bb.0:
2237; RV64I-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
2238; RV64I-NEXT:    vsrl.vi v12, v8, 1
2239; RV64I-NEXT:    vor.vv v8, v8, v12
2240; RV64I-NEXT:    vsrl.vi v12, v8, 2
2241; RV64I-NEXT:    vor.vv v8, v8, v12
2242; RV64I-NEXT:    vsrl.vi v12, v8, 4
2243; RV64I-NEXT:    vor.vv v8, v8, v12
2244; RV64I-NEXT:    vsrl.vi v12, v8, 8
2245; RV64I-NEXT:    vor.vv v8, v8, v12
2246; RV64I-NEXT:    vnot.v v8, v8
2247; RV64I-NEXT:    vsrl.vi v12, v8, 1
2248; RV64I-NEXT:    lui a0, 5
2249; RV64I-NEXT:    addiw a0, a0, 1365
2250; RV64I-NEXT:    vand.vx v12, v12, a0
2251; RV64I-NEXT:    vsub.vv v8, v8, v12
2252; RV64I-NEXT:    lui a0, 3
2253; RV64I-NEXT:    addiw a0, a0, 819
2254; RV64I-NEXT:    vand.vx v12, v8, a0
2255; RV64I-NEXT:    vsrl.vi v8, v8, 2
2256; RV64I-NEXT:    vand.vx v8, v8, a0
2257; RV64I-NEXT:    vadd.vv v8, v12, v8
2258; RV64I-NEXT:    vsrl.vi v12, v8, 4
2259; RV64I-NEXT:    vadd.vv v8, v8, v12
2260; RV64I-NEXT:    lui a0, 1
2261; RV64I-NEXT:    addiw a0, a0, -241
2262; RV64I-NEXT:    vand.vx v8, v8, a0
2263; RV64I-NEXT:    li a0, 257
2264; RV64I-NEXT:    vmul.vx v8, v8, a0
2265; RV64I-NEXT:    vsrl.vi v8, v8, 8
2266; RV64I-NEXT:    ret
2267;
2268; CHECK-D-LABEL: ctlz_zero_undef_nxv16i16:
2269; CHECK-D:       # %bb.0:
2270; CHECK-D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
2271; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
2272; CHECK-D-NEXT:    vnsrl.wi v8, v16, 23
2273; CHECK-D-NEXT:    li a0, 142
2274; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
2275; CHECK-D-NEXT:    ret
2276  %a = call <vscale x 16 x i16> @llvm.ctlz.nxv16i16(<vscale x 16 x i16> %va, i1 true)
2277  ret <vscale x 16 x i16> %a
2278}
2279
2280define <vscale x 32 x i16> @ctlz_zero_undef_nxv32i16(<vscale x 32 x i16> %va) {
2281; RV32-LABEL: ctlz_zero_undef_nxv32i16:
2282; RV32:       # %bb.0:
2283; RV32-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
2284; RV32-NEXT:    vsrl.vi v16, v8, 1
2285; RV32-NEXT:    vor.vv v8, v8, v16
2286; RV32-NEXT:    vsrl.vi v16, v8, 2
2287; RV32-NEXT:    vor.vv v8, v8, v16
2288; RV32-NEXT:    vsrl.vi v16, v8, 4
2289; RV32-NEXT:    vor.vv v8, v8, v16
2290; RV32-NEXT:    vsrl.vi v16, v8, 8
2291; RV32-NEXT:    vor.vv v8, v8, v16
2292; RV32-NEXT:    vnot.v v8, v8
2293; RV32-NEXT:    vsrl.vi v16, v8, 1
2294; RV32-NEXT:    lui a0, 5
2295; RV32-NEXT:    addi a0, a0, 1365
2296; RV32-NEXT:    vand.vx v16, v16, a0
2297; RV32-NEXT:    vsub.vv v8, v8, v16
2298; RV32-NEXT:    lui a0, 3
2299; RV32-NEXT:    addi a0, a0, 819
2300; RV32-NEXT:    vand.vx v16, v8, a0
2301; RV32-NEXT:    vsrl.vi v8, v8, 2
2302; RV32-NEXT:    vand.vx v8, v8, a0
2303; RV32-NEXT:    vadd.vv v8, v16, v8
2304; RV32-NEXT:    vsrl.vi v16, v8, 4
2305; RV32-NEXT:    vadd.vv v8, v8, v16
2306; RV32-NEXT:    lui a0, 1
2307; RV32-NEXT:    addi a0, a0, -241
2308; RV32-NEXT:    vand.vx v8, v8, a0
2309; RV32-NEXT:    li a0, 257
2310; RV32-NEXT:    vmul.vx v8, v8, a0
2311; RV32-NEXT:    vsrl.vi v8, v8, 8
2312; RV32-NEXT:    ret
2313;
2314; RV64-LABEL: ctlz_zero_undef_nxv32i16:
2315; RV64:       # %bb.0:
2316; RV64-NEXT:    vsetvli a0, zero, e16, m8, ta, mu
2317; RV64-NEXT:    vsrl.vi v16, v8, 1
2318; RV64-NEXT:    vor.vv v8, v8, v16
2319; RV64-NEXT:    vsrl.vi v16, v8, 2
2320; RV64-NEXT:    vor.vv v8, v8, v16
2321; RV64-NEXT:    vsrl.vi v16, v8, 4
2322; RV64-NEXT:    vor.vv v8, v8, v16
2323; RV64-NEXT:    vsrl.vi v16, v8, 8
2324; RV64-NEXT:    vor.vv v8, v8, v16
2325; RV64-NEXT:    vnot.v v8, v8
2326; RV64-NEXT:    vsrl.vi v16, v8, 1
2327; RV64-NEXT:    lui a0, 5
2328; RV64-NEXT:    addiw a0, a0, 1365
2329; RV64-NEXT:    vand.vx v16, v16, a0
2330; RV64-NEXT:    vsub.vv v8, v8, v16
2331; RV64-NEXT:    lui a0, 3
2332; RV64-NEXT:    addiw a0, a0, 819
2333; RV64-NEXT:    vand.vx v16, v8, a0
2334; RV64-NEXT:    vsrl.vi v8, v8, 2
2335; RV64-NEXT:    vand.vx v8, v8, a0
2336; RV64-NEXT:    vadd.vv v8, v16, v8
2337; RV64-NEXT:    vsrl.vi v16, v8, 4
2338; RV64-NEXT:    vadd.vv v8, v8, v16
2339; RV64-NEXT:    lui a0, 1
2340; RV64-NEXT:    addiw a0, a0, -241
2341; RV64-NEXT:    vand.vx v8, v8, a0
2342; RV64-NEXT:    li a0, 257
2343; RV64-NEXT:    vmul.vx v8, v8, a0
2344; RV64-NEXT:    vsrl.vi v8, v8, 8
2345; RV64-NEXT:    ret
2346  %a = call <vscale x 32 x i16> @llvm.ctlz.nxv32i16(<vscale x 32 x i16> %va, i1 true)
2347  ret <vscale x 32 x i16> %a
2348}
2349
2350define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
2351; RV32I-LABEL: ctlz_zero_undef_nxv1i32:
2352; RV32I:       # %bb.0:
2353; RV32I-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
2354; RV32I-NEXT:    vsrl.vi v9, v8, 1
2355; RV32I-NEXT:    vor.vv v8, v8, v9
2356; RV32I-NEXT:    vsrl.vi v9, v8, 2
2357; RV32I-NEXT:    vor.vv v8, v8, v9
2358; RV32I-NEXT:    vsrl.vi v9, v8, 4
2359; RV32I-NEXT:    vor.vv v8, v8, v9
2360; RV32I-NEXT:    vsrl.vi v9, v8, 8
2361; RV32I-NEXT:    vor.vv v8, v8, v9
2362; RV32I-NEXT:    vsrl.vi v9, v8, 16
2363; RV32I-NEXT:    vor.vv v8, v8, v9
2364; RV32I-NEXT:    vnot.v v8, v8
2365; RV32I-NEXT:    vsrl.vi v9, v8, 1
2366; RV32I-NEXT:    lui a0, 349525
2367; RV32I-NEXT:    addi a0, a0, 1365
2368; RV32I-NEXT:    vand.vx v9, v9, a0
2369; RV32I-NEXT:    vsub.vv v8, v8, v9
2370; RV32I-NEXT:    lui a0, 209715
2371; RV32I-NEXT:    addi a0, a0, 819
2372; RV32I-NEXT:    vand.vx v9, v8, a0
2373; RV32I-NEXT:    vsrl.vi v8, v8, 2
2374; RV32I-NEXT:    vand.vx v8, v8, a0
2375; RV32I-NEXT:    vadd.vv v8, v9, v8
2376; RV32I-NEXT:    vsrl.vi v9, v8, 4
2377; RV32I-NEXT:    vadd.vv v8, v8, v9
2378; RV32I-NEXT:    lui a0, 61681
2379; RV32I-NEXT:    addi a0, a0, -241
2380; RV32I-NEXT:    vand.vx v8, v8, a0
2381; RV32I-NEXT:    lui a0, 4112
2382; RV32I-NEXT:    addi a0, a0, 257
2383; RV32I-NEXT:    vmul.vx v8, v8, a0
2384; RV32I-NEXT:    vsrl.vi v8, v8, 24
2385; RV32I-NEXT:    ret
2386;
2387; RV64I-LABEL: ctlz_zero_undef_nxv1i32:
2388; RV64I:       # %bb.0:
2389; RV64I-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
2390; RV64I-NEXT:    vsrl.vi v9, v8, 1
2391; RV64I-NEXT:    vor.vv v8, v8, v9
2392; RV64I-NEXT:    vsrl.vi v9, v8, 2
2393; RV64I-NEXT:    vor.vv v8, v8, v9
2394; RV64I-NEXT:    vsrl.vi v9, v8, 4
2395; RV64I-NEXT:    vor.vv v8, v8, v9
2396; RV64I-NEXT:    vsrl.vi v9, v8, 8
2397; RV64I-NEXT:    vor.vv v8, v8, v9
2398; RV64I-NEXT:    vsrl.vi v9, v8, 16
2399; RV64I-NEXT:    vor.vv v8, v8, v9
2400; RV64I-NEXT:    vnot.v v8, v8
2401; RV64I-NEXT:    vsrl.vi v9, v8, 1
2402; RV64I-NEXT:    lui a0, 349525
2403; RV64I-NEXT:    addiw a0, a0, 1365
2404; RV64I-NEXT:    vand.vx v9, v9, a0
2405; RV64I-NEXT:    vsub.vv v8, v8, v9
2406; RV64I-NEXT:    lui a0, 209715
2407; RV64I-NEXT:    addiw a0, a0, 819
2408; RV64I-NEXT:    vand.vx v9, v8, a0
2409; RV64I-NEXT:    vsrl.vi v8, v8, 2
2410; RV64I-NEXT:    vand.vx v8, v8, a0
2411; RV64I-NEXT:    vadd.vv v8, v9, v8
2412; RV64I-NEXT:    vsrl.vi v9, v8, 4
2413; RV64I-NEXT:    vadd.vv v8, v8, v9
2414; RV64I-NEXT:    lui a0, 61681
2415; RV64I-NEXT:    addiw a0, a0, -241
2416; RV64I-NEXT:    vand.vx v8, v8, a0
2417; RV64I-NEXT:    lui a0, 4112
2418; RV64I-NEXT:    addiw a0, a0, 257
2419; RV64I-NEXT:    vmul.vx v8, v8, a0
2420; RV64I-NEXT:    vsrl.vi v8, v8, 24
2421; RV64I-NEXT:    ret
2422;
2423; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32:
2424; CHECK-D:       # %bb.0:
2425; CHECK-D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
2426; CHECK-D-NEXT:    vfwcvt.f.xu.v v9, v8
2427; CHECK-D-NEXT:    li a0, 52
2428; CHECK-D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
2429; CHECK-D-NEXT:    vsrl.vx v8, v9, a0
2430; CHECK-D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
2431; CHECK-D-NEXT:    vncvt.x.x.w v8, v8
2432; CHECK-D-NEXT:    li a0, 1054
2433; CHECK-D-NEXT:    vrsub.vx v8, v8, a0
2434; CHECK-D-NEXT:    ret
2435  %a = call <vscale x 1 x i32> @llvm.ctlz.nxv1i32(<vscale x 1 x i32> %va, i1 true)
2436  ret <vscale x 1 x i32> %a
2437}
2438
2439define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
2440; RV32I-LABEL: ctlz_zero_undef_nxv2i32:
2441; RV32I:       # %bb.0:
2442; RV32I-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
2443; RV32I-NEXT:    vsrl.vi v9, v8, 1
2444; RV32I-NEXT:    vor.vv v8, v8, v9
2445; RV32I-NEXT:    vsrl.vi v9, v8, 2
2446; RV32I-NEXT:    vor.vv v8, v8, v9
2447; RV32I-NEXT:    vsrl.vi v9, v8, 4
2448; RV32I-NEXT:    vor.vv v8, v8, v9
2449; RV32I-NEXT:    vsrl.vi v9, v8, 8
2450; RV32I-NEXT:    vor.vv v8, v8, v9
2451; RV32I-NEXT:    vsrl.vi v9, v8, 16
2452; RV32I-NEXT:    vor.vv v8, v8, v9
2453; RV32I-NEXT:    vnot.v v8, v8
2454; RV32I-NEXT:    vsrl.vi v9, v8, 1
2455; RV32I-NEXT:    lui a0, 349525
2456; RV32I-NEXT:    addi a0, a0, 1365
2457; RV32I-NEXT:    vand.vx v9, v9, a0
2458; RV32I-NEXT:    vsub.vv v8, v8, v9
2459; RV32I-NEXT:    lui a0, 209715
2460; RV32I-NEXT:    addi a0, a0, 819
2461; RV32I-NEXT:    vand.vx v9, v8, a0
2462; RV32I-NEXT:    vsrl.vi v8, v8, 2
2463; RV32I-NEXT:    vand.vx v8, v8, a0
2464; RV32I-NEXT:    vadd.vv v8, v9, v8
2465; RV32I-NEXT:    vsrl.vi v9, v8, 4
2466; RV32I-NEXT:    vadd.vv v8, v8, v9
2467; RV32I-NEXT:    lui a0, 61681
2468; RV32I-NEXT:    addi a0, a0, -241
2469; RV32I-NEXT:    vand.vx v8, v8, a0
2470; RV32I-NEXT:    lui a0, 4112
2471; RV32I-NEXT:    addi a0, a0, 257
2472; RV32I-NEXT:    vmul.vx v8, v8, a0
2473; RV32I-NEXT:    vsrl.vi v8, v8, 24
2474; RV32I-NEXT:    ret
2475;
2476; RV64I-LABEL: ctlz_zero_undef_nxv2i32:
2477; RV64I:       # %bb.0:
2478; RV64I-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
2479; RV64I-NEXT:    vsrl.vi v9, v8, 1
2480; RV64I-NEXT:    vor.vv v8, v8, v9
2481; RV64I-NEXT:    vsrl.vi v9, v8, 2
2482; RV64I-NEXT:    vor.vv v8, v8, v9
2483; RV64I-NEXT:    vsrl.vi v9, v8, 4
2484; RV64I-NEXT:    vor.vv v8, v8, v9
2485; RV64I-NEXT:    vsrl.vi v9, v8, 8
2486; RV64I-NEXT:    vor.vv v8, v8, v9
2487; RV64I-NEXT:    vsrl.vi v9, v8, 16
2488; RV64I-NEXT:    vor.vv v8, v8, v9
2489; RV64I-NEXT:    vnot.v v8, v8
2490; RV64I-NEXT:    vsrl.vi v9, v8, 1
2491; RV64I-NEXT:    lui a0, 349525
2492; RV64I-NEXT:    addiw a0, a0, 1365
2493; RV64I-NEXT:    vand.vx v9, v9, a0
2494; RV64I-NEXT:    vsub.vv v8, v8, v9
2495; RV64I-NEXT:    lui a0, 209715
2496; RV64I-NEXT:    addiw a0, a0, 819
2497; RV64I-NEXT:    vand.vx v9, v8, a0
2498; RV64I-NEXT:    vsrl.vi v8, v8, 2
2499; RV64I-NEXT:    vand.vx v8, v8, a0
2500; RV64I-NEXT:    vadd.vv v8, v9, v8
2501; RV64I-NEXT:    vsrl.vi v9, v8, 4
2502; RV64I-NEXT:    vadd.vv v8, v8, v9
2503; RV64I-NEXT:    lui a0, 61681
2504; RV64I-NEXT:    addiw a0, a0, -241
2505; RV64I-NEXT:    vand.vx v8, v8, a0
2506; RV64I-NEXT:    lui a0, 4112
2507; RV64I-NEXT:    addiw a0, a0, 257
2508; RV64I-NEXT:    vmul.vx v8, v8, a0
2509; RV64I-NEXT:    vsrl.vi v8, v8, 24
2510; RV64I-NEXT:    ret
2511;
2512; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32:
2513; CHECK-D:       # %bb.0:
2514; CHECK-D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
2515; CHECK-D-NEXT:    vfwcvt.f.xu.v v10, v8
2516; CHECK-D-NEXT:    li a0, 52
2517; CHECK-D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
2518; CHECK-D-NEXT:    vsrl.vx v8, v10, a0
2519; CHECK-D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
2520; CHECK-D-NEXT:    vncvt.x.x.w v10, v8
2521; CHECK-D-NEXT:    li a0, 1054
2522; CHECK-D-NEXT:    vrsub.vx v8, v10, a0
2523; CHECK-D-NEXT:    ret
2524  %a = call <vscale x 2 x i32> @llvm.ctlz.nxv2i32(<vscale x 2 x i32> %va, i1 true)
2525  ret <vscale x 2 x i32> %a
2526}
2527
2528define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
2529; RV32I-LABEL: ctlz_zero_undef_nxv4i32:
2530; RV32I:       # %bb.0:
2531; RV32I-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
2532; RV32I-NEXT:    vsrl.vi v10, v8, 1
2533; RV32I-NEXT:    vor.vv v8, v8, v10
2534; RV32I-NEXT:    vsrl.vi v10, v8, 2
2535; RV32I-NEXT:    vor.vv v8, v8, v10
2536; RV32I-NEXT:    vsrl.vi v10, v8, 4
2537; RV32I-NEXT:    vor.vv v8, v8, v10
2538; RV32I-NEXT:    vsrl.vi v10, v8, 8
2539; RV32I-NEXT:    vor.vv v8, v8, v10
2540; RV32I-NEXT:    vsrl.vi v10, v8, 16
2541; RV32I-NEXT:    vor.vv v8, v8, v10
2542; RV32I-NEXT:    vnot.v v8, v8
2543; RV32I-NEXT:    vsrl.vi v10, v8, 1
2544; RV32I-NEXT:    lui a0, 349525
2545; RV32I-NEXT:    addi a0, a0, 1365
2546; RV32I-NEXT:    vand.vx v10, v10, a0
2547; RV32I-NEXT:    vsub.vv v8, v8, v10
2548; RV32I-NEXT:    lui a0, 209715
2549; RV32I-NEXT:    addi a0, a0, 819
2550; RV32I-NEXT:    vand.vx v10, v8, a0
2551; RV32I-NEXT:    vsrl.vi v8, v8, 2
2552; RV32I-NEXT:    vand.vx v8, v8, a0
2553; RV32I-NEXT:    vadd.vv v8, v10, v8
2554; RV32I-NEXT:    vsrl.vi v10, v8, 4
2555; RV32I-NEXT:    vadd.vv v8, v8, v10
2556; RV32I-NEXT:    lui a0, 61681
2557; RV32I-NEXT:    addi a0, a0, -241
2558; RV32I-NEXT:    vand.vx v8, v8, a0
2559; RV32I-NEXT:    lui a0, 4112
2560; RV32I-NEXT:    addi a0, a0, 257
2561; RV32I-NEXT:    vmul.vx v8, v8, a0
2562; RV32I-NEXT:    vsrl.vi v8, v8, 24
2563; RV32I-NEXT:    ret
2564;
2565; RV64I-LABEL: ctlz_zero_undef_nxv4i32:
2566; RV64I:       # %bb.0:
2567; RV64I-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
2568; RV64I-NEXT:    vsrl.vi v10, v8, 1
2569; RV64I-NEXT:    vor.vv v8, v8, v10
2570; RV64I-NEXT:    vsrl.vi v10, v8, 2
2571; RV64I-NEXT:    vor.vv v8, v8, v10
2572; RV64I-NEXT:    vsrl.vi v10, v8, 4
2573; RV64I-NEXT:    vor.vv v8, v8, v10
2574; RV64I-NEXT:    vsrl.vi v10, v8, 8
2575; RV64I-NEXT:    vor.vv v8, v8, v10
2576; RV64I-NEXT:    vsrl.vi v10, v8, 16
2577; RV64I-NEXT:    vor.vv v8, v8, v10
2578; RV64I-NEXT:    vnot.v v8, v8
2579; RV64I-NEXT:    vsrl.vi v10, v8, 1
2580; RV64I-NEXT:    lui a0, 349525
2581; RV64I-NEXT:    addiw a0, a0, 1365
2582; RV64I-NEXT:    vand.vx v10, v10, a0
2583; RV64I-NEXT:    vsub.vv v8, v8, v10
2584; RV64I-NEXT:    lui a0, 209715
2585; RV64I-NEXT:    addiw a0, a0, 819
2586; RV64I-NEXT:    vand.vx v10, v8, a0
2587; RV64I-NEXT:    vsrl.vi v8, v8, 2
2588; RV64I-NEXT:    vand.vx v8, v8, a0
2589; RV64I-NEXT:    vadd.vv v8, v10, v8
2590; RV64I-NEXT:    vsrl.vi v10, v8, 4
2591; RV64I-NEXT:    vadd.vv v8, v8, v10
2592; RV64I-NEXT:    lui a0, 61681
2593; RV64I-NEXT:    addiw a0, a0, -241
2594; RV64I-NEXT:    vand.vx v8, v8, a0
2595; RV64I-NEXT:    lui a0, 4112
2596; RV64I-NEXT:    addiw a0, a0, 257
2597; RV64I-NEXT:    vmul.vx v8, v8, a0
2598; RV64I-NEXT:    vsrl.vi v8, v8, 24
2599; RV64I-NEXT:    ret
2600;
2601; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32:
2602; CHECK-D:       # %bb.0:
2603; CHECK-D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
2604; CHECK-D-NEXT:    vfwcvt.f.xu.v v12, v8
2605; CHECK-D-NEXT:    li a0, 52
2606; CHECK-D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
2607; CHECK-D-NEXT:    vsrl.vx v8, v12, a0
2608; CHECK-D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
2609; CHECK-D-NEXT:    vncvt.x.x.w v12, v8
2610; CHECK-D-NEXT:    li a0, 1054
2611; CHECK-D-NEXT:    vrsub.vx v8, v12, a0
2612; CHECK-D-NEXT:    ret
2613  %a = call <vscale x 4 x i32> @llvm.ctlz.nxv4i32(<vscale x 4 x i32> %va, i1 true)
2614  ret <vscale x 4 x i32> %a
2615}
2616
2617define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
2618; RV32I-LABEL: ctlz_zero_undef_nxv8i32:
2619; RV32I:       # %bb.0:
2620; RV32I-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
2621; RV32I-NEXT:    vsrl.vi v12, v8, 1
2622; RV32I-NEXT:    vor.vv v8, v8, v12
2623; RV32I-NEXT:    vsrl.vi v12, v8, 2
2624; RV32I-NEXT:    vor.vv v8, v8, v12
2625; RV32I-NEXT:    vsrl.vi v12, v8, 4
2626; RV32I-NEXT:    vor.vv v8, v8, v12
2627; RV32I-NEXT:    vsrl.vi v12, v8, 8
2628; RV32I-NEXT:    vor.vv v8, v8, v12
2629; RV32I-NEXT:    vsrl.vi v12, v8, 16
2630; RV32I-NEXT:    vor.vv v8, v8, v12
2631; RV32I-NEXT:    vnot.v v8, v8
2632; RV32I-NEXT:    vsrl.vi v12, v8, 1
2633; RV32I-NEXT:    lui a0, 349525
2634; RV32I-NEXT:    addi a0, a0, 1365
2635; RV32I-NEXT:    vand.vx v12, v12, a0
2636; RV32I-NEXT:    vsub.vv v8, v8, v12
2637; RV32I-NEXT:    lui a0, 209715
2638; RV32I-NEXT:    addi a0, a0, 819
2639; RV32I-NEXT:    vand.vx v12, v8, a0
2640; RV32I-NEXT:    vsrl.vi v8, v8, 2
2641; RV32I-NEXT:    vand.vx v8, v8, a0
2642; RV32I-NEXT:    vadd.vv v8, v12, v8
2643; RV32I-NEXT:    vsrl.vi v12, v8, 4
2644; RV32I-NEXT:    vadd.vv v8, v8, v12
2645; RV32I-NEXT:    lui a0, 61681
2646; RV32I-NEXT:    addi a0, a0, -241
2647; RV32I-NEXT:    vand.vx v8, v8, a0
2648; RV32I-NEXT:    lui a0, 4112
2649; RV32I-NEXT:    addi a0, a0, 257
2650; RV32I-NEXT:    vmul.vx v8, v8, a0
2651; RV32I-NEXT:    vsrl.vi v8, v8, 24
2652; RV32I-NEXT:    ret
2653;
2654; RV64I-LABEL: ctlz_zero_undef_nxv8i32:
2655; RV64I:       # %bb.0:
2656; RV64I-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
2657; RV64I-NEXT:    vsrl.vi v12, v8, 1
2658; RV64I-NEXT:    vor.vv v8, v8, v12
2659; RV64I-NEXT:    vsrl.vi v12, v8, 2
2660; RV64I-NEXT:    vor.vv v8, v8, v12
2661; RV64I-NEXT:    vsrl.vi v12, v8, 4
2662; RV64I-NEXT:    vor.vv v8, v8, v12
2663; RV64I-NEXT:    vsrl.vi v12, v8, 8
2664; RV64I-NEXT:    vor.vv v8, v8, v12
2665; RV64I-NEXT:    vsrl.vi v12, v8, 16
2666; RV64I-NEXT:    vor.vv v8, v8, v12
2667; RV64I-NEXT:    vnot.v v8, v8
2668; RV64I-NEXT:    vsrl.vi v12, v8, 1
2669; RV64I-NEXT:    lui a0, 349525
2670; RV64I-NEXT:    addiw a0, a0, 1365
2671; RV64I-NEXT:    vand.vx v12, v12, a0
2672; RV64I-NEXT:    vsub.vv v8, v8, v12
2673; RV64I-NEXT:    lui a0, 209715
2674; RV64I-NEXT:    addiw a0, a0, 819
2675; RV64I-NEXT:    vand.vx v12, v8, a0
2676; RV64I-NEXT:    vsrl.vi v8, v8, 2
2677; RV64I-NEXT:    vand.vx v8, v8, a0
2678; RV64I-NEXT:    vadd.vv v8, v12, v8
2679; RV64I-NEXT:    vsrl.vi v12, v8, 4
2680; RV64I-NEXT:    vadd.vv v8, v8, v12
2681; RV64I-NEXT:    lui a0, 61681
2682; RV64I-NEXT:    addiw a0, a0, -241
2683; RV64I-NEXT:    vand.vx v8, v8, a0
2684; RV64I-NEXT:    lui a0, 4112
2685; RV64I-NEXT:    addiw a0, a0, 257
2686; RV64I-NEXT:    vmul.vx v8, v8, a0
2687; RV64I-NEXT:    vsrl.vi v8, v8, 24
2688; RV64I-NEXT:    ret
2689;
2690; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32:
2691; CHECK-D:       # %bb.0:
2692; CHECK-D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
2693; CHECK-D-NEXT:    vfwcvt.f.xu.v v16, v8
2694; CHECK-D-NEXT:    li a0, 52
2695; CHECK-D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2696; CHECK-D-NEXT:    vsrl.vx v8, v16, a0
2697; CHECK-D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
2698; CHECK-D-NEXT:    vncvt.x.x.w v16, v8
2699; CHECK-D-NEXT:    li a0, 1054
2700; CHECK-D-NEXT:    vrsub.vx v8, v16, a0
2701; CHECK-D-NEXT:    ret
2702  %a = call <vscale x 8 x i32> @llvm.ctlz.nxv8i32(<vscale x 8 x i32> %va, i1 true)
2703  ret <vscale x 8 x i32> %a
2704}
2705
2706define <vscale x 16 x i32> @ctlz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
2707; RV32-LABEL: ctlz_zero_undef_nxv16i32:
2708; RV32:       # %bb.0:
2709; RV32-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
2710; RV32-NEXT:    vsrl.vi v16, v8, 1
2711; RV32-NEXT:    vor.vv v8, v8, v16
2712; RV32-NEXT:    vsrl.vi v16, v8, 2
2713; RV32-NEXT:    vor.vv v8, v8, v16
2714; RV32-NEXT:    vsrl.vi v16, v8, 4
2715; RV32-NEXT:    vor.vv v8, v8, v16
2716; RV32-NEXT:    vsrl.vi v16, v8, 8
2717; RV32-NEXT:    vor.vv v8, v8, v16
2718; RV32-NEXT:    vsrl.vi v16, v8, 16
2719; RV32-NEXT:    vor.vv v8, v8, v16
2720; RV32-NEXT:    vnot.v v8, v8
2721; RV32-NEXT:    vsrl.vi v16, v8, 1
2722; RV32-NEXT:    lui a0, 349525
2723; RV32-NEXT:    addi a0, a0, 1365
2724; RV32-NEXT:    vand.vx v16, v16, a0
2725; RV32-NEXT:    vsub.vv v8, v8, v16
2726; RV32-NEXT:    lui a0, 209715
2727; RV32-NEXT:    addi a0, a0, 819
2728; RV32-NEXT:    vand.vx v16, v8, a0
2729; RV32-NEXT:    vsrl.vi v8, v8, 2
2730; RV32-NEXT:    vand.vx v8, v8, a0
2731; RV32-NEXT:    vadd.vv v8, v16, v8
2732; RV32-NEXT:    vsrl.vi v16, v8, 4
2733; RV32-NEXT:    vadd.vv v8, v8, v16
2734; RV32-NEXT:    lui a0, 61681
2735; RV32-NEXT:    addi a0, a0, -241
2736; RV32-NEXT:    vand.vx v8, v8, a0
2737; RV32-NEXT:    lui a0, 4112
2738; RV32-NEXT:    addi a0, a0, 257
2739; RV32-NEXT:    vmul.vx v8, v8, a0
2740; RV32-NEXT:    vsrl.vi v8, v8, 24
2741; RV32-NEXT:    ret
2742;
2743; RV64-LABEL: ctlz_zero_undef_nxv16i32:
2744; RV64:       # %bb.0:
2745; RV64-NEXT:    vsetvli a0, zero, e32, m8, ta, mu
2746; RV64-NEXT:    vsrl.vi v16, v8, 1
2747; RV64-NEXT:    vor.vv v8, v8, v16
2748; RV64-NEXT:    vsrl.vi v16, v8, 2
2749; RV64-NEXT:    vor.vv v8, v8, v16
2750; RV64-NEXT:    vsrl.vi v16, v8, 4
2751; RV64-NEXT:    vor.vv v8, v8, v16
2752; RV64-NEXT:    vsrl.vi v16, v8, 8
2753; RV64-NEXT:    vor.vv v8, v8, v16
2754; RV64-NEXT:    vsrl.vi v16, v8, 16
2755; RV64-NEXT:    vor.vv v8, v8, v16
2756; RV64-NEXT:    vnot.v v8, v8
2757; RV64-NEXT:    vsrl.vi v16, v8, 1
2758; RV64-NEXT:    lui a0, 349525
2759; RV64-NEXT:    addiw a0, a0, 1365
2760; RV64-NEXT:    vand.vx v16, v16, a0
2761; RV64-NEXT:    vsub.vv v8, v8, v16
2762; RV64-NEXT:    lui a0, 209715
2763; RV64-NEXT:    addiw a0, a0, 819
2764; RV64-NEXT:    vand.vx v16, v8, a0
2765; RV64-NEXT:    vsrl.vi v8, v8, 2
2766; RV64-NEXT:    vand.vx v8, v8, a0
2767; RV64-NEXT:    vadd.vv v8, v16, v8
2768; RV64-NEXT:    vsrl.vi v16, v8, 4
2769; RV64-NEXT:    vadd.vv v8, v8, v16
2770; RV64-NEXT:    lui a0, 61681
2771; RV64-NEXT:    addiw a0, a0, -241
2772; RV64-NEXT:    vand.vx v8, v8, a0
2773; RV64-NEXT:    lui a0, 4112
2774; RV64-NEXT:    addiw a0, a0, 257
2775; RV64-NEXT:    vmul.vx v8, v8, a0
2776; RV64-NEXT:    vsrl.vi v8, v8, 24
2777; RV64-NEXT:    ret
2778  %a = call <vscale x 16 x i32> @llvm.ctlz.nxv16i32(<vscale x 16 x i32> %va, i1 true)
2779  ret <vscale x 16 x i32> %a
2780}
2781
2782define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
2783; RV32-LABEL: ctlz_zero_undef_nxv1i64:
2784; RV32:       # %bb.0:
2785; RV32-NEXT:    addi sp, sp, -16
2786; RV32-NEXT:    .cfi_def_cfa_offset 16
2787; RV32-NEXT:    lui a0, 349525
2788; RV32-NEXT:    addi a0, a0, 1365
2789; RV32-NEXT:    sw a0, 12(sp)
2790; RV32-NEXT:    sw a0, 8(sp)
2791; RV32-NEXT:    lui a0, 209715
2792; RV32-NEXT:    addi a0, a0, 819
2793; RV32-NEXT:    sw a0, 12(sp)
2794; RV32-NEXT:    sw a0, 8(sp)
2795; RV32-NEXT:    lui a0, 61681
2796; RV32-NEXT:    addi a0, a0, -241
2797; RV32-NEXT:    sw a0, 12(sp)
2798; RV32-NEXT:    sw a0, 8(sp)
2799; RV32-NEXT:    lui a0, 4112
2800; RV32-NEXT:    addi a0, a0, 257
2801; RV32-NEXT:    sw a0, 12(sp)
2802; RV32-NEXT:    sw a0, 8(sp)
2803; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
2804; RV32-NEXT:    vsrl.vi v9, v8, 1
2805; RV32-NEXT:    vor.vv v8, v8, v9
2806; RV32-NEXT:    vsrl.vi v9, v8, 2
2807; RV32-NEXT:    vor.vv v8, v8, v9
2808; RV32-NEXT:    vsrl.vi v9, v8, 4
2809; RV32-NEXT:    vor.vv v8, v8, v9
2810; RV32-NEXT:    vsrl.vi v9, v8, 8
2811; RV32-NEXT:    vor.vv v8, v8, v9
2812; RV32-NEXT:    vsrl.vi v9, v8, 16
2813; RV32-NEXT:    vor.vv v8, v8, v9
2814; RV32-NEXT:    li a0, 32
2815; RV32-NEXT:    vsrl.vx v9, v8, a0
2816; RV32-NEXT:    vor.vv v8, v8, v9
2817; RV32-NEXT:    addi a0, sp, 8
2818; RV32-NEXT:    vlse64.v v9, (a0), zero
2819; RV32-NEXT:    vnot.v v8, v8
2820; RV32-NEXT:    vlse64.v v10, (a0), zero
2821; RV32-NEXT:    vsrl.vi v11, v8, 1
2822; RV32-NEXT:    vand.vv v9, v11, v9
2823; RV32-NEXT:    vsub.vv v8, v8, v9
2824; RV32-NEXT:    vand.vv v9, v8, v10
2825; RV32-NEXT:    vsrl.vi v8, v8, 2
2826; RV32-NEXT:    vand.vv v8, v8, v10
2827; RV32-NEXT:    vadd.vv v8, v9, v8
2828; RV32-NEXT:    vlse64.v v9, (a0), zero
2829; RV32-NEXT:    vlse64.v v10, (a0), zero
2830; RV32-NEXT:    vsrl.vi v11, v8, 4
2831; RV32-NEXT:    vadd.vv v8, v8, v11
2832; RV32-NEXT:    vand.vv v8, v8, v9
2833; RV32-NEXT:    vmul.vv v8, v8, v10
2834; RV32-NEXT:    li a0, 56
2835; RV32-NEXT:    vsrl.vx v8, v8, a0
2836; RV32-NEXT:    addi sp, sp, 16
2837; RV32-NEXT:    ret
2838;
2839; RV64-LABEL: ctlz_zero_undef_nxv1i64:
2840; RV64:       # %bb.0:
2841; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
2842; RV64-NEXT:    vsrl.vi v9, v8, 1
2843; RV64-NEXT:    vor.vv v8, v8, v9
2844; RV64-NEXT:    vsrl.vi v9, v8, 2
2845; RV64-NEXT:    vor.vv v8, v8, v9
2846; RV64-NEXT:    vsrl.vi v9, v8, 4
2847; RV64-NEXT:    vor.vv v8, v8, v9
2848; RV64-NEXT:    vsrl.vi v9, v8, 8
2849; RV64-NEXT:    vor.vv v8, v8, v9
2850; RV64-NEXT:    vsrl.vi v9, v8, 16
2851; RV64-NEXT:    vor.vv v8, v8, v9
2852; RV64-NEXT:    li a0, 32
2853; RV64-NEXT:    vsrl.vx v9, v8, a0
2854; RV64-NEXT:    vor.vv v8, v8, v9
2855; RV64-NEXT:    vnot.v v8, v8
2856; RV64-NEXT:    lui a0, %hi(.LCPI40_0)
2857; RV64-NEXT:    ld a0, %lo(.LCPI40_0)(a0)
2858; RV64-NEXT:    lui a1, %hi(.LCPI40_1)
2859; RV64-NEXT:    ld a1, %lo(.LCPI40_1)(a1)
2860; RV64-NEXT:    vsrl.vi v9, v8, 1
2861; RV64-NEXT:    vand.vx v9, v9, a0
2862; RV64-NEXT:    vsub.vv v8, v8, v9
2863; RV64-NEXT:    vand.vx v9, v8, a1
2864; RV64-NEXT:    vsrl.vi v8, v8, 2
2865; RV64-NEXT:    vand.vx v8, v8, a1
2866; RV64-NEXT:    vadd.vv v8, v9, v8
2867; RV64-NEXT:    lui a0, %hi(.LCPI40_2)
2868; RV64-NEXT:    ld a0, %lo(.LCPI40_2)(a0)
2869; RV64-NEXT:    lui a1, %hi(.LCPI40_3)
2870; RV64-NEXT:    ld a1, %lo(.LCPI40_3)(a1)
2871; RV64-NEXT:    vsrl.vi v9, v8, 4
2872; RV64-NEXT:    vadd.vv v8, v8, v9
2873; RV64-NEXT:    vand.vx v8, v8, a0
2874; RV64-NEXT:    vmul.vx v8, v8, a1
2875; RV64-NEXT:    li a0, 56
2876; RV64-NEXT:    vsrl.vx v8, v8, a0
2877; RV64-NEXT:    ret
2878  %a = call <vscale x 1 x i64> @llvm.ctlz.nxv1i64(<vscale x 1 x i64> %va, i1 true)
2879  ret <vscale x 1 x i64> %a
2880}
2881
2882define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
2883; RV32-LABEL: ctlz_zero_undef_nxv2i64:
2884; RV32:       # %bb.0:
2885; RV32-NEXT:    addi sp, sp, -16
2886; RV32-NEXT:    .cfi_def_cfa_offset 16
2887; RV32-NEXT:    lui a0, 349525
2888; RV32-NEXT:    addi a0, a0, 1365
2889; RV32-NEXT:    sw a0, 12(sp)
2890; RV32-NEXT:    sw a0, 8(sp)
2891; RV32-NEXT:    lui a0, 209715
2892; RV32-NEXT:    addi a0, a0, 819
2893; RV32-NEXT:    sw a0, 12(sp)
2894; RV32-NEXT:    sw a0, 8(sp)
2895; RV32-NEXT:    lui a0, 61681
2896; RV32-NEXT:    addi a0, a0, -241
2897; RV32-NEXT:    sw a0, 12(sp)
2898; RV32-NEXT:    sw a0, 8(sp)
2899; RV32-NEXT:    lui a0, 4112
2900; RV32-NEXT:    addi a0, a0, 257
2901; RV32-NEXT:    sw a0, 12(sp)
2902; RV32-NEXT:    sw a0, 8(sp)
2903; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
2904; RV32-NEXT:    vsrl.vi v10, v8, 1
2905; RV32-NEXT:    vor.vv v8, v8, v10
2906; RV32-NEXT:    vsrl.vi v10, v8, 2
2907; RV32-NEXT:    vor.vv v8, v8, v10
2908; RV32-NEXT:    vsrl.vi v10, v8, 4
2909; RV32-NEXT:    vor.vv v8, v8, v10
2910; RV32-NEXT:    vsrl.vi v10, v8, 8
2911; RV32-NEXT:    vor.vv v8, v8, v10
2912; RV32-NEXT:    vsrl.vi v10, v8, 16
2913; RV32-NEXT:    vor.vv v8, v8, v10
2914; RV32-NEXT:    li a0, 32
2915; RV32-NEXT:    vsrl.vx v10, v8, a0
2916; RV32-NEXT:    vor.vv v8, v8, v10
2917; RV32-NEXT:    addi a0, sp, 8
2918; RV32-NEXT:    vlse64.v v10, (a0), zero
2919; RV32-NEXT:    vnot.v v8, v8
2920; RV32-NEXT:    vlse64.v v12, (a0), zero
2921; RV32-NEXT:    vsrl.vi v14, v8, 1
2922; RV32-NEXT:    vand.vv v10, v14, v10
2923; RV32-NEXT:    vsub.vv v8, v8, v10
2924; RV32-NEXT:    vand.vv v10, v8, v12
2925; RV32-NEXT:    vsrl.vi v8, v8, 2
2926; RV32-NEXT:    vand.vv v8, v8, v12
2927; RV32-NEXT:    vadd.vv v8, v10, v8
2928; RV32-NEXT:    vlse64.v v10, (a0), zero
2929; RV32-NEXT:    vlse64.v v12, (a0), zero
2930; RV32-NEXT:    vsrl.vi v14, v8, 4
2931; RV32-NEXT:    vadd.vv v8, v8, v14
2932; RV32-NEXT:    vand.vv v8, v8, v10
2933; RV32-NEXT:    vmul.vv v8, v8, v12
2934; RV32-NEXT:    li a0, 56
2935; RV32-NEXT:    vsrl.vx v8, v8, a0
2936; RV32-NEXT:    addi sp, sp, 16
2937; RV32-NEXT:    ret
2938;
2939; RV64-LABEL: ctlz_zero_undef_nxv2i64:
2940; RV64:       # %bb.0:
2941; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
2942; RV64-NEXT:    vsrl.vi v10, v8, 1
2943; RV64-NEXT:    vor.vv v8, v8, v10
2944; RV64-NEXT:    vsrl.vi v10, v8, 2
2945; RV64-NEXT:    vor.vv v8, v8, v10
2946; RV64-NEXT:    vsrl.vi v10, v8, 4
2947; RV64-NEXT:    vor.vv v8, v8, v10
2948; RV64-NEXT:    vsrl.vi v10, v8, 8
2949; RV64-NEXT:    vor.vv v8, v8, v10
2950; RV64-NEXT:    vsrl.vi v10, v8, 16
2951; RV64-NEXT:    vor.vv v8, v8, v10
2952; RV64-NEXT:    li a0, 32
2953; RV64-NEXT:    vsrl.vx v10, v8, a0
2954; RV64-NEXT:    vor.vv v8, v8, v10
2955; RV64-NEXT:    vnot.v v8, v8
2956; RV64-NEXT:    lui a0, %hi(.LCPI41_0)
2957; RV64-NEXT:    ld a0, %lo(.LCPI41_0)(a0)
2958; RV64-NEXT:    lui a1, %hi(.LCPI41_1)
2959; RV64-NEXT:    ld a1, %lo(.LCPI41_1)(a1)
2960; RV64-NEXT:    vsrl.vi v10, v8, 1
2961; RV64-NEXT:    vand.vx v10, v10, a0
2962; RV64-NEXT:    vsub.vv v8, v8, v10
2963; RV64-NEXT:    vand.vx v10, v8, a1
2964; RV64-NEXT:    vsrl.vi v8, v8, 2
2965; RV64-NEXT:    vand.vx v8, v8, a1
2966; RV64-NEXT:    vadd.vv v8, v10, v8
2967; RV64-NEXT:    lui a0, %hi(.LCPI41_2)
2968; RV64-NEXT:    ld a0, %lo(.LCPI41_2)(a0)
2969; RV64-NEXT:    lui a1, %hi(.LCPI41_3)
2970; RV64-NEXT:    ld a1, %lo(.LCPI41_3)(a1)
2971; RV64-NEXT:    vsrl.vi v10, v8, 4
2972; RV64-NEXT:    vadd.vv v8, v8, v10
2973; RV64-NEXT:    vand.vx v8, v8, a0
2974; RV64-NEXT:    vmul.vx v8, v8, a1
2975; RV64-NEXT:    li a0, 56
2976; RV64-NEXT:    vsrl.vx v8, v8, a0
2977; RV64-NEXT:    ret
2978  %a = call <vscale x 2 x i64> @llvm.ctlz.nxv2i64(<vscale x 2 x i64> %va, i1 true)
2979  ret <vscale x 2 x i64> %a
2980}
2981
2982define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
2983; RV32-LABEL: ctlz_zero_undef_nxv4i64:
2984; RV32:       # %bb.0:
2985; RV32-NEXT:    addi sp, sp, -16
2986; RV32-NEXT:    .cfi_def_cfa_offset 16
2987; RV32-NEXT:    lui a0, 349525
2988; RV32-NEXT:    addi a0, a0, 1365
2989; RV32-NEXT:    sw a0, 12(sp)
2990; RV32-NEXT:    sw a0, 8(sp)
2991; RV32-NEXT:    lui a0, 209715
2992; RV32-NEXT:    addi a0, a0, 819
2993; RV32-NEXT:    sw a0, 12(sp)
2994; RV32-NEXT:    sw a0, 8(sp)
2995; RV32-NEXT:    lui a0, 61681
2996; RV32-NEXT:    addi a0, a0, -241
2997; RV32-NEXT:    sw a0, 12(sp)
2998; RV32-NEXT:    sw a0, 8(sp)
2999; RV32-NEXT:    lui a0, 4112
3000; RV32-NEXT:    addi a0, a0, 257
3001; RV32-NEXT:    sw a0, 12(sp)
3002; RV32-NEXT:    sw a0, 8(sp)
3003; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
3004; RV32-NEXT:    vsrl.vi v12, v8, 1
3005; RV32-NEXT:    vor.vv v8, v8, v12
3006; RV32-NEXT:    vsrl.vi v12, v8, 2
3007; RV32-NEXT:    vor.vv v8, v8, v12
3008; RV32-NEXT:    vsrl.vi v12, v8, 4
3009; RV32-NEXT:    vor.vv v8, v8, v12
3010; RV32-NEXT:    vsrl.vi v12, v8, 8
3011; RV32-NEXT:    vor.vv v8, v8, v12
3012; RV32-NEXT:    vsrl.vi v12, v8, 16
3013; RV32-NEXT:    vor.vv v8, v8, v12
3014; RV32-NEXT:    li a0, 32
3015; RV32-NEXT:    vsrl.vx v12, v8, a0
3016; RV32-NEXT:    vor.vv v8, v8, v12
3017; RV32-NEXT:    addi a0, sp, 8
3018; RV32-NEXT:    vlse64.v v12, (a0), zero
3019; RV32-NEXT:    vnot.v v8, v8
3020; RV32-NEXT:    vlse64.v v16, (a0), zero
3021; RV32-NEXT:    vsrl.vi v20, v8, 1
3022; RV32-NEXT:    vand.vv v12, v20, v12
3023; RV32-NEXT:    vsub.vv v8, v8, v12
3024; RV32-NEXT:    vand.vv v12, v8, v16
3025; RV32-NEXT:    vsrl.vi v8, v8, 2
3026; RV32-NEXT:    vand.vv v8, v8, v16
3027; RV32-NEXT:    vadd.vv v8, v12, v8
3028; RV32-NEXT:    vlse64.v v12, (a0), zero
3029; RV32-NEXT:    vlse64.v v16, (a0), zero
3030; RV32-NEXT:    vsrl.vi v20, v8, 4
3031; RV32-NEXT:    vadd.vv v8, v8, v20
3032; RV32-NEXT:    vand.vv v8, v8, v12
3033; RV32-NEXT:    vmul.vv v8, v8, v16
3034; RV32-NEXT:    li a0, 56
3035; RV32-NEXT:    vsrl.vx v8, v8, a0
3036; RV32-NEXT:    addi sp, sp, 16
3037; RV32-NEXT:    ret
3038;
3039; RV64-LABEL: ctlz_zero_undef_nxv4i64:
3040; RV64:       # %bb.0:
3041; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
3042; RV64-NEXT:    vsrl.vi v12, v8, 1
3043; RV64-NEXT:    vor.vv v8, v8, v12
3044; RV64-NEXT:    vsrl.vi v12, v8, 2
3045; RV64-NEXT:    vor.vv v8, v8, v12
3046; RV64-NEXT:    vsrl.vi v12, v8, 4
3047; RV64-NEXT:    vor.vv v8, v8, v12
3048; RV64-NEXT:    vsrl.vi v12, v8, 8
3049; RV64-NEXT:    vor.vv v8, v8, v12
3050; RV64-NEXT:    vsrl.vi v12, v8, 16
3051; RV64-NEXT:    vor.vv v8, v8, v12
3052; RV64-NEXT:    li a0, 32
3053; RV64-NEXT:    vsrl.vx v12, v8, a0
3054; RV64-NEXT:    vor.vv v8, v8, v12
3055; RV64-NEXT:    vnot.v v8, v8
3056; RV64-NEXT:    lui a0, %hi(.LCPI42_0)
3057; RV64-NEXT:    ld a0, %lo(.LCPI42_0)(a0)
3058; RV64-NEXT:    lui a1, %hi(.LCPI42_1)
3059; RV64-NEXT:    ld a1, %lo(.LCPI42_1)(a1)
3060; RV64-NEXT:    vsrl.vi v12, v8, 1
3061; RV64-NEXT:    vand.vx v12, v12, a0
3062; RV64-NEXT:    vsub.vv v8, v8, v12
3063; RV64-NEXT:    vand.vx v12, v8, a1
3064; RV64-NEXT:    vsrl.vi v8, v8, 2
3065; RV64-NEXT:    vand.vx v8, v8, a1
3066; RV64-NEXT:    vadd.vv v8, v12, v8
3067; RV64-NEXT:    lui a0, %hi(.LCPI42_2)
3068; RV64-NEXT:    ld a0, %lo(.LCPI42_2)(a0)
3069; RV64-NEXT:    lui a1, %hi(.LCPI42_3)
3070; RV64-NEXT:    ld a1, %lo(.LCPI42_3)(a1)
3071; RV64-NEXT:    vsrl.vi v12, v8, 4
3072; RV64-NEXT:    vadd.vv v8, v8, v12
3073; RV64-NEXT:    vand.vx v8, v8, a0
3074; RV64-NEXT:    vmul.vx v8, v8, a1
3075; RV64-NEXT:    li a0, 56
3076; RV64-NEXT:    vsrl.vx v8, v8, a0
3077; RV64-NEXT:    ret
3078  %a = call <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64> %va, i1 true)
3079  ret <vscale x 4 x i64> %a
3080}
3081
3082define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
3083; RV32-LABEL: ctlz_zero_undef_nxv8i64:
3084; RV32:       # %bb.0:
3085; RV32-NEXT:    addi sp, sp, -16
3086; RV32-NEXT:    .cfi_def_cfa_offset 16
3087; RV32-NEXT:    lui a0, 349525
3088; RV32-NEXT:    addi a0, a0, 1365
3089; RV32-NEXT:    sw a0, 12(sp)
3090; RV32-NEXT:    sw a0, 8(sp)
3091; RV32-NEXT:    lui a0, 209715
3092; RV32-NEXT:    addi a0, a0, 819
3093; RV32-NEXT:    sw a0, 12(sp)
3094; RV32-NEXT:    sw a0, 8(sp)
3095; RV32-NEXT:    lui a0, 61681
3096; RV32-NEXT:    addi a0, a0, -241
3097; RV32-NEXT:    sw a0, 12(sp)
3098; RV32-NEXT:    sw a0, 8(sp)
3099; RV32-NEXT:    lui a0, 4112
3100; RV32-NEXT:    addi a0, a0, 257
3101; RV32-NEXT:    sw a0, 12(sp)
3102; RV32-NEXT:    sw a0, 8(sp)
3103; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
3104; RV32-NEXT:    vsrl.vi v16, v8, 1
3105; RV32-NEXT:    vor.vv v8, v8, v16
3106; RV32-NEXT:    vsrl.vi v16, v8, 2
3107; RV32-NEXT:    vor.vv v8, v8, v16
3108; RV32-NEXT:    vsrl.vi v16, v8, 4
3109; RV32-NEXT:    vor.vv v8, v8, v16
3110; RV32-NEXT:    vsrl.vi v16, v8, 8
3111; RV32-NEXT:    vor.vv v8, v8, v16
3112; RV32-NEXT:    vsrl.vi v16, v8, 16
3113; RV32-NEXT:    vor.vv v8, v8, v16
3114; RV32-NEXT:    li a0, 32
3115; RV32-NEXT:    vsrl.vx v16, v8, a0
3116; RV32-NEXT:    vor.vv v8, v8, v16
3117; RV32-NEXT:    addi a0, sp, 8
3118; RV32-NEXT:    vlse64.v v16, (a0), zero
3119; RV32-NEXT:    vnot.v v8, v8
3120; RV32-NEXT:    vlse64.v v24, (a0), zero
3121; RV32-NEXT:    vsrl.vi v0, v8, 1
3122; RV32-NEXT:    vand.vv v16, v0, v16
3123; RV32-NEXT:    vsub.vv v8, v8, v16
3124; RV32-NEXT:    vand.vv v16, v8, v24
3125; RV32-NEXT:    vsrl.vi v8, v8, 2
3126; RV32-NEXT:    vand.vv v8, v8, v24
3127; RV32-NEXT:    vadd.vv v8, v16, v8
3128; RV32-NEXT:    vlse64.v v16, (a0), zero
3129; RV32-NEXT:    vlse64.v v24, (a0), zero
3130; RV32-NEXT:    vsrl.vi v0, v8, 4
3131; RV32-NEXT:    vadd.vv v8, v8, v0
3132; RV32-NEXT:    vand.vv v8, v8, v16
3133; RV32-NEXT:    vmul.vv v8, v8, v24
3134; RV32-NEXT:    li a0, 56
3135; RV32-NEXT:    vsrl.vx v8, v8, a0
3136; RV32-NEXT:    addi sp, sp, 16
3137; RV32-NEXT:    ret
3138;
3139; RV64-LABEL: ctlz_zero_undef_nxv8i64:
3140; RV64:       # %bb.0:
3141; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
3142; RV64-NEXT:    vsrl.vi v16, v8, 1
3143; RV64-NEXT:    vor.vv v8, v8, v16
3144; RV64-NEXT:    vsrl.vi v16, v8, 2
3145; RV64-NEXT:    vor.vv v8, v8, v16
3146; RV64-NEXT:    vsrl.vi v16, v8, 4
3147; RV64-NEXT:    vor.vv v8, v8, v16
3148; RV64-NEXT:    vsrl.vi v16, v8, 8
3149; RV64-NEXT:    vor.vv v8, v8, v16
3150; RV64-NEXT:    vsrl.vi v16, v8, 16
3151; RV64-NEXT:    vor.vv v8, v8, v16
3152; RV64-NEXT:    li a0, 32
3153; RV64-NEXT:    vsrl.vx v16, v8, a0
3154; RV64-NEXT:    vor.vv v8, v8, v16
3155; RV64-NEXT:    vnot.v v8, v8
3156; RV64-NEXT:    lui a0, %hi(.LCPI43_0)
3157; RV64-NEXT:    ld a0, %lo(.LCPI43_0)(a0)
3158; RV64-NEXT:    lui a1, %hi(.LCPI43_1)
3159; RV64-NEXT:    ld a1, %lo(.LCPI43_1)(a1)
3160; RV64-NEXT:    vsrl.vi v16, v8, 1
3161; RV64-NEXT:    vand.vx v16, v16, a0
3162; RV64-NEXT:    vsub.vv v8, v8, v16
3163; RV64-NEXT:    vand.vx v16, v8, a1
3164; RV64-NEXT:    vsrl.vi v8, v8, 2
3165; RV64-NEXT:    vand.vx v8, v8, a1
3166; RV64-NEXT:    vadd.vv v8, v16, v8
3167; RV64-NEXT:    lui a0, %hi(.LCPI43_2)
3168; RV64-NEXT:    ld a0, %lo(.LCPI43_2)(a0)
3169; RV64-NEXT:    lui a1, %hi(.LCPI43_3)
3170; RV64-NEXT:    ld a1, %lo(.LCPI43_3)(a1)
3171; RV64-NEXT:    vsrl.vi v16, v8, 4
3172; RV64-NEXT:    vadd.vv v8, v8, v16
3173; RV64-NEXT:    vand.vx v8, v8, a0
3174; RV64-NEXT:    vmul.vx v8, v8, a1
3175; RV64-NEXT:    li a0, 56
3176; RV64-NEXT:    vsrl.vx v8, v8, a0
3177; RV64-NEXT:    ret
3178  %a = call <vscale x 8 x i64> @llvm.ctlz.nxv8i64(<vscale x 8 x i64> %va, i1 true)
3179  ret <vscale x 8 x i64> %a
3180}
3181